Skip to content

Commit

Permalink
Support multiple kind of timestamp format
Browse files Browse the repository at this point in the history
Use Case 1: Most of timestamp format is unixtime, but some entry use %iso8601

 time_type mixed
 time_format_fallbacks unixtime,%iso8601

Use Case 2: Most of timestamp format is %iso8601, but some entry use unixtime

 time_type mixed
 time_format_fallbacks %iso8601,unixtime

With a micro benchmark:

* normal TimeParser
* primary iso8601 and fallback to unixtime
* primary unixtime and fallback to iso8601

  Warming up --------------------------------------
    Normal time parser     1.191M i/100ms
  primary format is %iso8601, secondary is unixtime
                         408.609k i/100ms
  fallback to unixtime   302.240k i/100ms
  Calculating -------------------------------------
    Normal time parser     12.136M (± 2.3%) i/s -     60.731M in   5.007157s
  primary format is %iso8601, secondary is unixtime
                            4.074M (± 0.5%) i/s -     20.430M in   5.015248s
  fallback to unixtime      3.010M (± 0.5%) i/s -     15.112M in   5.020749s

Here is the benchmark program [1]

[1] #3252 (comment)

  require 'benchmark/ips'
  require 'fluent/time'
  require 'fluent/config'
  require 'fluent/configurable'

  class DummyForTimeParser
    include Fluent::Configurable
    include Fluent::TimeMixin::Parser
  end

  def config_element(name = 'test', argument = '', params = {}, elements = [])
    Fluent::Config::Element.new(name, argument, params, elements)
  end

  a = Fluent::TimeParser.new('%d/%b/%Y:%H:%M:%S %z')
  i = DummyForTimeParser.new
  i.configure(config_element('parse', '',
                             {'time_type' => 'mixed',
                              'time_format' => '%iso8601',
                              'time_format_fallbacks' => ['unixtime']}))
  b = i.time_parser_create
  i2 = DummyForTimeParser.new
  i2.configure(config_element('parse', '', {'time_type' => 'mixed',
                                            'time_format_fallbacks' => ['unixtime', '%iso8601']}))
  c = i2.time_parser_create
  time = Time.now
  timesec = "#{time.sec}".freeze

  Benchmark.ips do |x|
    x.report("Normal time parser") do
      a.parse('28/Feb/2013:12:00:00 +0900'.freeze)
    end

    x.report("primary format is %iso8601, secondary is unixtime") do
      b.parse('2021-01-01T12:00:00+0900'.freeze)
    end

    x.report("fallback to unixtime") do
      c.parse(timesec)
    end
  end

Signed-off-by: Kentaro Hayashi <kenhys@gmail.com>
  • Loading branch information
kenhys committed Mar 5, 2021
1 parent 3b88465 commit a71befd
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 1 deletion.
58 changes: 57 additions & 1 deletion lib/fluent/time.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,14 @@ def inspect
end

module TimeMixin
TIME_TYPES = ['string', 'unixtime', 'float']
TIME_TYPES = ['string', 'unixtime', 'float', 'mixed']

TIME_PARAMETERS = [
[:time_format, :string, {default: nil}],
[:localtime, :bool, {default: true}], # UTC if :localtime is false and :timezone is nil
[:utc, :bool, {default: false}], # to turn :localtime false
[:timezone, :string, {default: nil}],
[:time_format_fallbacks, :array, {default: []}], # try time_format, then try fallbacks
]
TIME_FULL_PARAMETERS = [
# To avoid to define :time_type twice (in plugin_helper/inject)
Expand Down Expand Up @@ -170,6 +171,12 @@ def configure(conf)
raise Fluent::ConfigError, "both of utc and localtime are specified, use only one of them"
end

if conf.has_key?('time_type') and @time_type == :mixed
if @time_format.nil? and @time_format_fallbacks.empty?
raise Fluent::ConfigError, "time_type is :mixed but time_format and time_format_fallbacks is empty."
end
end

Fluent::Timezone.validate!(@timezone) if @timezone
end
end
Expand All @@ -180,6 +187,7 @@ def self.included(mod)
end

def time_parser_create(type: @time_type, format: @time_format, timezone: @timezone, force_localtime: false)
return MixedTimeParser.new(type, format, @localtime, timezone, @utc, force_localtime, @time_format_fallbacks) if type == :mixed
return NumericTimeParser.new(type) if type != :string
return TimeParser.new(format, true, nil) if force_localtime

Expand Down Expand Up @@ -452,4 +460,52 @@ def format_float(time)
end
end
end

# MixedTimeParser is available when time_type is set to :mixed
#
# Use Case 1: primary format is specified explicitly in time_format
# time_type mixed
# time_format %iso8601
# time_format_fallbacks unixtime
# Use Case 2: time_format is omitted
# time_type mixed
# time_format_fallbacks %iso8601, unixtime
#
class MixedTimeParser < TimeParser # to include TimeParseError
def initialize(type, format = nil, localtime = nil, timezone = nil, utc = nil, force_localtime = nil, fallbacks = [])
@parsers = []
fallbacks.unshift(format).each do |fallback|
next unless fallback
case fallback
when 'unixtime', 'float'
@parsers << NumericTimeParser.new(fallback, localtime, timezone)
else
if force_localtime
@parsers << TimeParser.new(fallback, true, nil)
else
localtime = localtime && (timezone.nil? && !utc)
@parsers << TimeParser.new(fallback, localtime, timezone)
end
end
end
end

def parse(value)
@parsers.each do |parser|
begin
Float(value) if parser.class == Fluent::NumericTimeParser
rescue
next
end
begin
return parser.parse(value)
rescue
# skip TimeParseError
end
end
fallback_class = @parsers.collect do |parser| parser.class end.join(",")
raise TimeParseError, "invalid time format: value = #{value}, even though fallbacks: #{fallback_class}"
end
end

end
109 changes: 109 additions & 0 deletions test/test_time_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,113 @@ class DummyForTimeParser
assert_equal_event_time(time, parser.parse("#{time.sec}.#{time.nsec}"))
end
end

sub_test_case 'MixedTimeParser fallback' do
class DummyForTimeParser
include Fluent::Configurable
include Fluent::TimeMixin::Parser
end

test 'no time_format_fallbacks failure' do
i = DummyForTimeParser.new
assert_raise(Fluent::ConfigError.new("time_type is :mixed but time_format and time_format_fallbacks is empty.")) do
i.configure(config_element('parse', '', {'time_type' => 'mixed'}))
end
end

test 'fallback time format failure' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format_fallbacks' => ['%iso8601']}))
parser = i.time_parser_create
assert_raise(Fluent::TimeParser::TimeParseError.new("invalid time format: value = INVALID, even though fallbacks: Fluent::TimeParser")) do
parser.parse("INVALID")
end
end

test 'primary format is unixtime, secondary %iso8601 is used' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => 'unixtime',
'time_format_fallbacks' => ['%iso8601']}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(time, parser.parse('2021-01-01T12:00:00+0900'))
end

test 'primary format is %iso8601, secondary unixtime is used' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => '%iso8601',
'time_format_fallbacks' => ['unixtime']}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(time, parser.parse("#{time.sec}"))
end

test 'primary format is %iso8601, no secondary is used' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => '%iso8601'}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(time, parser.parse("2021-01-01T12:00:00+0900"))
end

test 'primary format is unixtime, no secondary is used' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => 'unixtime'}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(time, parser.parse("#{time.sec}"))
end

test 'primary format is %iso8601, raise error because of no appropriate secondary' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => '%iso8601'}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_raise("Fluent::TimeParser::TimeParseError: invalid time format: value = #{time.sec}, even though fallbacks: Fluent::TimeParser") do
parser.parse("#{time.sec}")
end
end

test 'primary format is unixtime, raise error because of no appropriate secondary' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '',
{'time_type' => 'mixed',
'time_format' => 'unixtime'}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_raise("Fluent::TimeParser::TimeParseError: invalid time format: value = #{time}, even though fallbacks: Fluent::NumericTimeParser") do
parser.parse("2021-01-01T12:00:00+0900")
end
end

test 'fallback to unixtime' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '', {'time_type' => 'mixed',
'time_format_fallbacks' => ['%iso8601', 'unixtime']}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(Fluent::EventTime.new(time.to_i), parser.parse("#{time.sec}"))
end

test 'fallback to %iso8601' do
i = DummyForTimeParser.new
i.configure(config_element('parse', '', {'time_type' => 'mixed',
'time_format_fallbacks' => ['unixtime', '%iso8601']}))
parser = i.time_parser_create
time = event_time('2021-01-01T12:00:00+0900')
assert_equal_event_time(time, parser.parse('2021-01-01T12:00:00+0900'))
end
end
end

0 comments on commit a71befd

Please sign in to comment.