Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve syslog parser #3019

Merged
merged 4 commits into from
Jun 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
115 changes: 58 additions & 57 deletions lib/fluent/plugin/parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,22 @@ class SyslogParser < Parser
def initialize
super
@mutex = Mutex.new
@space_count = nil
@regexp = nil
@regexp3164 = nil
@regexp5424 = nil
@regexp_parser = nil
@time_parser_rfc3164 = nil
@time_parser_rfc5424 = nil
@space_count_rfc3164 = nil
@space_count_rfc5424 = nil
@skip_space_count = false
@skip_space_count_rfc3164 = false
@skip_space_count_rfc5424 = false
@time_parser_rfc5424_without_subseconds = nil
end

def configure(conf)
super

@time_parser_rfc3164 = @time_parser_rfc5424 = nil
@time_parser_rfc5424_without_subseconds = nil
@support_rfc5424_without_subseconds = false
@regexp_parser = @parser_type == :regexp
@regexp = case @message_format
when :rfc3164
Expand All @@ -87,6 +91,7 @@ class << self
alias_method :parse, :parse_rfc3164
end
end
setup_time_parser_3164(@time_format)
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
when :rfc5424
if @regexp_parser
Expand All @@ -99,27 +104,36 @@ class << self
end
end
@time_format = @rfc5424_time_format unless conf.has_key?('time_format')
@support_rfc5424_without_subseconds = true
@skip_space_count_rfc5424 = @time_format.count(' ').zero?
setup_time_parser_5424(@time_format)
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
when :auto
class << self
alias_method :parse, :parse_auto
end
@time_parser_rfc3164 = time_parser_create(format: @time_format)
@time_parser_rfc5424 = time_parser_create(format: @rfc5424_time_format)
@skip_space_count_rfc5424 = @rfc5424_time_format.count(' ').zero?
setup_time_parser_3164(@time_format)
setup_time_parser_5424(@rfc5424_time_format)
nil
end

@space_count = @time_format.squeeze(' ').count(' ') + 1
@space_count_rfc5424 = @rfc5424_time_format.squeeze(' ').count(' ') + 1
@time_parser = time_parser_create
@time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
if @regexp_parser
@regexp3164 = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@regexp5424 = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
end
end

if ['%b %d %H:%M:%S', '%b %d %H:%M:%S.%N'].include?(@time_format)
@skip_space_count = true
def setup_time_parser_3164(time_fmt)
@time_parser_rfc3164 = time_parser_create(format: time_fmt)
if ['%b %d %H:%M:%S', '%b %d %H:%M:%S.%N'].include?(time_fmt)
@skip_space_count_rfc3164 = true
end
@space_count_rfc3164 = time_fmt.squeeze(' ').count(' ') + 1
end

def setup_time_parser_5424(time_fmt)
@time_parser_rfc5424 = time_parser_create(format: time_fmt)
@time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
@skip_space_count_rfc5424 = time_fmt.count(' ').zero?
@space_count_rfc5424 = time_fmt.squeeze(' ').count(' ') + 1
end

# this method is for tests
Expand All @@ -132,18 +146,13 @@ def parse(text)
end

def parse_auto(text, &block)
if REGEXP_DETECT_RFC5424.match(text)
@regexp = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc5424
@support_rfc5424_without_subseconds = true
if REGEXP_DETECT_RFC5424.match?(text)
if @regexp_parser
parse_rfc5424_regex(text, &block)
else
parse_rfc5424(text, &block)
end
else
@regexp = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
@time_parser = @time_parser_rfc3164
if @regexp_parser
parse_rfc3164_regex(text, &block)
else
Expand All @@ -152,6 +161,8 @@ def parse_auto(text, &block)
end
end

SPLIT_CHAR = ' '.freeze

def parse_rfc3164_regex(text, &block)
idx = 0
record = {}
Expand All @@ -169,22 +180,22 @@ def parse_rfc3164_regex(text, &block)

i = idx - 1
sq = false
@space_count.times do
while text[i + 1] == ' '.freeze
@space_count_rfc3164.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - idx).squeeze(' ') : text.slice(idx, i - idx)
time = @mutex.synchronize { @time_parser.parse(time_str) }
time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
time = @mutex.synchronize { @time_parser_rfc3164.parse(time_str) }
if @keep_time_key
record['time'] = time_str
end

parse_plain(time, text, i + 1, record, RFC3164_CAPTURES, &block)
parse_plain(@regexp3164, time, text, i + 1, record, RFC3164_CAPTURES, &block)
end

def parse_rfc5424_regex(text, &block)
Expand All @@ -204,40 +215,36 @@ def parse_rfc5424_regex(text, &block)
i = idx - 1
sq = false
@space_count_rfc5424.times {
while text[i + 1] == ' '.freeze
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end

i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
}

time_str = sq ? text.slice(idx, i - idx).squeeze(' '.freeze) : text.slice(idx, i - idx)
time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
time = @mutex.synchronize do
begin
@time_parser.parse(time_str)
@time_parser_rfc5424.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
log.trace(e)
@time_parser_rfc5424_without_subseconds.parse(time_str)
end
end

if @keep_time_key
record['time'] = time_str
end
parse_plain(time, text, i + 1, record, RFC5424_CAPTURES, &block)
parse_plain(@regexp5424, time, text, i + 1, record, RFC5424_CAPTURES, &block)
end

# @param time [EventTime]
# @param idx [Integer] note: this argument is needed to avoid string creation
# @param record [Hash]
# @param capture_list [Array] for performance
def parse_plain(time, text, idx, record, capture_list, &block)
m = @regexp.match(text, idx)
def parse_plain(re, time, text, idx, record, capture_list, &block)
m = re.match(text, idx)
if m.nil?
yield nil, nil
return
Expand All @@ -262,8 +269,6 @@ def parse_plain(time, text, idx, record, capture_list, &block)
yield time, record
end

SPLIT_CHAR = ' '.freeze

def parse_rfc3164(text, &block)
pri = nil
cursor = 0
Expand All @@ -282,7 +287,7 @@ def parse_rfc3164(text, &block)
end
end

if @skip_space_count
if @skip_space_count_rfc3164
# header part
time_size = 15 # skip Mmm dd hh:mm:ss
time_end = text[cursor + time_size]
Expand All @@ -301,15 +306,15 @@ def parse_rfc3164(text, &block)
else
i = cursor - 1
sq = false
@space_count.times do
while text[i + 1] == ' '.freeze
@space_count_rfc3164.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
end
i = text.index(' '.freeze, i + 1)
i = text.index(SPLIT_CHAR, i + 1)
end

time_str = sq ? text.slice(idx, i - cursor).squeeze(' '.freeze) : text.slice(cursor, i - cursor)
time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
cursor = i + 1
end

Expand Down Expand Up @@ -358,7 +363,7 @@ def parse_rfc3164(text, &block)
msg.chomp!
record['message'] = msg

time = @time_parser.parse(time_str)
time = @time_parser_rfc3164.parse(time_str)
record['time'] = time_str if @keep_time_key

yield time, record
Expand Down Expand Up @@ -393,7 +398,7 @@ def parse_rfc5424(text, &block)
else
i = cursor - 1
sq = false
@space_count.times do
@space_count_rfc5424.times do
while text[i + 1] == SPLIT_CHAR
sq = true
i += 1
Expand Down Expand Up @@ -472,19 +477,15 @@ def parse_rfc5424(text, &block)

# message part
if cursor != text.bytesize
msg = text[cursor + 1..-1]
msg = text.slice(cursor + 1, text.bytesize)
msg.chomp!
record['message'] = msg
end

time = begin
@time_parser.parse(time_str)
@time_parser_rfc5424.parse(time_str)
rescue Fluent::TimeParser::TimeParseError => e
if @support_rfc5424_without_subseconds
@time_parser_rfc5424_without_subseconds.parse(time_str)
else
raise
end
@time_parser_rfc5424_without_subseconds.parse(time_str)
end
record['time'] = time_str if @keep_time_key

Expand Down
15 changes: 1 addition & 14 deletions test/plugin/test_parser_syslog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ def test_auto_with_legacy_syslog_message(param)
assert_equal(event_time("Feb 28 00:00:12", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected, record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -497,7 +496,6 @@ def test_auto_with_legacy_syslog_priority_message(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 6), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -517,7 +515,6 @@ def test_parse_with_rfc5424_message(param)
assert_equal 16, record["pri"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -537,7 +534,7 @@ def test_parse_with_rfc5424_structured_message(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])end
end

data('regexp' => 'regexp', 'string' => 'string')
def test_parse_with_both_message_type(param)
Expand All @@ -553,7 +550,6 @@ def test_parse_with_both_message_type(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -564,21 +560,18 @@ def test_parse_with_both_message_type(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<1>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test 2>1'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1, 'message'=> '[error] Syslog test 2>1'), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<1>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 1), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -588,7 +581,6 @@ def test_parse_with_both_message_type(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end

data('regexp' => 'regexp', 'string' => 'string')
Expand All @@ -605,7 +597,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal(event_time("Feb 28 12:00:00", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 6), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd 11111 ID24224 [exampleSDID@20224 iut="3" eventSource="Application" eventID="11211"] Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -616,14 +607,12 @@ def test_parse_with_both_message_type_and_priority(param)
record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>Feb 28 12:00:02 192.168.0.1 fluentd[11111]: [error] Syslog test'
@parser.instance.parse(text) do |time, record|
assert_equal(event_time("Feb 28 12:00:02", format: '%b %d %M:%S:%H'), time)
assert_equal(@expected.merge('pri' => 16), record)
end
assert_equal(Fluent::Plugin::SyslogParser::RFC3164_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15.003Z 192.168.0.1 fluentd - - - Hi, from Fluentd!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -633,7 +622,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])

text = '<16>1 2017-02-06T13:14:15Z 192.168.0.1 fluentd - - - Hi, from Fluentd without subseconds!'
@parser.instance.parse(text) do |time, record|
Expand All @@ -643,7 +631,6 @@ def test_parse_with_both_message_type_and_priority(param)
assert_equal "-", record["extradata"]
assert_equal "Hi, from Fluentd without subseconds!", record["message"]
end
assert_equal(Fluent::Plugin::SyslogParser::RFC5424_WITHOUT_TIME_AND_PRI_REGEXP, @parser.instance.patterns['format'])
end
end
end