Skip to content

Commit

Permalink
CSV.open: detect BOM by default
Browse files Browse the repository at this point in the history
Fix GH-301

Reported by Junichi Ito. Thanks!!!
  • Loading branch information
kou committed May 17, 2024
1 parent 4e19f3d commit b706d91
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
9 changes: 8 additions & 1 deletion lib/csv.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,14 @@ def generate_lines(rows, **options)
def open(filename, mode="r", **options)
# wrap a File opened with the remaining +args+ with no newline
# decorator
file_opts = options.dup
file_opts = {}
have_encoding_options = (options.key?(:encoding) or
options.key?(:external_encoding) or
mode.include?(":"))
if not have_encoding_options and Encoding.default_external == Encoding::UTF_8
file_opts[:encoding] = "bom|utf-8"
end
file_opts.merge!(options)
unless file_opts.key?(:newline)
file_opts[:universal_newline] ||= false
end
Expand Down
10 changes: 10 additions & 0 deletions test/csv/interface/test_read.rb
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ def test_open_with_newline
end
end

def test_open_with_bom
csv_data = @input.read
bom = "\ufeff" # U+FEFF ZERO WIDTH NO-BREAK SPACE
File.binwrite(@input.path, "#{bom}#{csv_data}")
@input.rewind
CSV.open(@input.path, col_sep: "\t") do |csv|
assert_equal(@rows, csv.to_a)
end
end

def test_parse
assert_equal(@rows,
CSV.parse(@data, col_sep: "\t", row_sep: "\r\n"))
Expand Down

0 comments on commit b706d91

Please sign in to comment.