Skip to content

Commit

Permalink
skip UTF-8 BOM also (#381)
Browse files Browse the repository at this point in the history
Co-authored-by: Martin Tournoij <martin@arp242.net>
  • Loading branch information
kmuto and arp242 committed Jan 28, 2023
1 parent bd94408 commit 1a6ca6e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
1 change: 1 addition & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func TestDecodeBOM(t *testing.T) {
for _, tt := range [][]byte{
[]byte("\xff\xfea = \"b\""),
[]byte("\xfe\xffa = \"b\""),
[]byte("\xef\xbb\xbfa = \"b\""),
} {
t.Run("", func(t *testing.T) {
var s struct{ A string }
Expand Down
7 changes: 5 additions & 2 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ func parse(data string) (p *parser, err error) {
}()

// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
// which mangles stuff.
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
// which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
// it anyway.
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
data = data[2:]
} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
data = data[3:]
}

// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
Expand Down

0 comments on commit 1a6ca6e

Please sign in to comment.