Skip to content

Commit

Permalink
Fix line-ending backslash whitespace escaping for multiline strings
Browse files Browse the repository at this point in the history
Fixes #372.
  • Loading branch information
cespare committed May 23, 2023
1 parent d56d9f6 commit fb9756b
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 35 deletions.
35 changes: 35 additions & 0 deletions decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,41 @@ func TestDecodeDoubleTags(t *testing.T) {
}
}

// Test for #372
func TestDecodeMultilineStringBackslash(t *testing.T) {
var s struct{ S string }
_, err := Decode(`
s = """
this should \
be two
lines of text.
"""`, &s)
if err != nil {
t.Fatal(err)
}
want := "this should be two\nlines of text.\n"
if s.S != want {
t.Errorf("\nhave:\n\n%v\n\nwant:\n\n%v", s.S, want)
}

_, err = Decode(`
s = """\
a\
b
c\
"""`, &s)
if err != nil {
t.Fatal(err)
}
want = "ab\n c"
if s.S != want {
t.Errorf("\nhave:\n\n%v\n\nwant:\n\n%v", s.S, want)
}

}

// errorContains checks if the error message in have contains the text in
// want.
//
Expand Down
76 changes: 41 additions & 35 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
case itemString:
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
case itemMultilineString:
return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
case itemRawString:
return it.val, p.typeOfPrimitive(it)
case itemRawMultilineString:
Expand Down Expand Up @@ -681,49 +681,55 @@ func stripFirstNewline(s string) string {
return s
}

// Remove newlines inside triple-quoted strings if a line ends with "\".
// stripEscapedNewlines removes whitespace after line-ending backslashes in
// multiline strings.
//
// A line-ending backslash is an unescaped \ followed only by whitespace until
// the next newline. After a line-ending backslash, all whitespace is removed
// until the next non-whitespace character.
func (p *parser) stripEscapedNewlines(s string) string {
split := strings.Split(s, "\n")
if len(split) < 1 {
return s
}

escNL := false // Keep track of the last non-blank line was escaped.
for i, line := range split {
line = strings.TrimRight(line, " \t\r")

if len(line) == 0 || line[len(line)-1] != '\\' {
split[i] = strings.TrimRight(split[i], "\r")
if !escNL && i != len(split)-1 {
split[i] += "\n"
}
continue
var b strings.Builder
var i int
for {
ix := strings.Index(s[i:], `\`)
// fmt.Printf("s: %q, i: %d\n", s, i)
if ix < 0 {
b.WriteString(s)
return b.String()
}
i += ix

escBS := true
for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
escBS = !escBS
if len(s) > i+1 && s[i+1] == '\\' {
// Escaped backslash.
i += 2
continue
}
if escNL {
line = strings.TrimLeft(line, " \t\r")
// Scan until the next non-whitespace.
j := i + 1
whitespaceLoop:
for ; j < len(s); j++ {
switch s[j] {
case ' ', '\t', '\r', '\n':
default:
break whitespaceLoop
}
}
escNL = !escBS

if escBS {
split[i] += "\n"
if j == i+1 {
// Not a whitespace escape.
i++
continue
}

if i == len(split)-1 {
p.panicf("invalid escape: '\\ '")
}

split[i] = line[:len(line)-1] // Remove \
if len(split)-1 > i {
split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
if !strings.Contains(s[i:j], "\n") {
// This is not a line-ending backslash.
// (It's a bad escape sequence, but we can let
// replaceEscapes catch it.)
i++
continue
}
b.WriteString(s[:i])
s = s[j:]
i = 0
}
return strings.Join(split, "")
}

func (p *parser) replaceEscapes(it item, str string) string {
Expand Down

0 comments on commit fb9756b

Please sign in to comment.