Skip to content

Commit

Permalink
Positions are now mostly correct
Browse files Browse the repository at this point in the history
Still a few suboptimal ones:

	––– string-bad-escape.toml –––––––––––––––––––––––––––––––––––––––––––
	toml: error: invalid escape character 'a'; only the following escape characters are allowed: \b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX
		at line 1; start 18; length 24
		last key parsed was "invalid-escape"

	1 | invalid-escape = "This string has a bad \a escape character."
				^^^^^^^^^^^^^^^^^^^^^^^^

	––– string-bad-multiline.toml ––––––––––––––––––––––––––––––––––––––––
	toml: error: strings cannot contain newlines
		at line 1; start 7; length 0
		last key parsed was "multi"

	1 | multi = "first line
			^

	––– string-literal-multiline-quotes-1.toml –––––––––––––––––––––––––––
	toml: error: unexpected "''''''"
		at line 1; start 7; length 20
		last key parsed was "a"

	1 | a = '''6 apostrophes: ''''''
			^^^^^^^^^^^^^^^^^^^^

	––– string-wrong-close.toml ––––––––––––––––––––––––––––––––––––––––––
	toml: error: strings cannot contain newlines
		at line 1; start 18; length 0
		last key parsed was "bad-ending-quote"

	1 | bad-ending-quote = "double and single'
				^
  • Loading branch information
arp242 committed Jun 27, 2021
1 parent 9c821eb commit 5a19592
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 102 deletions.
59 changes: 37 additions & 22 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,18 @@ import (
//
// For example invalid TOML syntax, duplicate keys, etc.
type ParseError struct {
Message string
Line int
Pos int // Byte offset
LastKey string // Last parsed key, may be blank.
Input string
Message string
Position Position
LastKey string // Last parsed key, may be blank.
Input string
}

func (pe ParseError) Error() string {
if pe.LastKey == "" {
return fmt.Sprintf("toml: line %d: %s", pe.Line, pe.Message)
return fmt.Sprintf("toml: %s: %s", pe.Position, pe.Message)
}
return fmt.Sprintf("toml: line %d (last key parsed '%s'): %s",
pe.Line, pe.LastKey, pe.Message)
return fmt.Sprintf("toml: %s (last key parsed '%s'): %s",
pe.Position, pe.LastKey, pe.Message)
}

// Clang error:
Expand All @@ -43,18 +42,26 @@ func (pe ParseError) Error() string {
//
// For more information about this error, try `rustc --explain E0425`.

// ––– array-mixed-types-arrays-and-ints.toml –––––––––––––––––––––––––––
// toml: error: Array contains values of type 'Integer' and 'Array', but arrays must be homogeneous.
// at line 1; column 1-15; byte offset 15
// last key parsed was "arrays-and-ints"
//
// 1 | arrays-and-ints = [1, ["Arrays are not integers."]]
// ^^^^^^^^^^^^^^^
//
// This is on the key as the parser doesn't use the lex position.
func (pe ParseError) ExtError() string {
if pe.Input == "" {
return pe.Error()
}

lines := strings.Split(pe.Input, "\n")
var line, pos, col int
var pos, col int
for i := range lines {
ll := len(lines[i]) + 1 // +1 for the removed newline
if pos+ll >= pe.Pos {
line = i
col = pe.Pos - pos - 1
if pos+ll >= pe.Position.Start {
col = pe.Position.Start - pos
if col < 0 { // Should never happen, but just in case.
col = 0
}
Expand All @@ -66,24 +73,32 @@ func (pe ParseError) ExtError() string {
b := new(strings.Builder)
//fmt.Fprintf(b, "toml: error on line %d: %s\n", line, pe.Message)
fmt.Fprintf(b, "toml: error: %s\n", pe.Message)
fmt.Fprintf(b, " on line %d", line+1)
//fmt.Fprintf(b, " on line %d", pe.Position.Line)
fmt.Fprintf(b, " %s\n", pe.Position)
if pe.LastKey != "" {
fmt.Fprintf(b, "; last key parsed was %q", pe.LastKey)
fmt.Fprintf(b, " last key parsed was %q", pe.LastKey)
}
b.WriteString("\n\n")

if line > 1 {
fmt.Fprintf(b, "% 6d | %s\n", line-1, lines[line-2])
if pe.Position.Line > 2 {
fmt.Fprintf(b, "% 6d | %s\n", pe.Position.Line-2, lines[pe.Position.Line-3])
}
if line > 0 {
fmt.Fprintf(b, "% 6d | %s\n", line, lines[line-1])
if pe.Position.Line > 1 {
fmt.Fprintf(b, "% 6d | %s\n", pe.Position.Line-1, lines[pe.Position.Line-2])
}

l := pe.Position.Len - 1
if l < 0 {
l = 0
}

fmt.Fprintf(b, "% 6d | %s\n", line+1, lines[line])
fmt.Fprintf(b, "% 9s%s^\n", "", strings.Repeat(" ", col))
fmt.Fprintf(b, "% 6d | %s\n", pe.Position.Line, lines[pe.Position.Line-1])
fmt.Fprintf(b, "% 9s%s%s\n", "",
strings.Repeat(" ", col),
strings.Repeat("^", l+1))

// if len(lines)-1 > line && lines[line+1] != "" {
// fmt.Fprintf(b, "% 6d | %s\n", line+1, lines[line+1])
// if len(lines)-1 > pe.Position.Line && lines[pe.Position.Line+1] != "" {
// fmt.Fprintf(b, "% 6d | %s\n", pe.Position.Line+1, lines[pe.Position.Line+1])
// }

return b.String()
Expand Down
3 changes: 3 additions & 0 deletions error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ func TestParseError(t *testing.T) {
if !strings.HasSuffix(f.Name(), ".toml") {
continue
}
if f.Name() != "datetime-no-secs.toml" {
//continue
}

if f.Name() == "string-multiline-escape-space.toml" {
continue
Expand Down
116 changes: 74 additions & 42 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,35 +59,43 @@ const (

type stateFn func(lx *lexer) stateFn

type Position struct {
Line int // Line number, starting at 1.
Start int // Start of error, as byte offset starting at 0.
Len int // Lenght in bytes; may be 0.
}

func (p Position) String() string {
return fmt.Sprintf("at line %d; start %d; length %d", p.Line, p.Start, p.Len)
}

type lexer struct {
input string
start int
pos int
line int
state stateFn
items chan item

// Allow for backing up up to four runes.
// This is necessary because TOML contains 3-rune tokens (""" and ''').
input string
start int
pos int
itemPos Position
state stateFn
items chan item

// Allow for backing up up to four runes. This is necessary because TOML
// contains 3-rune tokens (""" and ''').
prevWidths [4]int
nprev int // how many of prevWidths are in use
// If we emit an eof, we can still back up, but it is not OK to call
// next again.
atEOF bool
nprev int // how many of prevWidths are in use
atEOF bool // If we emit an eof, we can still back up, but it is not OK to call next again.

// A stack of state functions used to maintain context.
// The idea is to reuse parts of the state machine in various places.
// For example, values can appear at the top level or within arbitrarily
// nested arrays. The last state on the stack is used after a value has
// been lexed. Similarly for comments.
//
// The idea is to reuse parts of the state machine in various places. For
// example, values can appear at the top level or within arbitrarily nested
// arrays. The last state on the stack is used after a value has been lexed.
// Similarly for comments.
stack []stateFn
}

type item struct {
typ itemType
val string
line int
pos int
typ itemType
val string
pos Position
}

func (lx *lexer) nextItem() item {
Expand All @@ -97,18 +105,19 @@ func (lx *lexer) nextItem() item {
return item
default:
lx.state = lx.state(lx)
//fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
//fmt.Printf(" STATE %-24s current: %-10q %d:%d-%d stack: %s\n",
// lx.state, lx.current(), lx.itemPos.Line, lx.itemPos.Start, lx.itemPos.Len, lx.stack)
}
}
}

func lex(input string) *lexer {
lx := &lexer{
input: input,
state: lexTop,
line: 1,
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
input: input,
state: lexTop,
itemPos: Position{Line: 1},
items: make(chan item, 10),
stack: make([]stateFn, 0, 10),
}
return lx
}
Expand All @@ -131,12 +140,16 @@ func (lx *lexer) current() string {
}

func (lx *lexer) emit(typ itemType) {
lx.items <- item{typ, lx.current(), lx.line, lx.pos}
lx.itemPos.Start = lx.start
lx.itemPos.Len = lx.pos - lx.start
lx.items <- item{typ, lx.current(), lx.itemPos}
lx.start = lx.pos
}

func (lx *lexer) emitTrim(typ itemType) {
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line, lx.pos}
lx.itemPos.Start = lx.start
lx.itemPos.Len = lx.pos - lx.start
lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.itemPos}
lx.start = lx.pos
}

Expand All @@ -150,7 +163,7 @@ func (lx *lexer) next() (r rune) {
}

if lx.input[lx.pos] == '\n' {
lx.line++
lx.itemPos.Line++
}
lx.prevWidths[3] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[1]
Expand All @@ -161,7 +174,7 @@ func (lx *lexer) next() (r rune) {

r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
if r == utf8.RuneError {
lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
lx.errorf("invalid UTF-8 byte at %d: 0x%02x", lx.itemPos, lx.input[lx.pos])
return utf8.RuneError
}

Expand Down Expand Up @@ -189,9 +202,10 @@ func (lx *lexer) backup() {
lx.prevWidths[1] = lx.prevWidths[2]
lx.prevWidths[2] = lx.prevWidths[3]
lx.nprev--

lx.pos -= w
if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
lx.line--
lx.itemPos.Line--
}
}

Expand Down Expand Up @@ -228,7 +242,25 @@ func (lx *lexer) skip(pred func(rune) bool) {
// Note that any value that is a character is escaped if it's a special
// character (newlines, tabs, etc.).
func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
lx.items <- item{itemError, fmt.Sprintf(format, values...), lx.line, lx.pos}
if lx.atEOF {
return lx.errorfPrevline(format, values...)
}

lx.itemPos.Start = lx.start
lx.itemPos.Len = lx.pos - lx.start
lx.items <- item{itemError, fmt.Sprintf(format, values...), lx.itemPos}
return nil
}

// errorfPrevline is like errorf, but sets the position to the last column of
// the previous line.
//
// This is so that unexpected EOF or NL errors don't show on a new blank line.
func (lx *lexer) errorfPrevline(format string, values ...interface{}) stateFn {
lx.itemPos.Line--
lx.itemPos.Len = 0
lx.itemPos.Start++
lx.items <- item{itemError, fmt.Sprintf(format, values...), lx.itemPos}
return nil
}

Expand Down Expand Up @@ -537,8 +569,7 @@ func lexArrayValue(lx *lexer) stateFn {
// the next value (or the end of the array): it ignores whitespace and newlines
// and expects either a ',' or a ']'.
func lexArrayValueEnd(lx *lexer) stateFn {
r := lx.next()
switch {
switch r := lx.next(); {
case isWhitespace(r) || isNL(r):
return lexSkip(lx, lexArrayValueEnd)
case r == commentStart:
Expand All @@ -549,10 +580,11 @@ func lexArrayValueEnd(lx *lexer) stateFn {
return lexArrayValue // move on to the next value
case r == arrayEnd:
return lexArrayEnd
default:
return lx.errorf(
"expected a comma or array terminator %q, but got %s instead",
arrayEnd, runeOrEOF(r))
}
return lx.errorf(
"expected a comma or array terminator %q, but got %s instead",
arrayEnd, runeOrEOF(r))
}

// lexArrayEnd finishes the lexing of an array.
Expand All @@ -571,7 +603,7 @@ func lexInlineTableValue(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValue)
case isNL(r):
return lx.errorf("newlines not allowed within inline tables")
return lx.errorfPrevline("newlines not allowed within inline tables")
case r == commentStart:
lx.push(lexInlineTableValue)
return lexCommentStart
Expand All @@ -593,7 +625,7 @@ func lexInlineTableValueEnd(lx *lexer) stateFn {
case isWhitespace(r):
return lexSkip(lx, lexInlineTableValueEnd)
case isNL(r):
return lx.errorf("newlines not allowed within inline tables")
return lx.errorfPrevline("newlines not allowed within inline tables")
case r == commentStart:
lx.push(lexInlineTableValueEnd)
return lexCommentStart
Expand Down Expand Up @@ -638,7 +670,7 @@ func lexString(lx *lexer) stateFn {
case isControl(r) || r == '\r':
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
return lx.errorf("strings cannot contain newlines")
return lx.errorfPrevline("strings cannot contain newlines")
case r == '\\':
lx.push(lexString)
return lexStringEscape
Expand Down Expand Up @@ -714,7 +746,7 @@ func lexRawString(lx *lexer) stateFn {
case isControl(r) || r == '\r':
return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
case isNL(r):
return lx.errorf("strings cannot contain newlines")
return lx.errorfPrevline("strings cannot contain newlines")
case r == rawStringEnd:
lx.backup()
lx.emit(itemRawString)
Expand Down

0 comments on commit 5a19592

Please sign in to comment.