Skip to content

Commit

Permalink
Support capturing all tokens into the AST.
Browse files Browse the repository at this point in the history
This includes tokens elided by Elide(), but not tokens elided by the
Lexer.

See #108.
  • Loading branch information
alecthomas committed Nov 26, 2020
1 parent 36441bd commit f9c3ae4
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 43 deletions.
2 changes: 2 additions & 0 deletions v2/README.md
Expand Up @@ -370,6 +370,8 @@ There are a few areas where Participle can provide useful feedback to users of y
populated from the nearest matching token.
4. Any node in the AST containing a field `EndPos lexer.Position` will be
automatically populated from the token at the end of the node.
5. Any node in the AST containing a field `Tokens []lexer.Token` will be automatically
populated with _all_ tokens captured by the node, _including_ elided tokens.

These related pieces of information can be combined to provide fairly comprehensive error reporting.

Expand Down
45 changes: 34 additions & 11 deletions v2/lexer/peek.go
Expand Up @@ -5,13 +5,21 @@ type PeekingLexer struct {
cursor int
eof Token
tokens []Token
elide map[rune]bool
}

var _ Lexer = &PeekingLexer{}

// Upgrade a Lexer to a PeekingLexer with arbitrary lookahead.
func Upgrade(lex Lexer) (*PeekingLexer, error) {
r := &PeekingLexer{}
//
// "elide" is a slice of token types to elide from processing.
func Upgrade(lex Lexer, elide ...rune) (*PeekingLexer, error) {
r := &PeekingLexer{
elide: make(map[rune]bool, len(elide)),
}
for _, rn := range elide {
r.elide[rn] = true
}
for {
t, err := lex.Next()
if err != nil {
Expand All @@ -26,27 +34,42 @@ func Upgrade(lex Lexer) (*PeekingLexer, error) {
return r, nil
}

// Cursor position in tokens.
// Range returns the slice of tokens between the two cursor points.
func (p *PeekingLexer) Range(start, end int) []Token {
return p.tokens[start:end]
}

// Cursor position in tokens (includes elided tokens).
func (p *PeekingLexer) Cursor() int {
return p.cursor
}

// Next consumes and returns the next token.
func (p *PeekingLexer) Next() (Token, error) {
if p.cursor >= len(p.tokens) {
return p.eof, nil
for p.cursor < len(p.tokens) {
t := p.tokens[p.cursor]
p.cursor++
if p.elide[t.Type] {
continue
}
return p.tokens[p.cursor-1], nil
}
p.cursor++
return p.tokens[p.cursor-1], nil
return p.eof, nil
}

// Peek ahead at the n+1 token. ie. Peek(0) will peek at the next token.
func (p *PeekingLexer) Peek(n int) (Token, error) {
i := p.cursor + n
if i >= len(p.tokens) {
return p.eof, nil
for i := p.cursor; i < len(p.tokens); i++ {
t := p.tokens[i]
if p.elide[t.Type] {
continue
}
if n == 0 {
return t, nil
}
n--
}
return p.tokens[i], nil
return p.eof, nil
}

// Clone creates a clone of this PeekingLexer at its current token.
Expand Down
5 changes: 4 additions & 1 deletion v2/lexer/peek_test.go
Expand Up @@ -21,15 +21,18 @@ func (s *staticLexer) Next() (Token, error) {

func TestUpgrade(t *testing.T) {
t0 := Token{Type: 1, Value: "moo"}
ts := Token{Type: 3, Value: " "}
t1 := Token{Type: 2, Value: "blah"}
l, err := Upgrade(&staticLexer{tokens: []Token{t0, t1}})
tokens := []Token{t0, ts, t1}
l, err := Upgrade(&staticLexer{tokens: tokens}, 3)
require.NoError(t, err)
require.Equal(t, t0, mustPeek(t, l, 0))
require.Equal(t, t0, mustPeek(t, l, 0))
require.Equal(t, t1, mustPeek(t, l, 1))
require.Equal(t, t1, mustPeek(t, l, 1))
require.True(t, mustPeek(t, l, 2).EOF())
require.True(t, mustPeek(t, l, 3).EOF())
require.Equal(t, tokens, l.Range(0, 3))
}

func mustPeek(t *testing.T, lexer *PeekingLexer, n int) Token {
Expand Down
7 changes: 4 additions & 3 deletions v2/map.go
Expand Up @@ -79,9 +79,10 @@ func Upper(types ...string) Option {

// Elide drops tokens of the specified types.
func Elide(types ...string) Option {
return Map(func(token lexer.Token) (lexer.Token, error) {
return lexer.Token{}, DropToken
}, types...)
return func(p *Parser) error {
p.elide = append(p.elide, types...)
return nil
}
}

// Apply a Mapping to all tokens coming out of a Lexer.
Expand Down
59 changes: 35 additions & 24 deletions v2/nodes.go
Expand Up @@ -16,6 +16,7 @@ var (
MaxIterations = 1000000

positionType = reflect.TypeOf(lexer.Position{})
tokensType = reflect.TypeOf([]lexer.Token{})
captureType = reflect.TypeOf((*Capture)(nil)).Elem()
textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem()
parseableType = reflect.TypeOf((*Parseable)(nil)).Elem()
Expand Down Expand Up @@ -71,48 +72,35 @@ func (p *parseable) Parse(ctx *parseContext, parent reflect.Value) (out []reflec
type strct struct {
typ reflect.Type
expr node
tokensFieldIndex []int
posFieldIndex []int
endPosFieldIndex []int
}

func newStrct(typ reflect.Type) *strct {
var (
posFieldIndex []int
endPosFieldIndex []int
)
s := &strct{
typ: typ,
}
field, ok := typ.FieldByName("Pos")
if ok && field.Type == positionType {
posFieldIndex = field.Index
s.posFieldIndex = field.Index
}
field, ok = typ.FieldByName("EndPos")
if ok && field.Type == positionType {
endPosFieldIndex = field.Index
s.endPosFieldIndex = field.Index
}
return &strct{
typ: typ,
posFieldIndex: posFieldIndex,
endPosFieldIndex: endPosFieldIndex,
field, ok = typ.FieldByName("Tokens")
if ok && field.Type == tokensType {
s.tokensFieldIndex = field.Index
}
return s
}

func (s *strct) String() string { return stringer(s) }

func (s *strct) maybeInjectStartToken(token lexer.Token, v reflect.Value) {
if s.posFieldIndex == nil {
return
}
v.FieldByIndex(s.posFieldIndex).Set(reflect.ValueOf(token.Pos))
}

func (s *strct) maybeInjectEndToken(token lexer.Token, v reflect.Value) {
if s.endPosFieldIndex == nil {
return
}
v.FieldByIndex(s.endPosFieldIndex).Set(reflect.ValueOf(token.Pos))
}

func (s *strct) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
sv := reflect.New(s.typ).Elem()
start := ctx.Cursor()
t, err := ctx.Peek(0)
if err != nil {
return nil, err
Expand All @@ -125,11 +113,34 @@ func (s *strct) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Va
} else if out == nil {
return nil, nil
}
end := ctx.Cursor()
t, _ = ctx.Peek(0)
s.maybeInjectEndToken(t, sv)
s.maybeInjectTokens(ctx.Range(start, end), sv)
return []reflect.Value{sv}, ctx.Apply()
}

func (s *strct) maybeInjectStartToken(token lexer.Token, v reflect.Value) {
if s.posFieldIndex == nil {
return
}
v.FieldByIndex(s.posFieldIndex).Set(reflect.ValueOf(token.Pos))
}

func (s *strct) maybeInjectEndToken(token lexer.Token, v reflect.Value) {
if s.endPosFieldIndex == nil {
return
}
v.FieldByIndex(s.endPosFieldIndex).Set(reflect.ValueOf(token.Pos))
}

func (s *strct) maybeInjectTokens(tokens []lexer.Token, v reflect.Value) {
if s.tokensFieldIndex == nil {
return
}
v.FieldByIndex(s.tokensFieldIndex).Set(reflect.ValueOf(tokens))
}

type groupMatchMode int

const (
Expand Down
22 changes: 18 additions & 4 deletions v2/parser.go
Expand Up @@ -16,6 +16,7 @@ type Parser struct {
useLookahead int
caseInsensitive map[string]bool
mappers []mapperByToken
elide []string
}

// MustBuild calls Build(grammar, options...) and panics if an error occurs.
Expand Down Expand Up @@ -46,9 +47,9 @@ func Build(grammar interface{}, options ...Option) (parser *Parser, err error) {
}
}

symbols := p.lex.Symbols()
if len(p.mappers) > 0 {
mappers := map[rune][]Mapper{}
symbols := p.lex.Symbols()
for _, mapper := range p.mappers {
if len(mapper.symbols) == 0 {
mappers[lexer.EOF] = append(mappers[lexer.EOF], mapper.mapper)
Expand Down Expand Up @@ -161,7 +162,7 @@ func (p *Parser) ParseReader(filename string, r io.Reader, v interface{}, option
if err != nil {
return err
}
peeker, err := lexer.Upgrade(lex)
peeker, err := lexer.Upgrade(lex, p.getElidedTypes()...)
if err != nil {
return err
}
Expand All @@ -177,7 +178,7 @@ func (p *Parser) ParseString(filename string, s string, v interface{}, options .
if err != nil {
return err
}
peeker, err := lexer.Upgrade(lex)
peeker, err := lexer.Upgrade(lex, p.getElidedTypes()...)
if err != nil {
return err
}
Expand All @@ -193,7 +194,7 @@ func (p *Parser) ParseBytes(filename string, b []byte, v interface{}, options ..
if err != nil {
return err
}
peeker, err := lexer.Upgrade(lex)
peeker, err := lexer.Upgrade(lex, p.getElidedTypes()...)
if err != nil {
return err
}
Expand Down Expand Up @@ -266,3 +267,16 @@ func (p *Parser) rootParseable(ctx *parseContext, parseable Parseable) error {
}
return nil
}

func (p *Parser) getElidedTypes() []rune {
symbols := p.lex.Symbols()
elideTypes := make([]rune, 0, len(p.elide))
for _, elide := range p.elide {
rn, ok := symbols[elide]
if !ok {
panic(fmt.Errorf("Elide() uses unknown token %q", elide))
}
elideTypes = append(elideTypes, rn)
}
return elideTypes
}
37 changes: 37 additions & 0 deletions v2/parser_test.go
Expand Up @@ -1273,3 +1273,40 @@ func TestNegationWithDisjunction(t *testing.T) {
require.Equal(t, &[]string{"hello", "world", ","}, ast.EverythingMoreComplex)

}

func TestASTTokens(t *testing.T) {
type subject struct {
Tokens []lexer.Token

Word string `@Ident`
}

type hello struct {
Tokens []lexer.Token

Subject subject `"hello" @@`
}

p := mustTestParser(t, &hello{},
participle.Elide("Whitespace"),
participle.Lexer(lexer.Must(stateful.NewSimple([]stateful.Rule{
{"Ident", `\w+`, nil},
{"Whitespace", `\s+`, nil},
}))))
actual := &hello{}
err := p.ParseString("", "hello world", actual)
require.NoError(t, err)
tokens := []lexer.Token{
{-2, "hello", lexer.Position{Line: 1, Column: 1}},
{-3, " ", lexer.Position{Offset: 5, Line: 1, Column: 6}},
{-2, "world", lexer.Position{Offset: 6, Line: 1, Column: 7}},
}
expected := &hello{
Tokens: tokens,
Subject: subject{
Tokens: tokens[1:],
Word: "world",
},
}
require.Equal(t, expected, actual)
}

0 comments on commit f9c3ae4

Please sign in to comment.