Skip to content

Commit

Permalink
Allow lexers to be code-generated from JSON.
Browse files Browse the repository at this point in the history
- Add a CLI tool that can ingest the JSON and dump out the generated code.
- Lexers can now be JSON marshalled.
- Add a goreleaser step for the binary.

As discussed in #213
  • Loading branch information
alecthomas committed Sep 4, 2022
1 parent 49f4822 commit bfe7c69
Show file tree
Hide file tree
Showing 26 changed files with 917 additions and 960 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/release.yml
@@ -0,0 +1,17 @@
name: Release
on:
push:
tags:
- 'v*'
jobs:
release:
name: Release
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- run: ./bin/hermit env --raw >> $GITHUB_ENV
- run: goreleaser release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37 changes: 37 additions & 0 deletions .goreleaser.yml
@@ -0,0 +1,37 @@
project_name: participle
release:
github:
owner: alecthomas
name: participle
brews:
-
install: bin.install "participle"
env:
- CGO_ENABLED=0
builds:
- goos:
- linux
- darwin
- windows
goarch:
- arm64
- amd64
- "386"
goarm:
- "6"
dir: ./cmd/participle
main: .
ldflags: -s -w -X main.version={{.Version}}
binary: participle
archives:
-
format: tar.gz
name_template: '{{ .Binary }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{
.Arm }}{{ end }}'
files:
- COPYING
- README*
snapshot:
name_template: SNAPSHOT-{{ .Commit }}
checksum:
name_template: '{{ .ProjectName }}-{{ .Version }}-checksums.txt'
2 changes: 1 addition & 1 deletion COPYING
@@ -1,4 +1,4 @@
Copyright (C) 2017 Alec Thomas
Copyright (C) 2017-2022 Alec Thomas

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand Down
2 changes: 1 addition & 1 deletion _examples/go.mod
Expand Up @@ -3,7 +3,7 @@ module github.com/alecthomas/participle/v2/_examples
go 1.18

require (
github.com/alecthomas/assert/v2 v2.0.3
github.com/alecthomas/assert/v2 v2.1.0
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2
github.com/alecthomas/kong v0.6.1
github.com/alecthomas/participle/v2 v2.0.0-alpha11
Expand Down
4 changes: 2 additions & 2 deletions _examples/go.sum
@@ -1,5 +1,5 @@
github.com/alecthomas/assert/v2 v2.0.3 h1:WKqJODfOiQG0nEJKFKzDIG3E29CN2/4zR9XGJzKIkbg=
github.com/alecthomas/assert/v2 v2.0.3/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
github.com/alecthomas/assert/v2 v2.1.0/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2 h1:gKv6LPDhF/G3cNribA+kZtNPiPpKabZGLhcJuEtp3ig=
github.com/alecthomas/go-thrift v0.0.0-20170109061633-7914173639b2/go.mod h1:CxCgO+NdpMdi9SsTlGbc0W+/UNxO3I0AabOEJZ3w61w=
github.com/alecthomas/kong v0.6.1 h1:1kNhcFepkR+HmasQpbiKDLylIL8yh5B5y1zPp5bJimA=
Expand Down
9 changes: 0 additions & 9 deletions _examples/thrift/main.go
Expand Up @@ -215,15 +215,6 @@ func main() {

ctx := kong.Parse(&cli)

if cli.Gen {
w, err := os.Create("lexer_gen.go")
ctx.FatalIfErrorf(err)
defer w.Close()
err = lexer.ExperimentalGenerateLexer(w, "main", def)
ctx.FatalIfErrorf(err)
return
}

for _, file := range cli.Files {
r, err := os.Open(file)
ctx.FatalIfErrorf(err, "")
Expand Down
1 change: 1 addition & 0 deletions bin/.goreleaser-1.11.2.pkg
1 change: 1 addition & 0 deletions bin/.jq-1.6.pkg
1 change: 1 addition & 0 deletions bin/goreleaser
3 changes: 3 additions & 0 deletions bin/hermit.hcl
@@ -0,0 +1,3 @@
env = {
"PATH": "${HERMIT_ENV}/scripts:${PATH}",
}
1 change: 1 addition & 0 deletions bin/jq
108 changes: 76 additions & 32 deletions lexer/codegen.go → cmd/participle/gen_lexer_cmd.go
@@ -1,33 +1,81 @@
package lexer
package main

import (
"encoding/json"
"fmt"
"io"
"os"
"regexp"
"regexp/syntax"
"sort"
"text/template"
"unicode/utf8"

"github.com/alecthomas/participle/v2/lexer"
)

type genLexerCmd struct {
Name string `help:"Name of the lexer."`
Output string `short:"o" help:"Output file."`
Package string `arg:"" required:"" help:"Go package for generated code."`
Lexer string `arg:"" required:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer."`
}

func (c *genLexerCmd) Help() string {
return `
Generates Go code implementing the given JSON representation of a lexer. The
generated code should in general by around 10x faster and produce zero garbage
per token.
`
}

func (c *genLexerCmd) Run() error {
var r *os.File
if c.Lexer == "-" {
r = os.Stdin
} else {
var err error
r, err = os.Open(c.Lexer)
if err != nil {
return err
}
defer r.Close()
}

rules := lexer.Rules{}
err := json.NewDecoder(r).Decode(&rules)
if err != nil {
return err
}
def, err := lexer.New(rules)
if err != nil {
return err
}
err = generateLexer(os.Stdout, c.Package, def, c.Name)
if err != nil {
return err
}
return nil
}

var codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`)

var codegenTemplate *template.Template = template.Must(template.New("lexgen").Funcs(template.FuncMap{
"IsPush": func(r Rule) string {
if p, ok := r.Action.(ActionPush); ok {
"IsPush": func(r lexer.Rule) string {
if p, ok := r.Action.(lexer.ActionPush); ok {
return p.State
}
return ""
},
"IsPop": func(r Rule) bool {
_, ok := r.Action.(ActionPop)
"IsPop": func(r lexer.Rule) bool {
_, ok := r.Action.(lexer.ActionPop)
return ok
},
"IsReturn": func(r Rule) bool {
return r == ReturnRule
"IsReturn": func(r lexer.Rule) bool {
return r == lexer.ReturnRule
},
"OrderRules": orderRules,
"HaveBackrefs": func(def *StatefulDefinition, state string) bool {
"HaveBackrefs": func(def *lexer.StatefulDefinition, state string) bool {
for _, rule := range def.Rules()[state] {
if codegenBackrefRe.MatchString(rule.Pattern) {
return true
Expand All @@ -51,35 +99,35 @@ import (
var _ syntax.Op
var Lexer lexer.Definition = definitionImpl{}
var {{.Name}}Lexer lexer.Definition = lexer{{.Name}}DefinitionImpl{}
type definitionImpl struct {}
type lexer{{.Name}}DefinitionImpl struct {}
func (definitionImpl) Symbols() map[string]lexer.TokenType {
func (lexer{{.Name}}DefinitionImpl) Symbols() map[string]lexer.TokenType {
return map[string]lexer.TokenType{
{{- range $sym, $rn := .Def.Symbols}}
"{{$sym}}": {{$rn}},
{{- end}}
}
}
func (definitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
return &lexerImpl{
func (lexer{{.Name}}DefinitionImpl) LexString(filename string, s string) (lexer.Lexer, error) {
return &lexer{{.Name}}Impl{
s: s,
pos: lexer.Position{
Filename: filename,
Line: 1,
Column: 1,
},
states: []lexerState{lexerState{name: "Root"}},
states: []lexer{{.Name}}State{lexer{{.Name}}State{name: "Root"}},
}, nil
}
func (d definitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
func (d lexer{{.Name}}DefinitionImpl) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
return d.LexString(filename, string(b))
}
func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
func (d lexer{{.Name}}DefinitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
s := &strings.Builder{}
_, err := io.Copy(s, r)
if err != nil {
Expand All @@ -88,19 +136,19 @@ func (d definitionImpl) Lex(filename string, r io.Reader) (lexer.Lexer, error) {
return d.LexString(filename, s.String())
}
type lexerState struct {
type lexer{{.Name}}State struct {
name string
groups []string
}
type lexerImpl struct {
type lexer{{.Name}}Impl struct {
s string
p int
pos lexer.Position
states []lexerState
states []lexer{{.Name}}State
}
func (l *lexerImpl) Next() (lexer.Token, error) {
func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) {
if l.p == len(l.s) {
return lexer.EOFToken(l.pos), nil
}
Expand All @@ -122,7 +170,7 @@ func (l *lexerImpl) Next() (lexer.Token, error) {
if true {
{{- end}}
{{- if .|IsPush}}
l.states = append(l.states, lexerState{name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, groups: l.sgroups(groups){{end}}})
l.states = append(l.states, lexer{{.Name}}State{name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, groups: l.sgroups(groups){{end}}})
{{- else if (or (.|IsPop) (.|IsReturn))}}
l.states = l.states[:len(l.states)-1]
{{- if .|IsReturn}}
Expand Down Expand Up @@ -154,7 +202,7 @@ func (l *lexerImpl) Next() (lexer.Token, error) {
}, nil
}
func (l *lexerImpl) sgroups(match []int) []string {
func (l *lexer{{.Name}}Impl) sgroups(match []int) []string {
sgroups := make([]string, len(match)/2)
for i := 0; i < len(match)-1; i += 2 {
sgroups[i/2] = l.s[l.p+match[i]:l.p+match[i+1]]
Expand All @@ -164,18 +212,14 @@ func (l *lexerImpl) sgroups(match []int) []string {
`))

// ExperimentalGenerateLexer generates Go code implementing the given stateful lexer.
//
// The generated code should in general by around 10x faster and produce zero garbage per token.
//
// NOTE: This is an experimental interface and subject to change.
func ExperimentalGenerateLexer(w io.Writer, pkg string, def *StatefulDefinition) error {
func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name string) error {
type ctx struct {
Package string
Def *StatefulDefinition
Name string
Def *lexer.StatefulDefinition
}
rules := def.Rules()
err := codegenTemplate.Execute(w, ctx{pkg, def})
err := codegenTemplate.Execute(w, ctx{pkg, name, def})
if err != nil {
return err
}
Expand All @@ -201,10 +245,10 @@ func ExperimentalGenerateLexer(w io.Writer, pkg string, def *StatefulDefinition)

type orderedRule struct {
Name string
Rules []Rule
Rules []lexer.Rule
}

func orderRules(rules Rules) []orderedRule {
func orderRules(rules lexer.Rules) []orderedRule {
orderedRules := []orderedRule{}
for name, rules := range rules {
orderedRules = append(orderedRules, orderedRule{
Expand Down
10 changes: 10 additions & 0 deletions cmd/participle/go.mod
@@ -0,0 +1,10 @@
module github.com/alecthomas/participle/v2/cmd/participle

go 1.18

require (
github.com/alecthomas/kong v0.6.1
github.com/alecthomas/participle/v2 v2.0.0-00010101000000-000000000000
)

replace github.com/alecthomas/participle/v2 => ../..
17 changes: 17 additions & 0 deletions cmd/participle/go.sum
@@ -0,0 +1,17 @@
github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
github.com/alecthomas/kong v0.6.1 h1:1kNhcFepkR+HmasQpbiKDLylIL8yh5B5y1zPp5bJimA=
github.com/alecthomas/kong v0.6.1/go.mod h1:JfHWDzLmbh/puW6I3V7uWenoh56YNVONW+w8eKeUr9I=
github.com/alecthomas/repr v0.0.0-20210801044451-80ca428c5142/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
22 changes: 22 additions & 0 deletions cmd/participle/main.go
@@ -0,0 +1,22 @@
package main

import "github.com/alecthomas/kong"

var (
version string = "dev"
cli struct {
Version kong.VersionFlag
Gen struct {
Lexer genLexerCmd `cmd:""`
} `cmd:"" help:"Generate code to accelerate Participle."`
}
)

func main() {
kctx := kong.Parse(&cli,
kong.Description(`A command-line tool for Participle.`),
kong.Vars{"version": version},
)
err := kctx.Run()
kctx.FatalIfErrorf(err)
}
7 changes: 5 additions & 2 deletions go.mod
Expand Up @@ -3,8 +3,11 @@ module github.com/alecthomas/participle/v2
go 1.18

require (
github.com/alecthomas/assert/v2 v2.0.3
github.com/alecthomas/assert/v2 v2.1.0
github.com/alecthomas/repr v0.1.0
)

require github.com/hexops/gotextdiff v1.0.3 // indirect
require (
github.com/hexops/gotextdiff v1.0.3 // indirect
github.com/mitchellh/mapstructure v1.5.0
)
4 changes: 4 additions & 0 deletions go.sum
@@ -1,6 +1,10 @@
github.com/alecthomas/assert/v2 v2.0.3 h1:WKqJODfOiQG0nEJKFKzDIG3E29CN2/4zR9XGJzKIkbg=
github.com/alecthomas/assert/v2 v2.0.3/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0=
github.com/alecthomas/assert/v2 v2.1.0/go.mod h1:b/+1DI2Q6NckYi+3mXyH3wFb8qG37K/DuK80n7WefXA=
github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE=
github.com/alecthomas/repr v0.1.0/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=

0 comments on commit bfe7c69

Please sign in to comment.