Skip to content

Commit

Permalink
Meld consecutive template string literals
Browse files Browse the repository at this point in the history
When processing a template string, the lexer can emit multiple string
literal tokens for what ought to be a single string literal. This occurs
when the string contains escape sequences, or consecutive characters
which are indistinguishable from escape sequences at tokenization time.

This leads to a confusing AST and causes heuristics about template
expressions to fail. Specifically, when parsing a traversal with an
index, a key value containing an escape symbol will cause the parser to
generate an index expression instead of a traversal.

This commit adds a post-processing step to the template parser to meld
any sequences of string literals into a single string literal. Existing
tests covered the previous misbehaviour (several of which had comments
apologizing for it), and have been updated accordingly.

The new behaviour of the `IsStringLiteral` method of `TemplateExpr` is
covered with a new set of tests.
  • Loading branch information
alisdair committed Jan 27, 2023
1 parent 67270ba commit 5fe5697
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 92 deletions.
41 changes: 41 additions & 0 deletions hclsyntax/expression_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,44 @@ trim`,
}

}

func TestTemplateExprIsStringLiteral(t *testing.T) {
tests := map[string]bool{
// A simple string value is a string literal
"a": true,

// Strings containing escape characters or escape sequences are
// tokenized into multiple string literals, but this should be
// corrected by the parser
"a$b": true,
"a%%b": true,
"a\nb": true,
"a$${\"b\"}": true,

// Wrapped values (HIL-like) are not treated as string literals for
// legacy reasons
"${1}": false,
"${\"b\"}": false,

// Even template expressions containing only literal values do not
// count as string literals
"a${1}": false,
"a${\"b\"}": false,
}
for input, want := range tests {
t.Run(input, func(t *testing.T) {
expr, diags := ParseTemplate([]byte(input), "", hcl.InitialPos)
if len(diags) != 0 {
t.Fatalf("unexpected diags: %s", diags.Error())
}

if tmplExpr, ok := expr.(*TemplateExpr); ok {
got := tmplExpr.IsStringLiteral()

if got != want {
t.Errorf("wrong result\ngot: %#v\nwant: %#v", got, want)
}
}
})
}
}
32 changes: 32 additions & 0 deletions hclsyntax/parser_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func (p *parser) parseTemplateInner(end TokenType, flushHeredoc bool) ([]Express
if flushHeredoc {
flushHeredocTemplateParts(parts) // Trim off leading spaces on lines per the flush heredoc spec
}
meldConsecutiveStringLiterals(parts)
tp := templateParser{
Tokens: parts.Tokens,
SrcRange: parts.SrcRange,
Expand Down Expand Up @@ -751,6 +752,37 @@ func flushHeredocTemplateParts(parts *templateParts) {
}
}

// meldConsecutiveStringLiterals simplifies the AST output by combining a
// sequence of string literal tokens into a single string literal. This must be
// performed after any whitespace trimming operations.
func meldConsecutiveStringLiterals(parts *templateParts) {
if len(parts.Tokens) == 0 {
return
}

// Loop over all tokens starting at the second element, as we want to join
// pairs of consecutive string literals.
i := 1
for i < len(parts.Tokens) {
if prevLiteral, ok := parts.Tokens[i-1].(*templateLiteralToken); ok {
if literal, ok := parts.Tokens[i].(*templateLiteralToken); ok {
// The current and previous tokens are both literals: combine
prevLiteral.Val = prevLiteral.Val + literal.Val
prevLiteral.SrcRange.End = literal.SrcRange.End

// Remove the current token from the slice
parts.Tokens = append(parts.Tokens[:i], parts.Tokens[i+1:]...)

// Continue without moving forward in the slice
continue
}
}

// Try the next pair of tokens
i++
}
}

type templateParts struct {
Tokens []templateToken
SrcRange hcl.Range
Expand Down
98 changes: 6 additions & 92 deletions hclsyntax/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -738,26 +738,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello ${true}"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
&LiteralValueExpr{
Val: cty.StringVal("${"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 15, Byte: 14},
},
},
&LiteralValueExpr{
Val: cty.StringVal("true}"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 15, Byte: 14},
End: hcl.Pos{Line: 1, Column: 20, Byte: 19},
},
},
Expand Down Expand Up @@ -804,26 +788,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello %{true}"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
&LiteralValueExpr{
Val: cty.StringVal("%{"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 15, Byte: 14},
},
},
&LiteralValueExpr{
Val: cty.StringVal("true}"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 15, Byte: 14},
End: hcl.Pos{Line: 1, Column: 20, Byte: 19},
},
},
Expand Down Expand Up @@ -870,29 +838,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello $$"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
// This parses oddly due to how the scanner
// handles escaping of the $ sequence, but it's
// functionally equivalent to a single literal.
&LiteralValueExpr{
Val: cty.StringVal("$"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
},
},
&LiteralValueExpr{
Val: cty.StringVal("$"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 13, Byte: 12},
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
},
},
Expand Down Expand Up @@ -939,18 +888,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello $"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
&LiteralValueExpr{
Val: cty.StringVal("$"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
},
},
Expand Down Expand Up @@ -997,29 +938,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello %%"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
// This parses oddly due to how the scanner
// handles escaping of the % sequence, but it's
// functionally equivalent to a single literal.
&LiteralValueExpr{
Val: cty.StringVal("%"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
},
},
&LiteralValueExpr{
Val: cty.StringVal("%"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 13, Byte: 12},
End: hcl.Pos{Line: 1, Column: 14, Byte: 13},
},
},
Expand Down Expand Up @@ -1066,18 +988,10 @@ block "valid" {}
Expr: &TemplateExpr{
Parts: []Expression{
&LiteralValueExpr{
Val: cty.StringVal("hello "),
Val: cty.StringVal("hello %"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 6, Byte: 5},
End: hcl.Pos{Line: 1, Column: 12, Byte: 11},
},
},
&LiteralValueExpr{
Val: cty.StringVal("%"),

SrcRange: hcl.Range{
Start: hcl.Pos{Line: 1, Column: 12, Byte: 11},
End: hcl.Pos{Line: 1, Column: 13, Byte: 12},
},
},
Expand Down

0 comments on commit 5fe5697

Please sign in to comment.