Skip to content

Commit

Permalink
add support for es5-style identifiers (#1349)
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Jun 5, 2021
1 parent 0954cc6 commit 537195a
Show file tree
Hide file tree
Showing 17 changed files with 897 additions and 119 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,23 @@
# Changelog

## Unreleased

* Add support for ES5-style identifiers ([#1349](https://github.com/evanw/esbuild/issues/1349))

In ES6 and above, an identifier is a character sequence starting with a character in the `ID_Start` Unicode category and followed by zero or more characters in the `ID_Continue` Unicode category, and these categories must be drawn from Unicode version 5.1 or above.

But in ES5, an identifier is a character sequence starting with a character in one of the `Lu, Ll, Lt, Lm, Lo, Nl` Unicode categories and followed by zero or more characters in the `Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd, Pc` Unicode categories, and these categories must be drawn from Unicode version 3.0.

Previously esbuild always used the ES6+ identifier validation test but with this release, it will use the ES5 validation test when the target environment doesn't support ES6+ identifiers. This identifier validation test is used in decisions related to code printing. For example:

```
$ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8
x.ꓷꓶꓲꓵꓭꓢꓱ;

$ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8 --target=es5
x["ꓷꓶꓲꓵꓭꓢꓱ"];
```

## 0.12.6

* Improve template literal lowering transformation conformance ([#1327](https://github.com/evanw/esbuild/issues/1327))
Expand Down
4 changes: 2 additions & 2 deletions internal/bundler/linker.go
Expand Up @@ -1646,7 +1646,7 @@ func (c *linkerContext) generateCodeForLazyExport(sourceIndex uint32) {
clone.Properties = append(make([]js_ast.Property, 0, len(clone.Properties)), clone.Properties...)
for i, property := range clone.Properties {
if str, ok := property.Key.Data.(*js_ast.EString); ok &&
(!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value) ||
(!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value, 0) ||
!c.options.UnsupportedJSFeatures.Has(compat.ArbitraryModuleNamespaceNames)) {
name := js_lexer.UTF16ToString(str.Value)
exportRef := generateExport(name, name, property.ValueOrNil).ref
Expand Down Expand Up @@ -4168,7 +4168,7 @@ func (c *linkerContext) renameSymbolsInChunk(chunk *chunkInfo, filesInOrder []ui
}

// When we're not minifying, just append numbers to symbol names to avoid collisions
r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames)
r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames, c.options.UnsupportedJSFeatures)
nestedScopes := make(map[uint32][]*js_ast.Scope)

timer.Begin("Add top-level symbols")
Expand Down
10 changes: 10 additions & 0 deletions internal/compat/js_table.go
Expand Up @@ -79,6 +79,7 @@ const (
TemplateLiteral
TopLevelAwait
UnicodeEscapes
UpdatedIdentifiers
)

func (features JSFeature) Has(feature JSFeature) bool {
Expand Down Expand Up @@ -417,6 +418,15 @@ var jsTable = map[JSFeature]map[Engine][]int{
Node: {4},
Safari: {9},
},
UpdatedIdentifiers: {
Chrome: {58},
Edge: {14},
ES: {2015},
Firefox: {52},
IOS: {14},
Node: {8},
Safari: {14},
},
}

func isVersionLessThan(a []int, b []int) bool {
Expand Down
69 changes: 39 additions & 30 deletions internal/js_lexer/js_lexer.go
Expand Up @@ -20,6 +20,7 @@ import (
"unicode"
"unicode/utf8"

"github.com/evanw/esbuild/internal/compat"
"github.com/evanw/esbuild/internal/js_ast"
"github.com/evanw/esbuild/internal/logger"
)
Expand Down Expand Up @@ -559,34 +560,34 @@ func (lexer *Lexer) maybeExpandEquals() {
}
}

func IsIdentifier(text string) bool {
func IsIdentifier(text string, unsupportedJSFeatures compat.JSFeature) bool {
if len(text) == 0 {
return false
}
for i, codePoint := range text {
if i == 0 {
if !IsIdentifierStart(codePoint) {
if !IsIdentifierStart(codePoint, unsupportedJSFeatures) {
return false
}
} else {
if !IsIdentifierContinue(codePoint) {
if !IsIdentifierContinue(codePoint, unsupportedJSFeatures) {
return false
}
}
}
return true
}

func ForceValidIdentifier(text string) string {
if IsIdentifier(text) {
func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) string {
if IsIdentifier(text, unsupportedJSFeatures) {
return text
}
sb := strings.Builder{}

// Identifier start
c, width := utf8.DecodeRuneInString(text)
text = text[width:]
if IsIdentifierStart(c) {
if IsIdentifierStart(c, unsupportedJSFeatures) {
sb.WriteRune(c)
} else {
sb.WriteRune('_')
Expand All @@ -596,7 +597,7 @@ func ForceValidIdentifier(text string) string {
for text != "" {
c, width := utf8.DecodeRuneInString(text)
text = text[width:]
if IsIdentifierContinue(c) {
if IsIdentifierContinue(c, unsupportedJSFeatures) {
sb.WriteRune(c)
} else {
sb.WriteRune('_')
Expand All @@ -607,7 +608,7 @@ func ForceValidIdentifier(text string) string {
}

// This does "IsIdentifier(UTF16ToString(text))" without any allocations
func IsIdentifierUTF16(text []uint16) bool {
func IsIdentifierUTF16(text []uint16, unsupportedJSFeatures compat.JSFeature) bool {
n := len(text)
if n == 0 {
return false
Expand All @@ -622,19 +623,19 @@ func IsIdentifierUTF16(text []uint16) bool {
}
}
if isStart {
if !IsIdentifierStart(r1) {
if !IsIdentifierStart(r1, unsupportedJSFeatures) {
return false
}
} else {
if !IsIdentifierContinue(r1) {
if !IsIdentifierContinue(r1, unsupportedJSFeatures) {
return false
}
}
}
return true
}

func IsIdentifierStart(codePoint rune) bool {
func IsIdentifierStart(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool {
switch codePoint {
case '_', '$',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
Expand All @@ -649,10 +650,14 @@ func IsIdentifierStart(codePoint rune) bool {
return false
}

return unicode.Is(idStart, codePoint)
if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) {
return unicode.Is(idStartES5, codePoint)
} else {
return unicode.Is(idStart, codePoint)
}
}

func IsIdentifierContinue(codePoint rune) bool {
func IsIdentifierContinue(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool {
switch codePoint {
case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
Expand All @@ -672,7 +677,11 @@ func IsIdentifierContinue(codePoint rune) bool {
return true
}

return unicode.Is(idContinue, codePoint)
if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) {
return unicode.Is(idContinueES5, codePoint)
} else {
return unicode.Is(idContinue, codePoint)
}
}

// See the "White Space Code Points" table in the ECMAScript standard
Expand Down Expand Up @@ -725,7 +734,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range {
c, _ = utf8.DecodeRuneInString(text[i:])
}

if IsIdentifierStart(c) || c == '\\' {
if IsIdentifierStart(c, 0) || c == '\\' {
// Search for the end of the identifier
for i < len(text) {
c2, width2 := utf8.DecodeRuneInString(text[i:])
Expand All @@ -743,7 +752,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range {
i++
}
}
} else if !IsIdentifierContinue(c2) {
} else if !IsIdentifierContinue(c2, 0) {
return logger.Range{Loc: loc, Len: int32(i)}
} else {
i += width2
Expand Down Expand Up @@ -1001,9 +1010,9 @@ func (lexer *Lexer) NextInsideJSXElement() {
continue
}

if IsIdentifierStart(lexer.codePoint) {
if IsIdentifierStart(lexer.codePoint, 0) {
lexer.step()
for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' {
for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' {
lexer.step()
}

Expand All @@ -1013,9 +1022,9 @@ func (lexer *Lexer) NextInsideJSXElement() {
// can't use this feature to reference JavaScript identifiers.
if lexer.codePoint == ':' {
lexer.step()
if IsIdentifierStart(lexer.codePoint) {
if IsIdentifierStart(lexer.codePoint, 0) {
lexer.step()
for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' {
for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' {
lexer.step()
}
} else {
Expand Down Expand Up @@ -1073,11 +1082,11 @@ func (lexer *Lexer) Next() {
if lexer.codePoint == '\\' {
lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier)
} else {
if !IsIdentifierStart(lexer.codePoint) {
if !IsIdentifierStart(lexer.codePoint, 0) {
lexer.SyntaxError()
}
lexer.step()
for IsIdentifierContinue(lexer.codePoint) {
for IsIdentifierContinue(lexer.codePoint, 0) {
lexer.step()
}
if lexer.codePoint == '\\' {
Expand Down Expand Up @@ -1592,7 +1601,7 @@ func (lexer *Lexer) Next() {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
lexer.step()
for IsIdentifierContinue(lexer.codePoint) {
for IsIdentifierContinue(lexer.codePoint, 0) {
lexer.step()
}
if lexer.codePoint == '\\' {
Expand All @@ -1619,9 +1628,9 @@ func (lexer *Lexer) Next() {
continue
}

if IsIdentifierStart(lexer.codePoint) {
if IsIdentifierStart(lexer.codePoint, 0) {
lexer.step()
for IsIdentifierContinue(lexer.codePoint) {
for IsIdentifierContinue(lexer.codePoint, 0) {
lexer.step()
}
if lexer.codePoint == '\\' {
Expand Down Expand Up @@ -1692,7 +1701,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
}

// Stop when we reach the end of the identifier
if !IsIdentifierContinue(lexer.codePoint) {
if !IsIdentifierContinue(lexer.codePoint, 0) {
break
}
lexer.step()
Expand All @@ -1711,7 +1720,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
if kind == privateIdentifier {
identifier = identifier[1:] // Skip over the "#"
}
if !IsIdentifier(identifier) {
if !IsIdentifier(identifier, 0) {
lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: int32(lexer.end - lexer.start)},
fmt.Sprintf("Invalid identifier: %q", text))
}
Expand Down Expand Up @@ -2029,7 +2038,7 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {
}

// Identifiers can't occur immediately after numbers
if IsIdentifierStart(lexer.codePoint) {
if IsIdentifierStart(lexer.codePoint, 0) {
lexer.SyntaxError()
}
}
Expand Down Expand Up @@ -2058,7 +2067,7 @@ func (lexer *Lexer) ScanRegExp() {
case '/':
lexer.step()
bits := uint32(0)
for IsIdentifierContinue(lexer.codePoint) {
for IsIdentifierContinue(lexer.codePoint, 0) {
switch lexer.codePoint {
case 'g', 'i', 'm', 's', 'u', 'y':
bit := uint32(1) << uint32(lexer.codePoint-'a')
Expand Down Expand Up @@ -2528,7 +2537,7 @@ func hasPrefixWithWordBoundary(text string, prefix string) bool {
return true
}
c, _ := utf8.DecodeRuneInString(text[p:])
if !IsIdentifierContinue(c) {
if !IsIdentifierContinue(c, 0) {
return true
}
}
Expand Down

0 comments on commit 537195a

Please sign in to comment.