add support for es5-style identifiers (#1349)

evanw · Jun 5, 2021 · 537195a · 537195a
1 parent 0954cc6
commit 537195a
Show file tree

Hide file tree

Showing 17 changed files with 897 additions and 119 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,23 @@
 # Changelog
 
+## Unreleased
+
+* Add support for ES5-style identifiers ([#1349](https://github.com/evanw/esbuild/issues/1349))
+
+    In ES6 and above, an identifier is a character sequence starting with a character in the `ID_Start` Unicode category and followed by zero or more characters in the `ID_Continue` Unicode category, and these categories must be drawn from Unicode version 5.1 or above.
+
+    But in ES5, an identifier is a character sequence starting with a character in one of the `Lu, Ll, Lt, Lm, Lo, Nl` Unicode categories and followed by zero or more characters in the `Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd, Pc` Unicode categories, and these categories must be drawn from Unicode version 3.0.
+
+    Previously esbuild always used the ES6+ identifier validation test but with this release, it will use the ES5 validation test when the target environment doesn't support ES6+ identifiers. This identifier validation test is used in decisions related to code printing. For example:
+
+    ```
+    $ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8
+    x.ꓷꓶꓲꓵꓭꓢꓱ;
+
+    $ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8 --target=es5
+    x["ꓷꓶꓲꓵꓭꓢꓱ"];
+    ```
+
 ## 0.12.6
 
 * Improve template literal lowering transformation conformance ([#1327](https://github.com/evanw/esbuild/issues/1327))

diff --git a/internal/bundler/linker.go b/internal/bundler/linker.go
@@ -1646,7 +1646,7 @@ func (c *linkerContext) generateCodeForLazyExport(sourceIndex uint32) {
 		clone.Properties = append(make([]js_ast.Property, 0, len(clone.Properties)), clone.Properties...)
 		for i, property := range clone.Properties {
 			if str, ok := property.Key.Data.(*js_ast.EString); ok &&
-				(!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value) ||
+				(!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value, 0) ||
 					!c.options.UnsupportedJSFeatures.Has(compat.ArbitraryModuleNamespaceNames)) {
 				name := js_lexer.UTF16ToString(str.Value)
 				exportRef := generateExport(name, name, property.ValueOrNil).ref
@@ -4168,7 +4168,7 @@ func (c *linkerContext) renameSymbolsInChunk(chunk *chunkInfo, filesInOrder []ui
 	}
 
 	// When we're not minifying, just append numbers to symbol names to avoid collisions
-	r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames)
+	r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames, c.options.UnsupportedJSFeatures)
 	nestedScopes := make(map[uint32][]*js_ast.Scope)
 
 	timer.Begin("Add top-level symbols")

diff --git a/internal/compat/js_table.go b/internal/compat/js_table.go
@@ -79,6 +79,7 @@ const (
 	TemplateLiteral
 	TopLevelAwait
 	UnicodeEscapes
+	UpdatedIdentifiers
 )
 
 func (features JSFeature) Has(feature JSFeature) bool {
@@ -417,6 +418,15 @@ var jsTable = map[JSFeature]map[Engine][]int{
 		Node:    {4},
 		Safari:  {9},
 	},
+	UpdatedIdentifiers: {
+		Chrome:  {58},
+		Edge:    {14},
+		ES:      {2015},
+		Firefox: {52},
+		IOS:     {14},
+		Node:    {8},
+		Safari:  {14},
+	},
 }
 
 func isVersionLessThan(a []int, b []int) bool {

diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go
@@ -20,6 +20,7 @@ import (
 	"unicode"
 	"unicode/utf8"
 
+	"github.com/evanw/esbuild/internal/compat"
 	"github.com/evanw/esbuild/internal/js_ast"
 	"github.com/evanw/esbuild/internal/logger"
 )
@@ -559,34 +560,34 @@ func (lexer *Lexer) maybeExpandEquals() {
 	}
 }
 
-func IsIdentifier(text string) bool {
+func IsIdentifier(text string, unsupportedJSFeatures compat.JSFeature) bool {
 	if len(text) == 0 {
 		return false
 	}
 	for i, codePoint := range text {
 		if i == 0 {
-			if !IsIdentifierStart(codePoint) {
+			if !IsIdentifierStart(codePoint, unsupportedJSFeatures) {
 				return false
 			}
 		} else {
-			if !IsIdentifierContinue(codePoint) {
+			if !IsIdentifierContinue(codePoint, unsupportedJSFeatures) {
 				return false
 			}
 		}
 	}
 	return true
 }
 
-func ForceValidIdentifier(text string) string {
-	if IsIdentifier(text) {
+func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) string {
+	if IsIdentifier(text, unsupportedJSFeatures) {
 		return text
 	}
 	sb := strings.Builder{}
 
 	// Identifier start
 	c, width := utf8.DecodeRuneInString(text)
 	text = text[width:]
-	if IsIdentifierStart(c) {
+	if IsIdentifierStart(c, unsupportedJSFeatures) {
 		sb.WriteRune(c)
 	} else {
 		sb.WriteRune('_')
@@ -596,7 +597,7 @@ func ForceValidIdentifier(text string) string {
 	for text != "" {
 		c, width := utf8.DecodeRuneInString(text)
 		text = text[width:]
-		if IsIdentifierContinue(c) {
+		if IsIdentifierContinue(c, unsupportedJSFeatures) {
 			sb.WriteRune(c)
 		} else {
 			sb.WriteRune('_')
@@ -607,7 +608,7 @@ func ForceValidIdentifier(text string) string {
 }
 
 // This does "IsIdentifier(UTF16ToString(text))" without any allocations
-func IsIdentifierUTF16(text []uint16) bool {
+func IsIdentifierUTF16(text []uint16, unsupportedJSFeatures compat.JSFeature) bool {
 	n := len(text)
 	if n == 0 {
 		return false
@@ -622,19 +623,19 @@ func IsIdentifierUTF16(text []uint16) bool {
 			}
 		}
 		if isStart {
-			if !IsIdentifierStart(r1) {
+			if !IsIdentifierStart(r1, unsupportedJSFeatures) {
 				return false
 			}
 		} else {
-			if !IsIdentifierContinue(r1) {
+			if !IsIdentifierContinue(r1, unsupportedJSFeatures) {
 				return false
 			}
 		}
 	}
 	return true
 }
 
-func IsIdentifierStart(codePoint rune) bool {
+func IsIdentifierStart(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool {
 	switch codePoint {
 	case '_', '$',
 		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
@@ -649,10 +650,14 @@ func IsIdentifierStart(codePoint rune) bool {
 		return false
 	}
 
-	return unicode.Is(idStart, codePoint)
+	if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) {
+		return unicode.Is(idStartES5, codePoint)
+	} else {
+		return unicode.Is(idStart, codePoint)
+	}
 }
 
-func IsIdentifierContinue(codePoint rune) bool {
+func IsIdentifierContinue(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool {
 	switch codePoint {
 	case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
@@ -672,7 +677,11 @@ func IsIdentifierContinue(codePoint rune) bool {
 		return true
 	}
 
-	return unicode.Is(idContinue, codePoint)
+	if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) {
+		return unicode.Is(idContinueES5, codePoint)
+	} else {
+		return unicode.Is(idContinue, codePoint)
+	}
 }
 
 // See the "White Space Code Points" table in the ECMAScript standard
@@ -725,7 +734,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range {
 		c, _ = utf8.DecodeRuneInString(text[i:])
 	}
 
-	if IsIdentifierStart(c) || c == '\\' {
+	if IsIdentifierStart(c, 0) || c == '\\' {
 		// Search for the end of the identifier
 		for i < len(text) {
 			c2, width2 := utf8.DecodeRuneInString(text[i:])
@@ -743,7 +752,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range {
 						i++
 					}
 				}
-			} else if !IsIdentifierContinue(c2) {
+			} else if !IsIdentifierContinue(c2, 0) {
 				return logger.Range{Loc: loc, Len: int32(i)}
 			} else {
 				i += width2
@@ -1001,9 +1010,9 @@ func (lexer *Lexer) NextInsideJSXElement() {
 				continue
 			}
 
-			if IsIdentifierStart(lexer.codePoint) {
+			if IsIdentifierStart(lexer.codePoint, 0) {
 				lexer.step()
-				for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' {
+				for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' {
 					lexer.step()
 				}
 
@@ -1013,9 +1022,9 @@ func (lexer *Lexer) NextInsideJSXElement() {
 				// can't use this feature to reference JavaScript identifiers.
 				if lexer.codePoint == ':' {
 					lexer.step()
-					if IsIdentifierStart(lexer.codePoint) {
+					if IsIdentifierStart(lexer.codePoint, 0) {
 						lexer.step()
-						for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' {
+						for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' {
 							lexer.step()
 						}
 					} else {
@@ -1073,11 +1082,11 @@ func (lexer *Lexer) Next() {
 				if lexer.codePoint == '\\' {
 					lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier)
 				} else {
-					if !IsIdentifierStart(lexer.codePoint) {
+					if !IsIdentifierStart(lexer.codePoint, 0) {
 						lexer.SyntaxError()
 					}
 					lexer.step()
-					for IsIdentifierContinue(lexer.codePoint) {
+					for IsIdentifierContinue(lexer.codePoint, 0) {
 						lexer.step()
 					}
 					if lexer.codePoint == '\\' {
@@ -1592,7 +1601,7 @@ func (lexer *Lexer) Next() {
 			'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
 			'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
 			lexer.step()
-			for IsIdentifierContinue(lexer.codePoint) {
+			for IsIdentifierContinue(lexer.codePoint, 0) {
 				lexer.step()
 			}
 			if lexer.codePoint == '\\' {
@@ -1619,9 +1628,9 @@ func (lexer *Lexer) Next() {
 				continue
 			}
 
-			if IsIdentifierStart(lexer.codePoint) {
+			if IsIdentifierStart(lexer.codePoint, 0) {
 				lexer.step()
-				for IsIdentifierContinue(lexer.codePoint) {
+				for IsIdentifierContinue(lexer.codePoint, 0) {
 					lexer.step()
 				}
 				if lexer.codePoint == '\\' {
@@ -1692,7 +1701,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
 		}
 
 		// Stop when we reach the end of the identifier
-		if !IsIdentifierContinue(lexer.codePoint) {
+		if !IsIdentifierContinue(lexer.codePoint, 0) {
 			break
 		}
 		lexer.step()
@@ -1711,7 +1720,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
 	if kind == privateIdentifier {
 		identifier = identifier[1:] // Skip over the "#"
 	}
-	if !IsIdentifier(identifier) {
+	if !IsIdentifier(identifier, 0) {
 		lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: int32(lexer.end - lexer.start)},
 			fmt.Sprintf("Invalid identifier: %q", text))
 	}
@@ -2029,7 +2038,7 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {
 	}
 
 	// Identifiers can't occur immediately after numbers
-	if IsIdentifierStart(lexer.codePoint) {
+	if IsIdentifierStart(lexer.codePoint, 0) {
 		lexer.SyntaxError()
 	}
 }
@@ -2058,7 +2067,7 @@ func (lexer *Lexer) ScanRegExp() {
 		case '/':
 			lexer.step()
 			bits := uint32(0)
-			for IsIdentifierContinue(lexer.codePoint) {
+			for IsIdentifierContinue(lexer.codePoint, 0) {
 				switch lexer.codePoint {
 				case 'g', 'i', 'm', 's', 'u', 'y':
 					bit := uint32(1) << uint32(lexer.codePoint-'a')
@@ -2528,7 +2537,7 @@ func hasPrefixWithWordBoundary(text string, prefix string) bool {
 			return true
 		}
 		c, _ := utf8.DecodeRuneInString(text[p:])
-		if !IsIdentifierContinue(c) {
+		if !IsIdentifierContinue(c, 0) {
 			return true
 		}
 	}