diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a890c6d138..42ac39fb09a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,22 +2,27 @@ ## Unreleased -* Add support for ES5-style identifiers ([#1349](https://github.com/evanw/esbuild/issues/1349)) +* Quote object properties that are modern Unicode identifiers ([#1349](https://github.com/evanw/esbuild/issues/1349)) In ES6 and above, an identifier is a character sequence starting with a character in the `ID_Start` Unicode category and followed by zero or more characters in the `ID_Continue` Unicode category, and these categories must be drawn from Unicode version 5.1 or above. - But in ES5, an identifier is a character sequence starting with a character in one of the `Lu, Ll, Lt, Lm, Lo, Nl` Unicode categories and followed by zero or more characters in the `Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd, Pc` Unicode categories, and these categories must be drawn from Unicode version 3.0. + But in ES5, an identifier is a character sequence starting with a character in one of the `Lu, Ll, Lt, Lm, Lo, Nl` Unicode categories and followed by zero or more characters in the `Lu, Ll, Lt, Lm, Lo, Nl, Mn, Mc, Nd, Pc` Unicode categories, and these categories must be drawn from Unicode version 3.0 or above. - Previously esbuild always used the ES6+ identifier validation test but with this release, it will use the ES5 validation test when the target environment doesn't support ES6+ identifiers. This identifier validation test is used in decisions related to code printing. For example: + Previously esbuild always used the ES6+ identifier validation test when deciding whether to use an identifier or a quoted string to encode an object property but with this release, it will use the ES5 validation test instead: - ``` - $ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8 - x.ꓷꓶꓲꓵꓭꓢꓱ; + ```js + // Original code + x.ꓷꓶꓲꓵꓭꓢꓱ = { ꓷꓶꓲꓵꓭꓢꓱ: y }; - $ echo x.ꓷꓶꓲꓵꓭꓢꓱ | ./esbuild --charset=utf8 --target=es5 - x["ꓷꓶꓲꓵꓭꓢꓱ"]; + // Old output + x.ꓷꓶꓲꓵꓭꓢꓱ = { ꓷꓶꓲꓵꓭꓢꓱ: y }; + + // New output + x["ꓷꓶꓲꓵꓭꓢꓱ"] = { "ꓷꓶꓲꓵꓭꓢꓱ": y }; ``` + This approach should ensure maximum compatibility with all JavaScript environments that support ES5 and above. Note that this means minified files containing Unicode properties may be slightly larger than before. + ## 0.12.6 * Improve template literal lowering transformation conformance ([#1327](https://github.com/evanw/esbuild/issues/1327)) diff --git a/internal/bundler/linker.go b/internal/bundler/linker.go index 495084ac30b..15fbe7e6e59 100644 --- a/internal/bundler/linker.go +++ b/internal/bundler/linker.go @@ -1660,7 +1660,7 @@ func (c *linkerContext) generateCodeForLazyExport(sourceIndex uint32) { clone.Properties = append(make([]js_ast.Property, 0, len(clone.Properties)), clone.Properties...) for i, property := range clone.Properties { if str, ok := property.Key.Data.(*js_ast.EString); ok && - (!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value, 0) || + (!file.IsEntryPoint() || js_lexer.IsIdentifierUTF16(str.Value) || !c.options.UnsupportedJSFeatures.Has(compat.ArbitraryModuleNamespaceNames)) { name := js_lexer.UTF16ToString(str.Value) exportRef := generateExport(name, name, property.ValueOrNil).ref @@ -4178,7 +4178,7 @@ func (c *linkerContext) renameSymbolsInChunk(chunk *chunkInfo, filesInOrder []ui } // When we're not minifying, just append numbers to symbol names to avoid collisions - r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames, c.options.UnsupportedJSFeatures) + r := renamer.NewNumberRenamer(c.graph.Symbols, reservedNames) nestedScopes := make(map[uint32][]*js_ast.Scope) timer.Begin("Add top-level symbols") @@ -4705,7 +4705,7 @@ func (c *linkerContext) generateGlobalNamePrefix() string { join = ";" } - if js_printer.CanQuoteIdentifier(prefix, c.options.UnsupportedJSFeatures, c.options.ASCIIOnly) { + if js_printer.CanEscapeIdentifier(prefix, c.options.UnsupportedJSFeatures, c.options.ASCIIOnly) { if c.options.ASCIIOnly { prefix = string(js_printer.QuoteIdentifier(nil, prefix, c.options.UnsupportedJSFeatures)) } @@ -4717,7 +4717,7 @@ func (c *linkerContext) generateGlobalNamePrefix() string { for _, name := range c.options.GlobalName[1:] { oldPrefix := prefix - if js_printer.CanQuoteIdentifier(name, c.options.UnsupportedJSFeatures, c.options.ASCIIOnly) { + if js_printer.CanEscapeIdentifier(name, c.options.UnsupportedJSFeatures, c.options.ASCIIOnly) { if c.options.ASCIIOnly { name = string(js_printer.QuoteIdentifier(nil, name, c.options.UnsupportedJSFeatures)) } diff --git a/internal/compat/js_table.go b/internal/compat/js_table.go index f1c80ca1de7..425a8f896ca 100644 --- a/internal/compat/js_table.go +++ b/internal/compat/js_table.go @@ -79,7 +79,6 @@ const ( TemplateLiteral TopLevelAwait UnicodeEscapes - UpdatedIdentifiers ) func (features JSFeature) Has(feature JSFeature) bool { @@ -418,15 +417,6 @@ var jsTable = map[JSFeature]map[Engine][]int{ Node: {4}, Safari: {9}, }, - UpdatedIdentifiers: { - Chrome: {58}, - Edge: {14}, - ES: {2015}, - Firefox: {52}, - IOS: {14}, - Node: {8}, - Safari: {14}, - }, } func isVersionLessThan(a []int, b []int) bool { diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go index d66dcc8c3cb..b169ab92357 100644 --- a/internal/js_lexer/js_lexer.go +++ b/internal/js_lexer/js_lexer.go @@ -20,7 +20,6 @@ import ( "unicode" "unicode/utf8" - "github.com/evanw/esbuild/internal/compat" "github.com/evanw/esbuild/internal/js_ast" "github.com/evanw/esbuild/internal/logger" ) @@ -560,17 +559,17 @@ func (lexer *Lexer) maybeExpandEquals() { } } -func IsIdentifier(text string, unsupportedJSFeatures compat.JSFeature) bool { +func IsIdentifier(text string) bool { if len(text) == 0 { return false } for i, codePoint := range text { if i == 0 { - if !IsIdentifierStart(codePoint, unsupportedJSFeatures) { + if !IsIdentifierStart(codePoint) { return false } } else { - if !IsIdentifierContinue(codePoint, unsupportedJSFeatures) { + if !IsIdentifierContinue(codePoint) { return false } } @@ -578,8 +577,26 @@ func IsIdentifier(text string, unsupportedJSFeatures compat.JSFeature) bool { return true } -func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) string { - if IsIdentifier(text, unsupportedJSFeatures) { +func IsIdentifierES5(text string) bool { + if len(text) == 0 { + return false + } + for i, codePoint := range text { + if i == 0 { + if !IsIdentifierStartES5(codePoint) { + return false + } + } else { + if !IsIdentifierContinueES5(codePoint) { + return false + } + } + } + return true +} + +func ForceValidIdentifier(text string) string { + if IsIdentifier(text) { return text } sb := strings.Builder{} @@ -587,7 +604,7 @@ func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) s // Identifier start c, width := utf8.DecodeRuneInString(text) text = text[width:] - if IsIdentifierStart(c, unsupportedJSFeatures) { + if IsIdentifierStart(c) { sb.WriteRune(c) } else { sb.WriteRune('_') @@ -597,7 +614,7 @@ func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) s for text != "" { c, width := utf8.DecodeRuneInString(text) text = text[width:] - if IsIdentifierContinue(c, unsupportedJSFeatures) { + if IsIdentifierContinue(c) { sb.WriteRune(c) } else { sb.WriteRune('_') @@ -608,7 +625,35 @@ func ForceValidIdentifier(text string, unsupportedJSFeatures compat.JSFeature) s } // This does "IsIdentifier(UTF16ToString(text))" without any allocations -func IsIdentifierUTF16(text []uint16, unsupportedJSFeatures compat.JSFeature) bool { +func IsIdentifierUTF16(text []uint16) bool { + n := len(text) + if n == 0 { + return false + } + for i := 0; i < n; i++ { + isStart := i == 0 + r1 := rune(text[i]) + if r1 >= 0xD800 && r1 <= 0xDBFF && i+1 < n { + if r2 := rune(text[i+1]); r2 >= 0xDC00 && r2 <= 0xDFFF { + r1 = (r1 << 10) + r2 + (0x10000 - (0xD800 << 10) - 0xDC00) + i++ + } + } + if isStart { + if !IsIdentifierStart(r1) { + return false + } + } else { + if !IsIdentifierContinue(r1) { + return false + } + } + } + return true +} + +// This does "IsIdentifierES5(UTF16ToString(text))" without any allocations +func IsIdentifierES5UTF16(text []uint16) bool { n := len(text) if n == 0 { return false @@ -623,11 +668,11 @@ func IsIdentifierUTF16(text []uint16, unsupportedJSFeatures compat.JSFeature) bo } } if isStart { - if !IsIdentifierStart(r1, unsupportedJSFeatures) { + if !IsIdentifierStartES5(r1) { return false } } else { - if !IsIdentifierContinue(r1, unsupportedJSFeatures) { + if !IsIdentifierContinueES5(r1) { return false } } @@ -635,7 +680,7 @@ func IsIdentifierUTF16(text []uint16, unsupportedJSFeatures compat.JSFeature) bo return true } -func IsIdentifierStart(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool { +func IsIdentifierStart(codePoint rune) bool { switch codePoint { case '_', '$', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', @@ -650,14 +695,10 @@ func IsIdentifierStart(codePoint rune, unsupportedJSFeatures compat.JSFeature) b return false } - if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) { - return unicode.Is(idStartES5, codePoint) - } else { - return unicode.Is(idStart, codePoint) - } + return unicode.Is(idStart, codePoint) } -func IsIdentifierContinue(codePoint rune, unsupportedJSFeatures compat.JSFeature) bool { +func IsIdentifierContinue(codePoint rune) bool { switch codePoint { case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', @@ -677,11 +718,48 @@ func IsIdentifierContinue(codePoint rune, unsupportedJSFeatures compat.JSFeature return true } - if unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) { - return unicode.Is(idContinueES5, codePoint) - } else { - return unicode.Is(idContinue, codePoint) + return unicode.Is(idContinue, codePoint) +} + +func IsIdentifierStartES5(codePoint rune) bool { + switch codePoint { + case '_', '$', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': + return true + } + + // All ASCII identifier start code points are listed above + if codePoint < 0x7F { + return false } + + return unicode.Is(idStartES5, codePoint) +} + +func IsIdentifierContinueES5(codePoint rune) bool { + switch codePoint { + case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': + return true + } + + // All ASCII identifier start code points are listed above + if codePoint < 0x7F { + return false + } + + // ZWNJ and ZWJ are allowed in identifiers + if codePoint == 0x200C || codePoint == 0x200D { + return true + } + + return unicode.Is(idContinueES5, codePoint) } // See the "White Space Code Points" table in the ECMAScript standard @@ -734,7 +812,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range { c, _ = utf8.DecodeRuneInString(text[i:]) } - if IsIdentifierStart(c, 0) || c == '\\' { + if IsIdentifierStart(c) || c == '\\' { // Search for the end of the identifier for i < len(text) { c2, width2 := utf8.DecodeRuneInString(text[i:]) @@ -752,7 +830,7 @@ func RangeOfIdentifier(source logger.Source, loc logger.Loc) logger.Range { i++ } } - } else if !IsIdentifierContinue(c2, 0) { + } else if !IsIdentifierContinue(c2) { return logger.Range{Loc: loc, Len: int32(i)} } else { i += width2 @@ -1010,9 +1088,9 @@ func (lexer *Lexer) NextInsideJSXElement() { continue } - if IsIdentifierStart(lexer.codePoint, 0) { + if IsIdentifierStart(lexer.codePoint) { lexer.step() - for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' { + for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' { lexer.step() } @@ -1022,9 +1100,9 @@ func (lexer *Lexer) NextInsideJSXElement() { // can't use this feature to reference JavaScript identifiers. if lexer.codePoint == ':' { lexer.step() - if IsIdentifierStart(lexer.codePoint, 0) { + if IsIdentifierStart(lexer.codePoint) { lexer.step() - for IsIdentifierContinue(lexer.codePoint, 0) || lexer.codePoint == '-' { + for IsIdentifierContinue(lexer.codePoint) || lexer.codePoint == '-' { lexer.step() } } else { @@ -1082,11 +1160,11 @@ func (lexer *Lexer) Next() { if lexer.codePoint == '\\' { lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier) } else { - if !IsIdentifierStart(lexer.codePoint, 0) { + if !IsIdentifierStart(lexer.codePoint) { lexer.SyntaxError() } lexer.step() - for IsIdentifierContinue(lexer.codePoint, 0) { + for IsIdentifierContinue(lexer.codePoint) { lexer.step() } if lexer.codePoint == '\\' { @@ -1601,7 +1679,7 @@ func (lexer *Lexer) Next() { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': lexer.step() - for IsIdentifierContinue(lexer.codePoint, 0) { + for IsIdentifierContinue(lexer.codePoint) { lexer.step() } if lexer.codePoint == '\\' { @@ -1628,9 +1706,9 @@ func (lexer *Lexer) Next() { continue } - if IsIdentifierStart(lexer.codePoint, 0) { + if IsIdentifierStart(lexer.codePoint) { lexer.step() - for IsIdentifierContinue(lexer.codePoint, 0) { + for IsIdentifierContinue(lexer.codePoint) { lexer.step() } if lexer.codePoint == '\\' { @@ -1701,7 +1779,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) { } // Stop when we reach the end of the identifier - if !IsIdentifierContinue(lexer.codePoint, 0) { + if !IsIdentifierContinue(lexer.codePoint) { break } lexer.step() @@ -1720,7 +1798,7 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) { if kind == privateIdentifier { identifier = identifier[1:] // Skip over the "#" } - if !IsIdentifier(identifier, 0) { + if !IsIdentifier(identifier) { lexer.addRangeError(logger.Range{Loc: logger.Loc{Start: int32(lexer.start)}, Len: int32(lexer.end - lexer.start)}, fmt.Sprintf("Invalid identifier: %q", text)) } @@ -2038,7 +2116,7 @@ func (lexer *Lexer) parseNumericLiteralOrDot() { } // Identifiers can't occur immediately after numbers - if IsIdentifierStart(lexer.codePoint, 0) { + if IsIdentifierStart(lexer.codePoint) { lexer.SyntaxError() } } @@ -2067,7 +2145,7 @@ func (lexer *Lexer) ScanRegExp() { case '/': lexer.step() bits := uint32(0) - for IsIdentifierContinue(lexer.codePoint, 0) { + for IsIdentifierContinue(lexer.codePoint) { switch lexer.codePoint { case 'g', 'i', 'm', 's', 'u', 'y': bit := uint32(1) << uint32(lexer.codePoint-'a') @@ -2537,7 +2615,7 @@ func hasPrefixWithWordBoundary(text string, prefix string) bool { return true } c, _ := utf8.DecodeRuneInString(text[p:]) - if !IsIdentifierContinue(c, 0) { + if !IsIdentifierContinue(c) { return true } } diff --git a/internal/js_lexer/unicode.go b/internal/js_lexer/unicode.go index 49ff38fd64a..6dbb0900515 100644 --- a/internal/js_lexer/unicode.go +++ b/internal/js_lexer/unicode.go @@ -225,11 +225,8 @@ var idStartES5 = &unicode.RangeTable{ {Lo: 0x212a, Hi: 0x212d, Stride: 1}, {Lo: 0x212f, Hi: 0x2131, Stride: 1}, {Lo: 0x2133, Hi: 0x2139, Stride: 1}, - {Lo: 0x2160, Hi: 0x2183, Stride: 1}, - {Lo: 0x3005, Hi: 0x3007, Stride: 1}, - {Lo: 0x3021, Hi: 0x3029, Stride: 1}, + {Lo: 0x3005, Hi: 0x3006, Stride: 1}, {Lo: 0x3031, Hi: 0x3035, Stride: 1}, - {Lo: 0x3038, Hi: 0x303a, Stride: 1}, {Lo: 0x3041, Hi: 0x3094, Stride: 1}, {Lo: 0x309d, Hi: 0x309e, Stride: 1}, {Lo: 0x30a1, Hi: 0x30fa, Stride: 1}, @@ -575,11 +572,9 @@ var idContinueES5 = &unicode.RangeTable{ {Lo: 0x212a, Hi: 0x212d, Stride: 1}, {Lo: 0x212f, Hi: 0x2131, Stride: 1}, {Lo: 0x2133, Hi: 0x2139, Stride: 1}, - {Lo: 0x2160, Hi: 0x2183, Stride: 1}, - {Lo: 0x3005, Hi: 0x3007, Stride: 1}, - {Lo: 0x3021, Hi: 0x302f, Stride: 1}, + {Lo: 0x3005, Hi: 0x3006, Stride: 1}, + {Lo: 0x302a, Hi: 0x302f, Stride: 1}, {Lo: 0x3031, Hi: 0x3035, Stride: 1}, - {Lo: 0x3038, Hi: 0x303a, Stride: 1}, {Lo: 0x3041, Hi: 0x3094, Stride: 1}, {Lo: 0x3099, Hi: 0x309a, Stride: 1}, {Lo: 0x309d, Hi: 0x309e, Stride: 1}, diff --git a/internal/js_parser/js_parser.go b/internal/js_parser/js_parser.go index 4c9319c24c5..a7d52b3c954 100644 --- a/internal/js_parser/js_parser.go +++ b/internal/js_parser/js_parser.go @@ -9107,7 +9107,7 @@ func (p *parser) visitAndAppendStmt(stmts []js_ast.Stmt, stmt js_ast.Stmt) []js_ value.ValueOrNil = js_ast.Expr{Loc: value.Loc, Data: js_ast.EUndefinedShared} } - if p.options.mangleSyntax && js_lexer.IsIdentifier(name, 0) { + if p.options.mangleSyntax && js_lexer.IsIdentifier(name) { // "Enum.Name = value" assignTarget = js_ast.Assign( js_ast.Expr{Loc: value.Loc, Data: &js_ast.EDot{ @@ -9561,7 +9561,7 @@ func (p *parser) visitClass(nameScopeLoc logger.Loc, class *js_ast.Class) js_ast // "class {['x'] = y}" => "class {x = y}" if p.options.mangleSyntax && property.IsComputed { - if str, ok := key.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value, 0) { + if str, ok := key.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value) { isInvalidConstructor := false if js_lexer.UTF16EqualsString(str.Value, "constructor") { if !property.IsMethod { @@ -9829,22 +9829,11 @@ func (p *parser) checkForUnrepresentableIdentifier(loc logger.Loc, name string) } if !p.unrepresentableIdentifiers[name] { p.unrepresentableIdentifiers[name] = true - where, notes := p.prettyPrintTargetEnvironment(compat.UpdatedIdentifiers) + where, notes := p.prettyPrintTargetEnvironment(compat.UnicodeEscapes) r := js_lexer.RangeOfIdentifier(p.source, loc) p.log.AddRangeErrorWithNotes(&p.tracker, r, fmt.Sprintf("%q cannot be escaped in %s but you "+ "can set the charset to \"utf8\" to allow unescaped Unicode characters", name, where), notes) } - } else if p.options.unsupportedJSFeatures.Has(compat.UpdatedIdentifiers) && !strings.HasPrefix(name, "#") && - !js_lexer.IsIdentifier(name, p.options.unsupportedJSFeatures) { - if p.unrepresentableIdentifiers == nil { - p.unrepresentableIdentifiers = make(map[string]bool) - } - if !p.unrepresentableIdentifiers[name] { - p.unrepresentableIdentifiers[name] = true - where, notes := p.prettyPrintTargetEnvironment(compat.UpdatedIdentifiers) - r := js_lexer.RangeOfIdentifier(p.source, loc) - p.log.AddRangeErrorWithNotes(&p.tracker, r, fmt.Sprintf("%q is not considered a valid identifier in %s", name, where), notes) - } } } @@ -11275,7 +11264,7 @@ func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprO // "a['b']" => "a.b" if p.options.mangleSyntax { - if str, ok := e.Index.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value, 0) { + if str, ok := e.Index.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value) { dot := &js_ast.EDot{ Target: e.Target, Name: js_lexer.UTF16ToString(str.Value), @@ -11759,7 +11748,7 @@ func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprO // "{['x']: y}" => "{x: y}" if p.options.mangleSyntax && property.IsComputed { - if str, ok := key.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value, 0) && !js_lexer.UTF16EqualsString(str.Value, "__proto__") { + if str, ok := key.Data.(*js_ast.EString); ok && js_lexer.IsIdentifierUTF16(str.Value) && !js_lexer.UTF16EqualsString(str.Value, "__proto__") { property.IsComputed = false } } @@ -13873,7 +13862,7 @@ func ParseJSXExpr(text string, kind JSXExprKind) (config.JSXExpr, bool) { // Try a property chain parts := strings.Split(text, ".") for _, part := range parts { - if !js_lexer.IsIdentifier(part, 0) { + if !js_lexer.IsIdentifier(part) { parts = nil break } diff --git a/internal/js_parser/js_parser_test.go b/internal/js_parser/js_parser_test.go index 1df90c115aa..ddc6866f893 100644 --- a/internal/js_parser/js_parser_test.go +++ b/internal/js_parser/js_parser_test.go @@ -4237,7 +4237,7 @@ func TestJSX(t *testing.T) { // Unicode tests expectPrintedJSX(t, "<\U00020000/>", "/* @__PURE__ */ React.createElement(\U00020000, null);\n") expectPrintedJSX(t, "\U00020000", "/* @__PURE__ */ React.createElement(\"a\", null, \"\U00020000\");\n") - expectPrintedJSX(t, "", "/* @__PURE__ */ React.createElement(\"a\", {\n \U00020000: 0\n});\n") + expectPrintedJSX(t, "", "/* @__PURE__ */ React.createElement(\"a\", {\n \"\U00020000\": 0\n});\n") // Comment tests expectParseErrorJSX(t, "", ": error: Expected \"*/\" to terminate multi-line comment\n: note: The multi-line comment starts here\n") @@ -4659,30 +4659,30 @@ func TestASCIIOnly(t *testing.T) { expectPrintedTargetASCII(t, 5, "'𐀀'", "\"\\uD800\\uDC00\";\n") expectPrinted(t, "x.π", "x.π;\n") - expectPrinted(t, "x.𐀀", "x.𐀀;\n") + expectPrinted(t, "x.𐀀", "x[\"𐀀\"];\n") expectPrintedASCII(t, "x.π", "x.\\u03C0;\n") - expectPrintedASCII(t, "x.𐀀", "x.\\u{10000};\n") + expectPrintedASCII(t, "x.𐀀", "x[\"\\u{10000}\"];\n") expectPrintedTargetASCII(t, 5, "x.π", "x.\\u03C0;\n") expectPrintedTargetASCII(t, 5, "x.𐀀", "x[\"\\uD800\\uDC00\"];\n") expectPrinted(t, "x?.π", "x?.π;\n") - expectPrinted(t, "x?.𐀀", "x?.𐀀;\n") + expectPrinted(t, "x?.𐀀", "x?.[\"𐀀\"];\n") expectPrintedASCII(t, "x?.π", "x?.\\u03C0;\n") - expectPrintedASCII(t, "x?.𐀀", "x?.\\u{10000};\n") + expectPrintedASCII(t, "x?.𐀀", "x?.[\"\\u{10000}\"];\n") expectPrintedTargetASCII(t, 5, "x?.π", "x == null ? void 0 : x.\\u03C0;\n") expectPrintedTargetASCII(t, 5, "x?.𐀀", "x == null ? void 0 : x[\"\\uD800\\uDC00\"];\n") expectPrinted(t, "0 .π", "0 .π;\n") - expectPrinted(t, "0 .𐀀", "0 .𐀀;\n") + expectPrinted(t, "0 .𐀀", "0[\"𐀀\"];\n") expectPrintedASCII(t, "0 .π", "0 .\\u03C0;\n") - expectPrintedASCII(t, "0 .𐀀", "0 .\\u{10000};\n") + expectPrintedASCII(t, "0 .𐀀", "0[\"\\u{10000}\"];\n") expectPrintedTargetASCII(t, 5, "0 .π", "0 .\\u03C0;\n") expectPrintedTargetASCII(t, 5, "0 .𐀀", "0[\"\\uD800\\uDC00\"];\n") expectPrinted(t, "0?.π", "0?.π;\n") - expectPrinted(t, "0?.𐀀", "0?.𐀀;\n") + expectPrinted(t, "0?.𐀀", "0?.[\"𐀀\"];\n") expectPrintedASCII(t, "0?.π", "0?.\\u03C0;\n") - expectPrintedASCII(t, "0?.𐀀", "0?.\\u{10000};\n") + expectPrintedASCII(t, "0?.𐀀", "0?.[\"\\u{10000}\"];\n") expectPrintedTargetASCII(t, 5, "0?.π", "0 == null ? void 0 : 0 .\\u03C0;\n") expectPrintedTargetASCII(t, 5, "0?.𐀀", "0 == null ? void 0 : 0[\"\\uD800\\uDC00\"];\n") @@ -4694,16 +4694,16 @@ func TestASCIIOnly(t *testing.T) { expectPrintedTargetASCII(t, 5, "import '𐀀'", "import \"\\uD800\\uDC00\";\n") expectPrinted(t, "({π: 0})", "({ π: 0 });\n") - expectPrinted(t, "({𐀀: 0})", "({ 𐀀: 0 });\n") + expectPrinted(t, "({𐀀: 0})", "({ \"𐀀\": 0 });\n") expectPrintedASCII(t, "({π: 0})", "({ \\u03C0: 0 });\n") - expectPrintedASCII(t, "({𐀀: 0})", "({ \\u{10000}: 0 });\n") + expectPrintedASCII(t, "({𐀀: 0})", "({ \"\\u{10000}\": 0 });\n") expectPrintedTargetASCII(t, 5, "({π: 0})", "({ \\u03C0: 0 });\n") expectPrintedTargetASCII(t, 5, "({𐀀: 0})", "({ \"\\uD800\\uDC00\": 0 });\n") expectPrinted(t, "({π})", "({ π });\n") - expectPrinted(t, "({𐀀})", "({ 𐀀 });\n") + expectPrinted(t, "({𐀀})", "({ \"𐀀\": 𐀀 });\n") expectPrintedASCII(t, "({π})", "({ \\u03C0 });\n") - expectPrintedASCII(t, "({𐀀})", "({ \\u{10000} });\n") + expectPrintedASCII(t, "({𐀀})", "({ \"\\u{10000}\": \\u{10000} });\n") expectPrintedTargetASCII(t, 5, "({π})", "({ \\u03C0: \\u03C0 });\n") expectParseErrorTargetASCII(t, 5, "({𐀀})", es5) @@ -4777,24 +4777,3 @@ func TestASCIIOnly(t *testing.T) { expectPrintedTargetASCII(t, 5, "export var π", "export var \\u03C0;\n") expectParseErrorTargetASCII(t, 5, "export var 𐀀", es5) } - -func TestUpdatedIdentifiers(t *testing.T) { - // Some context: The text "ꓷꓶꓲꓵꓭꓢꓱ" is all non-BMP code points and is a valid - // identifier in ES6+ but not in ES5. It must either be quoted or forbidden - // when it's used in ES5. - - expectPrinted(t, "x.ꓷꓶꓲꓵꓭꓢꓱ", "x.ꓷꓶꓲꓵꓭꓢꓱ;\n") - expectPrinted(t, "var ꓷꓶꓲꓵꓭꓢꓱ", "var ꓷꓶꓲꓵꓭꓢꓱ;\n") - expectPrintedTarget(t, 5, "x.ꓷꓶꓲꓵꓭꓢꓱ", "x[\"ꓷꓶꓲꓵꓭꓢꓱ\"];\n") - expectPrintedTarget(t, 5, "x = {ꓷꓶꓲꓵꓭꓢꓱ: 0}", "x = { \"ꓷꓶꓲꓵꓭꓢꓱ\": 0 };\n") - expectParseErrorTarget(t, 5, "ꓷꓶꓲꓵꓭꓢꓱ", - ": error: \"ꓷꓶꓲꓵꓭꓢꓱ\" is not considered a valid identifier in the configured target environment\n") - expectParseErrorTarget(t, 5, "var ꓷꓶꓲꓵꓭꓢꓱ", - ": error: \"ꓷꓶꓲꓵꓭꓢꓱ\" is not considered a valid identifier in the configured target environment\n") - - expectPrintedJSX(t, "", "/* @__PURE__ */ React.createElement(\"x\", {\n ꓷꓶꓲꓵꓭꓢꓱ: true\n});\n") - expectPrintedJSX(t, "<ꓷꓶꓲꓵꓭꓢꓱ/>", "/* @__PURE__ */ React.createElement(ꓷꓶꓲꓵꓭꓢꓱ, null);\n") - expectPrintedTargetJSX(t, 5, "", "/* @__PURE__ */ React.createElement(\"x\", {\n \"ꓷꓶꓲꓵꓭꓢꓱ\": true\n});\n") - expectParseErrorTargetJSX(t, 5, "<ꓷꓶꓲꓵꓭꓢꓱ/>", - ": error: \"ꓷꓶꓲꓵꓭꓢꓱ\" is not considered a valid identifier in the configured target environment\n") -} diff --git a/internal/js_parser/ts_parser.go b/internal/js_parser/ts_parser.go index 582e2c09aea..6485eaf31d6 100644 --- a/internal/js_parser/ts_parser.go +++ b/internal/js_parser/ts_parser.go @@ -842,7 +842,7 @@ func (p *parser) parseTypeScriptEnumStmt(loc logger.Loc, opts parseStmtOpts) js_ p.lexer.Next() // Identifiers can be referenced by other values - if !opts.isTypeScriptDeclare && js_lexer.IsIdentifierUTF16(value.Name, 0) { + if !opts.isTypeScriptDeclare && js_lexer.IsIdentifierUTF16(value.Name) { value.Ref = p.declareSymbol(js_ast.SymbolOther, value.Loc, js_lexer.UTF16ToString(value.Name)) } diff --git a/internal/js_printer/js_printer.go b/internal/js_printer/js_printer.go index 4f541c5b9e9..d9cf7ca79e2 100644 --- a/internal/js_printer/js_printer.go +++ b/internal/js_printer/js_printer.go @@ -805,7 +805,7 @@ func (p *printer) printSymbol(ref js_ast.Ref) { } func (p *printer) printClauseAlias(alias string) { - if js_lexer.IsIdentifier(alias, p.options.UnsupportedFeatures) { + if js_lexer.IsIdentifier(alias) { p.printSpaceBeforeIdentifier() p.printIdentifier(alias) } else { @@ -813,20 +813,31 @@ func (p *printer) printClauseAlias(alias string) { } } -func CanQuoteIdentifier(name string, unsupportedJSFeatures compat.JSFeature, asciiOnly bool) bool { - return js_lexer.IsIdentifier(name, unsupportedJSFeatures) && (!asciiOnly || +// Note: The functions below check whether something can be printed as an +// identifier or if it needs to be quoted (e.g. "x.y" vs. "x['y']") using the +// ES5 identifier validity test to maximize cross-platform portability. Even +// though newer JavaScript environments can handle more Unicode characters, +// there isn't a published document that says which Unicode versions are +// supported by which browsers. Even if a character is considered valid in the +// latest version of Unicode, we don't know if the browser we're targeting +// contains an older version of Unicode or not. So for safety, we quote +// anything that isn't guaranteed to be compatible with ES5, the oldest +// JavaScript language target that we support. + +func CanEscapeIdentifier(name string, unsupportedJSFeatures compat.JSFeature, asciiOnly bool) bool { + return js_lexer.IsIdentifierES5(name) && (!asciiOnly || !unsupportedJSFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePoint(name)) } func (p *printer) canPrintIdentifier(name string) bool { - return js_lexer.IsIdentifier(name, p.options.UnsupportedFeatures) && (!p.options.ASCIIOnly || + return js_lexer.IsIdentifierES5(name) && (!p.options.ASCIIOnly || !p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePoint(name)) } func (p *printer) canPrintIdentifierUTF16(name []uint16) bool { - return js_lexer.IsIdentifierUTF16(name, p.options.UnsupportedFeatures) && (!p.options.ASCIIOnly || + return js_lexer.IsIdentifierES5UTF16(name) && (!p.options.ASCIIOnly || !p.options.UnsupportedFeatures.Has(compat.UnicodeEscapes) || !js_lexer.ContainsNonBMPCodePointUTF16(name)) } @@ -1058,7 +1069,7 @@ func (p *printer) printSemicolonIfNeeded() { func (p *printer) printSpaceBeforeIdentifier() { buffer := p.js n := len(buffer) - if n > 0 && (js_lexer.IsIdentifierContinue(rune(buffer[n-1]), 0) || n == p.prevRegExpEnd) { + if n > 0 && (js_lexer.IsIdentifierContinue(rune(buffer[n-1])) || n == p.prevRegExpEnd) { p.print(" ") } } @@ -1807,18 +1818,22 @@ func (p *printer) printExpr(expr js_ast.Expr, level js_ast.L, flags printExprFla flags &= ^hasNonOptionalChainParent } p.printExpr(e.Target, js_ast.LPostfix, flags) - if e.OptionalChain == js_ast.OptionalChainStart { - p.print("?") - } if p.canPrintIdentifier(e.Name) { if e.OptionalChain != js_ast.OptionalChainStart && p.prevNumEnd == len(p.js) { // "1.toString" is a syntax error, so print "1 .toString" instead p.print(" ") } - p.print(".") + if e.OptionalChain == js_ast.OptionalChainStart { + p.print("?.") + } else { + p.print(".") + } p.addSourceMapping(e.NameLoc) p.printIdentifier(e.Name) } else { + if e.OptionalChain == js_ast.OptionalChainStart { + p.print("?.") + } p.print("[") p.addSourceMapping(e.NameLoc) p.printQuotedUTF8(e.Name, true /* allowBacktick */) diff --git a/internal/js_printer/js_printer_test.go b/internal/js_printer/js_printer_test.go index 94087a15f4d..48c3570ddd6 100644 --- a/internal/js_printer/js_printer_test.go +++ b/internal/js_printer/js_printer_test.go @@ -829,12 +829,12 @@ func TestASCIIOnly(t *testing.T) { expectPrintedASCII(t, "var \\u{10000}", "var \\u{10000};\n") expectPrintedTargetASCII(t, 2015, "'𐀀'", "\"\\u{10000}\";\n") expectPrintedTargetASCII(t, 5, "'𐀀'", "\"\\uD800\\uDC00\";\n") - expectPrintedTargetASCII(t, 2015, "x.𐀀", "x.\\u{10000};\n") + expectPrintedTargetASCII(t, 2015, "x.𐀀", "x[\"\\u{10000}\"];\n") expectPrintedTargetASCII(t, 5, "x.𐀀", "x[\"\\uD800\\uDC00\"];\n") // Escapes should use consistent case expectPrintedASCII(t, "var \\u{100a} = {\\u100A: '\\u100A'}", "var \\u100A = { \\u100A: \"\\u100A\" };\n") - expectPrintedASCII(t, "var \\u{1000a} = {\\u{1000A}: '\\u{1000A}'}", "var \\u{1000A} = { \\u{1000A}: \"\\u{1000A}\" };\n") + expectPrintedASCII(t, "var \\u{1000a} = {\\u{1000A}: '\\u{1000A}'}", "var \\u{1000A} = { \"\\u{1000A}\": \"\\u{1000A}\" };\n") // These characters should always be escaped expectPrinted(t, "let x = '\u2028'", "let x = \"\\u2028\";\n") diff --git a/internal/renamer/renamer.go b/internal/renamer/renamer.go index 04cf254e963..866a817599b 100644 --- a/internal/renamer/renamer.go +++ b/internal/renamer/renamer.go @@ -8,7 +8,6 @@ import ( "sync/atomic" "github.com/evanw/esbuild/internal/ast" - "github.com/evanw/esbuild/internal/compat" "github.com/evanw/esbuild/internal/js_ast" "github.com/evanw/esbuild/internal/js_lexer" ) @@ -387,13 +386,12 @@ func (a slotAndCountArray) Less(i int, j int) bool { // NumberRenamer type NumberRenamer struct { - symbols js_ast.SymbolMap - names [][]string - root numberScope - unsupportedJSFeatures compat.JSFeature + symbols js_ast.SymbolMap + names [][]string + root numberScope } -func NewNumberRenamer(symbols js_ast.SymbolMap, reservedNames map[string]uint32, unsupportedJSFeatures compat.JSFeature) *NumberRenamer { +func NewNumberRenamer(symbols js_ast.SymbolMap, reservedNames map[string]uint32) *NumberRenamer { return &NumberRenamer{ symbols: symbols, names: make([][]string, len(symbols.SymbolsForSource)), @@ -439,7 +437,7 @@ func (r *NumberRenamer) assignName(scope *numberScope, ref js_ast.Ref) { } // Compute a new name - name := scope.findUnusedName(originalName, r.unsupportedJSFeatures) + name := scope.findUnusedName(originalName) // Store the new name if inner == nil { @@ -536,8 +534,8 @@ func (s *numberScope) findNameUse(name string) nameUse { } } -func (s *numberScope) findUnusedName(name string, unsupportedJSFeatures compat.JSFeature) string { - name = js_lexer.ForceValidIdentifier(name, unsupportedJSFeatures) +func (s *numberScope) findUnusedName(name string) string { + name = js_lexer.ForceValidIdentifier(name) if use := s.findNameUse(name); use != nameUnused { // If the name is already in use, generate a new name by appending a number diff --git a/internal/resolver/tsconfig_json.go b/internal/resolver/tsconfig_json.go index fefa6b8e7cf..9897732e928 100644 --- a/internal/resolver/tsconfig_json.go +++ b/internal/resolver/tsconfig_json.go @@ -234,7 +234,7 @@ func parseMemberExpressionForJSX(log logger.Log, source *logger.Source, tracker } parts := strings.Split(text, ".") for _, part := range parts { - if !js_lexer.IsIdentifier(part, 0) { + if !js_lexer.IsIdentifier(part) { warnRange := source.RangeOfString(loc) log.AddRangeWarning(tracker, warnRange, fmt.Sprintf("Invalid JSX member expression: %q", text)) return nil diff --git a/pkg/api/api_impl.go b/pkg/api/api_impl.go index 5fe3d9301c1..89a4e9d612e 100644 --- a/pkg/api/api_impl.go +++ b/pkg/api/api_impl.go @@ -438,7 +438,7 @@ func validateDefines( for key, value := range defines { // The key must be a dot-separated identifier list for _, part := range strings.Split(key, ".") { - if !js_lexer.IsIdentifier(part, 0) { + if !js_lexer.IsIdentifier(part) { if part == key { log.AddError(nil, logger.Loc{}, fmt.Sprintf("The define key %q must be a valid identifier", key)) } else { @@ -449,7 +449,7 @@ func validateDefines( } // Allow substituting for an identifier - if js_lexer.IsIdentifier(value, 0) { + if js_lexer.IsIdentifier(value) { if _, ok := js_lexer.Keywords[value]; !ok { name := value // The closure must close over a variable inside the loop rawDefines[key] = config.DefineData{ @@ -538,7 +538,7 @@ func validateDefines( for _, key := range pureFns { // The key must be a dot-separated identifier list for _, part := range strings.Split(key, ".") { - if !js_lexer.IsIdentifier(part, 0) { + if !js_lexer.IsIdentifier(part) { log.AddError(nil, logger.Loc{}, fmt.Sprintf("Invalid pure function: %q", key)) continue } diff --git a/scripts/compat-table.js b/scripts/compat-table.js index 98cc53952d1..a29e12ff466 100644 --- a/scripts/compat-table.js +++ b/scripts/compat-table.js @@ -37,7 +37,6 @@ const features = { 'class': { target: 'Class' }, 'generators': { target: 'Generator' }, 'Unicode code point escapes': { target: 'UnicodeEscapes' }, - 'Updated identifier syntax': { target: 'UpdatedIdentifiers' }, // >ES6 features 'exponentiation (**) operator': { target: 'ExponentOperator' }, @@ -141,7 +140,6 @@ mergeVersions('ObjectExtensions', { es2015: true }) mergeVersions('RestArgument', { es2015: true }) mergeVersions('TemplateLiteral', { es2015: true }) mergeVersions('UnicodeEscapes', { es2015: true }) -mergeVersions('UpdatedIdentifiers', { es2015: true }) // >ES6 features mergeVersions('ExponentOperator', { es2016: true }) diff --git a/scripts/gen-unicode-table.js b/scripts/gen-unicode-table.js index eaffeb2947d..cc3ee65483a 100644 --- a/scripts/gen-unicode-table.js +++ b/scripts/gen-unicode-table.js @@ -18,7 +18,12 @@ const idStartES5 = [].concat( require('@unicode/unicode-3.0.0/General_Category/Titlecase_Letter/code-points'), require('@unicode/unicode-3.0.0/General_Category/Modifier_Letter/code-points'), require('@unicode/unicode-3.0.0/General_Category/Other_Letter/code-points'), - require('@unicode/unicode-3.0.0/General_Category/Letter_Number/code-points'), + + // The "letter number" category is not included because old versions of Safari + // had a bug where they didn't include it. This means it does not match ES5. + // We need to make sure we escape these characters so Safari can read them. + // See https://github.com/evanw/esbuild/issues/1349 for more information. + // require('@unicode/unicode-3.0.0/General_Category/Letter_Number/code-points'), ).sort((a, b) => a - b) // UnicodeCombiningMark: any character in the Unicode categories “Non-spacing mark (Mn)” diff --git a/scripts/js-api-tests.js b/scripts/js-api-tests.js index 7cb22e841de..899987f1835 100644 --- a/scripts/js-api-tests.js +++ b/scripts/js-api-tests.js @@ -3351,8 +3351,8 @@ let transformTests = { assert.strictEqual(globals.π["π 𐀀"].𐀀["𐀀 π"].default, 123) assert.strictEqual(code.slice(0, code.indexOf('(() => {\n')), `var \\u03C0 = \\u03C0 || {}; \\u03C0["\\u03C0 \\uD800\\uDC00"] = \\u03C0["\\u03C0 \\uD800\\uDC00"] || {}; -\\u03C0["\\u03C0 \\uD800\\uDC00"].\\u{10000} = \\u03C0["\\u03C0 \\uD800\\uDC00"].\\u{10000} || {}; -\\u03C0["\\u03C0 \\uD800\\uDC00"].\\u{10000}["\\uD800\\uDC00 \\u03C0"] = `) +\\u03C0["\\u03C0 \\uD800\\uDC00"]["\\uD800\\uDC00"] = \\u03C0["\\u03C0 \\uD800\\uDC00"]["\\uD800\\uDC00"] || {}; +\\u03C0["\\u03C0 \\uD800\\uDC00"]["\\uD800\\uDC00"]["\\uD800\\uDC00 \\u03C0"] = `) }, async iifeGlobalNameUnicodeNoEscape({ esbuild }) { @@ -3364,8 +3364,8 @@ let transformTests = { assert.strictEqual(code.slice(0, code.indexOf('(() => {\n')), `var π = π || {}; π["π 𐀀"] = π["π 𐀀"] || {}; -π["π 𐀀"].𐀀 = π["π 𐀀"].𐀀 || {}; -π["π 𐀀"].𐀀["𐀀 π"] = `) +π["π 𐀀"]["𐀀"] = π["π 𐀀"]["𐀀"] || {}; +π["π 𐀀"]["𐀀"]["𐀀 π"] = `) }, async jsx({ esbuild }) {