From f15f98d632f497e6b6ff5a73dcad1a3382acbf3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 28 Aug 2019 09:03:59 -0400 Subject: [PATCH 1/5] perf: replace lookahead by lookaheadCharCode --- packages/babel-parser/src/parser/expression.js | 2 +- packages/babel-parser/src/parser/statement.js | 9 ++++++--- packages/babel-parser/src/plugins/typescript/index.js | 9 +++++++-- packages/babel-parser/src/tokenizer/index.js | 9 +++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/packages/babel-parser/src/parser/expression.js b/packages/babel-parser/src/parser/expression.js index ae60b1746ba7..2e656847a9d0 100644 --- a/packages/babel-parser/src/parser/expression.js +++ b/packages/babel-parser/src/parser/expression.js @@ -590,7 +590,7 @@ export default class ExpressionParser extends LValParser { } else if (this.match(tt.questionDot)) { this.expectPlugin("optionalChaining"); state.optionalChainMember = true; - if (noCalls && this.lookahead().type === tt.parenL) { + if (noCalls && this.lookaheadCharCode() === charCodes.leftParenthesis) { state.stop = true; return base; } diff --git a/packages/babel-parser/src/parser/statement.js b/packages/babel-parser/src/parser/statement.js index 3e950d26578b..e8ba0ce11eaa 100644 --- a/packages/babel-parser/src/parser/statement.js +++ b/packages/babel-parser/src/parser/statement.js @@ -170,7 +170,7 @@ export default class StatementParser extends ExpressionParser { case tt._for: return this.parseForStatement(node); case tt._function: - if (this.lookahead().type === tt.dot) break; + if (this.lookaheadCharCode() === charCodes.dot) break; if (context) { if (this.state.strict) { this.raise( @@ -223,8 +223,11 @@ export default class StatementParser extends ExpressionParser { return this.parseEmptyStatement(node); case tt._export: case tt._import: { - const nextToken = this.lookahead(); - if (nextToken.type === tt.parenL || nextToken.type === tt.dot) { + const nextTokenCharCode = this.lookaheadCharCode(); + if ( + nextTokenCharCode === charCodes.leftParenthesis || + nextTokenCharCode === charCodes.dot + ) { break; } diff --git a/packages/babel-parser/src/plugins/typescript/index.js b/packages/babel-parser/src/plugins/typescript/index.js index 5d6922d329eb..1f2260b1b9da 100644 --- a/packages/babel-parser/src/plugins/typescript/index.js +++ b/packages/babel-parser/src/plugins/typescript/index.js @@ -19,6 +19,7 @@ import { BIND_CLASS, } from "../../util/scopeflags"; import TypeScriptScopeHandler from "./scope"; +import * as charCodes from "charcodes"; type TsModifier = | "readonly" @@ -657,7 +658,10 @@ export default (superClass: Class): Class => : this.match(tt._null) ? "TSNullKeyword" : keywordTypeFromName(this.state.value); - if (type !== undefined && this.lookahead().type !== tt.dot) { + if ( + type !== undefined && + this.lookaheadCharCode() !== charCodes.dot + ) { const node: N.TsKeywordType = this.startNode(); this.next(); return this.finishNode(node, type); @@ -1203,7 +1207,8 @@ export default (superClass: Class): Class => tsIsExternalModuleReference(): boolean { return ( - this.isContextual("require") && this.lookahead().type === tt.parenL + this.isContextual("require") && + this.lookaheadCharCode() === charCodes.leftParenthesis ); } diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index ffb8cba8ca6f..fdf280a6d9ff 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -17,6 +17,7 @@ import { import State from "./state"; const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u"]); +const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g; // The following character codes are forbidden from being // an immediate sibling of NumericLiteralSeparator _ @@ -168,6 +169,14 @@ export default class Tokenizer extends LocationParser { return curr; } + lookaheadCharCode(): number { + const thisTokEnd = this.state.pos; + skipWhiteSpace.lastIndex = thisTokEnd; + const skip = skipWhiteSpace.exec(this.input); + const next = thisTokEnd + skip[0].length; + return this.input.charCodeAt(next); + } + // Toggle strict mode. Re-reads the next number or string to please // pedantic tests (`"use strict"; 010;` should fail). From 8244a7ab52dd264eb7555c8ad70623214ce5c7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 28 Aug 2019 11:29:09 -0400 Subject: [PATCH 2/5] fix: flow ignore --- packages/babel-parser/src/tokenizer/index.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index fdf280a6d9ff..f5f835db4536 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -173,6 +173,7 @@ export default class Tokenizer extends LocationParser { const thisTokEnd = this.state.pos; skipWhiteSpace.lastIndex = thisTokEnd; const skip = skipWhiteSpace.exec(this.input); + // $FlowIgnore: The skipWhiteSpace ensures to match any string const next = thisTokEnd + skip[0].length; return this.input.charCodeAt(next); } From 4f6dcc42c33dcf26bdce004554ee6a8a94303e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 28 Aug 2019 17:36:26 -0400 Subject: [PATCH 3/5] refactor: add nextTokenStart method --- packages/babel-parser/src/parser/statement.js | 20 ++++--------------- packages/babel-parser/src/tokenizer/index.js | 11 ++++++---- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/packages/babel-parser/src/parser/statement.js b/packages/babel-parser/src/parser/statement.js index e8ba0ce11eaa..a761e6dfc77b 100644 --- a/packages/babel-parser/src/parser/statement.js +++ b/packages/babel-parser/src/parser/statement.js @@ -8,7 +8,7 @@ import { isIdentifierStart, keywordRelationalOperator, } from "../util/identifier"; -import { lineBreak, skipWhiteSpace } from "../util/whitespace"; +import { lineBreak } from "../util/whitespace"; import * as charCodes from "charcodes"; import { BIND_CLASS, @@ -105,10 +105,7 @@ export default class StatementParser extends ExpressionParser { if (!this.isContextual("let")) { return false; } - skipWhiteSpace.lastIndex = this.state.pos; - const skip = skipWhiteSpace.exec(this.input); - // $FlowIgnore - const next = this.state.pos + skip[0].length; + const next = this.nextTokenStart(); const nextCh = this.input.charCodeAt(next); // For ambiguous cases, determine if a LexicalDeclaration (or only a // Statement) is allowed here. If context is not empty then only a Statement @@ -1760,18 +1757,9 @@ export default class StatementParser extends ExpressionParser { isAsyncFunction(): boolean { if (!this.isContextual("async")) return false; - - const { pos } = this.state; - - skipWhiteSpace.lastIndex = pos; - const skip = skipWhiteSpace.exec(this.input); - - if (!skip || !skip.length) return false; - - const next = pos + skip[0].length; - + const next = this.nextTokenStart(); return ( - !lineBreak.test(this.input.slice(pos, next)) && + !lineBreak.test(this.input.slice(this.state.pos, next)) && this.input.slice(next, next + 8) === "function" && (next + 8 === this.length || !isIdentifierChar(this.input.charCodeAt(next + 8))) diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index f5f835db4536..a15e34c3607d 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -13,11 +13,11 @@ import { lineBreakG, isNewLine, isWhitespace, + skipWhiteSpace, } from "../util/whitespace"; import State from "./state"; const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u"]); -const skipWhiteSpace = /(?:\s|\/\/.*|\/\*[^]*?\*\/)*/g; // The following character codes are forbidden from being // an immediate sibling of NumericLiteralSeparator _ @@ -169,13 +169,16 @@ export default class Tokenizer extends LocationParser { return curr; } - lookaheadCharCode(): number { + nextTokenStart(): number { const thisTokEnd = this.state.pos; skipWhiteSpace.lastIndex = thisTokEnd; const skip = skipWhiteSpace.exec(this.input); // $FlowIgnore: The skipWhiteSpace ensures to match any string - const next = thisTokEnd + skip[0].length; - return this.input.charCodeAt(next); + return thisTokEnd + skip[0].length; + } + + lookaheadCharCode(): number { + return this.input.charCodeAt(this.nextTokenStart()); } // Toggle strict mode. Re-reads the next number or string to please From 19690e8b91f5edc174f734ad0cfc81dbc8a35bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 29 Aug 2019 14:21:31 -0400 Subject: [PATCH 4/5] refactor: duplicated isNewLine code --- packages/babel-parser/src/tokenizer/index.js | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index a15e34c3607d..453e7b6d53e5 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -280,13 +280,7 @@ export default class Tokenizer extends LocationParser { const startLoc = this.state.curPosition(); let ch = this.input.charCodeAt((this.state.pos += startSkip)); if (this.state.pos < this.length) { - while ( - ch !== charCodes.lineFeed && - ch !== charCodes.carriageReturn && - ch !== charCodes.lineSeparator && - ch !== charCodes.paragraphSeparator && - ++this.state.pos < this.length - ) { + while (!isNewLine(ch) && ++this.state.pos < this.length) { ch = this.input.charCodeAt(this.state.pos); } } @@ -452,13 +446,7 @@ export default class Tokenizer extends LocationParser { let ch = this.input.charCodeAt(this.state.pos); if (ch !== charCodes.exclamationMark) return false; - while ( - ch !== charCodes.lineFeed && - ch !== charCodes.carriageReturn && - ch !== charCodes.lineSeparator && - ch !== charCodes.paragraphSeparator && - ++this.state.pos < this.length - ) { + while (!isNewLine(ch) && ++this.state.pos < this.length) { ch = this.input.charCodeAt(this.state.pos); } From 31a05b8fa18365e1d85766c75375058f92bd1594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 29 Aug 2019 15:45:27 -0400 Subject: [PATCH 5/5] refactor: remove lookahead usage from babylon core --- packages/babel-parser/src/parser/statement.js | 16 +++++------- packages/babel-parser/src/parser/util.js | 26 ++++++++++++++++--- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/packages/babel-parser/src/parser/statement.js b/packages/babel-parser/src/parser/statement.js index a761e6dfc77b..4abe3945ba5d 100644 --- a/packages/babel-parser/src/parser/statement.js +++ b/packages/babel-parser/src/parser/statement.js @@ -1738,11 +1738,11 @@ export default class StatementParser extends ExpressionParser { maybeParseExportDeclaration(node: N.Node): boolean { if (this.shouldParseExportDeclaration()) { if (this.isContextual("async")) { - const next = this.lookahead(); + const next = this.nextTokenStart(); // export async; - if (next.type !== tt._function) { - this.unexpected(next.start, `Unexpected token, expected "function"`); + if (!this.isUnparsedContextual(next, "function")) { + this.unexpected(next, `Unexpected token, expected "function"`); } } @@ -1760,9 +1760,7 @@ export default class StatementParser extends ExpressionParser { const next = this.nextTokenStart(); return ( !lineBreak.test(this.input.slice(this.state.pos, next)) && - this.input.slice(next, next + 8) === "function" && - (next + 8 === this.length || - !isIdentifierChar(this.input.charCodeAt(next + 8))) + this.isUnparsedContextual(next, "function") ); } @@ -1824,10 +1822,10 @@ export default class StatementParser extends ExpressionParser { return false; } - const lookahead = this.lookahead(); + const next = this.nextTokenStart(); return ( - lookahead.type === tt.comma || - (lookahead.type === tt.name && lookahead.value === "from") + this.input.charCodeAt(next) === charCodes.comma || + this.isUnparsedContextual(next, "from") ); } diff --git a/packages/babel-parser/src/parser/util.js b/packages/babel-parser/src/parser/util.js index bd6139a2480a..678bb6b18379 100644 --- a/packages/babel-parser/src/parser/util.js +++ b/packages/babel-parser/src/parser/util.js @@ -4,6 +4,8 @@ import { types as tt, type TokenType } from "../tokenizer/types"; import Tokenizer from "../tokenizer"; import type { Node } from "../types"; import { lineBreak, skipWhiteSpace } from "../util/whitespace"; +import { isIdentifierChar } from "../util/identifier"; +import * as charCodes from "charcodes"; const literal = /^('|")((?:\\?.)*?)\1/; @@ -26,8 +28,15 @@ export default class UtilParser extends Tokenizer { } isLookaheadRelational(op: "<" | ">"): boolean { - const l = this.lookahead(); - return l.type === tt.relational && l.value === op; + const next = this.nextTokenStart(); + if (this.input.charAt(next) === op) { + if (next + 1 === this.input.length) { + return true; + } + const afterNext = this.input.charCodeAt(next + 1); + return afterNext !== op.charCodeAt(0) && afterNext !== charCodes.equalsTo; + } + return false; } // TODO @@ -60,9 +69,18 @@ export default class UtilParser extends Tokenizer { ); } + isUnparsedContextual(nameStart: number, name: string): boolean { + const nameEnd = nameStart + name.length; + return ( + this.input.slice(nameStart, nameEnd) === name && + (nameEnd === this.input.length || + !isIdentifierChar(this.input.charCodeAt(nameEnd))) + ); + } + isLookaheadContextual(name: string): boolean { - const l = this.lookahead(); - return l.type === tt.name && l.value === name; + const next = this.nextTokenStart(); + return this.isUnparsedContextual(next, name); } // Consumes contextual keyword if possible.