Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve template tokenizing #13919

Merged
22 changes: 22 additions & 0 deletions benchmark/babel-parser/many-nested-block-elements/bench.mjs
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "{".repeat(length) + "0" + "}".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} nested template elements`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
@@ -0,0 +1,25 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "<t a={x}>{y}".repeat(length) + "</t>".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(
`${name} ${length} nested jsx elements with one attribute and text`,
() => {
implementation.parse(input, options);
}
);
}
}

benchCases("baseline", baseline, { plugins: ["jsx"] });
benchCases("current", current, { plugins: ["jsx"] });

suite.on("cycle", report).run();
22 changes: 22 additions & 0 deletions benchmark/babel-parser/many-nested-template-elements/bench.mjs
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "` ${".repeat(length) + "0" + "}`".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} nested template elements`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
23 changes: 23 additions & 0 deletions benchmark/babel-parser/many-template-elements/bench.mjs
@@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "@babel/parser";
import { report } from "../../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "`" + " ${0}".repeat(length) + "`";
}
function benchCases(name, implementation, options) {
for (const length of [128, 256, 512, 1024]) {
const input = createInput(length);
suite.add(`${name} ${length} template elements`, () => {
implementation.parse(input, options);
});
}
}

current.parse(createInput(1));
benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
29 changes: 14 additions & 15 deletions eslint/babel-eslint-parser/src/convert/convertTokens.cjs
Expand Up @@ -71,13 +71,6 @@ function convertTemplateType(tokens, tl) {
templateTokens.push(token);
break;

case tl.eof:
if (curlyBrace) {
result.push(curlyBrace);
}

break;

default:
if (curlyBrace) {
result.push(curlyBrace);
Expand Down Expand Up @@ -186,6 +179,8 @@ function convertToken(token, source, tl) {
token.value = `${token.value}n`;
} else if (label === tl.privateName) {
token.type = "PrivateIdentifier";
} else if (label === tl.templateNonTail || label === tl.templateTail) {
token.type = "Template";
}

if (typeof token.type !== "string") {
Expand All @@ -196,22 +191,26 @@ function convertToken(token, source, tl) {

module.exports = function convertTokens(tokens, code, tl) {
const result = [];

const withoutComments = convertTemplateType(tokens, tl).filter(
t => t.type !== "CommentLine" && t.type !== "CommentBlock",
);
for (let i = 0, { length } = withoutComments; i < length; i++) {
const token = withoutComments[i];
const templateTypeMergedTokens = process.env.BABEL_8_BREAKING
? tokens
: convertTemplateType(tokens, tl);
// The last token is always tt.eof and should be skipped
for (let i = 0, { length } = templateTypeMergedTokens; i < length - 1; i++) {
const token = templateTypeMergedTokens[i];
const tokenType = token.type;
if (tokenType === "CommentLine" || tokenType === "CommentBlock") {
continue;
}

if (!process.env.BABEL_8_BREAKING) {
// Babel 8 already produces a single token

if (
ESLINT_VERSION >= 8 &&
i + 1 < length &&
token.type.label === tl.hash
tokenType.label === tl.hash
) {
const nextToken = withoutComments[i + 1];
const nextToken = templateTypeMergedTokens[i + 1];

// We must disambiguate private identifier from the hack pipes topic token
if (nextToken.type.label === tl.name && token.end === nextToken.start) {
Expand Down
67 changes: 41 additions & 26 deletions packages/babel-parser/src/parser/expression.js
Expand Up @@ -27,6 +27,7 @@ import {
tokenIsPostfix,
tokenIsPrefix,
tokenIsRightAssociative,
tokenIsTemplate,
tokenKeywordOrIdentifierIsKeyword,
tokenLabelName,
tokenOperatorPrecedence,
Expand All @@ -43,7 +44,7 @@ import {
isIdentifierStart,
canBeReservedWord,
} from "../util/identifier";
import { Position } from "../util/location";
import { Position, createPositionWithColumnOffset } from "../util/location";
import * as charCodes from "charcodes";
import {
BIND_OUTSIDE,
Expand Down Expand Up @@ -706,9 +707,10 @@ export default class ExpressionParser extends LValParser {
noCalls: ?boolean,
state: N.ParseSubscriptState,
): N.Expression {
if (!noCalls && this.eat(tt.doubleColon)) {
const { type } = this.state;
if (!noCalls && type === tt.doubleColon) {
return this.parseBind(base, startPos, startLoc, noCalls, state);
} else if (this.match(tt.backQuote)) {
} else if (tokenIsTemplate(type)) {
return this.parseTaggedTemplateExpression(
base,
startPos,
Expand All @@ -719,7 +721,7 @@ export default class ExpressionParser extends LValParser {

let optional = false;

if (this.match(tt.questionDot)) {
if (type === tt.questionDot) {
if (noCalls && this.lookaheadCharCode() === charCodes.leftParenthesis) {
// stop at `?.` when parsing `new a?.()`
state.stop = true;
Expand Down Expand Up @@ -801,6 +803,7 @@ export default class ExpressionParser extends LValParser {
): N.Expression {
const node = this.startNodeAt(startPos, startLoc);
node.object = base;
this.next(); // eat '::'
node.callee = this.parseNoCallExpr();
state.stop = true;
return this.parseSubscripts(
Expand Down Expand Up @@ -1153,7 +1156,8 @@ export default class ExpressionParser extends LValParser {
case tt._new:
return this.parseNewOrNewTarget();

case tt.backQuote:
case tt.templateNonTail:
case tt.templateTail:
return this.parseTemplate(false);

// BindExpression[Yield]
Expand Down Expand Up @@ -1831,37 +1835,47 @@ export default class ExpressionParser extends LValParser {
// Parse template expression.

parseTemplateElement(isTagged: boolean): N.TemplateElement {
const elem = this.startNode();
if (this.state.value === null) {
const { start, end, value } = this.state;
const elemStart = start + 1;
const elem = this.startNodeAt(
elemStart,
createPositionWithColumnOffset(this.state.startLoc, 1),
);
if (value === null) {
if (!isTagged) {
this.raise(this.state.start + 1, Errors.InvalidEscapeSequenceTemplate);
this.raise(start + 2, Errors.InvalidEscapeSequenceTemplate);
}
}

const isTail = this.match(tt.templateTail);
const endOffset = isTail ? -1 : -2;
const elemEnd = end + endOffset;
elem.value = {
raw: this.input
.slice(this.state.start, this.state.end)
.replace(/\r\n?/g, "\n"),
cooked: this.state.value,
raw: this.input.slice(elemStart, elemEnd).replace(/\r\n?/g, "\n"),
cooked: value === null ? null : value.slice(1, endOffset),
};
elem.tail = isTail;
this.next();
elem.tail = this.match(tt.backQuote);
return this.finishNode(elem, "TemplateElement");
this.finishNode(elem, "TemplateElement");
this.resetEndLocation(
elem,
elemEnd,
createPositionWithColumnOffset(this.state.lastTokEndLoc, endOffset),
);
return elem;
}

// https://tc39.es/ecma262/#prod-TemplateLiteral
parseTemplate(isTagged: boolean): N.TemplateLiteral {
const node = this.startNode();
this.next();
node.expressions = [];
let curElt = this.parseTemplateElement(isTagged);
node.quasis = [curElt];
while (!curElt.tail) {
this.expect(tt.dollarBraceL);
node.expressions.push(this.parseTemplateSubstitution());
this.expect(tt.braceR);
this.readTemplateContinuation();
node.quasis.push((curElt = this.parseTemplateElement(isTagged)));
}
this.next();
return this.finishNode(node, "TemplateLiteral");
}

Expand Down Expand Up @@ -2680,21 +2694,22 @@ export default class ExpressionParser extends LValParser {
}

isAmbiguousAwait(): boolean {
if (this.hasPrecedingLineBreak()) return true;
const { type } = this.state;
return (
this.hasPrecedingLineBreak() ||
// All the following expressions are ambiguous:
// await + 0, await - 0, await ( 0 ), await [ 0 ], await / 0 /u, await ``
this.match(tt.plusMin) ||
this.match(tt.parenL) ||
this.match(tt.bracketL) ||
this.match(tt.backQuote) ||
type === tt.plusMin ||
type === tt.parenL ||
type === tt.bracketL ||
tokenIsTemplate(type) ||
// Sometimes the tokenizer generates tt.slash for regexps, and this is
// handler by parseExprAtom
this.match(tt.regexp) ||
this.match(tt.slash) ||
type === tt.regexp ||
type === tt.slash ||
// This code could be parsed both as a modulo operator or as an intrinsic:
// await %x(0)
(this.hasPlugin("v8intrinsic") && this.match(tt.modulo))
(this.hasPlugin("v8intrinsic") && type === tt.modulo)
);
}

Expand Down