Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement support for the "regexp unicode sets" proposal #14091

Merged
merged 2 commits into from Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions packages/babel-core/src/parser/util/missing-plugin-helper.ts
Expand Up @@ -201,6 +201,16 @@ const pluginNameMap = {
url: "https://git.io/JvKp3",
},
},
regexpUnicodeSets: {
syntax: {
name: "@babel/plugin-syntax-unicode-sets-regex",
url: "https://git.io/J9GTd",
},
transform: {
name: "@babel/plugin-proposal-unicode-sets-regex",
url: "https://git.io/J9GTQ",
},
},
throwExpressions: {
syntax: {
name: "@babel/plugin-syntax-throw-expressions",
Expand Down
Expand Up @@ -19,7 +19,7 @@
],
"dependencies": {
"@babel/helper-annotate-as-pure": "workspace:^",
"regexpu-core": "^4.7.1"
"regexpu-core": "^5.0.1"
},
"peerDependencies": {
"@babel/core": "^7.0.0"
Expand Down
Expand Up @@ -3,6 +3,8 @@ export const FEATURES = Object.freeze({
dotAllFlag: 1 << 1,
unicodePropertyEscape: 1 << 2,
namedCaptureGroups: 1 << 3,
unicodeSetsFlag_syntax: 1 << 4,
unicodeSetsFlag: 1 << 5,
});

// We can't use a symbol because this needs to always be the same, even if
Expand Down
48 changes: 15 additions & 33 deletions packages/babel-helper-create-regexp-features-plugin/src/index.ts
@@ -1,29 +1,10 @@
import rewritePattern from "regexpu-core";
import {
featuresKey,
FEATURES,
enableFeature,
runtimeKey,
hasFeature,
} from "./features";
import { generateRegexpuOptions } from "./util";
import { featuresKey, FEATURES, enableFeature, runtimeKey } from "./features";
import { generateRegexpuOptions, canSkipRegexpu, transformFlags } from "./util";

import { types as t } from "@babel/core";
import annotateAsPure from "@babel/helper-annotate-as-pure";

type RegExpFlags = "i" | "g" | "m" | "s" | "u" | "y";

/**
* Remove given flag from given RegExpLiteral node
*
* @param {RegExpLiteral} node
* @param {RegExpFlags} flag
* @returns {void}
*/
function pullFlag(node, flag: RegExpFlags): void {
node.flags = node.flags.replace(flag, "");
}

declare const PACKAGE_JSON: { name: string; version: string };

// Note: Versions are represented as an integer. e.g. 7.1.5 is represented
Expand All @@ -39,9 +20,13 @@ export function createRegExpFeaturePlugin({
name,
feature,
options = {} as any,
manipulateOptions = (() => {}) as (opts: any, parserOpts: any) => void,
}) {
return {
name,

manipulateOptions,

pre() {
const { file } = this;
const features = file.get(featuresKey) ?? 0;
Expand Down Expand Up @@ -70,20 +55,21 @@ export function createRegExpFeaturePlugin({
const { file } = this;
const features = file.get(featuresKey);
const runtime = file.get(runtimeKey) ?? true;
const regexpuOptions = generateRegexpuOptions(node, features);
if (regexpuOptions === null) {
return;
}

const regexpuOptions = generateRegexpuOptions(features);
if (canSkipRegexpu(node, regexpuOptions)) return;

const namedCaptureGroups = {};
if (regexpuOptions.namedGroup) {
if (regexpuOptions.namedGroups === "transform") {
regexpuOptions.onNamedGroup = (name, index) => {
namedCaptureGroups[name] = index;
};
}

node.pattern = rewritePattern(node.pattern, node.flags, regexpuOptions);

if (
regexpuOptions.namedGroup &&
regexpuOptions.namedGroups === "transform" &&
Object.keys(namedCaptureGroups).length > 0 &&
runtime &&
!isRegExpTest(path)
Expand All @@ -96,12 +82,8 @@ export function createRegExpFeaturePlugin({

path.replaceWith(call);
}
if (hasFeature(features, FEATURES.unicodeFlag)) {
pullFlag(node, "u");
}
if (hasFeature(features, FEATURES.dotAllFlag)) {
pullFlag(node, "s");
}

node.flags = transformFlags(regexpuOptions, node.flags);
},
},
};
Expand Down
104 changes: 58 additions & 46 deletions packages/babel-helper-create-regexp-features-plugin/src/util.ts
@@ -1,65 +1,77 @@
import type { types as t } from "@babel/core";
import { FEATURES, hasFeature } from "./features";

type RegexpuOptions = {
useUnicodeFlag: boolean;
unicodeFlag: "transform" | false;
unicodeSetsFlag: "transform" | "parse" | false;
dotAllFlag: "transform" | false;
unicodePropertyEscapes: "transform" | false;
namedGroups: "transform" | false;
onNamedGroup: (name: string, index: number) => void;
namedGroup: boolean;
unicodePropertyEscape: boolean;
dotAllFlag: boolean;
lookbehind: boolean;
};

export function generateRegexpuOptions(node, features): RegexpuOptions | null {
let useUnicodeFlag = false,
dotAllFlag = false,
unicodePropertyEscape = false,
namedGroup = false;
export function generateRegexpuOptions(toTransform: number): RegexpuOptions {
type Experimental = 1;

const feat = <Stability extends 0 | 1 = 0>(
name: keyof typeof FEATURES,
ok: "transform" | (Stability extends 0 ? never : "parse") = "transform",
) => {
return hasFeature(toTransform, FEATURES[name]) ? ok : false;
};

return {
unicodeFlag: feat("unicodeFlag"),
unicodeSetsFlag:
feat<Experimental>("unicodeSetsFlag") ||
feat<Experimental>("unicodeSetsFlag_syntax", "parse"),
dotAllFlag: feat("dotAllFlag"),
unicodePropertyEscapes: feat("unicodePropertyEscape"),
namedGroups: feat("namedCaptureGroups"),
onNamedGroup: () => {},
};
}

export function canSkipRegexpu(
node: t.RegExpLiteral,
options: RegexpuOptions,
): boolean {
const { flags, pattern } = node;
const flagsIncludesU = flags.includes("u");

if (flagsIncludesU) {
if (!hasFeature(features, FEATURES.unicodeFlag)) {
useUnicodeFlag = true;
}
if (flags.includes("v")) {
if (options.unicodeSetsFlag === "transform") return false;
}

if (flags.includes("u")) {
if (options.unicodeFlag === "transform") return false;
if (
hasFeature(features, FEATURES.unicodePropertyEscape) &&
options.unicodePropertyEscapes === "transform" &&
/\\[pP]{/.test(pattern)
) {
unicodePropertyEscape = true;
return false;
}
}

if (hasFeature(features, FEATURES.dotAllFlag) && flags.indexOf("s") >= 0) {
dotAllFlag = true;
if (flags.includes("s")) {
if (options.dotAllFlag === "transform") return false;
}
if (
hasFeature(features, FEATURES.namedCaptureGroups) &&
/\(\?<(?![=!])/.test(pattern)
) {
namedGroup = true;

if (options.namedGroups === "transform" && /\(\?<(?![=!])/.test(pattern)) {
return false;
}
if (
!namedGroup &&
!unicodePropertyEscape &&
!dotAllFlag &&
(!flagsIncludesU || useUnicodeFlag)
) {
return null;

return true;
}

export function transformFlags(regexpuOptions: RegexpuOptions, flags: string) {
if (regexpuOptions.unicodeSetsFlag === "transform") {
flags = flags.replace("v", "u");
}
// Now we have to feed regexpu-core the regex
if (flagsIncludesU && flags.indexOf("s") >= 0) {
// When flags includes u, `config.unicode` will be enabled even if `u` is supported natively.
// In this case we have to enable dotAllFlag, otherwise `rewritePattern(/./su)` will return
// incorrect result
// https://github.com/mathiasbynens/regexpu-core/blob/v4.6.0/rewrite-pattern.js#L191
dotAllFlag = true;
if (regexpuOptions.unicodeFlag === "transform") {
flags = flags.replace("u", "");
}
return {
useUnicodeFlag,
onNamedGroup: () => {},
namedGroup,
unicodePropertyEscape,
dotAllFlag,
lookbehind: true,
};
if (regexpuOptions.dotAllFlag === "transform") {
flags = flags.replace("s", "");
}
return flags;
}
@@ -1 +1 @@
/([0-9]{4})/;
/(\d{4})/;
2 changes: 2 additions & 0 deletions packages/babel-parser/src/parser/error-message.js
Expand Up @@ -76,6 +76,8 @@ export const ErrorMessages = makeErrorTemplates(
ImportCallArity: "`import()` requires exactly %0.",
ImportCallNotNewExpression: "Cannot use new with import(...).",
ImportCallSpreadArgument: "`...` is not allowed in `import()`.",
IncompatibleRegExpUVFlags:
"The 'u' and 'v' regular expression flags cannot be enabled at the same time.",
InvalidBigIntLiteral: "Invalid BigIntLiteral.",
InvalidCodePoint: "Code point out of bounds.",
InvalidCoverInitializedName: "Invalid shorthand property initializer.",
Expand Down
26 changes: 19 additions & 7 deletions packages/babel-parser/src/tokenizer/index.js
Expand Up @@ -38,6 +38,8 @@ const VALID_REGEX_FLAGS = new Set([
charCodes.lowercaseY,
charCodes.lowercaseU,
charCodes.lowercaseD,
// This is only valid when using the regexpUnicodeSets plugin
charCodes.lowercaseV,
]);

// The following character codes are forbidden from being
Expand Down Expand Up @@ -1086,22 +1088,32 @@ export default class Tokenizer extends ParserErrors {

let mods = "";

const nextPos = () =>
// (pos + 1) + 1 - start
createPositionWithColumnOffset(startLoc, pos + 2 - start);

while (pos < this.length) {
const cp = this.codePointAtPos(pos);
// It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
const char = String.fromCharCode(cp);

if (VALID_REGEX_FLAGS.has(cp)) {
if (cp === charCodes.lowercaseV) {
this.expectPlugin("regexpUnicodeSets", nextPos());

if (mods.includes("u")) {
this.raise(Errors.IncompatibleRegExpUVFlags, { at: nextPos() });
}
} else if (cp === charCodes.lowercaseU) {
if (mods.includes("v")) {
this.raise(Errors.IncompatibleRegExpUVFlags, { at: nextPos() });
}
}
if (mods.includes(char)) {
// (pos + 1) + 1 - start
this.raise(Errors.DuplicateRegExpFlags, {
at: createPositionWithColumnOffset(startLoc, pos + 2 - start),
});
this.raise(Errors.DuplicateRegExpFlags, { at: nextPos() });
}
} else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
this.raise(Errors.MalformedRegExpFlags, {
at: createPositionWithColumnOffset(startLoc, pos + 2 - start),
});
this.raise(Errors.MalformedRegExpFlags, { at: nextPos() });
} else {
break;
}
Expand Down
@@ -0,0 +1 @@
/a/v;
@@ -0,0 +1,3 @@
{
"throws": "This experimental syntax requires enabling the parser plugin: \"regexpUnicodeSets\". (1:4)"
}
@@ -0,0 +1 @@
/a/v;
@@ -0,0 +1,26 @@
{
"type": "File",
"start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
"program": {
"type": "Program",
"start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
"sourceType": "script",
"interpreter": null,
"body": [
{
"type": "ExpressionStatement",
"start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
"expression": {
"type": "RegExpLiteral",
"start":0,"end":4,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":4}},
"extra": {
"raw": "/a/v"
},
"pattern": "a",
"flags": "v"
}
}
],
"directives": []
}
}
@@ -0,0 +1,3 @@
{
"plugins": ["regexpUnicodeSets"]
}
@@ -0,0 +1 @@
/a/ugv;
@@ -0,0 +1,29 @@
{
"type": "File",
"start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
"errors": [
"SyntaxError: The 'u' and 'v' regular expression flags cannot be enabled at the same time. (1:6)"
],
"program": {
"type": "Program",
"start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
"sourceType": "script",
"interpreter": null,
"body": [
{
"type": "ExpressionStatement",
"start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
"expression": {
"type": "RegExpLiteral",
"start":0,"end":6,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":6}},
"extra": {
"raw": "/a/ugv"
},
"pattern": "a",
"flags": "ugv"
}
}
],
"directives": []
}
}
@@ -0,0 +1 @@
/a/vu;