babel · nicolo-ribaudo · Feb 1, 2022 · Dec 31, 2021 · Jan 10, 2022
diff --git a/packages/babel-core/src/parser/util/missing-plugin-helper.ts b/packages/babel-core/src/parser/util/missing-plugin-helper.ts
@@ -201,6 +201,16 @@ const pluginNameMap = {
       url: "https://git.io/JvKp3",
     },
   },
+  regexpUnicodeSets: {
+    syntax: {
+      name: "@babel/plugin-syntax-unicode-sets-regex",
+      url: "https://git.io/J9GTd",
+    },
+    transform: {
+      name: "@babel/plugin-proposal-unicode-sets-regex",
+      url: "https://git.io/J9GTQ",
+    },
+  },
   throwExpressions: {
     syntax: {
       name: "@babel/plugin-syntax-throw-expressions",

diff --git a/packages/babel-helper-create-regexp-features-plugin/package.json b/packages/babel-helper-create-regexp-features-plugin/package.json
@@ -19,7 +19,7 @@
   ],
   "dependencies": {
     "@babel/helper-annotate-as-pure": "workspace:^",
-    "regexpu-core": "^4.7.1"
+    "regexpu-core": "^5.0.1"
   },
   "peerDependencies": {
     "@babel/core": "^7.0.0"

diff --git a/packages/babel-helper-create-regexp-features-plugin/src/features.ts b/packages/babel-helper-create-regexp-features-plugin/src/features.ts
@@ -3,6 +3,8 @@ export const FEATURES = Object.freeze({
   dotAllFlag: 1 << 1,
   unicodePropertyEscape: 1 << 2,
   namedCaptureGroups: 1 << 3,
+  unicodeSetsFlag_syntax: 1 << 4,
+  unicodeSetsFlag: 1 << 5,
 });
 
 // We can't use a symbol because this needs to always be the same, even if

diff --git a/packages/babel-helper-create-regexp-features-plugin/src/index.ts b/packages/babel-helper-create-regexp-features-plugin/src/index.ts
@@ -1,29 +1,10 @@
 import rewritePattern from "regexpu-core";
-import {
-  featuresKey,
-  FEATURES,
-  enableFeature,
-  runtimeKey,
-  hasFeature,
-} from "./features";
-import { generateRegexpuOptions } from "./util";
+import { featuresKey, FEATURES, enableFeature, runtimeKey } from "./features";
+import { generateRegexpuOptions, canSkipRegexpu, transformFlags } from "./util";
 
 import { types as t } from "@babel/core";
 import annotateAsPure from "@babel/helper-annotate-as-pure";
 
-type RegExpFlags = "i" | "g" | "m" | "s" | "u" | "y";
-
-/**
- * Remove given flag from given RegExpLiteral node
- *
- * @param {RegExpLiteral} node
- * @param {RegExpFlags} flag
- * @returns {void}
- */
-function pullFlag(node, flag: RegExpFlags): void {
-  node.flags = node.flags.replace(flag, "");
-}
-
 declare const PACKAGE_JSON: { name: string; version: string };
 
 // Note: Versions are represented as an integer. e.g. 7.1.5 is represented
@@ -39,9 +20,13 @@ export function createRegExpFeaturePlugin({
   name,
   feature,
   options = {} as any,
+  manipulateOptions = (() => {}) as (opts: any, parserOpts: any) => void,
 }) {
   return {
     name,
+
+    manipulateOptions,
+
     pre() {
       const { file } = this;
       const features = file.get(featuresKey) ?? 0;
@@ -70,20 +55,21 @@ export function createRegExpFeaturePlugin({
         const { file } = this;
         const features = file.get(featuresKey);
         const runtime = file.get(runtimeKey) ?? true;
-        const regexpuOptions = generateRegexpuOptions(node, features);
-        if (regexpuOptions === null) {
-          return;
-        }
+
+        const regexpuOptions = generateRegexpuOptions(features);
+        if (canSkipRegexpu(node, regexpuOptions)) return;
+
         const namedCaptureGroups = {};
-        if (regexpuOptions.namedGroup) {
+        if (regexpuOptions.namedGroups === "transform") {
           regexpuOptions.onNamedGroup = (name, index) => {
             namedCaptureGroups[name] = index;
           };
         }
+
         node.pattern = rewritePattern(node.pattern, node.flags, regexpuOptions);
 
         if (
-          regexpuOptions.namedGroup &&
+          regexpuOptions.namedGroups === "transform" &&
           Object.keys(namedCaptureGroups).length > 0 &&
           runtime &&
           !isRegExpTest(path)
@@ -96,12 +82,8 @@ export function createRegExpFeaturePlugin({
 
           path.replaceWith(call);
         }
-        if (hasFeature(features, FEATURES.unicodeFlag)) {
-          pullFlag(node, "u");
-        }
-        if (hasFeature(features, FEATURES.dotAllFlag)) {
-          pullFlag(node, "s");
-        }
+
+        node.flags = transformFlags(regexpuOptions, node.flags);
       },
     },
   };

diff --git a/packages/babel-helper-create-regexp-features-plugin/src/util.ts b/packages/babel-helper-create-regexp-features-plugin/src/util.ts
@@ -1,65 +1,77 @@
+import type { types as t } from "@babel/core";
 import { FEATURES, hasFeature } from "./features";
 
 type RegexpuOptions = {
-  useUnicodeFlag: boolean;
+  unicodeFlag: "transform" | false;
+  unicodeSetsFlag: "transform" | "parse" | false;
+  dotAllFlag: "transform" | false;
+  unicodePropertyEscapes: "transform" | false;
+  namedGroups: "transform" | false;
   onNamedGroup: (name: string, index: number) => void;
-  namedGroup: boolean;
-  unicodePropertyEscape: boolean;
-  dotAllFlag: boolean;
-  lookbehind: boolean;
 };
 
-export function generateRegexpuOptions(node, features): RegexpuOptions | null {
-  let useUnicodeFlag = false,
-    dotAllFlag = false,
-    unicodePropertyEscape = false,
-    namedGroup = false;
+export function generateRegexpuOptions(toTransform: number): RegexpuOptions {
+  type Experimental = 1;
+
+  const feat = <Stability extends 0 | 1 = 0>(
+    name: keyof typeof FEATURES,
+    ok: "transform" | (Stability extends 0 ? never : "parse") = "transform",
+  ) => {
+    return hasFeature(toTransform, FEATURES[name]) ? ok : false;
+  };
+
+  return {
+    unicodeFlag: feat("unicodeFlag"),
+    unicodeSetsFlag:
+      feat<Experimental>("unicodeSetsFlag") ||
+      feat<Experimental>("unicodeSetsFlag_syntax", "parse"),
+    dotAllFlag: feat("dotAllFlag"),
+    unicodePropertyEscapes: feat("unicodePropertyEscape"),
+    namedGroups: feat("namedCaptureGroups"),
+    onNamedGroup: () => {},
+  };
+}
+
+export function canSkipRegexpu(
+  node: t.RegExpLiteral,
+  options: RegexpuOptions,
+): boolean {
   const { flags, pattern } = node;
-  const flagsIncludesU = flags.includes("u");
 
-  if (flagsIncludesU) {
-    if (!hasFeature(features, FEATURES.unicodeFlag)) {
-      useUnicodeFlag = true;
-    }
+  if (flags.includes("v")) {
+    if (options.unicodeSetsFlag === "transform") return false;
+  }
+
+  if (flags.includes("u")) {
+    if (options.unicodeFlag === "transform") return false;
     if (
-      hasFeature(features, FEATURES.unicodePropertyEscape) &&
+      options.unicodePropertyEscapes === "transform" &&
       /\\[pP]{/.test(pattern)
     ) {
-      unicodePropertyEscape = true;
+      return false;
     }
   }
 
-  if (hasFeature(features, FEATURES.dotAllFlag) && flags.indexOf("s") >= 0) {
-    dotAllFlag = true;
+  if (flags.includes("s")) {
+    if (options.dotAllFlag === "transform") return false;
   }
-  if (
-    hasFeature(features, FEATURES.namedCaptureGroups) &&
-    /\(\?<(?![=!])/.test(pattern)
-  ) {
-    namedGroup = true;
+
+  if (options.namedGroups === "transform" && /\(\?<(?![=!])/.test(pattern)) {
+    return false;
   }
-  if (
-    !namedGroup &&
-    !unicodePropertyEscape &&
-    !dotAllFlag &&
-    (!flagsIncludesU || useUnicodeFlag)
-  ) {
-    return null;
+
+  return true;
+}
+
+export function transformFlags(regexpuOptions: RegexpuOptions, flags: string) {
+  if (regexpuOptions.unicodeSetsFlag === "transform") {
+    flags = flags.replace("v", "u");
   }
-  // Now we have to feed regexpu-core the regex
-  if (flagsIncludesU && flags.indexOf("s") >= 0) {
-    // When flags includes u, `config.unicode` will be enabled even if `u` is supported natively.
-    // In this case we have to enable dotAllFlag, otherwise `rewritePattern(/./su)` will return
-    // incorrect result
-    // https://github.com/mathiasbynens/regexpu-core/blob/v4.6.0/rewrite-pattern.js#L191
-    dotAllFlag = true;
+  if (regexpuOptions.unicodeFlag === "transform") {
+    flags = flags.replace("u", "");
   }
-  return {
-    useUnicodeFlag,
-    onNamedGroup: () => {},
-    namedGroup,
-    unicodePropertyEscape,
-    dotAllFlag,
-    lookbehind: true,
-  };
+  if (regexpuOptions.dotAllFlag === "transform") {
+    flags = flags.replace("s", "");
+  }
+  return flags;
 }
diff --git a/...es/plugin-transform-named-capturing-groups-regex/runtime-false-not-overwritten/output.mjs b/...es/plugin-transform-named-capturing-groups-regex/runtime-false-not-overwritten/output.mjs
@@ -1 +1 @@
-/([0-9]{4})/;
+/(\d{4})/;
diff --git a/packages/babel-parser/src/parser/error-message.js b/packages/babel-parser/src/parser/error-message.js
@@ -76,6 +76,8 @@ export const ErrorMessages = makeErrorTemplates(
     ImportCallArity: "`import()` requires exactly %0.",
     ImportCallNotNewExpression: "Cannot use new with import(...).",
     ImportCallSpreadArgument: "`...` is not allowed in `import()`.",
+    IncompatibleRegExpUVFlags:
+      "The 'u' and 'v' regular expression flags cannot be enabled at the same time.",
     InvalidBigIntLiteral: "Invalid BigIntLiteral.",
     InvalidCodePoint: "Code point out of bounds.",
     InvalidCoverInitializedName: "Invalid shorthand property initializer.",

diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
@@ -38,6 +38,8 @@ const VALID_REGEX_FLAGS = new Set([
   charCodes.lowercaseY,
   charCodes.lowercaseU,
   charCodes.lowercaseD,
+  // This is only valid when using the regexpUnicodeSets plugin
+  charCodes.lowercaseV,
 ]);
 
 // The following character codes are forbidden from being
@@ -1086,22 +1088,32 @@ export default class Tokenizer extends ParserErrors {
 
     let mods = "";
 
+    const nextPos = () =>
+      // (pos + 1) + 1 - start
+      createPositionWithColumnOffset(startLoc, pos + 2 - start);
+
     while (pos < this.length) {
       const cp = this.codePointAtPos(pos);
       // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
       const char = String.fromCharCode(cp);
 
       if (VALID_REGEX_FLAGS.has(cp)) {
+        if (cp === charCodes.lowercaseV) {
+          this.expectPlugin("regexpUnicodeSets", nextPos());
+
+          if (mods.includes("u")) {
+            this.raise(Errors.IncompatibleRegExpUVFlags, { at: nextPos() });
+          }
+        } else if (cp === charCodes.lowercaseU) {
+          if (mods.includes("v")) {
+            this.raise(Errors.IncompatibleRegExpUVFlags, { at: nextPos() });
+          }
+        }
         if (mods.includes(char)) {
-          // (pos + 1) + 1 - start
-          this.raise(Errors.DuplicateRegExpFlags, {
-            at: createPositionWithColumnOffset(startLoc, pos + 2 - start),
-          });
+          this.raise(Errors.DuplicateRegExpFlags, { at: nextPos() });
         }
       } else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
-        this.raise(Errors.MalformedRegExpFlags, {
-          at: createPositionWithColumnOffset(startLoc, pos + 2 - start),
-        });
+        this.raise(Errors.MalformedRegExpFlags, { at: nextPos() });
       } else {
         break;
       }

diff --git a/packages/babel-parser/test/fixtures/experimental/_no-plugin/regexp-unicode-sets/input.js b/packages/babel-parser/test/fixtures/experimental/_no-plugin/regexp-unicode-sets/input.js
@@ -0,0 +1 @@
+/a/v;
diff --git a/packages/babel-parser/test/fixtures/experimental/_no-plugin/regexp-unicode-sets/options.json b/packages/babel-parser/test/fixtures/experimental/_no-plugin/regexp-unicode-sets/options.json
@@ -0,0 +1,3 @@
+{
+  "throws": "This experimental syntax requires enabling the parser plugin: \"regexpUnicodeSets\". (1:4)"
+}
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/basic/input.js b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/basic/input.js
@@ -0,0 +1 @@
+/a/v;
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/basic/output.json b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/basic/output.json
@@ -0,0 +1,26 @@
+{
+  "type": "File",
+  "start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
+  "program": {
+    "type": "Program",
+    "start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
+    "sourceType": "script",
+    "interpreter": null,
+    "body": [
+      {
+        "type": "ExpressionStatement",
+        "start":0,"end":5,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":5}},
+        "expression": {
+          "type": "RegExpLiteral",
+          "start":0,"end":4,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":4}},
+          "extra": {
+            "raw": "/a/v"
+          },
+          "pattern": "a",
+          "flags": "v"
+        }
+      }
+    ],
+    "directives": []
+  }
+}
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/options.json b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/options.json
@@ -0,0 +1,3 @@
+{
+  "plugins": ["regexpUnicodeSets"]
+}
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/uv-error/input.js b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/uv-error/input.js
@@ -0,0 +1 @@
+/a/ugv;
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/uv-error/output.json b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/uv-error/output.json
@@ -0,0 +1,29 @@
+{
+  "type": "File",
+  "start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
+  "errors": [
+    "SyntaxError: The 'u' and 'v' regular expression flags cannot be enabled at the same time. (1:6)"
+  ],
+  "program": {
+    "type": "Program",
+    "start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
+    "sourceType": "script",
+    "interpreter": null,
+    "body": [
+      {
+        "type": "ExpressionStatement",
+        "start":0,"end":7,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":7}},
+        "expression": {
+          "type": "RegExpLiteral",
+          "start":0,"end":6,"loc":{"start":{"line":1,"column":0},"end":{"line":1,"column":6}},
+          "extra": {
+            "raw": "/a/ugv"
+          },
+          "pattern": "a",
+          "flags": "ugv"
+        }
+      }
+    ],
+    "directives": []
+  }
+}
diff --git a/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/vu-error/input.js b/packages/babel-parser/test/fixtures/experimental/regexp-unicode-sets/vu-error/input.js
@@ -0,0 +1 @@
+/a/vu;