eslint · mdjermanovic · Jul 28, 2023 · Jul 26, 2023 · Jul 27, 2023 · Jul 27, 2023
@@ -6,7 +6,12 @@
 "use strict";
 
 const astUtils = require("./utils/ast-utils");
+const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
 
+/**
+ * @typedef {import('@eslint-community/regexpp').AST.CharacterClass} CharacterClass
+ * @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass} ExpressionCharacterClass
+ */
 //------------------------------------------------------------------------------
 // Rule Definition
 //------------------------------------------------------------------------------
@@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
 const REGEX_GENERAL_ESCAPES = new Set("\\bcdDfnpPrsStvwWxu0123456789]");
 const REGEX_NON_CHARCLASS_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("^/.$*+?[{}|()Bk"));
 
-/**
- * Parses a regular expression into a list of characters with character class info.
- * @param {string} regExpText The raw text used to create the regular expression
- * @returns {Object[]} A list of characters, each with info on escaping and whether they're in a character class.
- * @example
- *
- * parseRegExp("a\\b[cd-]");
- *
- * // returns:
- * [
- *     { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
- *     { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
- *     { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
- *     { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
- *     { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
- * ];
- *
+/*
+ * Set of characters that require escaping in character classes in `unicodeSets` mode.
+ * ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
  */
-function parseRegExp(regExpText) {
-    const charList = [];
+const REGEX_CLASSSET_CHARACTER_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("q/[{}|()-"));
 
-    regExpText.split("").reduce((state, char, index) => {
-        if (!state.escapeNextChar) {
-            if (char === "\\") {
-                return Object.assign(state, { escapeNextChar: true });
-            }
-            if (char === "[" && !state.inCharClass) {
-                return Object.assign(state, { inCharClass: true, startingCharClass: true });
-            }
-            if (char === "]" && state.inCharClass) {
-                if (charList.length && charList[charList.length - 1].inCharClass) {
-                    charList[charList.length - 1].endsCharClass = true;
-                }
-                return Object.assign(state, { inCharClass: false, startingCharClass: false });
-            }
-        }
-        charList.push({
-            text: char,
-            index,
-            escaped: state.escapeNextChar,
-            inCharClass: state.inCharClass,
-            startsCharClass: state.startingCharClass,
-            endsCharClass: false
-        });
-        return Object.assign(state, { escapeNextChar: false, startingCharClass: false });
-    }, { escapeNextChar: false, inCharClass: false, startingCharClass: false });
-
-    return charList;
-}
+/*
+ * A single character set of ClassSetReservedDoublePunctuator.
+ * && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
+ */
+const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set("!#$%&*+,.:;<=>?@^`~");
 
 /** @type {import('../shared/types').Rule} */
 module.exports = {
@@ -103,15 +70,17 @@ module.exports = {
 
     create(context) {
         const sourceCode = context.sourceCode;
+        const parser = new RegExpParser();
 
         /**
          * Reports a node
          * @param {ASTNode} node The node to report
          * @param {number} startOffset The backslash's offset from the start of the node
          * @param {string} character The uselessly escaped character (not including the backslash)
+         * @param {boolean} [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
          * @returns {void}
          */
-        function report(node, startOffset, character) {
+        function report(node, startOffset, character, disableEscapeBackslashSuggest) {
             const rangeStart = node.range[0] + startOffset;
             const range = [rangeStart, rangeStart + 1];
             const start = sourceCode.getLocFromIndex(rangeStart);
@@ -134,12 +103,16 @@ module.exports = {
                             return fixer.removeRange(range);
                         }
                     },
-                    {
-                        messageId: "escapeBackslash",
-                        fix(fixer) {
-                            return fixer.insertTextBeforeRange(range, "\\");
-                        }
-                    }
+                    ...disableEscapeBackslashSuggest
+                        ? []
+                        : [
+                            {
+                                messageId: "escapeBackslash",
+                                fix(fixer) {
+                                    return fixer.insertTextBeforeRange(range, "\\");
+                                }
+                            }
+                        ]
                 ]
             });
         }
@@ -182,6 +155,133 @@ module.exports = {
             }
         }
 
+        /**
+         * Checks if the escape character in given regexp is unnecessary.
+         * @private
+         * @param {ASTNode} node node to validate.
+         * @returns {void}
+         */
+        function validateRegExp(node) {
+            const { pattern, flags } = node.regex;
+            let patternNode;
+            const unicode = flags.includes("u");
+            const unicodeSets = flags.includes("v");
+
+            try {
+                patternNode = parser.parsePattern(pattern, 0, pattern.length, { unicode, unicodeSets });
+            } catch {
+
+                // Ignore regular expressions with syntax errors
+                return;
+            }
+
+            /** @type {(CharacterClass | ExpressionCharacterClass)[]} */
+            const characterClassStack = [];
+
+            visitRegExpAST(patternNode, {
+                onCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
+                onCharacterClassLeave: () => characterClassStack.shift(),
+                onExpressionCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
+                onExpressionCharacterClassLeave: () => characterClassStack.shift(),
+                onCharacterEnter(characterNode) {
+                    if (!characterNode.raw.startsWith("\\")) {
+
+                        // It's not an escaped character.
+                        return;
+                    }
+
+                    const escapedChar = characterNode.raw.slice(1);
+
+                    if (escapedChar !== String.fromCodePoint(characterNode.value)) {
+
+                        // It's a valid escape.
+                        return;
+                    }
+                    let allowedEscapes;
+
+                    if (characterClassStack.length) {
+                        allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES;
+                    } else {
+                        allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES;
+                    }
+                    if (allowedEscapes.has(escapedChar)) {
+                        return;
+                    }
+
+                    const reportedIndex = characterNode.start + 1;
+                    let disableEscapeBackslashSuggest = false;
+
+                    if (characterClassStack.length) {
+                        const characterClassNode = characterClassStack[0];
+
+                        if (escapedChar === "^") {
+
+                            /*
+                             * The '^' character is also a special case; it must always be escaped outside of character classes, but
+                             * it only needs to be escaped in character classes if it's at the beginning of the character class. To
+                             * account for this, consider it to be a valid escape character outside of character classes, and filter
+                             * out '^' characters that appear at the start of a character class.
+                             */
+                            if (characterClassNode.start + 1 === characterNode.start) {
+
+                                return;
+                            }
+                        }
+                        if (!unicodeSets) {
+                            if (escapedChar === "-") {
+
+                                /*
+                                 * The '-' character is a special case, because it's only valid to escape it if it's in a character
+                                 * class, and is not at either edge of the character class. To account for this, don't consider '-'
+                                 * characters to be valid in general, and filter out '-' characters that appear in the middle of a
+                                 * character class.
+                                 */
+                                if (characterClassNode.start + 1 !== characterNode.start && characterNode.end !== characterClassNode.end - 1) {
+
+                                    return;
+                                }
+                            }
+                        } else { // unicodeSets mode
+                            if (REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(escapedChar)) {
+
+                                // Escaping is valid if it is a ClassSetReservedDoublePunctuator.
+                                if (pattern[characterNode.end] === escapedChar) {
+                                    return;
+                                }
+                                if (pattern[characterNode.start - 1] === escapedChar) {
+                                    if (escapedChar !== "^") {
+                                        return;
+                                    }
+
+                                    // If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.
+
+                                    if (!characterClassNode.negate) {
+                                        return;
+                                    }
+                                    const negateCaretIndex = characterClassNode.start + 1;
+
+                                    if (negateCaretIndex < characterNode.start - 1) {
+                                        return;
+                                    }
+                                }
+                            }
+
+                            if (characterNode.parent.type === "ClassIntersection" || characterNode.parent.type === "ClassSubtraction") {
+                                disableEscapeBackslashSuggest = true;
+                            }
+                        }
+                    }
+
+                    report(
+                        node,
+                        reportedIndex,
+                        escapedChar,
+                        disableEscapeBackslashSuggest
+                    );
+                }
+            });
+        }
+
         /**
          * Checks if a node has an escape.
          * @param {ASTNode} node node to check.
@@ -220,32 +320,7 @@ module.exports = {
                     validateString(node, match);
                 }
             } else if (node.regex) {
-                parseRegExp(node.regex.pattern)
-
-                    /*
-                     * The '-' character is a special case, because it's only valid to escape it if it's in a character
-                     * class, and is not at either edge of the character class. To account for this, don't consider '-'
-                     * characters to be valid in general, and filter out '-' characters that appear in the middle of a
-                     * character class.
-                     */
-                    .filter(charInfo => !(charInfo.text === "-" && charInfo.inCharClass && !charInfo.startsCharClass && !charInfo.endsCharClass))
-
-                    /*
-                     * The '^' character is also a special case; it must always be escaped outside of character classes, but
-                     * it only needs to be escaped in character classes if it's at the beginning of the character class. To
-                     * account for this, consider it to be a valid escape character outside of character classes, and filter
-                     * out '^' characters that appear at the start of a character class.
-                     */
-                    .filter(charInfo => !(charInfo.text === "^" && charInfo.startsCharClass))
-
-                    // Filter out characters that aren't escaped.
-                    .filter(charInfo => charInfo.escaped)
-
-                    // Filter out characters that are valid to escape, based on their position in the regular expression.
-                    .filter(charInfo => !(charInfo.inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES).has(charInfo.text))
-
-                    // Report all the remaining characters.
-                    .forEach(charInfo => report(node, charInfo.index, charInfo.text));
+                validateRegExp(node);
             }
 
         }