diff --git a/docs/rules/no-useless-backreference.md b/docs/rules/no-useless-backreference.md new file mode 100644 index 00000000000..d2360b39567 --- /dev/null +++ b/docs/rules/no-useless-backreference.md @@ -0,0 +1,130 @@ +# Disallow useless backreferences in regular expressions (no-useless-backreference) + +In JavaScript regular expressions, it's syntactically valid to define a backreference to a group that belongs to another alternative part of the pattern, a backreference to a group that appears after the backreference, a backreference to a group that contains that backreference, or a backreference to a group that is inside a negative lookaround. However, by the specification, in any of these cases the backreference always ends up matching only zero-length (the empty string), regardless of the context in which the backreference and the group appear. + +Backreferences that always successfully match zero-length and cannot match anything else are useless. They are basically ignored and can be removed without changing the behavior of the regular expression. + +```js +var regex = /^(?:(a)|\1b)$/; + +regex.test("a"); // true +regex.test("b"); // true! +regex.test("ab"); // false + +var equivalentRegex = /^(?:(a)|b)$/; + +equivalentRegex.test("a"); // true +equivalentRegex.test("b"); // true +equivalentRegex.test("ab"); // false +``` + +Useless backreference is a possible error in the code. It usually indicates that the regular expression does not work as intended. + +## Rule Details + +This rule aims to detect and disallow the following backreferences in regular expression: + +* Backreference to a group that is in another alternative, e.g., `/(a)|\1b/`. In such constructed regular expression, the backreference is expected to match what's been captured in, at that point, a non-participating group. +* Backreference to a group that appears later in the pattern, e.g., `/\1(a)/`. The group hasn't captured anything yet, and ECMAScript doesn't support forward references. Inside lookbehinds, which match backward, the opposite applies and this rule disallows backreference to a group that appears before in the same lookbehind, e.g., `/(?<=(a)\1)b/`. +* Backreference to a group from within the same group, e.g., `/(\1)/`. Similar to the previous, the group hasn't captured anything yet, and ECMAScript doesn't support nested references. +* Backreference to a group that is in a negative lookaround, if the backreference isn't in the same negative lookaround, e.g., `/a(?!(b)).\1/`. A negative lookaround (lookahead or lookbehind) succeeds only if its pattern cannot match, meaning that the group has failed. + +By the ECMAScript specification, all backreferences listed above are valid, always succeed to match zero-length, and cannot match anything else. Consequently, they don't produce parsing or runtime errors, but also don't affect the behavior of their regular expressions. They are syntactically valid but useless. + +This might be surprising to developers coming from other languages where some of these backreferences can be used in a meaningful way. + +```js +// in some other languages, this pattern would successfully match "aab" + +/^(?:(a)(?=a)|\1b)+$/.test("aab"); // false +``` + +Examples of **incorrect** code for this rule: + +```js +/*eslint no-useless-backreference: "error"*/ + +/^(?:(a)|\1b)$/; // reference to (a) into another alternative + +/^(?:(a)|b(?:c|\1))$/; // reference to (a) into another alternative + +/^(?:a|b(?:(c)|\1))$/; // reference to (c) into another alternative + +/\1(a)/; // forward reference to (a) + +RegExp('(a)\\2(b)'); // forward reference to (b) + +/(?:a)(b)\2(c)/; // forward reference to (c) + +/\k(?a)/; // forward reference to (?a) + +/(?<=(a)\1)b/; // backward reference to (a) from within the same lookbehind + +/(?(.)b\1)/; // nested reference to (?(.)b\1) + +/a(?!(b)).\1/; // reference to (b) into a negative lookahead + +/(?a)\k/; // reference to (?a) + +/(?<=\1(a))b/; // reference to (a), correctly before the group as they're in the same lookbehind + +/(?<=(a))b\1/; // reference to (a), correctly after the group as the backreference isn't in the lookbehind + +new RegExp('(.)\\1'); // reference to (.) + +/^(?:(a)\1)$/; // reference to (a) + +/^((a)\2)$/; // reference to (a) + +/a(?(.)b\2)/; // reference to (.) + +/a(?!(b|c)\1)./; // reference to (b|c), correct as it's from within the same negative lookahead + +/(? require("./no-unused-labels"), "no-unused-vars": () => require("./no-unused-vars"), "no-use-before-define": () => require("./no-use-before-define"), + "no-useless-backreference": () => require("./no-useless-backreference"), "no-useless-call": () => require("./no-useless-call"), "no-useless-catch": () => require("./no-useless-catch"), "no-useless-computed-key": () => require("./no-useless-computed-key"), diff --git a/lib/rules/no-useless-backreference.js b/lib/rules/no-useless-backreference.js new file mode 100644 index 00000000000..8a6fbe14daa --- /dev/null +++ b/lib/rules/no-useless-backreference.js @@ -0,0 +1,193 @@ +/** + * @fileoverview Rule to disallow useless backreferences in regular expressions + * @author Milos Djermanovic + */ + +"use strict"; + +//------------------------------------------------------------------------------ +// Requirements +//------------------------------------------------------------------------------ + +const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils"); +const { RegExpParser, visitRegExpAST } = require("regexpp"); +const lodash = require("lodash"); + +//------------------------------------------------------------------------------ +// Helpers +//------------------------------------------------------------------------------ + +const parser = new RegExpParser(); + +/** + * Finds the path from the given `regexpp` AST node to the root node. + * @param {regexpp.Node} node Node. + * @returns {regexpp.Node[]} Array that starts with the given node and ends with the root node. + */ +function getPathToRoot(node) { + const path = []; + let current = node; + + do { + path.push(current); + current = current.parent; + } while (current); + + return path; +} + +/** + * Determines whether the given `regexpp` AST node is a lookaround node. + * @param {regexpp.Node} node Node. + * @returns {boolean} `true` if it is a lookaround node. + */ +function isLookaround(node) { + return node.type === "Assertion" && + (node.kind === "lookahead" || node.kind === "lookbehind"); +} + +/** + * Determines whether the given `regexpp` AST node is a negative lookaround node. + * @param {regexpp.Node} node Node. + * @returns {boolean} `true` if it is a negative lookaround node. + */ +function isNegativeLookaround(node) { + return isLookaround(node) && node.negate; +} + +//------------------------------------------------------------------------------ +// Rule Definition +//------------------------------------------------------------------------------ + +module.exports = { + meta: { + type: "problem", + + docs: { + description: "disallow useless backreferences in regular expressions", + category: "Possible Errors", + recommended: false, + url: "https://eslint.org/docs/rules/no-useless-backreference" + }, + + schema: [], + + messages: { + nested: "Backreference '{{ bref }}' will be ignored. It references group '{{ group }}' from within that group.", + forward: "Backreference '{{ bref }}' will be ignored. It references group '{{ group }}' which appears later in the pattern.", + backward: "Backreference '{{ bref }}' will be ignored. It references group '{{ group }}' which appears before in the same lookbehind.", + disjunctive: "Backreference '{{ bref }}' will be ignored. It references group '{{ group }}' which is in another alternative.", + intoNegativeLookaround: "Backreference '{{ bref }}' will be ignored. It references group '{{ group }}' which is in a negative lookaround." + } + }, + + create(context) { + + /** + * Checks and reports useless backreferences in the given regular expression. + * @param {ASTNode} node Node that represents regular expression. A regex literal or RegExp constructor call. + * @param {string} pattern Regular expression pattern. + * @param {string} flags Regular expression flags. + * @returns {void} + */ + function checkRegex(node, pattern, flags) { + let regExpAST; + + try { + regExpAST = parser.parsePattern(pattern, 0, pattern.length, flags.includes("u")); + } catch (e) { + + // Ignore regular expressions with syntax errors + return; + } + + visitRegExpAST(regExpAST, { + onBackreferenceEnter(bref) { + const group = bref.resolved, + brefPath = getPathToRoot(bref), + groupPath = getPathToRoot(group); + let messageId = null; + + if (brefPath.includes(group)) { + + // group is bref's ancestor => bref is nested ('nested reference') => group hasn't matched yet when bref starts to match. + messageId = "nested"; + } else { + + // Start from the root to find the lowest common ancestor. + let i = brefPath.length - 1, + j = groupPath.length - 1; + + do { + i--; + j--; + } while (brefPath[i] === groupPath[j]); + + const indexOfLowestCommonAncestor = j + 1, + groupCut = groupPath.slice(0, indexOfLowestCommonAncestor), + commonPath = groupPath.slice(indexOfLowestCommonAncestor), + lowestCommonLookaround = commonPath.find(isLookaround), + isMatchingBackward = lowestCommonLookaround && lowestCommonLookaround.kind === "lookbehind"; + + if (!isMatchingBackward && bref.end <= group.start) { + + // bref is left, group is right ('forward reference') => group hasn't matched yet when bref starts to match. + messageId = "forward"; + } else if (isMatchingBackward && group.end <= bref.start) { + + // the opposite of the previous when the regex is matching backward in a lookbehind context. + messageId = "backward"; + } else if (lodash.last(groupCut).type === "Alternative") { + + // group's and bref's ancestor nodes below the lowest common ancestor are sibling alternatives => they're disjunctive. + messageId = "disjunctive"; + } else if (groupCut.some(isNegativeLookaround)) { + + // group is in a negative lookaround which isn't bref's ancestor => group has already failed when bref starts to match. + messageId = "intoNegativeLookaround"; + } + } + + if (messageId) { + context.report({ + node, + messageId, + data: { + bref: bref.raw, + group: group.raw + } + }); + } + } + }); + } + + return { + "Literal[regex]"(node) { + const { pattern, flags } = node.regex; + + checkRegex(node, pattern, flags); + }, + Program() { + const scope = context.getScope(), + tracker = new ReferenceTracker(scope), + traceMap = { + RegExp: { + [CALL]: true, + [CONSTRUCT]: true + } + }; + + for (const { node } of tracker.iterateGlobalReferences(traceMap)) { + const [patternNode, flagsNode] = node.arguments, + pattern = getStringIfConstant(patternNode, scope), + flags = getStringIfConstant(flagsNode, scope); + + if (typeof pattern === "string") { + checkRegex(node, pattern, flags || ""); + } + } + } + }; + } +}; diff --git a/tests/lib/rules/no-useless-backreference.js b/tests/lib/rules/no-useless-backreference.js new file mode 100644 index 00000000000..d51d5bf76e7 --- /dev/null +++ b/tests/lib/rules/no-useless-backreference.js @@ -0,0 +1,513 @@ +/** + * @fileoverview Tests for the no-useless-backreference rule + * @author Milos Djermanovic + */ + +"use strict"; + +//------------------------------------------------------------------------------ +// Requirements +//------------------------------------------------------------------------------ + +const rule = require("../../../lib/rules/no-useless-backreference"); +const { RuleTester } = require("../../../lib/rule-tester"); + +//------------------------------------------------------------------------------ +// Tests +//------------------------------------------------------------------------------ + +const ruleTester = new RuleTester({ parserOptions: { ecmaVersion: 2018 } }); + +ruleTester.run("no-useless-backreference", rule, { + valid: [ + + // not a regular expression + String.raw`'\1(a)'`, + String.raw`regExp('\\1(a)')`, + String.raw`new Regexp('\\1(a)', 'u')`, + String.raw`RegExp.foo('\\1(a)', 'u')`, + String.raw`new foo.RegExp('\\1(a)')`, + + // unknown pattern + String.raw`RegExp(p)`, + String.raw`new RegExp(p, 'u')`, + String.raw`RegExp('\\1(a)' + suffix)`, + "new RegExp(`${prefix}\\\\1(a)`)", + + // not the global RegExp + String.raw`let RegExp; new RegExp('\\1(a)');`, + String.raw`function foo() { var RegExp; RegExp('\\1(a)', 'u'); }`, + String.raw`function foo(RegExp) { new RegExp('\\1(a)'); }`, + String.raw`if (foo) { const RegExp = bar; RegExp('\\1(a)'); }`, + String.raw`/* globals RegExp:off */ new RegExp('\\1(a)');`, + { + code: String.raw`RegExp('\\1(a)');`, + globals: { RegExp: "off" } + }, + + // no capturing groups + String.raw`/(?:)/`, + String.raw`/(?:a)/`, + String.raw`new RegExp('')`, + String.raw`RegExp('(?:a)|(?:b)*')`, + String.raw`/^ab|[cd].\n$/`, + + // no backreferences + String.raw`/(a)/`, + String.raw`RegExp('(a)|(b)')`, + String.raw`new RegExp('\\n\\d(a)')`, + String.raw`/\0(a)/`, + String.raw`/\0(a)/u`, + String.raw`/(?<=(a))(b)(?=(c))/`, + String.raw`/(?a)/`, + + // not really a backreference + String.raw`RegExp('\1(a)')`, // string octal escape + String.raw`RegExp('\\\\1(a)')`, // escaped backslash + String.raw`/\\1(a)/`, // // escaped backslash + String.raw`/\1/`, // group 1 doesn't exist, this is a regex octal escape + String.raw`/^\1$/`, // group 1 doesn't exist, this is a regex octal escape + String.raw`/\2(a)/`, // group 2 doesn't exist, this is a regex octal escape + String.raw`/\1(?:a)/`, // group 1 doesn't exist, this is a regex octal escape + String.raw`/\1(?=a)/`, // group 1 doesn't exist, this is a regex octal escape + String.raw`/\1(?!a)/`, // group 1 doesn't exist, this is a regex octal escape + String.raw`/^[\1](a)$/`, // \N in a character class is a regex octal escape + String.raw`new RegExp('[\\1](a)')`, // \N in a character class is a regex octal escape + String.raw`/\11(a)/`, // regex octal escape \11, regex matches "\x09a" + String.raw`/\k(a)/`, // without the 'u' flag and any named groups this isn't a syntax error, matches "ka" + String.raw`/^(a)\1\2$/`, // \1 is a backreference, \2 is an octal escape sequence. + + // Valid backreferences: correct position, after the group + String.raw`/(a)\1/`, + String.raw`/(a).\1/`, + String.raw`RegExp('(a)\\1(b)')`, + String.raw`/(a)(b)\2(c)/`, + String.raw`/(?a)\k/`, + String.raw`new RegExp('(.)\\1')`, + String.raw`RegExp('(a)\\1(?:b)')`, + String.raw`/(a)b\1/`, + String.raw`/((a)\2)/`, + String.raw`/((a)b\2c)/`, + String.raw`/^(?:(a)\1)$/`, + String.raw`/^((a)\2)$/`, + String.raw`/^(((a)\3))|b$/`, + String.raw`/a(?(.)b\2)/`, + String.raw`/(a)?(b)*(\1)(c)/`, + String.raw`/(a)?(b)*(\2)(c)/`, + String.raw`/(?<=(a))b\1/`, + String.raw`/(?<=(?=(a)\1))b/`, + + // Valid backreferences: correct position before the group when they're both in the same lookbehind + String.raw`/(?', 'u')`, // \1 would be an error, but \k produces syntax error because of the u flag + String.raw`new RegExp('\\k(?a)\\k')` // \k would be an error, but \k produces syntax error because group doesn't exist + ], + + invalid: [ + + // full message tests + { + code: String.raw`/(b)(\2a)/`, + errors: [{ + message: String.raw`Backreference '\2' will be ignored. It references group '(\2a)' from within that group.`, + type: "Literal" + }] + }, + { + code: String.raw`/\k(?a)/`, + errors: [{ + message: String.raw`Backreference '\k' will be ignored. It references group '(?a)' which appears later in the pattern.`, + type: "Literal" + }] + }, + { + code: String.raw`RegExp('(a|bc)|\\1')`, + errors: [{ + message: String.raw`Backreference '\1' will be ignored. It references group '(a|bc)' which is in another alternative.`, + type: "CallExpression" + }] + }, + { + code: String.raw`new RegExp('(?!(?\\n))\\1')`, + errors: [{ + message: String.raw`Backreference '\1' will be ignored. It references group '(?\n)' which is in a negative lookaround.`, + type: "NewExpression" + }] + }, + { + code: String.raw`/(?(.)b\1)/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(?(.)b\1)` }, type: "Literal" }] + }, + { + code: String.raw`/a(?\k)b/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\k`, group: String.raw`(?\k)` }, type: "Literal" }] + }, + { + code: String.raw`/^(\1)*$/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(\1)` }, type: "Literal" }] + }, + { + code: String.raw`/^(?:a)(?:((?:\1)))*$/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`((?:\1))` }, type: "Literal" }] + }, + { + code: String.raw`/(?!(\1))/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(\1)` }, type: "Literal" }] + }, + { + code: String.raw`/a|(b\1c)/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(b\1c)` }, type: "Literal" }] + }, + { + code: String.raw`/(a|(\1))/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(a|(\1))` }, type: "Literal" }] + }, + { + code: String.raw`/(a|(\2))/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\2`, group: String.raw`(\2)` }, type: "Literal" }] + }, + { + code: String.raw`/(?:a|(\1))/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(\1)` }, type: "Literal" }] + }, + { + code: String.raw`/(a)?(b)*(\3)/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\3`, group: String.raw`(\3)` }, type: "Literal" }] + }, + { + code: String.raw`/(?<=(a\1))b/`, + errors: [{ messageId: "nested", data: { bref: String.raw`\1`, group: String.raw`(a\1)` }, type: "Literal" }] + }, + + // forward + { + code: String.raw`/\1(a)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`/\1.(a)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`/(?:\1)(?:(a))/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`/(?:\1)(?:((a)))/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`((a))` }, type: "Literal" }] + }, + { + code: String.raw`/(?:\2)(?:((a)))/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\2`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`/(?:\1)(?:((?:a)))/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`((?:a))` }, type: "Literal" }] + }, + { + code: String.raw`/(\2)(a)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\2`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`RegExp('(a)\\2(b)')`, + errors: [{ messageId: "forward", data: { bref: String.raw`\2`, group: String.raw`(b)` }, type: "CallExpression" }] + }, + { + code: String.raw`/(?:a)(b)\2(c)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\2`, group: String.raw`(c)` }, type: "Literal" }] + }, + { + code: String.raw`/\k(?a)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\k`, group: String.raw`(?a)` }, type: "Literal" }] + }, + { + code: String.raw`/(?:a(b)\2)(c)/`, + errors: [{ messageId: "forward", data: { bref: String.raw`\2`, group: String.raw`(c)` }, type: "Literal" }] + }, + { + code: String.raw`new RegExp('(a)(b)\\3(c)')`, + errors: [{ messageId: "forward", data: { bref: String.raw`\3`, group: String.raw`(c)` }, type: "NewExpression" }] + }, + { + code: String.raw`/\1(?<=(a))./`, + errors: [{ messageId: "forward", data: { bref: String.raw`\1`, group: String.raw`(a)` }, type: "Literal" }] + }, + { + code: String.raw`/\1(?