Skip to content

Commit

Permalink
feat: no-useless-escape support v flag (#17420)
Browse files Browse the repository at this point in the history
* feat: `no-useless-escape` support `v` flag

* fix: false positives for `/[\^]/v`

* fix: remove unnecessary `if` statement

* fix: incorrect suggestion
  • Loading branch information
ota-meshi committed Jul 28, 2023
1 parent 0aa0bc3 commit 1a2f966
Show file tree
Hide file tree
Showing 2 changed files with 888 additions and 81 deletions.
235 changes: 155 additions & 80 deletions lib/rules/no-useless-escape.js
Expand Up @@ -6,7 +6,12 @@
"use strict";

const astUtils = require("./utils/ast-utils");
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");

/**
* @typedef {import('@eslint-community/regexpp').AST.CharacterClass} CharacterClass
* @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass} ExpressionCharacterClass
*/
//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------
Expand All @@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
const REGEX_GENERAL_ESCAPES = new Set("\\bcdDfnpPrsStvwWxu0123456789]");
const REGEX_NON_CHARCLASS_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("^/.$*+?[{}|()Bk"));

/**
* Parses a regular expression into a list of characters with character class info.
* @param {string} regExpText The raw text used to create the regular expression
* @returns {Object[]} A list of characters, each with info on escaping and whether they're in a character class.
* @example
*
* parseRegExp("a\\b[cd-]");
*
* // returns:
* [
* { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
* { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
* { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
* { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
* { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
* ];
*
/*
* Set of characters that require escaping in character classes in `unicodeSets` mode.
* ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
*/
function parseRegExp(regExpText) {
const charList = [];
const REGEX_CLASSSET_CHARACTER_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("q/[{}|()-"));

regExpText.split("").reduce((state, char, index) => {
if (!state.escapeNextChar) {
if (char === "\\") {
return Object.assign(state, { escapeNextChar: true });
}
if (char === "[" && !state.inCharClass) {
return Object.assign(state, { inCharClass: true, startingCharClass: true });
}
if (char === "]" && state.inCharClass) {
if (charList.length && charList[charList.length - 1].inCharClass) {
charList[charList.length - 1].endsCharClass = true;
}
return Object.assign(state, { inCharClass: false, startingCharClass: false });
}
}
charList.push({
text: char,
index,
escaped: state.escapeNextChar,
inCharClass: state.inCharClass,
startsCharClass: state.startingCharClass,
endsCharClass: false
});
return Object.assign(state, { escapeNextChar: false, startingCharClass: false });
}, { escapeNextChar: false, inCharClass: false, startingCharClass: false });

return charList;
}
/*
* A single character set of ClassSetReservedDoublePunctuator.
* && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
*/
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set("!#$%&*+,.:;<=>?@^`~");

/** @type {import('../shared/types').Rule} */
module.exports = {
Expand All @@ -103,15 +70,17 @@ module.exports = {

create(context) {
const sourceCode = context.sourceCode;
const parser = new RegExpParser();

/**
* Reports a node
* @param {ASTNode} node The node to report
* @param {number} startOffset The backslash's offset from the start of the node
* @param {string} character The uselessly escaped character (not including the backslash)
* @param {boolean} [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
* @returns {void}
*/
function report(node, startOffset, character) {
function report(node, startOffset, character, disableEscapeBackslashSuggest) {
const rangeStart = node.range[0] + startOffset;
const range = [rangeStart, rangeStart + 1];
const start = sourceCode.getLocFromIndex(rangeStart);
Expand All @@ -134,12 +103,16 @@ module.exports = {
return fixer.removeRange(range);
}
},
{
messageId: "escapeBackslash",
fix(fixer) {
return fixer.insertTextBeforeRange(range, "\\");
}
}
...disableEscapeBackslashSuggest
? []
: [
{
messageId: "escapeBackslash",
fix(fixer) {
return fixer.insertTextBeforeRange(range, "\\");
}
}
]
]
});
}
Expand Down Expand Up @@ -182,6 +155,133 @@ module.exports = {
}
}

/**
* Checks if the escape character in given regexp is unnecessary.
* @private
* @param {ASTNode} node node to validate.
* @returns {void}
*/
function validateRegExp(node) {
const { pattern, flags } = node.regex;
let patternNode;
const unicode = flags.includes("u");
const unicodeSets = flags.includes("v");

try {
patternNode = parser.parsePattern(pattern, 0, pattern.length, { unicode, unicodeSets });
} catch {

// Ignore regular expressions with syntax errors
return;
}

/** @type {(CharacterClass | ExpressionCharacterClass)[]} */
const characterClassStack = [];

visitRegExpAST(patternNode, {
onCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
onCharacterClassLeave: () => characterClassStack.shift(),
onExpressionCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
onExpressionCharacterClassLeave: () => characterClassStack.shift(),
onCharacterEnter(characterNode) {
if (!characterNode.raw.startsWith("\\")) {

// It's not an escaped character.
return;
}

const escapedChar = characterNode.raw.slice(1);

if (escapedChar !== String.fromCodePoint(characterNode.value)) {

// It's a valid escape.
return;
}
let allowedEscapes;

if (characterClassStack.length) {
allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES;
} else {
allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES;
}
if (allowedEscapes.has(escapedChar)) {
return;
}

const reportedIndex = characterNode.start + 1;
let disableEscapeBackslashSuggest = false;

if (characterClassStack.length) {
const characterClassNode = characterClassStack[0];

if (escapedChar === "^") {

/*
* The '^' character is also a special case; it must always be escaped outside of character classes, but
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
* account for this, consider it to be a valid escape character outside of character classes, and filter
* out '^' characters that appear at the start of a character class.
*/
if (characterClassNode.start + 1 === characterNode.start) {

return;
}
}
if (!unicodeSets) {
if (escapedChar === "-") {

/*
* The '-' character is a special case, because it's only valid to escape it if it's in a character
* class, and is not at either edge of the character class. To account for this, don't consider '-'
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
* character class.
*/
if (characterClassNode.start + 1 !== characterNode.start && characterNode.end !== characterClassNode.end - 1) {

return;
}
}
} else { // unicodeSets mode
if (REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(escapedChar)) {

// Escaping is valid if it is a ClassSetReservedDoublePunctuator.
if (pattern[characterNode.end] === escapedChar) {
return;
}
if (pattern[characterNode.start - 1] === escapedChar) {
if (escapedChar !== "^") {
return;
}

// If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.

if (!characterClassNode.negate) {
return;
}
const negateCaretIndex = characterClassNode.start + 1;

if (negateCaretIndex < characterNode.start - 1) {
return;
}
}
}

if (characterNode.parent.type === "ClassIntersection" || characterNode.parent.type === "ClassSubtraction") {
disableEscapeBackslashSuggest = true;
}
}
}

report(
node,
reportedIndex,
escapedChar,
disableEscapeBackslashSuggest
);
}
});
}

/**
* Checks if a node has an escape.
* @param {ASTNode} node node to check.
Expand Down Expand Up @@ -220,32 +320,7 @@ module.exports = {
validateString(node, match);
}
} else if (node.regex) {
parseRegExp(node.regex.pattern)

/*
* The '-' character is a special case, because it's only valid to escape it if it's in a character
* class, and is not at either edge of the character class. To account for this, don't consider '-'
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
* character class.
*/
.filter(charInfo => !(charInfo.text === "-" && charInfo.inCharClass && !charInfo.startsCharClass && !charInfo.endsCharClass))

/*
* The '^' character is also a special case; it must always be escaped outside of character classes, but
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
* account for this, consider it to be a valid escape character outside of character classes, and filter
* out '^' characters that appear at the start of a character class.
*/
.filter(charInfo => !(charInfo.text === "^" && charInfo.startsCharClass))

// Filter out characters that aren't escaped.
.filter(charInfo => charInfo.escaped)

// Filter out characters that are valid to escape, based on their position in the regular expression.
.filter(charInfo => !(charInfo.inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES).has(charInfo.text))

// Report all the remaining characters.
.forEach(charInfo => report(node, charInfo.index, charInfo.text));
validateRegExp(node);
}

}
Expand Down

0 comments on commit 1a2f966

Please sign in to comment.