Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: no-useless-escape support v flag #17420

Merged
merged 4 commits into from Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
235 changes: 155 additions & 80 deletions lib/rules/no-useless-escape.js
Expand Up @@ -6,7 +6,12 @@
"use strict";

const astUtils = require("./utils/ast-utils");
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");

/**
* @typedef {import('@eslint-community/regexpp').AST.CharacterClass} CharacterClass
* @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass} ExpressionCharacterClass
*/
//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------
Expand All @@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
const REGEX_GENERAL_ESCAPES = new Set("\\bcdDfnpPrsStvwWxu0123456789]");
const REGEX_NON_CHARCLASS_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("^/.$*+?[{}|()Bk"));

/**
* Parses a regular expression into a list of characters with character class info.
* @param {string} regExpText The raw text used to create the regular expression
* @returns {Object[]} A list of characters, each with info on escaping and whether they're in a character class.
* @example
*
* parseRegExp("a\\b[cd-]");
*
* // returns:
* [
* { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
* { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
* { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
* { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
* { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
* ];
*
/*
* Set of characters that require escaping in character classes in `unicodeSets` mode.
* ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
*/
function parseRegExp(regExpText) {
const charList = [];
const REGEX_CLASSSET_CHARACTER_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("q/[{}|()-"));
mdjermanovic marked this conversation as resolved.
Show resolved Hide resolved

regExpText.split("").reduce((state, char, index) => {
if (!state.escapeNextChar) {
if (char === "\\") {
return Object.assign(state, { escapeNextChar: true });
}
if (char === "[" && !state.inCharClass) {
return Object.assign(state, { inCharClass: true, startingCharClass: true });
}
if (char === "]" && state.inCharClass) {
if (charList.length && charList[charList.length - 1].inCharClass) {
charList[charList.length - 1].endsCharClass = true;
}
return Object.assign(state, { inCharClass: false, startingCharClass: false });
}
}
charList.push({
text: char,
index,
escaped: state.escapeNextChar,
inCharClass: state.inCharClass,
startsCharClass: state.startingCharClass,
endsCharClass: false
});
return Object.assign(state, { escapeNextChar: false, startingCharClass: false });
}, { escapeNextChar: false, inCharClass: false, startingCharClass: false });

return charList;
}
/*
* A single character set of ClassSetReservedDoublePunctuator.
* && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
*/
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set("!#$%&*+,.:;<=>?@^`~");

/** @type {import('../shared/types').Rule} */
module.exports = {
Expand All @@ -103,15 +70,17 @@ module.exports = {

create(context) {
const sourceCode = context.sourceCode;
const parser = new RegExpParser();

/**
* Reports a node
* @param {ASTNode} node The node to report
* @param {number} startOffset The backslash's offset from the start of the node
* @param {string} character The uselessly escaped character (not including the backslash)
* @param {boolean} [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
* @returns {void}
*/
function report(node, startOffset, character) {
function report(node, startOffset, character, disableEscapeBackslashSuggest) {
const rangeStart = node.range[0] + startOffset;
const range = [rangeStart, rangeStart + 1];
const start = sourceCode.getLocFromIndex(rangeStart);
Expand All @@ -134,12 +103,16 @@ module.exports = {
return fixer.removeRange(range);
}
},
{
messageId: "escapeBackslash",
fix(fixer) {
return fixer.insertTextBeforeRange(range, "\\");
}
}
...disableEscapeBackslashSuggest
? []
: [
{
messageId: "escapeBackslash",
fix(fixer) {
return fixer.insertTextBeforeRange(range, "\\");
}
}
]
]
});
}
Expand Down Expand Up @@ -182,6 +155,133 @@ module.exports = {
}
}

/**
* Checks if the escape character in given regexp is unnecessary.
* @private
* @param {ASTNode} node node to validate.
* @returns {void}
*/
function validateRegExp(node) {
const { pattern, flags } = node.regex;
let patternNode;
const unicode = flags.includes("u");
const unicodeSets = flags.includes("v");

try {
patternNode = parser.parsePattern(pattern, 0, pattern.length, { unicode, unicodeSets });
} catch {

// Ignore regular expressions with syntax errors
return;
}

/** @type {(CharacterClass | ExpressionCharacterClass)[]} */
const characterClassStack = [];

visitRegExpAST(patternNode, {
onCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
onCharacterClassLeave: () => characterClassStack.shift(),
onExpressionCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
onExpressionCharacterClassLeave: () => characterClassStack.shift(),
onCharacterEnter(characterNode) {
if (!characterNode.raw.startsWith("\\")) {

// It's not an escaped character.
return;
}

const escapedChar = characterNode.raw.slice(1);

if (escapedChar !== String.fromCodePoint(characterNode.value)) {

// It's a valid escape.
return;
}
let allowedEscapes;

if (characterClassStack.length) {
allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES;
} else {
allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES;
}
if (allowedEscapes.has(escapedChar)) {
return;
}

const reportedIndex = characterNode.start + 1;
let disableEscapeBackslashSuggest = false;

if (characterClassStack.length) {
const characterClassNode = characterClassStack[0];

if (escapedChar === "^") {

/*
* The '^' character is also a special case; it must always be escaped outside of character classes, but
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
* account for this, consider it to be a valid escape character outside of character classes, and filter
* out '^' characters that appear at the start of a character class.
*/
if (characterClassNode.start + 1 === characterNode.start) {

return;
}
}
if (!unicodeSets) {
if (escapedChar === "-") {

/*
* The '-' character is a special case, because it's only valid to escape it if it's in a character
* class, and is not at either edge of the character class. To account for this, don't consider '-'
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
* character class.
*/
if (characterClassNode.start + 1 !== characterNode.start && characterNode.end !== characterClassNode.end - 1) {

return;
}
}
} else { // unicodeSets mode
if (REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(escapedChar)) {

// Escaping is valid if it is a ClassSetReservedDoublePunctuator.
if (pattern[characterNode.end] === escapedChar) {
return;
}
if (pattern[characterNode.start - 1] === escapedChar) {
if (escapedChar !== "^") {
return;
}

// If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.

if (!characterClassNode.negate) {
return;
}
const negateCaretIndex = characterClassNode.start + 1;

if (negateCaretIndex < characterNode.start - 1) {
return;
}
}
}

if (characterNode.parent.type === "ClassIntersection" || characterNode.parent.type === "ClassSubtraction") {
disableEscapeBackslashSuggest = true;
}
}
}

report(
node,
reportedIndex,
escapedChar,
disableEscapeBackslashSuggest
);
}
});
}

/**
* Checks if a node has an escape.
* @param {ASTNode} node node to check.
Expand Down Expand Up @@ -220,32 +320,7 @@ module.exports = {
validateString(node, match);
}
} else if (node.regex) {
parseRegExp(node.regex.pattern)

/*
* The '-' character is a special case, because it's only valid to escape it if it's in a character
* class, and is not at either edge of the character class. To account for this, don't consider '-'
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
* character class.
*/
.filter(charInfo => !(charInfo.text === "-" && charInfo.inCharClass && !charInfo.startsCharClass && !charInfo.endsCharClass))

/*
* The '^' character is also a special case; it must always be escaped outside of character classes, but
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
* account for this, consider it to be a valid escape character outside of character classes, and filter
* out '^' characters that appear at the start of a character class.
*/
.filter(charInfo => !(charInfo.text === "^" && charInfo.startsCharClass))

// Filter out characters that aren't escaped.
.filter(charInfo => charInfo.escaped)

// Filter out characters that are valid to escape, based on their position in the regular expression.
.filter(charInfo => !(charInfo.inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES).has(charInfo.text))

// Report all the remaining characters.
.forEach(charInfo => report(node, charInfo.index, charInfo.text));
validateRegExp(node);
}

}
Expand Down