Skip to content

Commit

Permalink
feat: add Unicode flag suggestion in no-misleading-character-class (#…
Browse files Browse the repository at this point in the history
…15867)

* refactor: Simplify kinds tracking with a Set

* feat: Add fixer for missing regex unicode flag

* test: Update tests

* refactor: Change fix to suggestion

* Revert "test: Update tests"

* Address review comments

* Add tests

* Update lib/rules/no-misleading-character-class.js

Co-authored-by: Milos Djermanovic <milos.djermanovic@gmail.com>

* add suggestions:null assertions

* update docs

* add more tests

* fix edge cases when inserting flags argument

* validate pattern for regex literals

* validate pattern for regex constructor calls

Co-authored-by: Mathias Rasmussen <mathiasvr@gmail.com>
  • Loading branch information
mdjermanovic and mathiasvr committed May 20, 2022
1 parent c686e4c commit cab0c22
Show file tree
Hide file tree
Showing 3 changed files with 472 additions and 71 deletions.
2 changes: 2 additions & 0 deletions docs/src/rules/no-misleading-character-class.md
Expand Up @@ -7,6 +7,8 @@ rule_type: problem

<!--RECOMMENDED-->

<!--SUGGESTIONS-->

Disallows characters which are made with multiple code points in character class syntax.

Unicode includes the characters which are made with multiple code points.
Expand Down
107 changes: 90 additions & 17 deletions lib/rules/no-misleading-character-class.js
Expand Up @@ -4,13 +4,16 @@
"use strict";

const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
const { RegExpParser, visitRegExpAST } = require("regexpp");
const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

const REGEXPP_LATEST_ECMA_VERSION = 2022;

/**
* Iterate character sequences of a given nodes.
*
Expand Down Expand Up @@ -109,35 +112,32 @@ module.exports = {
url: "https://eslint.org/docs/rules/no-misleading-character-class"
},

hasSuggestions: true,

schema: [],

messages: {
surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
combiningClass: "Unexpected combined character in character class.",
emojiModifier: "Unexpected modified Emoji in character class.",
regionalIndicatorSymbol: "Unexpected national flag in character class.",
zwj: "Unexpected joined character sequence in character class."
zwj: "Unexpected joined character sequence in character class.",
suggestUnicodeFlag: "Add unicode 'u' flag to regex."
}
},
create(context) {
const sourceCode = context.getSourceCode();
const parser = new RegExpParser();

/**
* Verify a given regular expression.
* @param {Node} node The node to report.
* @param {string} pattern The regular expression pattern to verify.
* @param {string} flags The flags of the regular expression.
* @param {Function} unicodeFixer Fixer for missing "u" flag.
* @returns {void}
*/
function verify(node, pattern, flags) {
const has = {
surrogatePairWithoutUFlag: false,
combiningClass: false,
variationSelector: false,
emojiModifier: false,
regionalIndicatorSymbol: false,
zwj: false
};
function verify(node, pattern, flags, unicodeFixer) {
let patternNode;

try {
Expand All @@ -153,26 +153,75 @@ module.exports = {
return;
}

const foundKinds = new Set();

visitRegExpAST(patternNode, {
onCharacterClassEnter(ccNode) {
for (const chars of iterateCharacterSequence(ccNode.elements)) {
for (const kind of kinds) {
has[kind] = has[kind] || hasCharacterSequence[kind](chars);
if (hasCharacterSequence[kind](chars)) {
foundKinds.add(kind);
}
}
}
}
});

for (const kind of kinds) {
if (has[kind]) {
context.report({ node, messageId: kind });
for (const kind of foundKinds) {
let suggest;

if (kind === "surrogatePairWithoutUFlag") {
suggest = [{
messageId: "suggestUnicodeFlag",
fix: unicodeFixer
}];
}

context.report({
node,
messageId: kind,
suggest
});
}
}

/**
* Checks if the given regular expression pattern would be valid with the `u` flag.
* @param {string} pattern The regular expression pattern to verify.
* @returns {boolean} `true` if the pattern would be valid with the `u` flag.
* `false` if the pattern would be invalid with the `u` flag or the configured
* ecmaVersion doesn't support the `u` flag.
*/
function isValidWithUnicodeFlag(pattern) {
const { ecmaVersion } = context.parserOptions;

// ecmaVersion is unknown or it doesn't support the 'u' flag
if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) {
return false;
}

const validator = new RegExpValidator({
ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION)
});

try {
validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
} catch {
return false;
}

return true;
}

return {
"Literal[regex]"(node) {
verify(node, node.regex.pattern, node.regex.flags);
verify(node, node.regex.pattern, node.regex.flags, fixer => {
if (!isValidWithUnicodeFlag(node.regex.pattern)) {
return null;
}

return fixer.insertTextAfter(node, "u");
});
},
"Program"() {
const scope = context.getScope();
Expand All @@ -191,7 +240,31 @@ module.exports = {
const flags = getStringIfConstant(flagsNode, scope);

if (typeof pattern === "string") {
verify(node, pattern, flags || "");
verify(node, pattern, flags || "", fixer => {

if (!isValidWithUnicodeFlag(pattern)) {
return null;
}

if (node.arguments.length === 1) {
const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis

return fixer.insertTextAfter(
penultimateToken,
astUtils.isCommaToken(penultimateToken)
? ' "u",'
: ', "u"'
);
}

if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
const range = [flagsNode.range[0], flagsNode.range[1] - 1];

return fixer.insertTextAfterRange(range, "u");
}

return null;
});
}
}
}
Expand Down

0 comments on commit cab0c22

Please sign in to comment.