Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Unicode flag suggestion in no-misleading-character-class #15867

Merged
merged 13 commits into from May 20, 2022
2 changes: 2 additions & 0 deletions docs/src/rules/no-misleading-character-class.md
Expand Up @@ -7,6 +7,8 @@ rule_type: problem

<!--RECOMMENDED-->

<!--SUGGESTIONS-->

Disallows characters which are made with multiple code points in character class syntax.

Unicode includes the characters which are made with multiple code points.
Expand Down
107 changes: 90 additions & 17 deletions lib/rules/no-misleading-character-class.js
Expand Up @@ -4,13 +4,16 @@
"use strict";

const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
const { RegExpParser, visitRegExpAST } = require("regexpp");
const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

const REGEXPP_LATEST_ECMA_VERSION = 2022;

/**
* Iterate character sequences of a given nodes.
*
Expand Down Expand Up @@ -109,35 +112,32 @@ module.exports = {
url: "https://eslint.org/docs/rules/no-misleading-character-class"
},

hasSuggestions: true,

schema: [],

messages: {
surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
combiningClass: "Unexpected combined character in character class.",
emojiModifier: "Unexpected modified Emoji in character class.",
regionalIndicatorSymbol: "Unexpected national flag in character class.",
zwj: "Unexpected joined character sequence in character class."
zwj: "Unexpected joined character sequence in character class.",
suggestUnicodeFlag: "Add unicode 'u' flag to regex."
}
},
create(context) {
const sourceCode = context.getSourceCode();
const parser = new RegExpParser();

/**
* Verify a given regular expression.
* @param {Node} node The node to report.
* @param {string} pattern The regular expression pattern to verify.
* @param {string} flags The flags of the regular expression.
* @param {Function} unicodeFixer Fixer for missing "u" flag.
* @returns {void}
*/
function verify(node, pattern, flags) {
const has = {
surrogatePairWithoutUFlag: false,
combiningClass: false,
variationSelector: false,
emojiModifier: false,
regionalIndicatorSymbol: false,
zwj: false
};
function verify(node, pattern, flags, unicodeFixer) {
let patternNode;

try {
Expand All @@ -153,26 +153,75 @@ module.exports = {
return;
}

const foundKinds = new Set();

visitRegExpAST(patternNode, {
onCharacterClassEnter(ccNode) {
for (const chars of iterateCharacterSequence(ccNode.elements)) {
for (const kind of kinds) {
has[kind] = has[kind] || hasCharacterSequence[kind](chars);
if (hasCharacterSequence[kind](chars)) {
foundKinds.add(kind);
}
}
}
}
});

for (const kind of kinds) {
if (has[kind]) {
context.report({ node, messageId: kind });
for (const kind of foundKinds) {
let suggest;

if (kind === "surrogatePairWithoutUFlag") {
suggest = [{
messageId: "suggestUnicodeFlag",
fix: unicodeFixer
}];
}

context.report({
node,
messageId: kind,
suggest
});
}
}

/**
* Checks if the given regular expression pattern would be valid with the `u` flag.
* @param {string} pattern The regular expression pattern to verify.
* @returns {boolean} `true` if the pattern would be valid with the `u` flag.
* `false` if the pattern would be invalid with the `u` flag or the configured
* ecmaVersion doesn't support the `u` flag.
*/
function isValidWithUnicodeFlag(pattern) {
const { ecmaVersion } = context.parserOptions;

// ecmaVersion is unknown or it doesn't support the 'u' flag
if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) {
return false;
}

const validator = new RegExpValidator({
ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION)
});

try {
validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
} catch {
return false;
}

return true;
}

return {
"Literal[regex]"(node) {
verify(node, node.regex.pattern, node.regex.flags);
verify(node, node.regex.pattern, node.regex.flags, fixer => {
if (!isValidWithUnicodeFlag(node.regex.pattern)) {
return null;
}

return fixer.insertTextAfter(node, "u");
});
},
"Program"() {
const scope = context.getScope();
Expand All @@ -191,7 +240,31 @@ module.exports = {
const flags = getStringIfConstant(flagsNode, scope);

if (typeof pattern === "string") {
verify(node, pattern, flags || "");
verify(node, pattern, flags || "", fixer => {

if (!isValidWithUnicodeFlag(pattern)) {
return null;
}

if (node.arguments.length === 1) {
const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis

return fixer.insertTextAfter(
penultimateToken,
astUtils.isCommaToken(penultimateToken)
? ' "u",'
: ', "u"'
);
}

if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
const range = [flagsNode.range[0], flagsNode.range[1] - 1];

return fixer.insertTextAfterRange(range, "u");
}

return null;
});
}
}
}
Expand Down