Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: check Unicode code point escapes in no-control-regex #15862

Merged
merged 1 commit into from May 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 10 additions & 7 deletions docs/src/rules/no-control-regex.md
Expand Up @@ -19,6 +19,7 @@ The following elements of regular expression patterns are considered possible er

* Hexadecimal character escapes from `\x00` to `\x1F`.
* Unicode character escapes from `\u0000` to `\u001F`.
* Unicode code point escapes from `\u{0}` to `\u{1F}`.
* Unescaped raw characters from U+0000 to U+001F.

Control escapes such as `\t` and `\n` are allowed by this rule.
Expand All @@ -32,8 +33,9 @@ var pattern1 = /\x00/;
var pattern2 = /\x0C/;
var pattern3 = /\x1F/;
var pattern4 = /\u000C/;
var pattern5 = new RegExp("\x0C"); // raw U+000C character in the pattern
var pattern6 = new RegExp("\\x0C"); // \x0C pattern
var pattern5 = /\u{C}/u;
var pattern6 = new RegExp("\x0C"); // raw U+000C character in the pattern
var pattern7 = new RegExp("\\x0C"); // \x0C pattern
```

Examples of **correct** code for this rule:
Expand All @@ -43,11 +45,12 @@ Examples of **correct** code for this rule:

var pattern1 = /\x20/;
var pattern2 = /\u0020/;
var pattern3 = /\t/;
var pattern4 = /\n/;
var pattern5 = new RegExp("\x20");
var pattern6 = new RegExp("\\t");
var pattern7 = new RegExp("\\n");
var pattern3 = /\u{20}/u;
var pattern4 = /\t/;
var pattern5 = /\n/;
var pattern6 = new RegExp("\x20");
var pattern7 = new RegExp("\\t");
var pattern8 = new RegExp("\\n");
```

## Known Limitations
Expand Down
33 changes: 23 additions & 10 deletions lib/rules/no-control-regex.js
Expand Up @@ -30,10 +30,12 @@ const collector = new (class {
}
}

collectControlChars(regexpStr) {
collectControlChars(regexpStr, flags) {
const uFlag = typeof flags === "string" && flags.includes("u");

try {
this._source = regexpStr;
this._validator.validatePattern(regexpStr); // Call onCharacter hook
this._validator.validatePattern(regexpStr, void 0, void 0, uFlag); // Call onCharacter hook
} catch {

// Ignore syntax errors in RegExp.
Expand Down Expand Up @@ -68,32 +70,43 @@ module.exports = {

/**
* Get the regex expression
* @param {ASTNode} node node to evaluate
* @returns {RegExp|null} Regex if found else null
* @param {ASTNode} node `Literal` node to evaluate
* @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
* or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
* the `flags` property will be `null`.
* @private
*/
function getRegExpPattern(node) {
function getRegExp(node) {
if (node.regex) {
return node.regex.pattern;
return node.regex;
}
if (typeof node.value === "string" &&
(node.parent.type === "NewExpression" || node.parent.type === "CallExpression") &&
node.parent.callee.type === "Identifier" &&
node.parent.callee.name === "RegExp" &&
node.parent.arguments[0] === node
) {
return node.value;
const pattern = node.value;
const flags =
node.parent.arguments.length > 1 &&
node.parent.arguments[1].type === "Literal" &&
typeof node.parent.arguments[1].value === "string"
? node.parent.arguments[1].value
: null;

return { pattern, flags };
}

return null;
}

return {
Literal(node) {
const pattern = getRegExpPattern(node);
const regExp = getRegExp(node);

if (pattern) {
const controlCharacters = collector.collectControlChars(pattern);
if (regExp) {
const { pattern, flags } = regExp;
const controlCharacters = collector.collectControlChars(pattern, flags);

if (controlCharacters.length > 0) {
context.report({
Expand Down
41 changes: 40 additions & 1 deletion tests/lib/rules/no-control-regex.js
Expand Up @@ -26,7 +26,14 @@ ruleTester.run("no-control-regex", rule, {
"var regex = RegExp('x1f')",
"new RegExp('[')",
"RegExp('[')",
"new (function foo(){})('\\x1f')"
"new (function foo(){})('\\x1f')",
{ code: String.raw`/\u{20}/u`, parserOptions: { ecmaVersion: 2015 } },
String.raw`/\u{1F}/`,
String.raw`/\u{1F}/g`,
String.raw`new RegExp("\\u{20}", "u")`,
String.raw`new RegExp("\\u{1F}")`,
String.raw`new RegExp("\\u{1F}", "g")`,
String.raw`new RegExp("\\u{1F}", flags)` // when flags are unknown, this rule assumes there's no `u` flag
],
invalid: [
{ code: String.raw`var regex = /\x1f/`, errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }] },
Expand All @@ -46,6 +53,38 @@ ruleTester.run("no-control-regex", rule, {
code: String.raw`var regex = /(?<\u{1d49c}>.)\x1f/`,
parserOptions: { ecmaVersion: 2020 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u001F", flags)`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1111}*\x1F/u`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1111}*\\x1F", "u")`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1F}/u`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1F}/gui`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1F}", "u")`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1F}", "gui")`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
}
]
});