Skip to content

Commit

Permalink
feat: check Unicode code point escapes in no-control-regex (#15862)
Browse files Browse the repository at this point in the history
Fixes #15809
  • Loading branch information
mdjermanovic committed May 20, 2022
1 parent 36287c0 commit 38ae956
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 18 deletions.
17 changes: 10 additions & 7 deletions docs/src/rules/no-control-regex.md
Expand Up @@ -22,6 +22,7 @@ The following elements of regular expression patterns are considered possible er

* Hexadecimal character escapes from `\x00` to `\x1F`.
* Unicode character escapes from `\u0000` to `\u001F`.
* Unicode code point escapes from `\u{0}` to `\u{1F}`.
* Unescaped raw characters from U+0000 to U+001F.

Control escapes such as `\t` and `\n` are allowed by this rule.
Expand All @@ -35,8 +36,9 @@ var pattern1 = /\x00/;
var pattern2 = /\x0C/;
var pattern3 = /\x1F/;
var pattern4 = /\u000C/;
var pattern5 = new RegExp("\x0C"); // raw U+000C character in the pattern
var pattern6 = new RegExp("\\x0C"); // \x0C pattern
var pattern5 = /\u{C}/u;
var pattern6 = new RegExp("\x0C"); // raw U+000C character in the pattern
var pattern7 = new RegExp("\\x0C"); // \x0C pattern
```

Examples of **correct** code for this rule:
Expand All @@ -46,11 +48,12 @@ Examples of **correct** code for this rule:

var pattern1 = /\x20/;
var pattern2 = /\u0020/;
var pattern3 = /\t/;
var pattern4 = /\n/;
var pattern5 = new RegExp("\x20");
var pattern6 = new RegExp("\\t");
var pattern7 = new RegExp("\\n");
var pattern3 = /\u{20}/u;
var pattern4 = /\t/;
var pattern5 = /\n/;
var pattern6 = new RegExp("\x20");
var pattern7 = new RegExp("\\t");
var pattern8 = new RegExp("\\n");
```

## Known Limitations
Expand Down
33 changes: 23 additions & 10 deletions lib/rules/no-control-regex.js
Expand Up @@ -30,10 +30,12 @@ const collector = new (class {
}
}

collectControlChars(regexpStr) {
collectControlChars(regexpStr, flags) {
const uFlag = typeof flags === "string" && flags.includes("u");

try {
this._source = regexpStr;
this._validator.validatePattern(regexpStr); // Call onCharacter hook
this._validator.validatePattern(regexpStr, void 0, void 0, uFlag); // Call onCharacter hook
} catch {

// Ignore syntax errors in RegExp.
Expand Down Expand Up @@ -68,32 +70,43 @@ module.exports = {

/**
* Get the regex expression
* @param {ASTNode} node node to evaluate
* @returns {RegExp|null} Regex if found else null
* @param {ASTNode} node `Literal` node to evaluate
* @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
* or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
* the `flags` property will be `null`.
* @private
*/
function getRegExpPattern(node) {
function getRegExp(node) {
if (node.regex) {
return node.regex.pattern;
return node.regex;
}
if (typeof node.value === "string" &&
(node.parent.type === "NewExpression" || node.parent.type === "CallExpression") &&
node.parent.callee.type === "Identifier" &&
node.parent.callee.name === "RegExp" &&
node.parent.arguments[0] === node
) {
return node.value;
const pattern = node.value;
const flags =
node.parent.arguments.length > 1 &&
node.parent.arguments[1].type === "Literal" &&
typeof node.parent.arguments[1].value === "string"
? node.parent.arguments[1].value
: null;

return { pattern, flags };
}

return null;
}

return {
Literal(node) {
const pattern = getRegExpPattern(node);
const regExp = getRegExp(node);

if (pattern) {
const controlCharacters = collector.collectControlChars(pattern);
if (regExp) {
const { pattern, flags } = regExp;
const controlCharacters = collector.collectControlChars(pattern, flags);

if (controlCharacters.length > 0) {
context.report({
Expand Down
41 changes: 40 additions & 1 deletion tests/lib/rules/no-control-regex.js
Expand Up @@ -26,7 +26,14 @@ ruleTester.run("no-control-regex", rule, {
"var regex = RegExp('x1f')",
"new RegExp('[')",
"RegExp('[')",
"new (function foo(){})('\\x1f')"
"new (function foo(){})('\\x1f')",
{ code: String.raw`/\u{20}/u`, parserOptions: { ecmaVersion: 2015 } },
String.raw`/\u{1F}/`,
String.raw`/\u{1F}/g`,
String.raw`new RegExp("\\u{20}", "u")`,
String.raw`new RegExp("\\u{1F}")`,
String.raw`new RegExp("\\u{1F}", "g")`,
String.raw`new RegExp("\\u{1F}", flags)` // when flags are unknown, this rule assumes there's no `u` flag
],
invalid: [
{ code: String.raw`var regex = /\x1f/`, errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }] },
Expand All @@ -46,6 +53,38 @@ ruleTester.run("no-control-regex", rule, {
code: String.raw`var regex = /(?<\u{1d49c}>.)\x1f/`,
parserOptions: { ecmaVersion: 2020 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u001F", flags)`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1111}*\x1F/u`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1111}*\\x1F", "u")`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1F}/u`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`/\u{1F}/gui`,
parserOptions: { ecmaVersion: 2015 },
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1F}", "u")`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
},
{
code: String.raw`new RegExp("\\u{1F}", "gui")`,
errors: [{ messageId: "unexpected", data: { controlChars: "\\x1f" }, type: "Literal" }]
}
]
});

0 comments on commit 38ae956

Please sign in to comment.