Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests: Improved detection of empty patterns #3058

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/prism-asciidoc.js
Expand Up @@ -35,7 +35,7 @@
pattern: /^\|={3,}(?:(?:\r?\n|\r(?!\n)).*)*?(?:\r?\n|\r)\|={3,}$/m,
inside: {
'specifiers': {
pattern: /(?!\|)(?:(?:(?:\d+(?:\.\d+)?|\.\d+)[+*])?(?:[<^>](?:\.[<^>])?|\.[<^>])?[a-z]*)(?=\|)/,
pattern: /(?:(?:(?:\d+(?:\.\d+)?|\.\d+)[+*](?:[<^>](?:\.[<^>])?|\.[<^>])?|[<^>](?:\.[<^>])?|\.[<^>])[a-z]*|[a-z]+)(?=\|)/,
alias: 'attr-value'
},
'punctuation': {
Expand Down
2 changes: 1 addition & 1 deletion components/prism-asciidoc.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion components/prism-promql.js
Expand Up @@ -47,7 +47,7 @@
lookbehind: true,
inside: {
'label-key': {
pattern: /\b[^,]*\b/,
pattern: /\b[^,]+\b/,
alias: 'attr-name',
},
'punctuation': /[(),]/
Expand Down
2 changes: 1 addition & 1 deletion components/prism-promql.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Expand Up @@ -59,6 +59,7 @@
"npm-run-all": "^4.1.5",
"pump": "^3.0.0",
"refa": "^0.9.1",
"regexp-ast-analysis": "^0.2.4",
"regexpp": "^3.2.0",
"scslre": "^0.1.6",
"simple-git": "^1.107.0",
Expand Down
100 changes: 44 additions & 56 deletions tests/pattern-tests.js
Expand Up @@ -12,6 +12,7 @@ const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA,
const scslre = require('scslre');
const path = require('path');
const { argv } = require('yargs');
const RAA = require('regexp-ast-analysis');

/**
* A map from language id to a list of code snippets in that language.
Expand Down Expand Up @@ -130,6 +131,7 @@ function testPatterns(Prism, mainLanguage) {
* @property {string} name
* @property {any} parent
* @property {boolean} lookbehind Whether the first capturing group of the pattern is a Prism lookbehind group.
* @property {CapturingGroup | undefined} lookbehindGroup
* @property {{ key: string, value: any }[]} path
* @property {(message: string) => void} reportError
*/
Expand Down Expand Up @@ -163,14 +165,17 @@ function testPatterns(Prism, mainLanguage) {
}

const parent = path.length > 1 ? path[path.length - 2].value : undefined;
const lookbehind = key === 'pattern' && parent && !!parent.lookbehind;
const lookbehindGroup = lookbehind ? getFirstCapturingGroup(ast.pattern) : undefined;
callback({
pattern: value,
ast,
tokenPath,
name: key,
parent,
path,
lookbehind: key === 'pattern' && parent && !!parent.lookbehind,
lookbehind,
lookbehindGroup,
reportError: message => errors.push(message)
});
} catch (error) {
Expand Down Expand Up @@ -231,9 +236,10 @@ function testPatterns(Prism, mainLanguage) {


it('- should not match the empty string', function () {
forEachPattern(({ pattern, tokenPath }) => {
forEachPattern(({ ast, pattern, tokenPath }) => {
// test for empty string
assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
const empty = RAA.isPotentiallyZeroLength(ast.pattern.alternatives);
assert.isFalse(empty, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
+ `Make sure that all patterns always consume at least one character.`);
});
Expand All @@ -256,47 +262,37 @@ function testPatterns(Prism, mainLanguage) {
});

it('- should not have lookbehind groups that can be preceded by other some characters', function () {
forEachPattern(({ ast, tokenPath, lookbehind }) => {
if (!lookbehind) {
return;
forEachPattern(({ tokenPath, lookbehindGroup }) => {
if (lookbehindGroup && !isFirstMatch(lookbehindGroup)) {
assert.fail(`${tokenPath}: The lookbehind group ${lookbehindGroup.raw} might be preceded by some characters.\n\n`
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
}
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
if (number === 1 && !isFirstMatch(group)) {
assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n`
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
}
});
});
});

it('- should not have lookbehind groups that only have zero-width alternatives', function () {
forEachPattern(({ ast, tokenPath, lookbehind, reportError }) => {
if (!lookbehind) {
return;
forEachPattern(({ tokenPath, lookbehindGroup, reportError }) => {
if (lookbehindGroup && RAA.isZeroLength(lookbehindGroup)) {
const groupContent = lookbehindGroup.raw.substr(1, lookbehindGroup.raw.length - 2);
const replacement = lookbehindGroup.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
reportError(`${tokenPath}: The lookbehind group ${lookbehindGroup.raw} does not consume characters.\n\n`
+ `Therefor it is not necessary to use a lookbehind group.\n`
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
}
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
if (number === 1 && isAlwaysZeroWidth(group)) {
const groupContent = group.raw.substr(1, group.raw.length - 2);
const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n`
+ `Therefor it is not necessary to use a lookbehind group.\n`
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
}
});
});
});

it('- should not have unused capturing groups', function () {
forEachPattern(({ ast, tokenPath, lookbehind, reportError }) => {
forEachPattern(({ ast, tokenPath, lookbehindGroup, reportError }) => {
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
const isLookbehindGroup = lookbehind && number === 1;
const isLookbehindGroup = group === lookbehindGroup;
if (group.references.length === 0 && !isLookbehindGroup) {
const fixes = [];
fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`);
fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`);
if (number === 1 && !lookbehind) {
if (number === 1 && !lookbehindGroup) {
if (isFirstMatch(group)) {
fixes.push(`Add a 'lookbehind: true' declaration.`);
} else {
Expand Down Expand Up @@ -392,28 +388,26 @@ function testPatterns(Prism, mainLanguage) {


/**
* Returns whether the given element will always have zero width meaning that it doesn't consume characters.
* Returns the first capturing group in the given pattern.
*
* @param {Element} element
* @returns {boolean}
* @param {Pattern} pattern
* @returns {CapturingGroup | undefined}
*/
function isAlwaysZeroWidth(element) {
switch (element.type) {
case 'Assertion':
// assertions == ^, $, \b, lookarounds
return true;
case 'Quantifier':
return element.max === 0 || isAlwaysZeroWidth(element.element);
case 'CapturingGroup':
case 'Group':
// every element in every alternative has to be of zero length
return element.alternatives.every(alt => alt.elements.every(isAlwaysZeroWidth));
case 'Backreference':
// on if the group referred to is of zero length
return isAlwaysZeroWidth(element.resolved);
default:
return false; // what's left are characters
function getFirstCapturingGroup(pattern) {
let cap = undefined;

try {
visitRegExpAST(pattern, {
onCapturingGroupEnter(node) {
cap = node;
throw new Error('stop');
}
});
} catch (error) {
// ignore errors
}

return cap;
}

/**
Expand All @@ -427,7 +421,7 @@ function isFirstMatch(element) {
switch (parent.type) {
case 'Alternative': {
// all elements before this element have to of zero length
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(RAA.isZeroLength)) {
return false;
}
const grandParent = parent.parent;
Expand Down Expand Up @@ -457,13 +451,7 @@ function isFirstMatch(element) {
* @returns {boolean}
*/
function underAStar(node) {
if (node.type === 'Quantifier' && node.max > 10) {
return true;
} else if (node.parent) {
return underAStar(node.parent);
} else {
return false;
}
return RAA.getEffectiveMaximumRepetition(node) > 10;
}

/**
Expand Down