Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better error messages for pattern tests #2364

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
114 changes: 70 additions & 44 deletions tests/pattern-tests.js
Expand Up @@ -81,7 +81,7 @@ function testPatterns(Prism) {
BFS(Prism.languages, path => {
const { key, value } = path[path.length - 1];

let tokenPath = '<languages>';
let tokenPath = 'Prism.languages';
for (const { key } of path) {
if (!key) {
// do nothing
Expand Down Expand Up @@ -172,11 +172,46 @@ function testPatterns(Prism) {
}
}

/**
* Returns whether the given element will always at the start of the whole match.
*
* @param {Element} element
* @returns {boolean}
*/
function isFirstMatch(element) {
const parent = element.parent;
switch (parent.type) {
case 'Alternative':
// all elements before this element have to of zero length
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
return false;
}
const grandParent = parent.parent;
if (grandParent.type === 'Pattern') {
return true;
} else {
return isFirstMatch(grandParent);
}

case 'Quantifier':
if (parent.max >= 2) {
return false;
} else {
return isFirstMatch(parent);
}

default:
throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
}
}


it('- should not match the empty string', function () {
forEachPattern(({ pattern, tokenPath }) => {
// test for empty string
assert.notMatch('', pattern, `Token ${tokenPath}: ${pattern} should not match the empty string.`);
assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n`
+ `Patterns that do match the empty string can potentially cause infinitely many empty tokens. `
+ `Make sure that all patterns always consume at least one character.`);
});
});

Expand All @@ -187,54 +222,26 @@ function testPatterns(Prism) {
forEachCapturingGroup(ast.pattern, () => { hasCapturingGroup = true; });

if (!hasCapturingGroup) {
assert.fail(`Token ${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.`);
assert.fail(`${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.\n\n`
+ `Prism lookbehind groups use the captured text of the first capturing group to simulate a lookbehind. `
+ `Without a capturing group, a lookbehind is not possible.\n`
+ `To fix this, either add a capturing group for the lookbehind or remove the 'lookbehind' property.`);
}
}
});
});

it('- should not have lookbehind groups that can be preceded by other some characters', function () {
/**
* Returns whether the given element will always match the start of the string.
*
* @param {Element} element
* @returns {boolean}
*/
function isFirstMatch(element) {
const parent = element.parent;
switch (parent.type) {
case 'Alternative':
// all elements before this element have to of zero length
if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) {
return false;
}
const grandParent = parent.parent;
if (grandParent.type === 'Pattern') {
return true;
} else {
return isFirstMatch(grandParent);
}

case 'Quantifier':
if (parent.max >= 2) {
return false;
} else {
return isFirstMatch(parent);
}

default:
throw new Error(`Internal error: The given node should not be a '${element.type}'.`);
}
}

forEachPattern(({ ast, tokenPath, lookbehind }) => {
if (!lookbehind) {
return;
}
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
if (number === 1 && !isFirstMatch(group)) {
assert.fail(`Token ${tokenPath}: `
+ `The lookbehind group (if matched) always has to be at index 0 relative to the whole match.`);
assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n`
+ `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. `
+ `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n`
+ `To fix this, either remove the preceding characters or include them in the lookbehind group.`);
}
});
});
Expand All @@ -249,9 +256,9 @@ function testPatterns(Prism) {
if (number === 1 && isAlwaysZeroWidth(group)) {
const groupContent = group.raw.substr(1, group.raw.length - 2);
const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`;
reportError(`Token ${tokenPath}: The lookbehind group ${group.raw} does not consume characters. `
+ `Therefor it is not necessary to use a lookbehind group. `
+ `Replacing the lookbehind group with: ${replacement}`);
reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n`
+ `Therefor it is not necessary to use a lookbehind group.\n`
+ `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`);
}
});
});
Expand All @@ -262,7 +269,22 @@ function testPatterns(Prism) {
forEachCapturingGroup(ast.pattern, ({ group, number }) => {
const isLookbehindGroup = lookbehind && number === 1;
if (group.references.length === 0 && !isLookbehindGroup) {
reportError(`Token ${tokenPath}: Unused capturing group ${group.raw}. All capturing groups have to be either referenced or used as a Prism lookbehind group.`);
const fixes = [];
fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`);
fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`);
if (number === 1 && !lookbehind) {
if (isFirstMatch(group)) {
fixes.push(`Add a 'lookbehind: true' declaration.`);
} else {
fixes.push(`Add a 'lookbehind: true' declaration. (This group is not a valid lookbehind group because it can be preceded by some characters.)`);
}
}

reportError(`${tokenPath}: Unused capturing group ${group.raw}.\n\n`
+ `Unused capturing groups generally degrade the performance of regular expressions. `
+ `They might also be a sign that a backreference is incorrect or that a 'lookbehind: true' declaration in missing.\n`
+ `To fix this, do one of the following:\n`
+ fixes.map(f => '- ' + f).join('\n'));
}
});
});
Expand All @@ -272,7 +294,8 @@ function testPatterns(Prism) {
const niceName = /^[a-z][a-z\d]*(?:[-_][a-z\d]+)*$/;
function testName(name, desc = 'token name') {
if (!niceName.test(name)) {
assert.fail(`The ${desc} '${name}' does not match ${niceName}`);
assert.fail(`The ${desc} '${name}' does not match ${niceName}.\n\n`
+ `To fix this, choose a name that matches the above regular expression.`);
}
}

Expand Down Expand Up @@ -305,7 +328,10 @@ function testPatterns(Prism) {
visitRegExpAST(ast.pattern, {
onCharacterEnter(node) {
if (/^\\(?:[1-9]|\d{2,})$/.test(node.raw)) {
reportError(`Token ${tokenPath}: Octal escape ${node.raw}.`);
reportError(`${tokenPath}: Octal escape ${node.raw}.\n\n`
+ `Octal escapes can be confused with backreferences, so please do not use them.\n`
+ `To fix this, use a different escape method. `
+ `Note that this could also be an invalid backreference, so be sure to carefully analyse the pattern.`);
}
}
});
Expand Down