diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js index beb33c7b11..b35fdb9b6a 100644 --- a/tests/pattern-tests.js +++ b/tests/pattern-tests.js @@ -81,7 +81,7 @@ function testPatterns(Prism) { BFS(Prism.languages, path => { const { key, value } = path[path.length - 1]; - let tokenPath = ''; + let tokenPath = 'Prism.languages'; for (const { key } of path) { if (!key) { // do nothing @@ -172,11 +172,46 @@ function testPatterns(Prism) { } } + /** + * Returns whether the given element will always at the start of the whole match. + * + * @param {Element} element + * @returns {boolean} + */ + function isFirstMatch(element) { + const parent = element.parent; + switch (parent.type) { + case 'Alternative': + // all elements before this element have to of zero length + if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) { + return false; + } + const grandParent = parent.parent; + if (grandParent.type === 'Pattern') { + return true; + } else { + return isFirstMatch(grandParent); + } + + case 'Quantifier': + if (parent.max >= 2) { + return false; + } else { + return isFirstMatch(parent); + } + + default: + throw new Error(`Internal error: The given node should not be a '${element.type}'.`); + } + } + it('- should not match the empty string', function () { forEachPattern(({ pattern, tokenPath }) => { // test for empty string - assert.notMatch('', pattern, `Token ${tokenPath}: ${pattern} should not match the empty string.`); + assert.notMatch('', pattern, `${tokenPath}: ${pattern} should not match the empty string.\n\n` + + `Patterns that do match the empty string can potentially cause infinitely many empty tokens. ` + + `Make sure that all patterns always consume at least one character.`); }); }); @@ -187,54 +222,26 @@ function testPatterns(Prism) { forEachCapturingGroup(ast.pattern, () => { hasCapturingGroup = true; }); if (!hasCapturingGroup) { - assert.fail(`Token ${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.`); + assert.fail(`${tokenPath}: The pattern is set to 'lookbehind: true' but does not have a capturing group.\n\n` + + `Prism lookbehind groups use the captured text of the first capturing group to simulate a lookbehind. ` + + `Without a capturing group, a lookbehind is not possible.\n` + + `To fix this, either add a capturing group for the lookbehind or remove the 'lookbehind' property.`); } } }); }); it('- should not have lookbehind groups that can be preceded by other some characters', function () { - /** - * Returns whether the given element will always match the start of the string. - * - * @param {Element} element - * @returns {boolean} - */ - function isFirstMatch(element) { - const parent = element.parent; - switch (parent.type) { - case 'Alternative': - // all elements before this element have to of zero length - if (!parent.elements.slice(0, parent.elements.indexOf(element)).every(isAlwaysZeroWidth)) { - return false; - } - const grandParent = parent.parent; - if (grandParent.type === 'Pattern') { - return true; - } else { - return isFirstMatch(grandParent); - } - - case 'Quantifier': - if (parent.max >= 2) { - return false; - } else { - return isFirstMatch(parent); - } - - default: - throw new Error(`Internal error: The given node should not be a '${element.type}'.`); - } - } - forEachPattern(({ ast, tokenPath, lookbehind }) => { if (!lookbehind) { return; } forEachCapturingGroup(ast.pattern, ({ group, number }) => { if (number === 1 && !isFirstMatch(group)) { - assert.fail(`Token ${tokenPath}: ` - + `The lookbehind group (if matched) always has to be at index 0 relative to the whole match.`); + assert.fail(`${tokenPath}: The lookbehind group ${group.raw} might be preceded by some characters.\n\n` + + `Prism assumes that the lookbehind group, if captured, is the first thing matched by the regex. ` + + `If characters might precede the lookbehind group (e.g. /a?(b)c/), then Prism cannot correctly apply the lookbehind correctly in all cases.\n` + + `To fix this, either remove the preceding characters or include them in the lookbehind group.`); } }); }); @@ -249,9 +256,9 @@ function testPatterns(Prism) { if (number === 1 && isAlwaysZeroWidth(group)) { const groupContent = group.raw.substr(1, group.raw.length - 2); const replacement = group.alternatives.length === 1 ? groupContent : `(?:${groupContent})`; - reportError(`Token ${tokenPath}: The lookbehind group ${group.raw} does not consume characters. ` - + `Therefor it is not necessary to use a lookbehind group. ` - + `Replacing the lookbehind group with: ${replacement}`); + reportError(`${tokenPath}: The lookbehind group ${group.raw} does not consume characters.\n\n` + + `Therefor it is not necessary to use a lookbehind group.\n` + + `To fix this, replace the lookbehind group with ${replacement} and remove the 'lookbehind' property.`); } }); }); @@ -262,7 +269,22 @@ function testPatterns(Prism) { forEachCapturingGroup(ast.pattern, ({ group, number }) => { const isLookbehindGroup = lookbehind && number === 1; if (group.references.length === 0 && !isLookbehindGroup) { - reportError(`Token ${tokenPath}: Unused capturing group ${group.raw}. All capturing groups have to be either referenced or used as a Prism lookbehind group.`); + const fixes = []; + fixes.push(`Make this group a non-capturing group ('(?:...)' instead of '(...)'). (It's usually this option.)`); + fixes.push(`Reference this group with a backreference (use '\\${number}' for this).`); + if (number === 1 && !lookbehind) { + if (isFirstMatch(group)) { + fixes.push(`Add a 'lookbehind: true' declaration.`); + } else { + fixes.push(`Add a 'lookbehind: true' declaration. (This group is not a valid lookbehind group because it can be preceded by some characters.)`); + } + } + + reportError(`${tokenPath}: Unused capturing group ${group.raw}.\n\n` + + `Unused capturing groups generally degrade the performance of regular expressions. ` + + `They might also be a sign that a backreference is incorrect or that a 'lookbehind: true' declaration in missing.\n` + + `To fix this, do one of the following:\n` + + fixes.map(f => '- ' + f).join('\n')); } }); }); @@ -272,7 +294,8 @@ function testPatterns(Prism) { const niceName = /^[a-z][a-z\d]*(?:[-_][a-z\d]+)*$/; function testName(name, desc = 'token name') { if (!niceName.test(name)) { - assert.fail(`The ${desc} '${name}' does not match ${niceName}`); + assert.fail(`The ${desc} '${name}' does not match ${niceName}.\n\n` + + `To fix this, choose a name that matches the above regular expression.`); } } @@ -305,7 +328,10 @@ function testPatterns(Prism) { visitRegExpAST(ast.pattern, { onCharacterEnter(node) { if (/^\\(?:[1-9]|\d{2,})$/.test(node.raw)) { - reportError(`Token ${tokenPath}: Octal escape ${node.raw}.`); + reportError(`${tokenPath}: Octal escape ${node.raw}.\n\n` + + `Octal escapes can be confused with backreferences, so please do not use them.\n` + + `To fix this, use a different escape method. ` + + `Note that this could also be an invalid backreference, so be sure to carefully analyse the pattern.`); } } });