From 5333e28106b8438f76b07b855c0c5fe4b3570947 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Tue, 19 Oct 2021 19:34:41 +0200 Subject: [PATCH] Added regex coverage (#3138) --- .github/workflows/test.yml | 13 ++ package.json | 1 + tests/coverage.js | 260 +++++++++++++++++++++++++++++++++ tests/helper/test-case.js | 44 +++++- tests/helper/test-discovery.js | 34 ++--- tests/helper/util.js | 30 +++- tests/pattern-tests.js | 32 +--- tests/run.js | 30 ++-- 8 files changed, 371 insertions(+), 73 deletions(-) create mode 100644 tests/coverage.js diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c8de6ee801..f8f4941b8b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,3 +57,16 @@ jobs: node-version: 14.x - run: npm ci - run: npm run lint:ci + + coverage: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Use Node.js 14.x + uses: actions/setup-node@v1 + with: + node-version: 14.x + - run: npm ci + - run: npm run regex-coverage diff --git a/package.json b/package.json index 1961040965..e15083f94c 100755 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "lint": "eslint . --cache", "lint:fix": "npm run lint -- --fix", "lint:ci": "eslint . --max-warnings 0", + "regex-coverage": "mocha tests/coverage.js", "test:aliases": "mocha tests/aliases-test.js", "test:core": "mocha tests/core/**/*.js", "test:dependencies": "mocha tests/dependencies-test.js", diff --git a/tests/coverage.js b/tests/coverage.js new file mode 100644 index 0000000000..cd7175789f --- /dev/null +++ b/tests/coverage.js @@ -0,0 +1,260 @@ +'use strict'; + +const TestDiscovery = require('./helper/test-discovery'); +const TestCase = require('./helper/test-case'); +const PrismLoader = require('./helper/prism-loader'); +const { BFS, BFSPathToPrismTokenPath } = require('./helper/util'); +const { assert } = require('chai'); +const components = require('../components.json'); +const ALL_LANGUAGES = [...Object.keys(components.languages).filter(k => k !== 'meta')]; + + +describe('Pattern test coverage', function () { + /** + * @type {Map} + * @typedef PatternData + * @property {RegExp} pattern + * @property {string} language + * @property {Set} from + * @property {RegExpExecArray[]} matches + */ + const patterns = new Map(); + + /** + * @param {string | string[]} languages + * @returns {import("./helper/prism-loader").Prism} + */ + function createInstance(languages) { + const Prism = PrismLoader.createInstance(languages); + + BFS(Prism.languages, (path, object) => { + const { key, value } = path[path.length - 1]; + const tokenPath = BFSPathToPrismTokenPath(path); + + if (Object.prototype.toString.call(value) == '[object RegExp]') { + const regex = makeGlobal(value); + object[key] = regex; + + const patternKey = String(regex); + let data = patterns.get(patternKey); + if (!data) { + data = { + pattern: regex, + language: path[1].key, + from: new Set([tokenPath]), + matches: [] + }; + patterns.set(patternKey, data); + } else { + data.from.add(tokenPath); + } + + regex.exec = string => { + let match = RegExp.prototype.exec.call(regex, string); + if (match) { + data.matches.push(match); + } + return match; + }; + } + }); + + return Prism; + } + + describe('Register all patterns', function () { + it('all', function () { + this.slow(10 * 1000); + // This will cause ALL regexes of Prism to be registered in the patterns map. + // (Languages that don't have any tests can't be caught otherwise.) + createInstance(ALL_LANGUAGES); + }); + }); + + describe('Run all language tests', function () { + // define tests for all tests in all languages in the test suite + for (const [languageIdentifier, files] of TestDiscovery.loadAllTests()) { + it(languageIdentifier, function () { + this.timeout(10 * 1000); + + for (const filePath of files) { + try { + TestCase.run({ + languageIdentifier, + filePath, + updateMode: 'none', + createInstance + }); + } catch (error) { + // we don't case about whether the test succeeds, + // we just want to gather usage data + } + } + }); + } + }); + + describe('Coverage', function () { + for (const language of ALL_LANGUAGES) { + describe(language, function () { + it(`- should cover all patterns`, function () { + const untested = getAllOf(language).filter(d => d.matches.length === 0); + if (untested.length === 0) { + return; + } + + const problems = untested.map(data => { + return formatProblem(data, [ + 'This pattern is completely untested. Add test files that match this pattern.' + ]); + }); + + assert.fail([ + `${problems.length} pattern(s) are untested:\n` + + 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests', + ...problems + ].join('\n\n')); + }); + + it(`- should exhaustively cover all keywords in keyword lists`, function () { + const problems = []; + + for (const data of getAllOf(language)) { + if (data.matches.length === 0) { + // don't report the same pattern twice + continue; + } + + const keywords = getKeywordList(data.pattern); + if (!keywords) { + continue; + } + const keywordCount = keywords.size; + + data.matches.forEach(([m]) => { + if (data.pattern.ignoreCase) { + m = m.toUpperCase(); + } + keywords.delete(m); + }); + + if (keywords.size > 0) { + problems.push(formatProblem(data, [ + `Add test files to test all keywords. The following keywords (${keywords.size}/${keywordCount}) are untested:`, + ...[...keywords].map(k => ` ${k}`) + ])); + } + } + + if (problems.length === 0) { + return; + } + + assert.fail([ + `${problems.length} keyword list(s) are not exhaustively tested:\n` + + 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests', + ...problems + ].join('\n\n')); + }); + }); + } + }); + + /** + * @param {string} language + * @returns {PatternData[]} + */ + function getAllOf(language) { + return [...patterns.values()].filter(d => d.language === language); + } + + /** + * @param {string} string + * @param {number} maxLength + * @returns {string} + */ + function short(string, maxLength) { + if (string.length > maxLength) { + return string.slice(0, maxLength - 1) + '…'; + } else { + return string; + } + } + + /** + * If the given pattern string describes a keyword list, all keyword will be returned. Otherwise, `null` will be + * returned. + * + * @param {RegExp} pattern + * @returns {Set | null} + */ + function getKeywordList(pattern) { + // Right now, only keyword lists of the form /\b(?:foo|bar)\b/ are supported. + // In the future, we might want to convert these regexes to NFAs and iterate all words to cover more complex + // keyword lists and even operator and punctuation lists. + + let source = pattern.source.replace(/^\\b|\\b$/g, ''); + if (source.startsWith('(?:') && source.endsWith(')')) { + source = source.slice('(?:'.length, source.length - ')'.length); + } + + if (/^\w+(?:\|\w+)*$/.test(source)) { + if (pattern.ignoreCase) { + source = source.toUpperCase(); + } + return new Set(source.split(/\|/g)); + } else { + return null; + } + } + + /** + * @param {Iterable} occurrences + * @returns {{ origin: string; otherOccurrences: string[] }} + */ + function splitOccurrences(occurrences) { + const all = [...occurrences]; + return { + origin: all[0], + otherOccurrences: all.slice(1), + }; + } + + /** + * @param {PatternData} data + * @param {string[]} messageLines + * @returns {string} + */ + function formatProblem(data, messageLines) { + const { origin, otherOccurrences } = splitOccurrences(data.from); + + const lines = [ + `${origin}:`, + short(String(data.pattern), 100), + '', + ...messageLines, + ]; + + if (otherOccurrences.length) { + lines.push( + '', + 'Other occurrences of this pattern:', + ...otherOccurrences.map(o => `- ${o}`) + ); + } + + return lines.join('\n '); + } +}); + +/** + * @param {RegExp} regex + * @returns {RegExp} + */ +function makeGlobal(regex) { + if (regex.global) { + return regex; + } else { + return RegExp(regex.source, regex.flags + 'g'); + } +} diff --git a/tests/helper/test-case.js b/tests/helper/test-case.js index 3454d77d1c..d69e2c3d98 100644 --- a/tests/helper/test-case.js +++ b/tests/helper/test-case.js @@ -1,6 +1,7 @@ 'use strict'; const fs = require('fs'); +const path = require('path'); const { assert } = require('chai'); const Prettier = require('prettier'); const PrismLoader = require('./prism-loader'); @@ -11,6 +12,12 @@ const TokenStreamTransformer = require('./token-stream-transformer'); * @typedef {import("../../components/prism-core.js")} Prism */ +/** + * @param {string[]} languages + * @returns {Prism} + */ +const defaultCreateInstance = (languages) => PrismLoader.createInstance(languages); + /** * Handles parsing and printing of a test case file. * @@ -297,6 +304,29 @@ class HighlightHTMLRunner { module.exports = { TestCaseFile, + /** + * Runs the given test file and asserts the result. + * + * This function will determine what kind of test files the given file is and call the appropriate method to run the + * test. + * + * @param {RunOptions} options + * @returns {void} + * + * @typedef RunOptions + * @property {string} languageIdentifier + * @property {string} filePath + * @property {"none" | "insert" | "update"} updateMode + * @property {(languages: string[]) => Prism} [createInstance] + */ + run(options) { + if (path.extname(options.filePath) === '.test') { + this.runTestCase(options.languageIdentifier, options.filePath, options.updateMode, options.createInstance); + } else { + this.runTestsWithHooks(options.languageIdentifier, require(options.filePath), options.createInstance); + } + }, + /** * Runs the given test case file and asserts the result * @@ -312,13 +342,16 @@ module.exports = { * @param {string} languageIdentifier * @param {string} filePath * @param {"none" | "insert" | "update"} updateMode + * @param {(languages: string[]) => Prism} [createInstance] */ - runTestCase(languageIdentifier, filePath, updateMode) { + runTestCase(languageIdentifier, filePath, updateMode, createInstance = defaultCreateInstance) { + let runner; if (/\.html\.test$/i.test(filePath)) { - this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new HighlightHTMLRunner()); + runner = new HighlightHTMLRunner(); } else { - this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new TokenizeJSONRunner()); + runner = new TokenizeJSONRunner(); } + this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance); }, /** @@ -326,13 +359,14 @@ module.exports = { * @param {string} filePath * @param {"none" | "insert" | "update"} updateMode * @param {Runner} runner + * @param {(languages: string[]) => Prism} createInstance * @template T */ - runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner) { + runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance) { const testCase = TestCaseFile.readFromFile(filePath); const usedLanguages = this.parseLanguageNames(languageIdentifier); - const Prism = PrismLoader.createInstance(usedLanguages.languages); + const Prism = createInstance(usedLanguages.languages); // the first language is the main language to highlight const actualValue = runner.run(Prism, testCase.code, usedLanguages.mainLanguage); diff --git a/tests/helper/test-discovery.js b/tests/helper/test-discovery.js index 4d97873972..3e374a393c 100644 --- a/tests/helper/test-discovery.js +++ b/tests/helper/test-discovery.js @@ -3,41 +3,37 @@ const fs = require('fs'); const path = require('path'); +const LANGUAGES_DIR = path.join(__dirname, '..', 'languages'); + module.exports = { /** * Loads the list of all available tests * - * @param {string} rootDir - * @returns {Object} + * @param {string} [rootDir] + * @returns {Map} */ loadAllTests(rootDir) { - /** @type {Object.} */ - const testSuite = {}; - - for (const language of this.getAllDirectories(rootDir)) { - testSuite[language] = this.getAllFiles(path.join(rootDir, language)); - } + rootDir = rootDir || LANGUAGES_DIR; - return testSuite; + return new Map(this.getAllDirectories(rootDir).map(language => { + return [language, this.getAllFiles(path.join(rootDir, language))]; + })); }, /** * Loads the list of available tests that match the given languages * - * @param {string} rootDir * @param {string|string[]} languages - * @returns {Object} + * @param {string} [rootDir] + * @returns {Map} */ - loadSomeTests(rootDir, languages) { - /** @type {Object.} */ - const testSuite = {}; - - for (const language of this.getSomeDirectories(rootDir, languages)) { - testSuite[language] = this.getAllFiles(path.join(rootDir, language)); - } + loadSomeTests(languages, rootDir) { + rootDir = rootDir || LANGUAGES_DIR; - return testSuite; + return new Map(this.getSomeDirectories(rootDir, languages).map(language => { + return [language, this.getAllFiles(path.join(rootDir, language))]; + })); }, diff --git a/tests/helper/util.js b/tests/helper/util.js index da3a18dd36..e4c72879fc 100644 --- a/tests/helper/util.js +++ b/tests/helper/util.js @@ -19,7 +19,7 @@ module.exports = { * Performs a breadth-first search on the given start element. * * @param {any} start - * @param {(path: { key: string, value: any }[]) => void} callback + * @param {(path: { key: string, value: any }[], obj: Record) => void} callback */ BFS(start, callback) { const visited = new Set(); @@ -28,8 +28,6 @@ module.exports = { [{ key: null, value: start }] ]; - callback(toVisit[0]); - while (toVisit.length > 0) { /** @type {{ key: string, value: any }[][]} */ const newToVisit = []; @@ -43,7 +41,7 @@ module.exports = { const value = obj[key]; path.push({ key, value }); - callback(path); + callback(path, obj); if (Array.isArray(value) || Object.prototype.toString.call(value) == '[object Object]') { newToVisit.push([...path]); @@ -58,6 +56,30 @@ module.exports = { } }, + /** + * Given the `BFS` path given to `BFS` callbacks, this will return the Prism language token path of the current + * value (e.g. `Prism.languages.xml.tag.pattern`). + * + * @param {readonly{ key: string, value: any }[]} path + * @param {string} [root] + * @returns {string} + */ + BFSPathToPrismTokenPath(path, root = 'Prism.languages') { + let tokenPath = root; + for (const { key } of path) { + if (!key) { + // do nothing + } else if (/^\d+$/.test(key)) { + tokenPath += `[${key}]`; + } else if (/^[a-z]\w*$/i.test(key)) { + tokenPath += `.${key}`; + } else { + tokenPath += `[${JSON.stringify(key)}]`; + } + } + return tokenPath; + }, + /** * Returns the AST of a given pattern. * diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js index 2dc9ee4c8b..91ce21eddf 100644 --- a/tests/pattern-tests.js +++ b/tests/pattern-tests.js @@ -5,7 +5,7 @@ const { assert } = require('chai'); const PrismLoader = require('./helper/prism-loader'); const TestDiscovery = require('./helper/test-discovery'); const TestCase = require('./helper/test-case'); -const { BFS, parseRegex } = require('./helper/util'); +const { BFS, BFSPathToPrismTokenPath, parseRegex } = require('./helper/util'); const { languages } = require('../components.json'); const { visitRegExpAST } = require('regexpp'); const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA, Transformers } = require('refa'); @@ -19,8 +19,8 @@ const RAA = require('regexp-ast-analysis'); * @type {Map} */ const testSnippets = new Map(); -const testSuite = TestDiscovery.loadAllTests(__dirname + '/languages'); -for (const languageIdentifier in testSuite) { +const testSuite = TestDiscovery.loadAllTests(); +for (const [languageIdentifier, files] of testSuite) { const lang = TestCase.parseLanguageNames(languageIdentifier).mainLanguage; let snippets = testSnippets.get(lang); if (snippets === undefined) { @@ -28,7 +28,7 @@ for (const languageIdentifier in testSuite) { testSnippets.set(lang, snippets); } - for (const file of testSuite[languageIdentifier]) { + for (const file of files) { snippets.push(TestCase.TestCaseFile.readFromFile(file).code); } } @@ -90,27 +90,6 @@ function testPatterns(Prism, mainLanguage) { .filter(lang => lang in Prism.languages); } - /** - * @param {string} root - * @param {Parameters[1]>[0]} path - * @returns {string} - */ - function BFSPathToString(root, path) { - let pathStr = root; - for (const { key } of path) { - if (!key) { - // do nothing - } else if (/^\d+$/.test(key)) { - pathStr += `[${key}]`; - } else if (/^[a-z]\w*$/i.test(key)) { - pathStr += `.${key}`; - } else { - pathStr += `[${JSON.stringify(key)}]`; - } - } - return pathStr; - } - /** * Invokes the given function on every pattern in `Prism.languages`. * @@ -146,10 +125,9 @@ function testPatterns(Prism, mainLanguage) { BFS(root, path => { const { key, value } = path[path.length - 1]; + const tokenPath = BFSPathToPrismTokenPath(path, rootStr); visited.add(value); - const tokenPath = BFSPathToString(rootStr, path); - if (Object.prototype.toString.call(value) == '[object RegExp]') { try { let ast; diff --git a/tests/run.js b/tests/run.js index f030fbc3c7..390d771d33 100644 --- a/tests/run.js +++ b/tests/run.js @@ -8,29 +8,23 @@ const { argv } = require('yargs'); const testSuite = (argv.language) - ? TestDiscovery.loadSomeTests(__dirname + '/languages', argv.language) + ? TestDiscovery.loadSomeTests(argv.language) // load complete test suite - : TestDiscovery.loadAllTests(__dirname + '/languages'); + : TestDiscovery.loadAllTests(); const update = !!argv.update; // define tests for all tests in all languages in the test suite -for (const language in testSuite) { - if (!testSuite.hasOwnProperty(language)) { - continue; - } +for (const [languageIdentifier, files] of testSuite) { + describe("Testing language '" + languageIdentifier + "'", function () { + this.timeout(10000); - (function (language, testFiles) { - describe("Testing language '" + language + "'", function () { - this.timeout(10000); + for (const filePath of files) { + const fileName = path.basename(filePath, path.extname(filePath)); - for (const filePath of testFiles) { - const fileName = path.basename(filePath, path.extname(filePath)); - - it("– should pass test case '" + fileName + "'", function () { - TestCase.runTestCase(language, filePath, update ? 'update' : 'insert'); - }); - } - }); - }(language, testSuite[language])); + it("– should pass test case '" + fileName + "'", function () { + TestCase.runTestCase(languageIdentifier, filePath, update ? 'update' : 'insert'); + }); + } + }); }