diff --git a/package-lock.json b/package-lock.json index 5c522b8..e0e6e26 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,6 @@ "async": "^3.2.1", "diff-match-patch": "^1.0.5", "kleur": "^4.1.4", - "parse5-sax-parser": "^6.0.1", "remark-parse": "^10.0.1", "supports-color": "^9.1.0", "unified": "^10.1.1" @@ -1804,19 +1803,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/parse5": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" - }, - "node_modules/parse5-sax-parser": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5-sax-parser/-/parse5-sax-parser-6.0.1.tgz", - "integrity": "sha512-kXX+5S81lgESA0LsDuGjAlBybImAChYRMT+/uKCEXFBFOeEhS52qUCydGhU3qLRD8D9DVjaUo821WK7DM4iCeg==", - "dependencies": { - "parse5": "^6.0.1" - } - }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -3594,19 +3580,6 @@ "is-hexadecimal": "^2.0.0" } }, - "parse5": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz", - "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==" - }, - "parse5-sax-parser": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/parse5-sax-parser/-/parse5-sax-parser-6.0.1.tgz", - "integrity": "sha512-kXX+5S81lgESA0LsDuGjAlBybImAChYRMT+/uKCEXFBFOeEhS52qUCydGhU3qLRD8D9DVjaUo821WK7DM4iCeg==", - "requires": { - "parse5": "^6.0.1" - } - }, "path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", diff --git a/package.json b/package.json index 2cb5c76..15a4ab5 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,6 @@ "async": "^3.2.1", "diff-match-patch": "^1.0.5", "kleur": "^4.1.4", - "parse5-sax-parser": "^6.0.1", "remark-parse": "^10.0.1", "supports-color": "^9.1.0", "unified": "^10.1.1" diff --git a/src/main.js b/src/main.js index 6e7b6ca..481c3cf 100644 --- a/src/main.js +++ b/src/main.js @@ -5,7 +5,6 @@ import remarkParse from 'remark-parse' import async from 'async' import supportsColor from 'supports-color' import color from 'kleur' -import saxParser from 'parse5-sax-parser' import { exec } from 'child_process' import DMP from 'diff-match-patch' @@ -338,15 +337,99 @@ const runTests = (queue, options) => { } } -const extractHtmlComments = function(input){ - var comments, x$, p; - comments = []; - x$ = p = new saxParser(); - x$.on('comment', function(it){ - return comments.push(it.text); - }); - x$.end(input); - return comments; +const extractHtmlComments = function(input, nodePositionInMarkdown){ + // Reference: https://html.spec.whatwg.org/#comments + // + // Comments are generally ``, where `stuff` is disallowed from + // containing the ending delimiter. However, the comment delimiters may also + // occur inside CDATA blocks, where we do *not* want to parse them. + + const comments = [] + + const CDATA_OPENER = '' + const COMMENT_OPENER = '' + const IN_CDATA = Symbol('parser in CDATA') + const IN_COMMENT = Symbol('parser in comment') + const BASE = Symbol('parser in base state') + let state = BASE + let nextIndex = 0 + let done = false + + while (!done) { + const rest = input.slice(nextIndex) + + switch (state) { + case BASE: + // Parse the rest of whichever we see first. CDATA "swallows" + // comments, and vice-versa. + const cdataIndex = rest.indexOf(CDATA_OPENER) + const commentIndex = rest.indexOf(COMMENT_OPENER) + + if (cdataIndex === -1 && commentIndex === -1) { // No more of either; done + done = true + } else if (cdataIndex === -1 && commentIndex >= 0) { // Comment only + state = IN_COMMENT + nextIndex += commentIndex + } else if (cdataIndex >= 0 && commentIndex === -1) { // CDATA only + state = IN_CDATA + nextIndex += cdataIndex + } else { // Matched both. Go with the earlier one. + if (cdataIndex < commentIndex) { // CDATA earlier + state = IN_CDATA + nextIndex += cdataIndex + } else { // Comment earlier + state = IN_COMMENT + nextIndex += commentIndex + } + } + break + + case IN_COMMENT: { + // Parse end of comment + const closerIndex = rest.indexOf(COMMENT_CLOSER) + if (closerIndex >= 0) { + comments.push(rest.slice(0, closerIndex)) + nextIndex += closerIndex + state = BASE + } else { + // Unterminated comment + const openerIndex = input.slice(nextIndex) + const line = input.slice(0, nextIndex).split('\n').length + + nodePositionInMarkdown.start.line - 1 + parsingError(`'' where appropriate.` + + `\nCheck that '-->' doesn't occur anywhere unexpected.` + }) + } + break + } + + case IN_CDATA: { + // Parse end of CDATA + const closerIndex = rest.indexOf(CDATA_CLOSER) + if (closerIndex >= 0) { + nextIndex += closerIndex + state = BASE + } else { + // Unterminated CDATA + const line = input.slice(0, nextIndex).split('\n').length + + nodePositionInMarkdown.start.line - 1 + parsingError(`''` + + ` where appropriate.` + + `\nCheck that ']]>' doesn't occur anywhere unexpected.` + }) + } + break + } + } + } + + return comments }; /* @@ -462,7 +545,7 @@ const parseAndRunTests = (text, options={jobs: 1}) => { const visitMarkdownNode = (node) => { if (node.type === 'html') { - extractHtmlComments(node.value).forEach((comment) => { + extractHtmlComments(node.value, node.position).forEach((comment) => { // Optional whitespace, followed by '!test', more optional whitespace, // then the commands we actually care about. diff --git a/test.ls b/test.ls index c1c773f..f0df4c9 100644 --- a/test.ls +++ b/test.ls @@ -258,6 +258,66 @@ txm-expect do """ +txm-expect do + name: "using << heredocs in comments works" + input: """ + + + + + doesn't matter + + + + YEAH + + """ + expect-stdout: """ + TAP version 13 + 1..1 + ok 1 test name + + # 1/1 passed + # OK + + """ + +txm-expect do + name: "comment inside CDATA is not parsed" + input: """ + + ]]> + """ + expect-stdout: /1..0\n# no tests/ + +txm-expect do + name: "unterminated comment" + expect-exit: 2 + input: """ + + + true + + """ + expect-stdout: /unterminated HTML CDATA[\s\S]*line 2/ + txm-expect do name: "no program specified" input: """