diff --git a/README.md b/README.md index dd36002a1..6226d6031 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ NPM Version Downloads Downloads total + Coverage

diff --git a/package.json b/package.json index b62373591..cf89fd7da 100644 --- a/package.json +++ b/package.json @@ -68,6 +68,11 @@ "^(parse5[^/]*)/dist/(.*?)(?:\\.js)?$": "/packages/$1/lib/$2", "^(parse5[^/]*)$": "/packages/$1/lib/index.ts", "^(.*)\\.js$": "$1" - } + }, + "coveragePathIgnorePatterns": [ + "node_modules", + "bench", + "test" + ] } } diff --git a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts index 5c7892a61..104a469b9 100644 --- a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts +++ b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts @@ -202,6 +202,33 @@ describe('RewritingStream', () => { }) ); + it( + 'rewrite doctype (no public id)', + createRewriterTest({ + src: srcHtml, + expected: outdent` + + + + + + + +

Hey ya
+ + + `, + assignTokenHandlers: (rewriter) => { + rewriter.on('doctype', (token) => { + token.publicId = null; + token.systemId = 'hey'; + + rewriter.emitDoctype(token); + }); + }, + }) + ); + it( 'emit multiple', createRewriterTest({ @@ -210,7 +237,7 @@ describe('RewritingStream', () => { - + @@ -221,6 +248,11 @@ describe('RewritingStream', () => { assignTokenHandlers: (rewriter) => { rewriter.on('startTag', (token) => { rewriter.emitRaw(''); + + if (token.tagName === 'head') { + token.selfClosing = true; + } + rewriter.emitStartTag(token); rewriter.emitRaw(''); }); diff --git a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts index 4a56897a7..5d67230cb 100644 --- a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts +++ b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts @@ -149,9 +149,9 @@ export const adapter: TreeAdapter = { adapter.appendChild(document, doctypeNode); } - doctypeNode['x-name'] = name ?? undefined; - doctypeNode['x-publicId'] = publicId ?? undefined; - doctypeNode['x-systemId'] = systemId ?? undefined; + doctypeNode['x-name'] = name; + doctypeNode['x-publicId'] = publicId; + doctypeNode['x-systemId'] = systemId; }, setDocumentMode(document: Document, mode: html.DOCUMENT_MODE): void { diff --git a/packages/parse5-parser-stream/test/utils/parse-chunked.ts b/packages/parse5-parser-stream/test/utils/parse-chunked.ts index f219bc012..402a5b2d9 100644 --- a/packages/parse5-parser-stream/test/utils/parse-chunked.ts +++ b/packages/parse5-parser-stream/test/utils/parse-chunked.ts @@ -17,9 +17,6 @@ export function parseChunked( parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8; for (let i = 0; i < chunks.length - 1; i++) { - if (typeof chunks[i] !== 'string') { - throw new TypeError('Expected chunk to be a string'); - } parserStream.write(chunks[i]); } diff --git a/packages/parse5-sax-parser/test/sax-parser.test.ts b/packages/parse5-sax-parser/test/sax-parser.test.ts index 20ee3e6da..b37101008 100644 --- a/packages/parse5-sax-parser/test/sax-parser.test.ts +++ b/packages/parse5-sax-parser/test/sax-parser.test.ts @@ -137,4 +137,21 @@ describe('SAX parser', () => { assert.throws(() => stream.write(buf), TypeError); }); + + it('Should treat NULL characters as normal text', async () => { + const parser = new SAXParser(); + let foundText = false; + + parser.on('text', ({ text }) => { + foundText = true; + assert.strictEqual(text, '\0'); + }); + + parser.write('\0'); + parser.end(); + + await finished(parser); + + assert.strictEqual(foundText, true); + }); }); diff --git a/packages/parse5/lib/parser/formatting-element-list.test.ts b/packages/parse5/lib/parser/formatting-element-list.test.ts index cd3f9af48..3960e694a 100644 --- a/packages/parse5/lib/parser/formatting-element-list.test.ts +++ b/packages/parse5/lib/parser/formatting-element-list.test.ts @@ -142,6 +142,10 @@ generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => { list.clearToLastMarker(); assert.strictEqual(list.entries.length, 2); + + list.clearToLastMarker(); + + assert.strictEqual(list.entries.length, 0); }); test('Remove entry', () => { diff --git a/packages/parse5/lib/parser/formatting-element-list.ts b/packages/parse5/lib/parser/formatting-element-list.ts index f59b7fed6..f8e4224e8 100644 --- a/packages/parse5/lib/parser/formatting-element-list.ts +++ b/packages/parse5/lib/parser/formatting-element-list.ts @@ -127,6 +127,11 @@ export class FormattingElementList { } } + /** + * Clears the list of formatting elements up to the last marker. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker + */ clearToLastMarker(): void { const markerIdx = this.entries.indexOf(MARKER); diff --git a/packages/parse5/lib/parser/open-element-stack.test.ts b/packages/parse5/lib/parser/open-element-stack.test.ts index 2ebbebe43..68742197a 100644 --- a/packages/parse5/lib/parser/open-element-stack.test.ts +++ b/packages/parse5/lib/parser/open-element-stack.test.ts @@ -317,6 +317,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => { test('Has numbered header in scope', () => { const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler); + assert.ok(stack.hasNumberedHeaderInScope()); + stack.push(createElement(TN.HTML), $.HTML); stack.push(createElement(TN.DIV), $.DIV); assert.ok(!stack.hasNumberedHeaderInScope()); @@ -337,6 +339,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => { test('Has element in list item scope', () => { const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler); + assert.ok(stack.hasInListItemScope($.P)); + stack.push(createElement(TN.HTML), $.HTML); stack.push(createElement(TN.DIV), $.DIV); assert.ok(!stack.hasInListItemScope($.P)); @@ -353,6 +357,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => { test('Has element in button scope', () => { const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler); + assert.ok(stack.hasInButtonScope($.P)); + stack.push(createElement(TN.HTML), $.HTML); stack.push(createElement(TN.DIV), $.DIV); assert.ok(!stack.hasInButtonScope($.P)); @@ -406,6 +412,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => { test('Has element in select scope', () => { const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler); + assert.ok(stack.hasInSelectScope($.P)); + stack.push(createElement(TN.HTML), $.HTML); stack.push(createElement(TN.DIV), $.DIV); assert.ok(!stack.hasInSelectScope($.P)); diff --git a/packages/parse5/lib/tokenizer/index.test.ts b/packages/parse5/lib/tokenizer/index.test.ts index ec478affe..e5fea0686 100644 --- a/packages/parse5/lib/tokenizer/index.test.ts +++ b/packages/parse5/lib/tokenizer/index.test.ts @@ -1,5 +1,6 @@ import { Tokenizer } from 'parse5'; import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js'; +import * as assert from 'node:assert'; const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url); const tokenizerOpts = { @@ -7,3 +8,44 @@ const tokenizerOpts = { }; generateTokenizationTests('Tokenizer', dataPath.pathname, (handler) => new Tokenizer(tokenizerOpts, handler)); + +function noop(): void { + // Noop +} + +describe('Tokenizer methods', () => { + it('should pause and resume', () => { + let count = 0; + const tokenizer = new Tokenizer(tokenizerOpts, { + onComment(t): void { + assert.strictEqual(t.data, 'INIT'); + assert.strictEqual(count++, 0); + + tokenizer.pause(); + tokenizer.write('', false); + }, + onDoctype(t): void { + assert.strictEqual(t.name, 'foo'); + assert.strictEqual(count++, 2); + + expect(() => tokenizer.resume()).toThrow('Parser was already resumed'); + tokenizer.write('', true); + }, + onStartTag(t): void { + assert.strictEqual(count++, 3); + assert.strictEqual(t.tagName, 'next'); + }, + onEndTag: noop, + onEof: noop, + onCharacter: noop, + onNullCharacter: noop, + onWhitespaceCharacter: noop, + }); + + tokenizer.write('', false); + assert.strictEqual(count++, 1); + expect(tokenizer).toHaveProperty('paused', true); + + tokenizer.resume(); + }); +}); diff --git a/packages/parse5/lib/tokenizer/index.ts b/packages/parse5/lib/tokenizer/index.ts index db330d4d7..1581b9758 100644 --- a/packages/parse5/lib/tokenizer/index.ts +++ b/packages/parse5/lib/tokenizer/index.ts @@ -132,7 +132,6 @@ const enum State { AMBIGUOUS_AMPERSAND, NUMERIC_CHARACTER_REFERENCE, HEXADEMICAL_CHARACTER_REFERENCE_START, - DECIMAL_CHARACTER_REFERENCE_START, HEXADEMICAL_CHARACTER_REFERENCE, DECIMAL_CHARACTER_REFERENCE, NUMERIC_CHARACTER_REFERENCE_END, @@ -993,10 +992,6 @@ export class Tokenizer { this._stateHexademicalCharacterReferenceStart(cp); break; } - case State.DECIMAL_CHARACTER_REFERENCE_START: { - this._stateDecimalCharacterReferenceStart(cp); - break; - } case State.HEXADEMICAL_CHARACTER_REFERENCE: { this._stateHexademicalCharacterReference(cp); break; @@ -3029,9 +3024,16 @@ export class Tokenizer { if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) { this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START; + } + // Inlined decimal character reference start state + else if (isAsciiDigit(cp)) { + this.state = State.DECIMAL_CHARACTER_REFERENCE; + this._stateDecimalCharacterReference(cp); } else { - this.state = State.DECIMAL_CHARACTER_REFERENCE_START; - this._stateDecimalCharacterReferenceStart(cp); + this._err(ERR.absenceOfDigitsInNumericCharacterReference); + this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); + this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); + this._reconsumeInState(this.returnState); } } @@ -3050,20 +3052,6 @@ export class Tokenizer { } } - // Decimal character reference start state - //------------------------------------------------------------------ - private _stateDecimalCharacterReferenceStart(cp: number): void { - if (isAsciiDigit(cp)) { - this.state = State.DECIMAL_CHARACTER_REFERENCE; - this._stateDecimalCharacterReference(cp); - } else { - this._err(ERR.absenceOfDigitsInNumericCharacterReference); - this._flushCodePointConsumedAsCharacterReference($.AMPERSAND); - this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN); - this._reconsumeInState(this.returnState); - } - } - // Hexademical character reference state //------------------------------------------------------------------ private _stateHexademicalCharacterReference(cp: number): void {