diff --git a/README.md b/README.md
index dd36002a1..6226d6031 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@
+
diff --git a/package.json b/package.json
index b62373591..cf89fd7da 100644
--- a/package.json
+++ b/package.json
@@ -68,6 +68,11 @@
"^(parse5[^/]*)/dist/(.*?)(?:\\.js)?$": "/packages/$1/lib/$2",
"^(parse5[^/]*)$": "/packages/$1/lib/index.ts",
"^(.*)\\.js$": "$1"
- }
+ },
+ "coveragePathIgnorePatterns": [
+ "node_modules",
+ "bench",
+ "test"
+ ]
}
}
diff --git a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts
index 5c7892a61..104a469b9 100644
--- a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts
+++ b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts
@@ -202,6 +202,33 @@ describe('RewritingStream', () => {
})
);
+ it(
+ 'rewrite doctype (no public id)',
+ createRewriterTest({
+ src: srcHtml,
+ expected: outdent`
+
+
+
+
+
+
+
+ Hey ya
+
+
+ `,
+ assignTokenHandlers: (rewriter) => {
+ rewriter.on('doctype', (token) => {
+ token.publicId = null;
+ token.systemId = 'hey';
+
+ rewriter.emitDoctype(token);
+ });
+ },
+ })
+ );
+
it(
'emit multiple',
createRewriterTest({
@@ -210,7 +237,7 @@ describe('RewritingStream', () => {
-
+
@@ -221,6 +248,11 @@ describe('RewritingStream', () => {
assignTokenHandlers: (rewriter) => {
rewriter.on('startTag', (token) => {
rewriter.emitRaw('');
+
+ if (token.tagName === 'head') {
+ token.selfClosing = true;
+ }
+
rewriter.emitStartTag(token);
rewriter.emitRaw('');
});
diff --git a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts
index 4a56897a7..5d67230cb 100644
--- a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts
+++ b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts
@@ -149,9 +149,9 @@ export const adapter: TreeAdapter = {
adapter.appendChild(document, doctypeNode);
}
- doctypeNode['x-name'] = name ?? undefined;
- doctypeNode['x-publicId'] = publicId ?? undefined;
- doctypeNode['x-systemId'] = systemId ?? undefined;
+ doctypeNode['x-name'] = name;
+ doctypeNode['x-publicId'] = publicId;
+ doctypeNode['x-systemId'] = systemId;
},
setDocumentMode(document: Document, mode: html.DOCUMENT_MODE): void {
diff --git a/packages/parse5-parser-stream/test/utils/parse-chunked.ts b/packages/parse5-parser-stream/test/utils/parse-chunked.ts
index f219bc012..402a5b2d9 100644
--- a/packages/parse5-parser-stream/test/utils/parse-chunked.ts
+++ b/packages/parse5-parser-stream/test/utils/parse-chunked.ts
@@ -17,9 +17,6 @@ export function parseChunked(
parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;
for (let i = 0; i < chunks.length - 1; i++) {
- if (typeof chunks[i] !== 'string') {
- throw new TypeError('Expected chunk to be a string');
- }
parserStream.write(chunks[i]);
}
diff --git a/packages/parse5-sax-parser/test/sax-parser.test.ts b/packages/parse5-sax-parser/test/sax-parser.test.ts
index 20ee3e6da..b37101008 100644
--- a/packages/parse5-sax-parser/test/sax-parser.test.ts
+++ b/packages/parse5-sax-parser/test/sax-parser.test.ts
@@ -137,4 +137,21 @@ describe('SAX parser', () => {
assert.throws(() => stream.write(buf), TypeError);
});
+
+ it('Should treat NULL characters as normal text', async () => {
+ const parser = new SAXParser();
+ let foundText = false;
+
+ parser.on('text', ({ text }) => {
+ foundText = true;
+ assert.strictEqual(text, '\0');
+ });
+
+ parser.write('\0');
+ parser.end();
+
+ await finished(parser);
+
+ assert.strictEqual(foundText, true);
+ });
});
diff --git a/packages/parse5/lib/parser/formatting-element-list.test.ts b/packages/parse5/lib/parser/formatting-element-list.test.ts
index cd3f9af48..3960e694a 100644
--- a/packages/parse5/lib/parser/formatting-element-list.test.ts
+++ b/packages/parse5/lib/parser/formatting-element-list.test.ts
@@ -142,6 +142,10 @@ generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => {
list.clearToLastMarker();
assert.strictEqual(list.entries.length, 2);
+
+ list.clearToLastMarker();
+
+ assert.strictEqual(list.entries.length, 0);
});
test('Remove entry', () => {
diff --git a/packages/parse5/lib/parser/formatting-element-list.ts b/packages/parse5/lib/parser/formatting-element-list.ts
index f59b7fed6..f8e4224e8 100644
--- a/packages/parse5/lib/parser/formatting-element-list.ts
+++ b/packages/parse5/lib/parser/formatting-element-list.ts
@@ -127,6 +127,11 @@ export class FormattingElementList {
}
}
+ /**
+ * Clears the list of formatting elements up to the last marker.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
+ */
clearToLastMarker(): void {
const markerIdx = this.entries.indexOf(MARKER);
diff --git a/packages/parse5/lib/parser/open-element-stack.test.ts b/packages/parse5/lib/parser/open-element-stack.test.ts
index 2ebbebe43..68742197a 100644
--- a/packages/parse5/lib/parser/open-element-stack.test.ts
+++ b/packages/parse5/lib/parser/open-element-stack.test.ts
@@ -317,6 +317,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has numbered header in scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);
+ assert.ok(stack.hasNumberedHeaderInScope());
+
stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasNumberedHeaderInScope());
@@ -337,6 +339,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in list item scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);
+ assert.ok(stack.hasInListItemScope($.P));
+
stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInListItemScope($.P));
@@ -353,6 +357,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in button scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);
+ assert.ok(stack.hasInButtonScope($.P));
+
stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInButtonScope($.P));
@@ -406,6 +412,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in select scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);
+ assert.ok(stack.hasInSelectScope($.P));
+
stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInSelectScope($.P));
diff --git a/packages/parse5/lib/tokenizer/index.test.ts b/packages/parse5/lib/tokenizer/index.test.ts
index ec478affe..e5fea0686 100644
--- a/packages/parse5/lib/tokenizer/index.test.ts
+++ b/packages/parse5/lib/tokenizer/index.test.ts
@@ -1,5 +1,6 @@
import { Tokenizer } from 'parse5';
import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
+import * as assert from 'node:assert';
const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
const tokenizerOpts = {
@@ -7,3 +8,44 @@ const tokenizerOpts = {
};
generateTokenizationTests('Tokenizer', dataPath.pathname, (handler) => new Tokenizer(tokenizerOpts, handler));
+
+function noop(): void {
+ // Noop
+}
+
+describe('Tokenizer methods', () => {
+ it('should pause and resume', () => {
+ let count = 0;
+ const tokenizer = new Tokenizer(tokenizerOpts, {
+ onComment(t): void {
+ assert.strictEqual(t.data, 'INIT');
+ assert.strictEqual(count++, 0);
+
+ tokenizer.pause();
+ tokenizer.write('', false);
+ },
+ onDoctype(t): void {
+ assert.strictEqual(t.name, 'foo');
+ assert.strictEqual(count++, 2);
+
+ expect(() => tokenizer.resume()).toThrow('Parser was already resumed');
+ tokenizer.write('', true);
+ },
+ onStartTag(t): void {
+ assert.strictEqual(count++, 3);
+ assert.strictEqual(t.tagName, 'next');
+ },
+ onEndTag: noop,
+ onEof: noop,
+ onCharacter: noop,
+ onNullCharacter: noop,
+ onWhitespaceCharacter: noop,
+ });
+
+ tokenizer.write('', false);
+ assert.strictEqual(count++, 1);
+ expect(tokenizer).toHaveProperty('paused', true);
+
+ tokenizer.resume();
+ });
+});
diff --git a/packages/parse5/lib/tokenizer/index.ts b/packages/parse5/lib/tokenizer/index.ts
index db330d4d7..1581b9758 100644
--- a/packages/parse5/lib/tokenizer/index.ts
+++ b/packages/parse5/lib/tokenizer/index.ts
@@ -132,7 +132,6 @@ const enum State {
AMBIGUOUS_AMPERSAND,
NUMERIC_CHARACTER_REFERENCE,
HEXADEMICAL_CHARACTER_REFERENCE_START,
- DECIMAL_CHARACTER_REFERENCE_START,
HEXADEMICAL_CHARACTER_REFERENCE,
DECIMAL_CHARACTER_REFERENCE,
NUMERIC_CHARACTER_REFERENCE_END,
@@ -993,10 +992,6 @@ export class Tokenizer {
this._stateHexademicalCharacterReferenceStart(cp);
break;
}
- case State.DECIMAL_CHARACTER_REFERENCE_START: {
- this._stateDecimalCharacterReferenceStart(cp);
- break;
- }
case State.HEXADEMICAL_CHARACTER_REFERENCE: {
this._stateHexademicalCharacterReference(cp);
break;
@@ -3029,9 +3024,16 @@ export class Tokenizer {
if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) {
this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
+ }
+ // Inlined decimal character reference start state
+ else if (isAsciiDigit(cp)) {
+ this.state = State.DECIMAL_CHARACTER_REFERENCE;
+ this._stateDecimalCharacterReference(cp);
} else {
- this.state = State.DECIMAL_CHARACTER_REFERENCE_START;
- this._stateDecimalCharacterReferenceStart(cp);
+ this._err(ERR.absenceOfDigitsInNumericCharacterReference);
+ this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
+ this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
+ this._reconsumeInState(this.returnState);
}
}
@@ -3050,20 +3052,6 @@ export class Tokenizer {
}
}
- // Decimal character reference start state
- //------------------------------------------------------------------
- private _stateDecimalCharacterReferenceStart(cp: number): void {
- if (isAsciiDigit(cp)) {
- this.state = State.DECIMAL_CHARACTER_REFERENCE;
- this._stateDecimalCharacterReference(cp);
- } else {
- this._err(ERR.absenceOfDigitsInNumericCharacterReference);
- this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
- this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
- this._reconsumeInState(this.returnState);
- }
- }
-
// Hexademical character reference state
//------------------------------------------------------------------
private _stateHexademicalCharacterReference(cp: number): void {