Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Improve code coverage #517

Merged
merged 5 commits into from Apr 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Expand Up @@ -3,8 +3,7 @@
.vscode
node_modules
docs/build
docs/05_api_reference.md
packages/*/dist/
test/dist/
.DS_Store
tsconfig.tsbuildinfo
coverage/
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -13,6 +13,7 @@
<a href="https://www.npmjs.com/package/parse5"><img alt="NPM Version" src="https://img.shields.io/npm/v/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads" src="http://img.shields.io/npm/dm/parse5.svg"></a>
<a href="https://npmjs.org/package/parse5"><img alt="Downloads total" src="http://img.shields.io/npm/dt/parse5.svg"></a>
<a href="https://coveralls.io/github/inikulin/parse5"><img alt="Coverage" src="https://img.shields.io/coveralls/github/inikulin/parse5/master"></a>
</p>

<p align="center">
Expand Down
7 changes: 6 additions & 1 deletion package.json
Expand Up @@ -68,6 +68,11 @@
"^(parse5[^/]*)/dist/(.*?)(?:\\.js)?$": "<rootDir>/packages/$1/lib/$2",
"^(parse5[^/]*)$": "<rootDir>/packages/$1/lib/index.ts",
"^(.*)\\.js$": "$1"
}
},
"coveragePathIgnorePatterns": [
"node_modules",
"bench",
"test"
]
}
}
Expand Up @@ -202,6 +202,33 @@ describe('RewritingStream', () => {
})
);

it(
'rewrite doctype (no public id)',
createRewriterTest({
src: srcHtml,
expected: outdent`
<!DOCTYPE html SYSTEM "hey">
<html>
<!-- comment1 -->
<head /// 123>
</head>
<!-- comment2 -->
<body =123>
<div>Hey ya</div>
</body>
</html>
`,
assignTokenHandlers: (rewriter) => {
rewriter.on('doctype', (token) => {
token.publicId = null;
token.systemId = 'hey';

rewriter.emitDoctype(token);
});
},
})
);

it(
'emit multiple',
createRewriterTest({
Expand All @@ -210,7 +237,7 @@ describe('RewritingStream', () => {
<!DOCTYPE html "">
<wrap><html></wrap>
<!-- comment1 -->
<wrap><head 123=""></wrap>
<wrap><head 123=""/></wrap>
</head>
<!-- comment2 -->
<wrap><body =123=""></wrap>
Expand All @@ -221,6 +248,11 @@ describe('RewritingStream', () => {
assignTokenHandlers: (rewriter) => {
rewriter.on('startTag', (token) => {
rewriter.emitRaw('<wrap>');

if (token.tagName === 'head') {
token.selfClosing = true;
}

rewriter.emitStartTag(token);
rewriter.emitRaw('</wrap>');
});
Expand Down
6 changes: 3 additions & 3 deletions packages/parse5-htmlparser2-tree-adapter/lib/index.ts
Expand Up @@ -149,9 +149,9 @@ export const adapter: TreeAdapter<Htmlparser2TreeAdapterMap> = {
adapter.appendChild(document, doctypeNode);
}

doctypeNode['x-name'] = name ?? undefined;
fb55 marked this conversation as resolved.
Show resolved Hide resolved
doctypeNode['x-publicId'] = publicId ?? undefined;
doctypeNode['x-systemId'] = systemId ?? undefined;
doctypeNode['x-name'] = name;
doctypeNode['x-publicId'] = publicId;
doctypeNode['x-systemId'] = systemId;
},

setDocumentMode(document: Document, mode: html.DOCUMENT_MODE): void {
Expand Down
3 changes: 0 additions & 3 deletions packages/parse5-parser-stream/test/utils/parse-chunked.ts
Expand Up @@ -17,9 +17,6 @@ export function parseChunked<T extends TreeAdapterTypeMap>(
parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;

for (let i = 0; i < chunks.length - 1; i++) {
if (typeof chunks[i] !== 'string') {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TypeScript validates this, we don't need this check.

throw new TypeError('Expected chunk to be a string');
}
parserStream.write(chunks[i]);
}

Expand Down
17 changes: 17 additions & 0 deletions packages/parse5-sax-parser/test/sax-parser.test.ts
Expand Up @@ -137,4 +137,21 @@ describe('SAX parser', () => {

assert.throws(() => stream.write(buf), TypeError);
});

it('Should treat NULL characters as normal text', async () => {
const parser = new SAXParser();
let foundText = false;

parser.on('text', ({ text }) => {
foundText = true;
assert.strictEqual(text, '\0');
});

parser.write('\0');
parser.end();

await finished(parser);

assert.strictEqual(foundText, true);
});
});
4 changes: 4 additions & 0 deletions packages/parse5/lib/parser/formatting-element-list.test.ts
Expand Up @@ -142,6 +142,10 @@ generateTestsForEachTreeAdapter('FormattingElementList', (treeAdapter) => {
list.clearToLastMarker();

assert.strictEqual(list.entries.length, 2);

list.clearToLastMarker();

assert.strictEqual(list.entries.length, 0);
});

test('Remove entry', () => {
Expand Down
5 changes: 5 additions & 0 deletions packages/parse5/lib/parser/formatting-element-list.ts
Expand Up @@ -127,6 +127,11 @@ export class FormattingElementList<T extends TreeAdapterTypeMap> {
}
}

/**
* Clears the list of formatting elements up to the last marker.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
*/
clearToLastMarker(): void {
const markerIdx = this.entries.indexOf(MARKER);

Expand Down
8 changes: 8 additions & 0 deletions packages/parse5/lib/parser/open-element-stack.test.ts
Expand Up @@ -317,6 +317,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has numbered header in scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);

assert.ok(stack.hasNumberedHeaderInScope());
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The empty states won't be used in the parser, but we can test them here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 Why does hasNumberedHeaderInScope exist if parse5 doesn’t use it? Should we deprecate it and remove it in a major then?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If these functions are used, can we instead add unit tests (input/output) of parsing behavior that depends on the result of these functions?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hasNumberedHeaderInScope is used by the parser. The only thing that the parser cannot reach is the default return value, eg. for an empty stack.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean this early return

causing to never run?

Could you change that early return to a break in that case, to fix the coverage?

Not sure about the algorithm: but it seems off that the default for that function i true? I’d expect, with such a function name, to exit early with true, and otherwise return false?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The relevant part of the spec is https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-scope

The algorithm explicitly doesn't consider the possibility of an empty stack:

This will never fail, since the loop will always terminate in the previous step if the top of the stack — an html element — is reached.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then I recommend using a c8 ignore (or similar), with this as an explanation:

/* Note: the algorithm <link> never fails */
/* c8 ignore next 123 */


stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasNumberedHeaderInScope());
Expand All @@ -337,6 +339,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in list item scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);

assert.ok(stack.hasInListItemScope($.P));

stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInListItemScope($.P));
Expand All @@ -353,6 +357,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in button scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);

assert.ok(stack.hasInButtonScope($.P));

stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInButtonScope($.P));
Expand Down Expand Up @@ -406,6 +412,8 @@ generateTestsForEachTreeAdapter('open-element-stack', (treeAdapter) => {
test('Has element in select scope', () => {
const stack = new OpenElementStack(treeAdapter.createDocument(), treeAdapter, stackHandler);

assert.ok(stack.hasInSelectScope($.P));

stack.push(createElement(TN.HTML), $.HTML);
stack.push(createElement(TN.DIV), $.DIV);
assert.ok(!stack.hasInSelectScope($.P));
Expand Down
42 changes: 42 additions & 0 deletions packages/parse5/lib/tokenizer/index.test.ts
@@ -1,9 +1,51 @@
import { Tokenizer } from 'parse5';
import { generateTokenizationTests } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
import * as assert from 'node:assert';

const dataPath = new URL('../../../../test/data/html5lib-tests/tokenizer', import.meta.url);
const tokenizerOpts = {
sourceCodeLocationInfo: true,
};

generateTokenizationTests('Tokenizer', dataPath.pathname, (handler) => new Tokenizer(tokenizerOpts, handler));

function noop(): void {
// Noop
}

describe('Tokenizer methods', () => {
it('should pause and resume', () => {
let count = 0;
const tokenizer = new Tokenizer(tokenizerOpts, {
onComment(t): void {
assert.strictEqual(t.data, 'INIT');
assert.strictEqual(count++, 0);

tokenizer.pause();
tokenizer.write('<!doctype foo>', false);
},
onDoctype(t): void {
assert.strictEqual(t.name, 'foo');
assert.strictEqual(count++, 2);

expect(() => tokenizer.resume()).toThrow('Parser was already resumed');
tokenizer.write('<next>', true);
},
onStartTag(t): void {
assert.strictEqual(count++, 3);
assert.strictEqual(t.tagName, 'next');
},
onEndTag: noop,
fb55 marked this conversation as resolved.
Show resolved Hide resolved
onEof: noop,
onCharacter: noop,
onNullCharacter: noop,
onWhitespaceCharacter: noop,
});

tokenizer.write('<!--INIT-->', false);
assert.strictEqual(count++, 1);
expect(tokenizer).toHaveProperty('paused', true);

tokenizer.resume();
});
});
30 changes: 9 additions & 21 deletions packages/parse5/lib/tokenizer/index.ts
Expand Up @@ -132,7 +132,6 @@ const enum State {
AMBIGUOUS_AMPERSAND,
NUMERIC_CHARACTER_REFERENCE,
HEXADEMICAL_CHARACTER_REFERENCE_START,
DECIMAL_CHARACTER_REFERENCE_START,
fb55 marked this conversation as resolved.
Show resolved Hide resolved
HEXADEMICAL_CHARACTER_REFERENCE,
DECIMAL_CHARACTER_REFERENCE,
NUMERIC_CHARACTER_REFERENCE_END,
Expand Down Expand Up @@ -993,10 +992,6 @@ export class Tokenizer {
this._stateHexademicalCharacterReferenceStart(cp);
break;
}
case State.DECIMAL_CHARACTER_REFERENCE_START: {
this._stateDecimalCharacterReferenceStart(cp);
break;
}
case State.HEXADEMICAL_CHARACTER_REFERENCE: {
this._stateHexademicalCharacterReference(cp);
break;
Expand Down Expand Up @@ -3029,9 +3024,16 @@ export class Tokenizer {

if (cp === $.LATIN_SMALL_X || cp === $.LATIN_CAPITAL_X) {
this.state = State.HEXADEMICAL_CHARACTER_REFERENCE_START;
}
// Inlined decimal character reference start state
else if (isAsciiDigit(cp)) {
this.state = State.DECIMAL_CHARACTER_REFERENCE;
this._stateDecimalCharacterReference(cp);
} else {
this.state = State.DECIMAL_CHARACTER_REFERENCE_START;
this._stateDecimalCharacterReferenceStart(cp);
this._err(ERR.absenceOfDigitsInNumericCharacterReference);
this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
this._reconsumeInState(this.returnState);
}
}

Expand All @@ -3050,20 +3052,6 @@ export class Tokenizer {
}
}

// Decimal character reference start state
//------------------------------------------------------------------
private _stateDecimalCharacterReferenceStart(cp: number): void {
fb55 marked this conversation as resolved.
Show resolved Hide resolved
if (isAsciiDigit(cp)) {
this.state = State.DECIMAL_CHARACTER_REFERENCE;
this._stateDecimalCharacterReference(cp);
} else {
this._err(ERR.absenceOfDigitsInNumericCharacterReference);
this._flushCodePointConsumedAsCharacterReference($.AMPERSAND);
this._flushCodePointConsumedAsCharacterReference($.NUMBER_SIGN);
this._reconsumeInState(this.returnState);
}
}

// Hexademical character reference state
//------------------------------------------------------------------
private _stateHexademicalCharacterReference(cp: number): void {
Expand Down