Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(parser-stream): Support parsing fragments #487

Merged
merged 3 commits into from Apr 3, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 17 additions & 5 deletions packages/parse5-parser-stream/lib/index.ts
Expand Up @@ -30,18 +30,31 @@ import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js
*
*/
export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> extends Writable {
static getFragmentStream<T extends TreeAdapterTypeMap>(
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): ParserStream<T> {
const parser = Parser.getFragmentParser(fragmentContext, options);
const stream = new ParserStream(options, parser);
return stream;
}

private lastChunkWritten = false;
private writeCallback: undefined | (() => void) = undefined;

public parser: Parser<T>;
private pendingHtmlInsertions: string[] = [];
/** The resulting document node. */
public document: T['document'];
public get document(): T['document'] {
return this.parser.document;
}
public getFragment(): T['documentFragment'] {
return this.parser.getFragment();
}

/**
* @param options Parsing options.
*/
constructor(options?: ParserOptions<T>) {
constructor(options?: ParserOptions<T>, public parser: Parser<T> = new Parser(options)) {
super({ decodeStrings: false });

const resume = (): void => {
Expand All @@ -68,8 +81,7 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
}
};

this.parser = new Parser(options, undefined, undefined, scriptHandler);
this.document = this.parser.document;
this.parser.scriptHandler = scriptHandler;
}

//WritableStream implementation
Expand Down
4 changes: 2 additions & 2 deletions packages/parse5-parser-stream/test/location-info.test.ts
Expand Up @@ -5,7 +5,7 @@ import { parseChunked } from './utils/parse-chunked.js';

generateLocationInfoParserTests('location-info', (input, opts) =>
// NOTE: because of performance use bigger chunks here
parseChunked(input, opts, 100, 400)
parseChunked({ input }, opts, 100, 400)
);

generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
Expand All @@ -17,7 +17,7 @@ generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
sourceCodeLocationInfo: true,
};

const document = parseChunked(html, opts).node;
const document = parseChunked({ input: html }, opts).node;
const htmlEl = treeAdapter.getChildNodes(document)[0];
const headEl = treeAdapter.getChildNodes(htmlEl)[0];
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];
Expand Down
18 changes: 16 additions & 2 deletions packages/parse5-parser-stream/test/parser-stream.test.ts
Expand Up @@ -4,8 +4,22 @@ import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-t
import { parseChunked } from './utils/parse-chunked.js';
import { finished } from 'parse5-test-utils/utils/common.js';

generateParsingTests('ParserStream', 'ParserStream', { skipFragments: true }, (test, opts) =>
parseChunked(test.input, opts)
generateParsingTests(
'ParserStream',
'ParserStream',
{
expectErrors: [
//NOTE: Foreign content behaviour was updated in the HTML spec.
//The old test suite still tests the old behaviour.
fb55 marked this conversation as resolved.
Show resolved Hide resolved
'269.foreign-fragment',
'270.foreign-fragment',
'307.foreign-fragment',
'309.foreign-fragment',
'316.foreign-fragment',
'317.foreign-fragment',
],
},
(test, opts) => parseChunked(test, opts)
);

describe('ParserStream', () => {
Expand Down
11 changes: 7 additions & 4 deletions packages/parse5-parser-stream/test/scripting.test.ts
Expand Up @@ -14,14 +14,14 @@ generateParsingTests(
'ParserStream - Scripting',
'ParserStream - Scripting',
{
skipFragments: true,
withoutErrors: true,
suitePath,
},
async (test, opts) => {
const chunks = makeChunks(test.input);
const parser = new ParserStream(opts);
const { document } = parser;
const parser = test.fragmentContext
? ParserStream.getFragmentStream(test.fragmentContext, opts)
: new ParserStream(opts);

parser.on('script', async (scriptElement, documentWrite, resume) => {
const scriptTextNode = opts.treeAdapter.getChildNodes(scriptElement)[0];
Expand All @@ -48,7 +48,10 @@ generateParsingTests(

await finished(parser);

return { node: document };
return {
node: test.fragmentContext ? parser.getFragment() : parser.document,
chunks,
};
}
);

Expand Down
14 changes: 8 additions & 6 deletions packages/parse5-parser-stream/test/utils/parse-chunked.ts
Expand Up @@ -3,14 +3,16 @@ import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'
import { ParserStream } from '../../lib/index.js';
import { makeChunks } from 'parse5-test-utils/utils/common.js';

export function parseChunked(
html: string,
opts: ParserOptions<TreeAdapterTypeMap>,
export function parseChunked<T extends TreeAdapterTypeMap>(
test: { input: string; fragmentContext?: T['parentNode'] },
opts: ParserOptions<T>,
minChunkSize?: number,
maxChunkSize?: number
): { node: TreeAdapterTypeMap['document']; chunks: string[] } {
const parserStream = new ParserStream(opts);
const chunks = makeChunks(html, minChunkSize, maxChunkSize);
const parserStream = test.fragmentContext
? ParserStream.getFragmentStream(test.fragmentContext, opts)
: new ParserStream(opts);
const chunks = makeChunks(test.input, minChunkSize, maxChunkSize);

// NOTE: set small waterline for testing purposes
parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;
Expand All @@ -25,7 +27,7 @@ export function parseChunked(
parserStream.end(chunks[chunks.length - 1]);

return {
node: parserStream.document,
node: test.fragmentContext ? parserStream.getFragment() : parserStream.document,
chunks,
};
}
6 changes: 5 additions & 1 deletion packages/parse5/lib/index.ts
Expand Up @@ -75,5 +75,9 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
fragmentContext = null;
}

return Parser.parseFragment(html as string, fragmentContext, options);
const parser = Parser.getFragmentParser(fragmentContext, options);

parser.tokenizer.write(html as string, true);

return parser.getFragment();
}
53 changes: 0 additions & 53 deletions packages/parse5/lib/parser/index.test.ts
@@ -1,13 +1,8 @@
import * as assert from 'node:assert';
import * as parse5 from 'parse5';
import { jest } from '@jest/globals';
import { Parser, ParserOptions } from './index.js';
import type { TreeAdapterTypeMap } from './../tree-adapters/interface.js';
import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
import { treeAdapters } from 'parse5-test-utils/utils/common.js';
import { NAMESPACES as NS } from '../common/html.js';

const origParseFragment = Parser.parseFragment;

generateParsingTests(
'parser',
Expand Down Expand Up @@ -55,54 +50,6 @@ describe('parser', () => {
assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"');
});

describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
beforeEach(() => {
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['element'],
options?: ParserOptions<T>
): {
html: string;
fragmentContext: T['element'] | null | undefined;
options: ParserOptions<T> | undefined;
} {
return {
html,
fragmentContext,
options,
};
};
});

afterEach(() => {
Parser.parseFragment = origParseFragment;
});

it('parses correctly', () => {
const fragmentContext = treeAdapters.default.createElement('div', NS.HTML, []);
const html = '<script></script>';
const opts = { sourceCodeLocationInfo: true };

let args: any = parse5.parseFragment(fragmentContext, html, opts);

expect(args).toHaveProperty('fragmentContext', fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(args.options.sourceCodeLocationInfo);

args = parse5.parseFragment(html, opts);

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(args.options.sourceCodeLocationInfo);

args = parse5.parseFragment(html);

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(!args.options);
});
});

describe("Regression - Don't inherit from Object when creating collections (GH-119)", () => {
beforeEach(() => {
/*eslint-disable no-extend-native*/
Expand Down
16 changes: 9 additions & 7 deletions packages/parse5/lib/parser/index.ts
Expand Up @@ -165,11 +165,10 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
return parser.document;
}

public static parseFragment<T extends TreeAdapterTypeMap>(
html: string,
public static getFragmentParser<T extends TreeAdapterTypeMap>(
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): T['documentFragment'] {
): Parser<T> {
const opts: Required<ParserOptions<T>> = {
...defaultParserOptions,
...options,
Expand All @@ -194,12 +193,15 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
parser._insertFakeRootElement();
parser._resetInsertionMode();
parser._findFormInFragmentContext();
parser.tokenizer.write(html, true);

const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = opts.treeAdapter.createDocumentFragment();
return parser;
}

public getFragment(): T['documentFragment'] {
const rootElement = this.treeAdapter.getFirstChild(this.document) as T['parentNode'];
const fragment = this.treeAdapter.createDocumentFragment();

parser._adoptNodes(rootElement, fragment);
this._adoptNodes(rootElement, fragment);

return fragment;
}
Expand Down
7 changes: 2 additions & 5 deletions test/utils/generate-parsing-tests.ts
Expand Up @@ -137,19 +137,16 @@ export function generateParsingTests(
name: string,
prefix: string,
{
skipFragments,
withoutErrors,
expectErrors: expectError = [],
suitePath = treePath,
}: { skipFragments?: boolean; withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
}: { withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
parse: ParseMethod<TreeAdapterTypeMap>
): void {
generateTestsForEachTreeAdapter(name, (treeAdapter) => {
const errorsToExpect = new Set(expectError);

for (const test of loadTreeConstructionTestData(suitePath, treeAdapter).filter(
(test) => !skipFragments || !test.fragmentContext
)) {
for (const test of loadTreeConstructionTestData(suitePath, treeAdapter)) {
const expectError = errorsToExpect.delete(`${test.idx}.${test.setName}`);

it(
Expand Down