Skip to content

Commit

Permalink
feat(parser-stream): Support parsing fragments (inikulin#487)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored and jmbpwtw committed Feb 16, 2023
1 parent 15ab3c3 commit 4d85a48
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 86 deletions.
22 changes: 17 additions & 5 deletions packages/parse5-parser-stream/lib/index.ts
Expand Up @@ -30,18 +30,31 @@ import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js
*
*/
export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> extends Writable {
static getFragmentStream<T extends TreeAdapterTypeMap>(
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): ParserStream<T> {
const parser = Parser.getFragmentParser(fragmentContext, options);
const stream = new ParserStream(options, parser);
return stream;
}

private lastChunkWritten = false;
private writeCallback: undefined | (() => void) = undefined;

public parser: Parser<T>;
private pendingHtmlInsertions: string[] = [];
/** The resulting document node. */
public document: T['document'];
public get document(): T['document'] {
return this.parser.document;
}
public getFragment(): T['documentFragment'] {
return this.parser.getFragment();
}

/**
* @param options Parsing options.
*/
constructor(options?: ParserOptions<T>) {
constructor(options?: ParserOptions<T>, public parser: Parser<T> = new Parser(options)) {
super({ decodeStrings: false });

const resume = (): void => {
Expand All @@ -68,8 +81,7 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
}
};

this.parser = new Parser(options, undefined, undefined, scriptHandler);
this.document = this.parser.document;
this.parser.scriptHandler = scriptHandler;
}

//WritableStream implementation
Expand Down
4 changes: 2 additions & 2 deletions packages/parse5-parser-stream/test/location-info.test.ts
Expand Up @@ -5,7 +5,7 @@ import { parseChunked } from './utils/parse-chunked.js';

generateLocationInfoParserTests('location-info', (input, opts) =>
// NOTE: because of performance use bigger chunks here
parseChunked(input, opts, 100, 400)
parseChunked({ input }, opts, 100, 400)
);

generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
Expand All @@ -17,7 +17,7 @@ generateTestsForEachTreeAdapter('location-info', (treeAdapter) => {
sourceCodeLocationInfo: true,
};

const document = parseChunked(html, opts).node;
const document = parseChunked({ input: html }, opts).node;
const htmlEl = treeAdapter.getChildNodes(document)[0];
const headEl = treeAdapter.getChildNodes(htmlEl)[0];
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];
Expand Down
18 changes: 16 additions & 2 deletions packages/parse5-parser-stream/test/parser-stream.test.ts
Expand Up @@ -4,8 +4,22 @@ import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-t
import { parseChunked } from './utils/parse-chunked.js';
import { finished } from 'parse5-test-utils/utils/common.js';

generateParsingTests('ParserStream', 'ParserStream', { skipFragments: true }, (test, opts) =>
parseChunked(test.input, opts)
generateParsingTests(
'ParserStream',
'ParserStream',
{
expectErrors: [
//TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
//The old test suite still tests the old behaviour.
'269.foreign-fragment',
'270.foreign-fragment',
'307.foreign-fragment',
'309.foreign-fragment',
'316.foreign-fragment',
'317.foreign-fragment',
],
},
(test, opts) => parseChunked(test, opts)
);

describe('ParserStream', () => {
Expand Down
11 changes: 7 additions & 4 deletions packages/parse5-parser-stream/test/scripting.test.ts
Expand Up @@ -14,14 +14,14 @@ generateParsingTests(
'ParserStream - Scripting',
'ParserStream - Scripting',
{
skipFragments: true,
withoutErrors: true,
suitePath,
},
async (test, opts) => {
const chunks = makeChunks(test.input);
const parser = new ParserStream(opts);
const { document } = parser;
const parser = test.fragmentContext
? ParserStream.getFragmentStream(test.fragmentContext, opts)
: new ParserStream(opts);

parser.on('script', async (scriptElement, documentWrite, resume) => {
const scriptTextNode = opts.treeAdapter.getChildNodes(scriptElement)[0];
Expand All @@ -48,7 +48,10 @@ generateParsingTests(

await finished(parser);

return { node: document };
return {
node: test.fragmentContext ? parser.getFragment() : parser.document,
chunks,
};
}
);

Expand Down
14 changes: 8 additions & 6 deletions packages/parse5-parser-stream/test/utils/parse-chunked.ts
Expand Up @@ -3,14 +3,16 @@ import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'
import { ParserStream } from '../../lib/index.js';
import { makeChunks } from 'parse5-test-utils/utils/common.js';

export function parseChunked(
html: string,
opts: ParserOptions<TreeAdapterTypeMap>,
export function parseChunked<T extends TreeAdapterTypeMap>(
test: { input: string; fragmentContext?: T['parentNode'] },
opts: ParserOptions<T>,
minChunkSize?: number,
maxChunkSize?: number
): { node: TreeAdapterTypeMap['document']; chunks: string[] } {
const parserStream = new ParserStream(opts);
const chunks = makeChunks(html, minChunkSize, maxChunkSize);
const parserStream = test.fragmentContext
? ParserStream.getFragmentStream(test.fragmentContext, opts)
: new ParserStream(opts);
const chunks = makeChunks(test.input, minChunkSize, maxChunkSize);

// NOTE: set small waterline for testing purposes
parserStream.parser.tokenizer.preprocessor.bufferWaterline = 8;
Expand All @@ -25,7 +27,7 @@ export function parseChunked(
parserStream.end(chunks[chunks.length - 1]);

return {
node: parserStream.document,
node: test.fragmentContext ? parserStream.getFragment() : parserStream.document,
chunks,
};
}
6 changes: 5 additions & 1 deletion packages/parse5/lib/index.ts
Expand Up @@ -75,5 +75,9 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
fragmentContext = null;
}

return Parser.parseFragment(html as string, fragmentContext, options);
const parser = Parser.getFragmentParser(fragmentContext, options);

parser.tokenizer.write(html as string, true);

return parser.getFragment();
}
55 changes: 1 addition & 54 deletions packages/parse5/lib/parser/index.test.ts
@@ -1,20 +1,15 @@
import * as assert from 'node:assert';
import * as parse5 from 'parse5';
import { jest } from '@jest/globals';
import { Parser, ParserOptions } from './index.js';
import type { TreeAdapterTypeMap } from './../tree-adapters/interface.js';
import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js';
import { treeAdapters } from 'parse5-test-utils/utils/common.js';
import { NAMESPACES as NS } from '../common/html.js';

const origParseFragment = Parser.parseFragment;

generateParsingTests(
'parser',
'Parser',
{
expectErrors: [
//NOTE: Foreign content behaviour was updated in the HTML spec.
//TODO(GH-448): Foreign content behaviour was updated in the HTML spec.
//The old test suite still tests the old behaviour.
'269.foreign-fragment',
'270.foreign-fragment',
Expand Down Expand Up @@ -55,54 +50,6 @@ describe('parser', () => {
assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"');
});

describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
beforeEach(() => {
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['element'],
options?: ParserOptions<T>
): {
html: string;
fragmentContext: T['element'] | null | undefined;
options: ParserOptions<T> | undefined;
} {
return {
html,
fragmentContext,
options,
};
};
});

afterEach(() => {
Parser.parseFragment = origParseFragment;
});

it('parses correctly', () => {
const fragmentContext = treeAdapters.default.createElement('div', NS.HTML, []);
const html = '<script></script>';
const opts = { sourceCodeLocationInfo: true };

let args: any = parse5.parseFragment(fragmentContext, html, opts);

expect(args).toHaveProperty('fragmentContext', fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(args.options.sourceCodeLocationInfo);

args = parse5.parseFragment(html, opts);

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(args.options.sourceCodeLocationInfo);

args = parse5.parseFragment(html);

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(!args.options);
});
});

describe("Regression - Don't inherit from Object when creating collections (GH-119)", () => {
beforeEach(() => {
/*eslint-disable no-extend-native*/
Expand Down
16 changes: 9 additions & 7 deletions packages/parse5/lib/parser/index.ts
Expand Up @@ -165,11 +165,10 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
return parser.document;
}

public static parseFragment<T extends TreeAdapterTypeMap>(
html: string,
public static getFragmentParser<T extends TreeAdapterTypeMap>(
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): T['documentFragment'] {
): Parser<T> {
const opts: Required<ParserOptions<T>> = {
...defaultParserOptions,
...options,
Expand All @@ -194,12 +193,15 @@ export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler, Stack
parser._insertFakeRootElement();
parser._resetInsertionMode();
parser._findFormInFragmentContext();
parser.tokenizer.write(html, true);

const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = opts.treeAdapter.createDocumentFragment();
return parser;
}

public getFragment(): T['documentFragment'] {
const rootElement = this.treeAdapter.getFirstChild(this.document) as T['parentNode'];
const fragment = this.treeAdapter.createDocumentFragment();

parser._adoptNodes(rootElement, fragment);
this._adoptNodes(rootElement, fragment);

return fragment;
}
Expand Down
7 changes: 2 additions & 5 deletions test/utils/generate-parsing-tests.ts
Expand Up @@ -137,19 +137,16 @@ export function generateParsingTests(
name: string,
prefix: string,
{
skipFragments,
withoutErrors,
expectErrors: expectError = [],
suitePath = treePath,
}: { skipFragments?: boolean; withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
}: { withoutErrors?: boolean; expectErrors?: string[]; suitePath?: URL },
parse: ParseMethod<TreeAdapterTypeMap>
): void {
generateTestsForEachTreeAdapter(name, (treeAdapter) => {
const errorsToExpect = new Set(expectError);

for (const test of loadTreeConstructionTestData(suitePath, treeAdapter).filter(
(test) => !skipFragments || !test.fragmentContext
)) {
for (const test of loadTreeConstructionTestData(suitePath, treeAdapter)) {
const expectError = errorsToExpect.delete(`${test.idx}.${test.setName}`);

it(
Expand Down

0 comments on commit 4d85a48

Please sign in to comment.