From ffa6722fdf5c276dacc53cdacf6bc4af1c00b6db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 17 Jan 2022 23:58:38 +0000 Subject: [PATCH 1/2] refactor(parser): Remove `_bootstrap` method --- packages/parse5-parser-stream/lib/index.ts | 9 +- packages/parse5/lib/index.ts | 8 +- packages/parse5/lib/parser/index.test.ts | 20 ++- packages/parse5/lib/parser/index.ts | 143 ++++++++---------- .../generate-parser-feedback-test/index.ts | 24 +-- 5 files changed, 98 insertions(+), 106 deletions(-) diff --git a/packages/parse5-parser-stream/lib/index.ts b/packages/parse5-parser-stream/lib/index.ts index 35ab6f472..677b59956 100644 --- a/packages/parse5-parser-stream/lib/index.ts +++ b/packages/parse5-parser-stream/lib/index.ts @@ -1,5 +1,5 @@ import { Writable } from 'node:stream'; -import { Parser, ParserOptions } from 'parse5/dist/parser/index.js'; +import { Parser, ParserOptions, defaultParserOptions } from 'parse5/dist/parser/index.js'; import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js'; @@ -42,10 +42,9 @@ export class ParserStream constructor(options?: ParserOptions) { super({ decodeStrings: false }); - this.parser = new Parser(options); - - this.document = this.parser.treeAdapter.createDocument(); - this.parser._bootstrap(this.document, null); + const opts = { ...defaultParserOptions, ...options }; + this.parser = new Parser(opts); + this.document = this.parser.document; } //WritableStream implementation diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts index 1e770816e..1f1061fdb 100644 --- a/packages/parse5/lib/index.ts +++ b/packages/parse5/lib/index.ts @@ -29,9 +29,7 @@ export function parse( html: string, options?: ParserOptions ): T['document'] { - const parser = new Parser(options); - - return parser.parse(html); + return Parser.parse(html, options); } /** @@ -77,9 +75,7 @@ export function parseFragment ({ node: test.fragmentContext @@ -25,21 +25,25 @@ describe('parser', () => { describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => { beforeEach(() => { - Parser.prototype.parseFragment = function ( - this: Parser, + Parser.parseFragment = function ( html: string, - fragmentContext?: T['element'] - ): { html: string; fragmentContext: T['element'] | null | undefined; options: ParserOptions } { + fragmentContext?: T['element'], + options?: ParserOptions + ): { + html: string; + fragmentContext: T['element'] | null | undefined; + options: ParserOptions | undefined; + } { return { html, fragmentContext, - options: this.options, + options, }; }; }); afterEach(() => { - Parser.prototype.parseFragment = origParseFragment; + Parser.parseFragment = origParseFragment; }); it('parses correctly', () => { @@ -63,7 +67,7 @@ describe('parser', () => { assert.ok(!args.fragmentContext); expect(args).toHaveProperty('html', html); - assert.ok(!args.options.sourceCodeLocationInfo); + assert.ok(!args.options); }); }); diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index 3ddb4905b..ce6cf9824 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -82,7 +82,7 @@ export interface ParserOptions { * * @default `true` */ - scriptingEnabled?: boolean | undefined; + scriptingEnabled?: boolean; /** * Enables source code location information. When enabled, each node (except the root node) @@ -94,14 +94,14 @@ export interface ParserOptions { * * @default `false` */ - sourceCodeLocationInfo?: boolean | undefined; + sourceCodeLocationInfo?: boolean; /** * Specifies the resulting tree format. * * @default `treeAdapters.default` */ - treeAdapter?: TreeAdapter | undefined; + treeAdapter?: TreeAdapter; /** * Callback for parse errors. @@ -111,20 +111,24 @@ export interface ParserOptions { onParseError?: ParserErrorHandler | null; } +export const defaultParserOptions = { + scriptingEnabled: true, + sourceCodeLocationInfo: false, + treeAdapter: defaultTreeAdapter, + onParseError: null, +}; + //Parser export class Parser { - options: ParserOptions; treeAdapter: TreeAdapter; private onParseError: ParserErrorHandler | null; private currentToken: Token | null = null; - constructor(options?: ParserOptions) { - this.options = { - scriptingEnabled: true, - sourceCodeLocationInfo: false, - ...options, - }; - + public constructor( + public options: Required>, + public document: T['document'] = options.treeAdapter.createDocument(), + public fragmentContext: T['element'] | null = null + ) { this.treeAdapter = this.options.treeAdapter ??= defaultTreeAdapter as TreeAdapter; this.onParseError = this.options.onParseError ??= null; @@ -132,65 +136,90 @@ export class Parser { if (this.onParseError) { this.options.sourceCodeLocationInfo = true; } + + this.tokenizer = new Tokenizer(this.options); + this.activeFormattingElements = new FormattingElementList(this.treeAdapter); + + this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN; + this._setContextModes(fragmentContext ?? document, this.fragmentContextID); + + this.openElements = new OpenElementStack( + this.document, + this.treeAdapter, + this.onItemPush.bind(this), + this.onItemPop.bind(this) + ); } // API - public parse(html: string): T['document'] { - const document = this.treeAdapter.createDocument(); + public static parse(html: string, options?: ParserOptions): T['document'] { + const opts = { + ...defaultParserOptions, + ...options, + }; - this._bootstrap(document, null); - this.tokenizer.write(html, true); - this._runParsingLoop(null); + const parser = new this(opts); - return document; + parser.tokenizer.write(html, true); + parser._runParsingLoop(null); + + return parser.document; } - public parseFragment(html: string, fragmentContext?: T['parentNode'] | null): T['documentFragment'] { + public static parseFragment( + html: string, + fragmentContext?: T['parentNode'] | null, + options?: ParserOptions + ): T['documentFragment'] { + const opts: Required> = { + ...defaultParserOptions, + ...options, + }; + //NOTE: use