Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): Remove _bootstrap method #384

Merged
merged 2 commits into from Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 4 additions & 5 deletions packages/parse5-parser-stream/lib/index.ts
@@ -1,5 +1,5 @@
import { Writable } from 'node:stream';
import { Parser, ParserOptions } from 'parse5/dist/parser/index.js';
import { Parser, ParserOptions, defaultParserOptions } from 'parse5/dist/parser/index.js';
import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js';
import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js';

Expand Down Expand Up @@ -42,10 +42,9 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
constructor(options?: ParserOptions<T>) {
super({ decodeStrings: false });

this.parser = new Parser(options);

this.document = this.parser.treeAdapter.createDocument();
this.parser._bootstrap(this.document, null);
const opts = { ...defaultParserOptions, ...options };
this.parser = new Parser(opts);
fb55 marked this conversation as resolved.
Show resolved Hide resolved
this.document = this.parser.document;
}

//WritableStream implementation
Expand Down
8 changes: 2 additions & 6 deletions packages/parse5/lib/index.ts
Expand Up @@ -29,9 +29,7 @@ export function parse<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
html: string,
options?: ParserOptions<T>
): T['document'] {
const parser = new Parser(options);

return parser.parse(html);
return Parser.parse(html, options);
}

/**
Expand Down Expand Up @@ -77,9 +75,7 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
fragmentContext = null;
}

const parser = new Parser(options);

return parser.parseFragment(html as string, fragmentContext);
return Parser.parseFragment(html as string, fragmentContext, options);
}

/**
Expand Down
20 changes: 12 additions & 8 deletions packages/parse5/lib/parser/index.test.ts
Expand Up @@ -6,7 +6,7 @@ import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-t
import { treeAdapters } from 'parse5-test-utils/utils/common.js';
import { NAMESPACES as NS } from '../common/html.js';

const origParseFragment = Parser.prototype.parseFragment;
const origParseFragment = Parser.parseFragment;

generateParsingTests('parser', 'Parser', {}, (test, opts) => ({
node: test.fragmentContext
Expand All @@ -25,21 +25,25 @@ describe('parser', () => {

describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
beforeEach(() => {
Parser.prototype.parseFragment = function <T extends TreeAdapterTypeMap>(
this: Parser<T>,
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['element']
): { html: string; fragmentContext: T['element'] | null | undefined; options: ParserOptions<T> } {
fragmentContext?: T['element'],
options?: ParserOptions<T>
): {
html: string;
fragmentContext: T['element'] | null | undefined;
options: ParserOptions<T> | undefined;
} {
return {
html,
fragmentContext,
options: this.options,
options,
};
};
});

afterEach(() => {
Parser.prototype.parseFragment = origParseFragment;
Parser.parseFragment = origParseFragment;
});

it('parses correctly', () => {
Expand All @@ -63,7 +67,7 @@ describe('parser', () => {

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(!args.options.sourceCodeLocationInfo);
assert.ok(!args.options);
});
});

Expand Down
143 changes: 67 additions & 76 deletions packages/parse5/lib/parser/index.ts
Expand Up @@ -82,7 +82,7 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
*
* @default `true`
*/
scriptingEnabled?: boolean | undefined;
scriptingEnabled?: boolean;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it intentional that the value can’t be undefined, but that the field can be missing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These values can still be explicitly undefined — I am honestly not sure why the | undefined was here in the first place (this was copied from the previous @types package). My suspicion would be an older TS version?

The only difference is that now Required<> works with the options.

Copy link
Collaborator

@wooorm wooorm Feb 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that DefinitelyTyped was adding explicit | undefined to types, when they had ?. They‘re planning to pull them apart: DefinitelyTyped/DefinitelyTyped@b24e5f6.

My recommendation is to add | undefined if the value can be set to undefined. And to use ? when the field can be missing. And to use both, for both.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, thanks for digging this up. Looks like the option is now called exactOptionalPropertyTypes and is explicitly opt-in (no longer enabled with strict): microsoft/TypeScript#44626

My intuition is that users enabling this flag wouldn't want to be able to pass undefined here.


/**
* Enables source code location information. When enabled, each node (except the root node)
Expand All @@ -94,14 +94,14 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
*
* @default `false`
*/
sourceCodeLocationInfo?: boolean | undefined;
sourceCodeLocationInfo?: boolean;

/**
* Specifies the resulting tree format.
*
* @default `treeAdapters.default`
*/
treeAdapter?: TreeAdapter<T> | undefined;
treeAdapter?: TreeAdapter<T>;

/**
* Callback for parse errors.
Expand All @@ -111,86 +111,115 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
onParseError?: ParserErrorHandler | null;
}

export const defaultParserOptions = {
scriptingEnabled: true,
sourceCodeLocationInfo: false,
treeAdapter: defaultTreeAdapter,
onParseError: null,
};

//Parser
export class Parser<T extends TreeAdapterTypeMap> {
options: ParserOptions<T>;
treeAdapter: TreeAdapter<T>;
private onParseError: ParserErrorHandler | null;
private currentToken: Token | null = null;

constructor(options?: ParserOptions<T>) {
this.options = {
scriptingEnabled: true,
sourceCodeLocationInfo: false,
...options,
};

public constructor(
public options: Required<ParserOptions<T>>,
public document: T['document'] = options.treeAdapter.createDocument(),
public fragmentContext: T['element'] | null = null
) {
this.treeAdapter = this.options.treeAdapter ??= defaultTreeAdapter as TreeAdapter<T>;
this.onParseError = this.options.onParseError ??= null;

// Always enable location info if we report parse errors.
if (this.onParseError) {
this.options.sourceCodeLocationInfo = true;
}

this.tokenizer = new Tokenizer(this.options);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
this._setContextModes(fragmentContext ?? document, this.fragmentContextID);

this.openElements = new OpenElementStack(
this.document,
this.treeAdapter,
this.onItemPush.bind(this),
this.onItemPop.bind(this)
);
}

// API
public parse(html: string): T['document'] {
const document = this.treeAdapter.createDocument();
public static parse<T extends TreeAdapterTypeMap>(html: string, options?: ParserOptions<T>): T['document'] {
const opts = {
...defaultParserOptions,
...options,
};

this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
const parser = new this(opts);

return document;
parser.tokenizer.write(html, true);
parser._runParsingLoop(null);

return parser.document;
}

public parseFragment(html: string, fragmentContext?: T['parentNode'] | null): T['documentFragment'] {
public static parseFragment<T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): T['documentFragment'] {
const opts: Required<ParserOptions<T>> = {
...defaultParserOptions,
...options,
};

//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
fragmentContext ??= this.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);
fragmentContext ??= opts.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);

//NOTE: create fake element which will be used as 'document' for fragment parsing.
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
const documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);

this._bootstrap(documentMock, fragmentContext);
const parser = new this(opts, documentMock, fragmentContext);

if (this.fragmentContextID === $.TEMPLATE) {
this.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
if (parser.fragmentContextID === $.TEMPLATE) {
parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
}

this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
parser._initTokenizerForFragmentParsing();
parser._insertFakeRootElement();
parser._resetInsertionMode();
parser._findFormInFragmentContext();
parser.tokenizer.write(html, true);
parser._runParsingLoop(null);

const rootElement = this.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = this.treeAdapter.createDocumentFragment();
const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = opts.treeAdapter.createDocumentFragment();

this._adoptNodes(rootElement, fragment);
parser._adoptNodes(rootElement, fragment);

return fragment;
}

tokenizer!: Tokenizer;
tokenizer: Tokenizer;

stopped = false;
insertionMode = InsertionMode.INITIAL;
originalInsertionMode = InsertionMode.INITIAL;

document!: T['document'];
fragmentContext!: T['element'] | null;
fragmentContextID = $.UNKNOWN;
fragmentContextID: $;

headElement: null | T['element'] = null;
formElement: null | T['element'] = null;
pendingScript: null | T['element'] = null;

openElements!: OpenElementStack<T>;
activeFormattingElements!: FormattingElementList<T>;
openElements: OpenElementStack<T>;
activeFormattingElements: FormattingElementList<T>;
private _considerForeignContent = false;

/**
Expand All @@ -206,44 +235,6 @@ export class Parser<T extends TreeAdapterTypeMap> {
skipNextNewLine = false;
fosterParentingEnabled = false;

//Bootstrap parser
_bootstrap(document: T['document'], fragmentContext: T['element'] | null): void {
this.tokenizer = new Tokenizer(this.options);

this.stopped = false;

this.insertionMode = InsertionMode.INITIAL;
this.originalInsertionMode = InsertionMode.INITIAL;

this.document = document;
this.fragmentContext = fragmentContext;
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
this._setContextModes(fragmentContext ?? document, this.fragmentContextID);

this.headElement = null;
this.formElement = null;
this.pendingScript = null;
this.currentToken = null;

this.openElements = new OpenElementStack(
this.document,
this.treeAdapter,
this.onItemPush.bind(this),
this.onItemPop.bind(this)
);

this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.tmplInsertionModeStack.length = 0;

this.pendingCharacterTokens.length = 0;
this.hasNonWhitespacePendingCharacterToken = false;

this.framesetOk = true;
this.skipNextNewLine = false;
this.fosterParentingEnabled = false;
}

//Errors
_err(token: Token, code: ERR, beforeToken?: boolean): void {
if (!this.onParseError) return;
Expand Down
24 changes: 13 additions & 11 deletions scripts/generate-parser-feedback-test/index.ts
Expand Up @@ -6,6 +6,7 @@ import { convertTokenToHtml5Lib } from 'parse5-test-utils/utils/generate-tokeniz
import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
import { addSlashes } from 'parse5-test-utils/utils/common.js';
import { TokenType, Token } from '../../packages/parse5/dist/common/token.js';
import type { TreeAdapterTypeMap } from '../../packages/parse5/dist/tree-adapters/interface.js';

// eslint-disable-next-line no-console
main().catch(console.error);
Expand Down Expand Up @@ -42,21 +43,22 @@ function appendToken(dest: Token[], token: Token): void {

function collectParserTokens(html: string): ReturnType<typeof convertTokenToHtml5Lib>[] {
const tokens: Token[] = [];
const parser = new Parser();

parser._processInputToken = function (token): void {
Parser.prototype._processInputToken.call(this, token);
class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
override _processInputToken(token: Token): void {
super._processInputToken(token);

// NOTE: Needed to split attributes of duplicate <html> and <body>
// which are otherwise merged as per tree constructor spec
if (token.type === TokenType.START_TAG) {
token.attrs = [...token.attrs];
}
// NOTE: Needed to split attributes of duplicate <html> and <body>
// which are otherwise merged as per tree constructor spec
if (token.type === TokenType.START_TAG) {
token.attrs = [...token.attrs];
}

appendToken(tokens, token);
};
appendToken(tokens, token);
}
}

parser.parse(html);
ExtendedParser.parse(html);

return tokens.map((token) => convertTokenToHtml5Lib(token));
}
Expand Down