Skip to content
This repository has been archived by the owner on Mar 24, 2022. It is now read-only.

Commit

Permalink
refactor(parser): Remove _bootstrap method (inikulin#384)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Mar 2, 2022
1 parent cb2265d commit c31654a
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 102 deletions.
4 changes: 1 addition & 3 deletions packages/parse5-parser-stream/lib/index.ts
Expand Up @@ -43,9 +43,7 @@ export class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>
super({ decodeStrings: false });

this.parser = new Parser(options);

this.document = this.parser.treeAdapter.createDocument();
this.parser._bootstrap(this.document, null);
this.document = this.parser.document;
}

//WritableStream implementation
Expand Down
8 changes: 2 additions & 6 deletions packages/parse5/lib/index.ts
Expand Up @@ -29,9 +29,7 @@ export function parse<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
html: string,
options?: ParserOptions<T>
): T['document'] {
const parser = new Parser(options);

return parser.parse(html);
return Parser.parse(html, options);
}

/**
Expand Down Expand Up @@ -77,7 +75,5 @@ export function parseFragment<T extends TreeAdapterTypeMap = DefaultTreeAdapterM
fragmentContext = null;
}

const parser = new Parser(options);

return parser.parseFragment(html as string, fragmentContext);
return Parser.parseFragment(html as string, fragmentContext, options);
}
20 changes: 12 additions & 8 deletions packages/parse5/lib/parser/index.test.ts
Expand Up @@ -8,7 +8,7 @@ import { treeAdapters } from 'parse5-test-utils/utils/common.js';
import { NAMESPACES as NS } from '../common/html.js';
import { isElementNode } from '../tree-adapters/default.js';

const origParseFragment = Parser.prototype.parseFragment;
const origParseFragment = Parser.parseFragment;

generateParsingTests('parser', 'Parser', {}, (test, opts) => ({
node: test.fragmentContext
Expand All @@ -27,21 +27,25 @@ describe('parser', () => {

describe('Regression - Incorrect arguments fallback for the parser.parseFragment (GH-82, GH-83)', () => {
beforeEach(() => {
Parser.prototype.parseFragment = function <T extends TreeAdapterTypeMap>(
this: Parser<T>,
Parser.parseFragment = function <T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['element']
): { html: string; fragmentContext: T['element'] | null | undefined; options: ParserOptions<T> } {
fragmentContext?: T['element'],
options?: ParserOptions<T>
): {
html: string;
fragmentContext: T['element'] | null | undefined;
options: ParserOptions<T> | undefined;
} {
return {
html,
fragmentContext,
options: this.options,
options,
};
};
});

afterEach(() => {
Parser.prototype.parseFragment = origParseFragment;
Parser.parseFragment = origParseFragment;
});

it('parses correctly', () => {
Expand All @@ -65,7 +69,7 @@ describe('parser', () => {

assert.ok(!args.fragmentContext);
expect(args).toHaveProperty('html', html);
assert.ok(!args.options.sourceCodeLocationInfo);
assert.ok(!args.options);
});
});

Expand Down
143 changes: 69 additions & 74 deletions packages/parse5/lib/parser/index.ts
Expand Up @@ -82,7 +82,7 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
*
* @default `true`
*/
scriptingEnabled?: boolean | undefined;
scriptingEnabled?: boolean;

/**
* Enables source code location information. When enabled, each node (except the root node)
Expand All @@ -94,14 +94,14 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
*
* @default `false`
*/
sourceCodeLocationInfo?: boolean | undefined;
sourceCodeLocationInfo?: boolean;

/**
* Specifies the resulting tree format.
*
* @default `treeAdapters.default`
*/
treeAdapter?: TreeAdapter<T> | undefined;
treeAdapter?: TreeAdapter<T>;

/**
* Callback for parse errors.
Expand All @@ -111,86 +111,119 @@ export interface ParserOptions<T extends TreeAdapterTypeMap> {
onParseError?: ParserErrorHandler | null;
}

const defaultParserOptions = {
scriptingEnabled: true,
sourceCodeLocationInfo: false,
treeAdapter: defaultTreeAdapter,
onParseError: null,
};

//Parser
export class Parser<T extends TreeAdapterTypeMap> {
options: ParserOptions<T>;
treeAdapter: TreeAdapter<T>;
private onParseError: ParserErrorHandler | null;
private currentToken: Token | null = null;
public options: Required<ParserOptions<T>>;
public document: T['document'];

constructor(options?: ParserOptions<T>) {
public constructor(
options?: ParserOptions<T>,
document?: T['document'],
public fragmentContext: T['element'] | null = null
) {
this.options = {
scriptingEnabled: true,
sourceCodeLocationInfo: false,
...defaultParserOptions,
...options,
};

this.treeAdapter = this.options.treeAdapter ??= defaultTreeAdapter as TreeAdapter<T>;
this.onParseError = this.options.onParseError ??= null;
this.treeAdapter = this.options.treeAdapter;
this.onParseError = this.options.onParseError;

// Always enable location info if we report parse errors.
if (this.onParseError) {
this.options.sourceCodeLocationInfo = true;
}

this.document = document ?? this.treeAdapter.createDocument();

this.tokenizer = new Tokenizer(this.options);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
this._setContextModes(fragmentContext ?? this.document, this.fragmentContextID);

this.openElements = new OpenElementStack(
this.document,
this.treeAdapter,
this.onItemPush.bind(this),
this.onItemPop.bind(this)
);
}

// API
public parse(html: string): T['document'] {
const document = this.treeAdapter.createDocument();
public static parse<T extends TreeAdapterTypeMap>(html: string, options?: ParserOptions<T>): T['document'] {
const parser = new this(options);

this._bootstrap(document, null);
this.tokenizer.write(html, true);
this._runParsingLoop(null);
parser.tokenizer.write(html, true);
parser._runParsingLoop(null);

return document;
return parser.document;
}

public parseFragment(html: string, fragmentContext?: T['parentNode'] | null): T['documentFragment'] {
public static parseFragment<T extends TreeAdapterTypeMap>(
html: string,
fragmentContext?: T['parentNode'] | null,
options?: ParserOptions<T>
): T['documentFragment'] {
const opts: Required<ParserOptions<T>> = {
...defaultParserOptions,
...options,
};

//NOTE: use <template> element as a fragment context if context element was not provided,
//so we will parse in "forgiving" manner
fragmentContext ??= this.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);
fragmentContext ??= opts.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []);

//NOTE: create fake element which will be used as 'document' for fragment parsing.
//This is important for jsdom there 'document' can't be recreated, therefore
//fragment parsing causes messing of the main `document`.
const documentMock = this.treeAdapter.createElement('documentmock', NS.HTML, []);
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);

this._bootstrap(documentMock, fragmentContext);
const parser = new this(opts, documentMock, fragmentContext);

if (this.fragmentContextID === $.TEMPLATE) {
this.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
if (parser.fragmentContextID === $.TEMPLATE) {
parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
}

this._initTokenizerForFragmentParsing();
this._insertFakeRootElement();
this._resetInsertionMode();
this._findFormInFragmentContext();
this.tokenizer.write(html, true);
this._runParsingLoop(null);
parser._initTokenizerForFragmentParsing();
parser._insertFakeRootElement();
parser._resetInsertionMode();
parser._findFormInFragmentContext();
parser.tokenizer.write(html, true);
parser._runParsingLoop(null);

const rootElement = this.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = this.treeAdapter.createDocumentFragment();
const rootElement = opts.treeAdapter.getFirstChild(documentMock) as T['parentNode'];
const fragment = opts.treeAdapter.createDocumentFragment();

this._adoptNodes(rootElement, fragment);
parser._adoptNodes(rootElement, fragment);

return fragment;
}

tokenizer!: Tokenizer;
tokenizer: Tokenizer;

stopped = false;
insertionMode = InsertionMode.INITIAL;
originalInsertionMode = InsertionMode.INITIAL;

document!: T['document'];
fragmentContext!: T['element'] | null;
fragmentContextID = $.UNKNOWN;
fragmentContextID: $;

headElement: null | T['element'] = null;
formElement: null | T['element'] = null;
pendingScript: null | T['element'] = null;

openElements!: OpenElementStack<T>;
activeFormattingElements!: FormattingElementList<T>;
openElements: OpenElementStack<T>;
activeFormattingElements: FormattingElementList<T>;
private _considerForeignContent = false;

/**
Expand All @@ -206,44 +239,6 @@ export class Parser<T extends TreeAdapterTypeMap> {
skipNextNewLine = false;
fosterParentingEnabled = false;

//Bootstrap parser
_bootstrap(document: T['document'], fragmentContext: T['element'] | null): void {
this.tokenizer = new Tokenizer(this.options);

this.stopped = false;

this.insertionMode = InsertionMode.INITIAL;
this.originalInsertionMode = InsertionMode.INITIAL;

this.document = document;
this.fragmentContext = fragmentContext;
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
this._setContextModes(fragmentContext ?? document, this.fragmentContextID);

this.headElement = null;
this.formElement = null;
this.pendingScript = null;
this.currentToken = null;

this.openElements = new OpenElementStack(
this.document,
this.treeAdapter,
this.onItemPush.bind(this),
this.onItemPop.bind(this)
);

this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.tmplInsertionModeStack.length = 0;

this.pendingCharacterTokens.length = 0;
this.hasNonWhitespacePendingCharacterToken = false;

this.framesetOk = true;
this.skipNextNewLine = false;
this.fosterParentingEnabled = false;
}

//Errors
_err(token: Token, code: ERR, beforeToken?: boolean): void {
if (!this.onParseError) return;
Expand Down
24 changes: 13 additions & 11 deletions scripts/generate-parser-feedback-test/index.ts
Expand Up @@ -6,6 +6,7 @@ import { convertTokenToHtml5Lib } from 'parse5-test-utils/utils/generate-tokeniz
import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
import { addSlashes } from 'parse5-test-utils/utils/common.js';
import { TokenType, Token } from '../../packages/parse5/dist/common/token.js';
import type { TreeAdapterTypeMap } from '../../packages/parse5/dist/tree-adapters/interface.js';

// eslint-disable-next-line no-console
main().catch(console.error);
Expand Down Expand Up @@ -42,21 +43,22 @@ function appendToken(dest: Token[], token: Token): void {

function collectParserTokens(html: string): ReturnType<typeof convertTokenToHtml5Lib>[] {
const tokens: Token[] = [];
const parser = new Parser();

parser._processInputToken = function (token): void {
Parser.prototype._processInputToken.call(this, token);
class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
override _processInputToken(token: Token): void {
super._processInputToken(token);

// NOTE: Needed to split attributes of duplicate <html> and <body>
// which are otherwise merged as per tree constructor spec
if (token.type === TokenType.START_TAG) {
token.attrs = [...token.attrs];
}
// NOTE: Needed to split attributes of duplicate <html> and <body>
// which are otherwise merged as per tree constructor spec
if (token.type === TokenType.START_TAG) {
token.attrs = [...token.attrs];
}

appendToken(tokens, token);
};
appendToken(tokens, token);
}
}

parser.parse(html);
ExtendedParser.parse(html);

return tokens.map((token) => convertTokenToHtml5Lib(token));
}
Expand Down

0 comments on commit c31654a

Please sign in to comment.