From 347206c279c7667f0729ac2fa3c7258c7110b292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sat, 13 Nov 2021 16:12:28 +0000 Subject: [PATCH] refactor: Convert most of the parser to TS --- packages/parse5-parser-stream/lib/index.ts | 4 +- .../lib/parser-feedback-simulator.ts | 2 +- packages/parse5/lib/common/doctype.ts | 2 +- packages/parse5/lib/common/token.ts | 2 +- .../error-reporting/parser-mixin.js | 2 +- .../{parser-mixin.js => parser-mixin.ts} | 53 +- .../lib/parser/formatting-element-list.ts | 12 +- .../parse5/lib/parser/{index.js => index.ts} | 741 ++++++++++-------- packages/parse5/lib/tokenizer/index.ts | 2 +- .../parse5/lib/tree-adapters/interface.ts | 2 +- .../test/location-info-tokenizer.test.ts | 4 +- 11 files changed, 468 insertions(+), 358 deletions(-) rename packages/parse5/lib/extensions/location-info/{parser-mixin.js => parser-mixin.ts} (78%) rename packages/parse5/lib/parser/{index.js => index.ts} (78%) diff --git a/packages/parse5-parser-stream/lib/index.ts b/packages/parse5-parser-stream/lib/index.ts index fd894246a..d9207a658 100644 --- a/packages/parse5-parser-stream/lib/index.ts +++ b/packages/parse5-parser-stream/lib/index.ts @@ -7,7 +7,7 @@ export class ParserStream extends Writable { writeCallback: null | (() => void) = null; pausedByScript = false; - parser: Parser; + parser: Parser; pendingHtmlInsertions: string[] = []; document: T['document']; @@ -70,7 +70,7 @@ export class ParserStream extends Writable { } } - _scriptHandler(scriptElement: any) { + _scriptHandler(scriptElement: T['element']) { if (this.listenerCount('script') > 0) { this.pausedByScript = true; this.emit('script', scriptElement, this._documentWrite, this._resume); diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts index c85b64c91..f6ae7d87a 100644 --- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts +++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts @@ -22,7 +22,7 @@ export class ParserFeedbackSimulator { } getNextToken(): Token { - const token = this.tokenizer.getNextToken()!; + const token = this.tokenizer.getNextToken(); if (token.type === Tokenizer.START_TAG_TOKEN) { this._handleStartTagToken(token); diff --git a/packages/parse5/lib/common/doctype.ts b/packages/parse5/lib/common/doctype.ts index 55a7e0028..a5242be45 100644 --- a/packages/parse5/lib/common/doctype.ts +++ b/packages/parse5/lib/common/doctype.ts @@ -99,7 +99,7 @@ export function isConforming(token: DoctypeToken): boolean { ); } -export function getDocumentMode(token: DoctypeToken): string | null { +export function getDocumentMode(token: DoctypeToken): DOCUMENT_MODE { if (token.name !== VALID_DOCTYPE_NAME) { return DOCUMENT_MODE.QUIRKS; } diff --git a/packages/parse5/lib/common/token.ts b/packages/parse5/lib/common/token.ts index 1b96486dd..9a5bc5471 100644 --- a/packages/parse5/lib/common/token.ts +++ b/packages/parse5/lib/common/token.ts @@ -61,7 +61,7 @@ export interface CommentToken extends TokenBase { data: string; } -interface EOFToken extends TokenBase { +export interface EOFToken extends TokenBase { readonly type: TokenType.EOF; } diff --git a/packages/parse5/lib/extensions/error-reporting/parser-mixin.js b/packages/parse5/lib/extensions/error-reporting/parser-mixin.js index 3773ec2ad..fcc2ed9b1 100644 --- a/packages/parse5/lib/extensions/error-reporting/parser-mixin.js +++ b/packages/parse5/lib/extensions/error-reporting/parser-mixin.js @@ -40,7 +40,7 @@ export class ErrorReportingParserMixin extends ErrorReportingMixinBase { }, _err(code, options) { - mxn.locBeforeToken = options && options.beforeToken; + mxn.locBeforeToken = options?.beforeToken; mxn._reportError(code); }, }; diff --git a/packages/parse5/lib/extensions/location-info/parser-mixin.js b/packages/parse5/lib/extensions/location-info/parser-mixin.ts similarity index 78% rename from packages/parse5/lib/extensions/location-info/parser-mixin.js rename to packages/parse5/lib/extensions/location-info/parser-mixin.ts index a7b182699..6bce109d3 100644 --- a/packages/parse5/lib/extensions/location-info/parser-mixin.js +++ b/packages/parse5/lib/extensions/location-info/parser-mixin.ts @@ -1,24 +1,31 @@ +import { CommentToken, DoctypeToken, CharacterToken } from './../../common/token'; import { Mixin } from '../../utils/mixin.js'; import { Tokenizer } from '../../tokenizer/index.js'; import { LocationInfoTokenizerMixin } from './tokenizer-mixin.js'; import { LocationInfoOpenElementStackMixin } from './open-element-stack-mixin.js'; import * as HTML from '../../common/html.js'; +import type { TreeAdapter, TreeAdapterTypeMap } from './../../tree-adapters/interface'; +import type { Parser } from '../../parser/index.js'; +import type { PositionTrackingPreprocessorMixin } from './../position-tracking/preprocessor-mixin'; +import type { Token, Location, TagToken } from '../../common/token.js'; //Aliases const $ = HTML.TAG_NAMES; -export class LocationInfoParserMixin extends Mixin { - constructor(parser) { +export class LocationInfoParserMixin extends Mixin> { + treeAdapter: TreeAdapter; + posTracker: PositionTrackingPreprocessorMixin | null = null; + lastStartTagToken: null | TagToken = null; + lastFosterParentingLocation: null | ReturnType['_findFosterParentingLocation']> = null; + currentToken: Token | null = null; + + constructor(parser: Parser) { super(parser); this.treeAdapter = parser.treeAdapter; - this.posTracker = null; - this.lastStartTagToken = null; - this.lastFosterParentingLocation = null; - this.currentToken = null; } - _setStartLocation(element) { + _setStartLocation(element: T['element']) { let loc = null; if (this.lastStartTagToken) { @@ -31,7 +38,7 @@ export class LocationInfoParserMixin extends Mixin { this.treeAdapter.setNodeSourceCodeLocation(element, loc); } - _setEndLocation(element, closingToken) { + _setEndLocation(element: T['element'], closingToken: TagToken) { const loc = this.treeAdapter.getNodeSourceCodeLocation(element); if (loc && closingToken.location) { @@ -41,7 +48,7 @@ export class LocationInfoParserMixin extends Mixin { // NOTE: For cases like

- First 'p' closes without a closing // tag and for cases like

- 'p' closes without a closing tag. const isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName; - const endLoc = {}; + const endLoc: Location = {}; if (isClosingEndTag) { endLoc.endTag = { ...ctLoc }; endLoc.endLine = ctLoc.endLine; @@ -57,9 +64,9 @@ export class LocationInfoParserMixin extends Mixin { } } - _getOverriddenMethods(mxn, orig) { + override _getOverriddenMethods(mxn: LocationInfoParserMixin, orig: Parser) { return { - _bootstrap(document, fragmentContext) { + _bootstrap(this: Parser, document, fragmentContext) { orig._bootstrap.call(this, document, fragmentContext); mxn.lastStartTagToken = null; @@ -77,7 +84,7 @@ export class LocationInfoParserMixin extends Mixin { }); }, - _runParsingLoop(scriptHandler) { + _runParsingLoop(this: Parser, scriptHandler) { orig._runParsingLoop.call(this, scriptHandler); // NOTE: generate location info for elements @@ -88,12 +95,12 @@ export class LocationInfoParserMixin extends Mixin { }, //Token processing - _processTokenInForeignContent(token) { + _processTokenInForeignContent(this: Parser, token: Token) { mxn.currentToken = token; orig._processTokenInForeignContent.call(this, token); }, - _processToken(token) { + _processToken(this: Parser, token: Token) { mxn.currentToken = token; orig._processToken.call(this, token); @@ -116,7 +123,7 @@ export class LocationInfoParserMixin extends Mixin { }, //Doctype - _setDocumentType(token) { + _setDocumentType(this: Parser, token: DoctypeToken) { orig._setDocumentType.call(this, token); const documentChildren = this.treeAdapter.getChildNodes(this.document); @@ -128,7 +135,7 @@ export class LocationInfoParserMixin extends Mixin { }, //Elements - _attachElementToTree(element) { + _attachElementToTree(this: Parser, element: T['element']) { //NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods. //So we will use token location stored in this methods for the element. mxn._setStartLocation(element); @@ -136,17 +143,17 @@ export class LocationInfoParserMixin extends Mixin { orig._attachElementToTree.call(this, element); }, - _appendElement(token, namespaceURI) { + _appendElement(this: Parser, token: TagToken, namespaceURI: HTML.NAMESPACES) { mxn.lastStartTagToken = token; orig._appendElement.call(this, token, namespaceURI); }, - _insertElement(token, namespaceURI) { + _insertElement(this: Parser, token: TagToken, namespaceURI: HTML.NAMESPACES) { mxn.lastStartTagToken = token; orig._insertElement.call(this, token, namespaceURI); }, - _insertTemplate(token) { + _insertTemplate(this: Parser, token: TagToken) { mxn.lastStartTagToken = token; orig._insertTemplate.call(this, token); @@ -155,13 +162,13 @@ export class LocationInfoParserMixin extends Mixin { this.treeAdapter.setNodeSourceCodeLocation(tmplContent, null); }, - _insertFakeRootElement() { + _insertFakeRootElement(this: Parser) { orig._insertFakeRootElement.call(this); this.treeAdapter.setNodeSourceCodeLocation(this.openElements.current, null); }, //Comments - _appendCommentNode(token, parent) { + _appendCommentNode(this: Parser, token: CommentToken, parent: T['parentNode']) { orig._appendCommentNode.call(this, token, parent); const children = this.treeAdapter.getChildNodes(parent); @@ -171,7 +178,7 @@ export class LocationInfoParserMixin extends Mixin { }, //Text - _findFosterParentingLocation() { + _findFosterParentingLocation(this: Parser) { //NOTE: store last foster parenting location, so we will be able to find inserted text //in case of foster parenting mxn.lastFosterParentingLocation = orig._findFosterParentingLocation.call(this); @@ -179,7 +186,7 @@ export class LocationInfoParserMixin extends Mixin { return mxn.lastFosterParentingLocation; }, - _insertCharacters(token) { + _insertCharacters(this: Parser, token: CharacterToken) { orig._insertCharacters.call(this, token); const hasFosterParent = this._shouldFosterParentOnInsertion(); diff --git a/packages/parse5/lib/parser/formatting-element-list.ts b/packages/parse5/lib/parser/formatting-element-list.ts index e6a185f71..f3dec514b 100644 --- a/packages/parse5/lib/parser/formatting-element-list.ts +++ b/packages/parse5/lib/parser/formatting-element-list.ts @@ -1,4 +1,4 @@ -import type { Attribute, Token } from '../common/token.js'; +import type { Attribute, TagToken } from '../common/token.js'; import type { TreeAdapterTypeMap } from './../tree-adapters/interface'; //Const @@ -13,13 +13,13 @@ interface MarkerEntry { type: EntryType.Marker; } -interface ElementEntry { +export interface ElementEntry { type: EntryType.Element; element: T['element']; - token: Token; + token: TagToken; } -type Entry = MarkerEntry | ElementEntry; +export type Entry = MarkerEntry | ElementEntry; //List of formatting elements export class FormattingElementList { @@ -90,7 +90,7 @@ export class FormattingElementList { this.length++; } - pushElement(element: T['element'], token: Token) { + pushElement(element: T['element'], token: TagToken) { this._ensureNoahArkCondition(element); this.entries.push({ @@ -102,7 +102,7 @@ export class FormattingElementList { this.length++; } - insertElementAfterBookmark(element: T['element'], token: Token) { + insertElementAfterBookmark(element: T['element'], token: TagToken) { const bookmarkIdx = this.entries.lastIndexOf(this.bookmark!); this.entries.splice(bookmarkIdx + 1, 0, { diff --git a/packages/parse5/lib/parser/index.js b/packages/parse5/lib/parser/index.ts similarity index 78% rename from packages/parse5/lib/parser/index.js rename to packages/parse5/lib/parser/index.ts index 1b3a8c8af..ae5e6080f 100644 --- a/packages/parse5/lib/parser/index.js +++ b/packages/parse5/lib/parser/index.ts @@ -1,6 +1,6 @@ import { Tokenizer } from '../tokenizer/index.js'; import { OpenElementStack } from './open-element-stack.js'; -import { FormattingElementList } from './formatting-element-list.js'; +import { FormattingElementList, ElementEntry } from './formatting-element-list.js'; import { LocationInfoParserMixin } from '../extensions/location-info/parser-mixin.js'; import { ErrorReportingParserMixin } from '../extensions/error-reporting/parser-mixin.js'; import { Mixin } from '../utils/mixin.js'; @@ -10,6 +10,9 @@ import * as foreignContent from '../common/foreign-content.js'; import { ERR } from '../common/error-codes.js'; import * as unicode from '../common/unicode.js'; import * as HTML from '../common/html.js'; +import type { TreeAdapter, TreeAdapterTypeMap } from './../tree-adapters/interface'; +import type { ParserError } from './../extensions/error-reporting/mixin-base'; +import { Token, CommentToken, CharacterToken, TagToken, DoctypeToken, EOFToken } from './../common/token'; //Aliases const $ = HTML.TAG_NAMES; @@ -24,60 +27,74 @@ const AA_OUTER_LOOP_ITER = 8; const AA_INNER_LOOP_ITER = 3; //Insertion modes -const INITIAL_MODE = 'INITIAL_MODE'; -const BEFORE_HTML_MODE = 'BEFORE_HTML_MODE'; -const BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE'; -const IN_HEAD_MODE = 'IN_HEAD_MODE'; -const IN_HEAD_NO_SCRIPT_MODE = 'IN_HEAD_NO_SCRIPT_MODE'; -const AFTER_HEAD_MODE = 'AFTER_HEAD_MODE'; -const IN_BODY_MODE = 'IN_BODY_MODE'; -const TEXT_MODE = 'TEXT_MODE'; -const IN_TABLE_MODE = 'IN_TABLE_MODE'; -const IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE'; -const IN_CAPTION_MODE = 'IN_CAPTION_MODE'; -const IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE'; -const IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE'; -const IN_ROW_MODE = 'IN_ROW_MODE'; -const IN_CELL_MODE = 'IN_CELL_MODE'; -const IN_SELECT_MODE = 'IN_SELECT_MODE'; -const IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE'; -const IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE'; -const AFTER_BODY_MODE = 'AFTER_BODY_MODE'; -const IN_FRAMESET_MODE = 'IN_FRAMESET_MODE'; -const AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE'; -const AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE'; -const AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; +enum InsertionMode { + INITIAL = 'INITIAL_MODE', + BEFORE_HTML = 'BEFORE_HTML_MODE', + BEFORE_HEAD = 'BEFORE_HEAD_MODE', + IN_HEAD = 'IN_HEAD_MODE', + IN_HEAD_NO_SCRIPT = 'IN_HEAD_NO_SCRIPT_MODE', + AFTER_HEAD = 'AFTER_HEAD_MODE', + IN_BODY = 'IN_BODY_MODE', + TEXT = 'TEXT_MODE', + IN_TABLE = 'IN_TABLE_MODE', + IN_TABLE_TEXT = 'IN_TABLE_TEXT_MODE', + IN_CAPTION = 'IN_CAPTION_MODE', + IN_COLUMN_GROUP = 'IN_COLUMN_GROUP_MODE', + IN_TABLE_BODY = 'IN_TABLE_BODY_MODE', + IN_ROW = 'IN_ROW_MODE', + IN_CELL = 'IN_CELL_MODE', + IN_SELECT = 'IN_SELECT_MODE', + IN_SELECT_IN_TABLE = 'IN_SELECT_IN_TABLE_MODE', + IN_TEMPLATE = 'IN_TEMPLATE_MODE', + AFTER_BODY = 'AFTER_BODY_MODE', + IN_FRAMESET = 'IN_FRAMESET_MODE', + AFTER_FRAMESET = 'AFTER_FRAMESET_MODE', + AFTER_AFTER_BODY = 'AFTER_AFTER_BODY_MODE', + AFTER_AFTER_FRAMESET = 'AFTER_AFTER_FRAMESET_MODE', +} //Insertion mode reset map const INSERTION_MODE_RESET_MAP = new Map([ - [$.TR, IN_ROW_MODE], - [$.TBODY, IN_TABLE_BODY_MODE], - [$.THEAD, IN_TABLE_BODY_MODE], - [$.TFOOT, IN_TABLE_BODY_MODE], - [$.CAPTION, IN_CAPTION_MODE], - [$.COLGROUP, IN_COLUMN_GROUP_MODE], - [$.TABLE, IN_TABLE_MODE], - [$.BODY, IN_BODY_MODE], - [$.FRAMESET, IN_FRAMESET_MODE], + [$.TR, InsertionMode.IN_ROW], + [$.TBODY, InsertionMode.IN_TABLE_BODY], + [$.THEAD, InsertionMode.IN_TABLE_BODY], + [$.TFOOT, InsertionMode.IN_TABLE_BODY], + [$.CAPTION, InsertionMode.IN_CAPTION], + [$.COLGROUP, InsertionMode.IN_COLUMN_GROUP], + [$.TABLE, InsertionMode.IN_TABLE], + [$.BODY, InsertionMode.IN_BODY], + [$.FRAMESET, InsertionMode.IN_FRAMESET], ]); //Template insertion mode switch map -const TEMPLATE_INSERTION_MODE_SWITCH_MAP = new Map([ - [$.CAPTION, IN_TABLE_MODE], - [$.COLGROUP, IN_TABLE_MODE], - [$.TBODY, IN_TABLE_MODE], - [$.TFOOT, IN_TABLE_MODE], - [$.THEAD, IN_TABLE_MODE], - [$.COL, IN_COLUMN_GROUP_MODE], - [$.TR, IN_TABLE_BODY_MODE], - [$.TD, IN_ROW_MODE], - [$.TH, IN_ROW_MODE], +const TEMPLATE_INSERTION_MODE_SWITCH_MAP = new Map([ + [$.CAPTION, InsertionMode.IN_TABLE], + [$.COLGROUP, InsertionMode.IN_TABLE], + [$.TBODY, InsertionMode.IN_TABLE], + [$.TFOOT, InsertionMode.IN_TABLE], + [$.THEAD, InsertionMode.IN_TABLE], + [$.COL, InsertionMode.IN_COLUMN_GROUP], + [$.TR, InsertionMode.IN_TABLE_BODY], + [$.TD, InsertionMode.IN_ROW], + [$.TH, InsertionMode.IN_ROW], ]); //Token handlers map for insertion modes -const TOKEN_HANDLERS = new Map([ +const TOKEN_HANDLERS = new Map< + InsertionMode, + { + [Tokenizer.CHARACTER_TOKEN]: (p: Parser, token: CharacterToken) => void; + [Tokenizer.NULL_CHARACTER_TOKEN]: (p: Parser, token: CharacterToken) => void; + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: (p: Parser, token: CharacterToken) => void; + [Tokenizer.COMMENT_TOKEN]: (p: Parser, token: CommentToken) => void; + [Tokenizer.DOCTYPE_TOKEN]: (p: Parser, token: DoctypeToken) => void; + [Tokenizer.START_TAG_TOKEN]: (p: Parser, token: TagToken) => void; + [Tokenizer.END_TAG_TOKEN]: (p: Parser, token: TagToken) => void; + [Tokenizer.EOF_TOKEN]: (p: Parser, token: EOFToken) => void; + } +>([ [ - INITIAL_MODE, + InsertionMode.INITIAL, { [Tokenizer.CHARACTER_TOKEN]: tokenInInitialMode, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInInitialMode, @@ -90,7 +107,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - BEFORE_HTML_MODE, + InsertionMode.BEFORE_HTML, { [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHtml, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHtml, @@ -103,7 +120,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - BEFORE_HEAD_MODE, + InsertionMode.BEFORE_HEAD, { [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHead, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHead, @@ -116,7 +133,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_HEAD_MODE, + InsertionMode.IN_HEAD, { [Tokenizer.CHARACTER_TOKEN]: tokenInHead, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHead, @@ -129,7 +146,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_HEAD_NO_SCRIPT_MODE, + InsertionMode.IN_HEAD_NO_SCRIPT, { [Tokenizer.CHARACTER_TOKEN]: tokenInHeadNoScript, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHeadNoScript, @@ -142,7 +159,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - AFTER_HEAD_MODE, + InsertionMode.AFTER_HEAD, { [Tokenizer.CHARACTER_TOKEN]: tokenAfterHead, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterHead, @@ -155,7 +172,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_BODY_MODE, + InsertionMode.IN_BODY, { [Tokenizer.CHARACTER_TOKEN]: characterInBody, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -168,7 +185,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - TEXT_MODE, + InsertionMode.TEXT, { [Tokenizer.CHARACTER_TOKEN]: insertCharacters, [Tokenizer.NULL_CHARACTER_TOKEN]: insertCharacters, @@ -181,7 +198,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_TABLE_MODE, + InsertionMode.IN_TABLE, { [Tokenizer.CHARACTER_TOKEN]: characterInTable, [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, @@ -194,7 +211,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_TABLE_TEXT_MODE, + InsertionMode.IN_TABLE_TEXT, { [Tokenizer.CHARACTER_TOKEN]: characterInTableText, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -207,7 +224,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_CAPTION_MODE, + InsertionMode.IN_CAPTION, { [Tokenizer.CHARACTER_TOKEN]: characterInBody, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -220,7 +237,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_COLUMN_GROUP_MODE, + InsertionMode.IN_COLUMN_GROUP, { [Tokenizer.CHARACTER_TOKEN]: tokenInColumnGroup, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInColumnGroup, @@ -233,7 +250,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_TABLE_BODY_MODE, + InsertionMode.IN_TABLE_BODY, { [Tokenizer.CHARACTER_TOKEN]: characterInTable, [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, @@ -246,7 +263,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_ROW_MODE, + InsertionMode.IN_ROW, { [Tokenizer.CHARACTER_TOKEN]: characterInTable, [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, @@ -259,7 +276,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_CELL_MODE, + InsertionMode.IN_CELL, { [Tokenizer.CHARACTER_TOKEN]: characterInBody, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -272,7 +289,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_SELECT_MODE, + InsertionMode.IN_SELECT, { [Tokenizer.CHARACTER_TOKEN]: insertCharacters, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -285,7 +302,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_SELECT_IN_TABLE_MODE, + InsertionMode.IN_SELECT_IN_TABLE, { [Tokenizer.CHARACTER_TOKEN]: insertCharacters, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -298,7 +315,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_TEMPLATE_MODE, + InsertionMode.IN_TEMPLATE, { [Tokenizer.CHARACTER_TOKEN]: characterInBody, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -311,7 +328,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - AFTER_BODY_MODE, + InsertionMode.AFTER_BODY, { [Tokenizer.CHARACTER_TOKEN]: tokenAfterBody, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterBody, @@ -324,7 +341,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - IN_FRAMESET_MODE, + InsertionMode.IN_FRAMESET, { [Tokenizer.CHARACTER_TOKEN]: ignoreToken, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -337,7 +354,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - AFTER_FRAMESET_MODE, + InsertionMode.AFTER_FRAMESET, { [Tokenizer.CHARACTER_TOKEN]: ignoreToken, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -350,7 +367,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - AFTER_AFTER_BODY_MODE, + InsertionMode.AFTER_AFTER_BODY, { [Tokenizer.CHARACTER_TOKEN]: tokenAfterAfterBody, [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterAfterBody, @@ -363,7 +380,7 @@ const TOKEN_HANDLERS = new Map([ }, ], [ - AFTER_AFTER_FRAMESET_MODE, + InsertionMode.AFTER_AFTER_FRAMESET, { [Tokenizer.CHARACTER_TOKEN]: ignoreToken, [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, @@ -377,22 +394,61 @@ const TOKEN_HANDLERS = new Map([ ], ]); -const TOKEN_HANDLER_IN_BODY = TOKEN_HANDLERS.get(IN_BODY_MODE); +const TOKEN_HANDLER_IN_BODY = TOKEN_HANDLERS.get(InsertionMode.IN_BODY); + +const TABLE_STRUCTURE_TAGS = new Set([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]); + +export interface ParserOptions { + /** + * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set + * to `true`, `noscript` element content will be parsed as text. + * + * **Default:** `true` + */ + scriptingEnabled?: boolean | undefined; + + /** + * Enables source code location information. When enabled, each node (except the root node) + * will have a `sourceCodeLocation` property. If the node is not an empty element, `sourceCodeLocation` will + * be a {@link ElementLocation} object, otherwise it will be {@link Location}. + * If the element was implicitly created by the parser (as part of + * [tree correction](https://html.spec.whatwg.org/multipage/syntax.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser)), + * its `sourceCodeLocation` property will be `undefined`. + * + * **Default:** `false` + */ + sourceCodeLocationInfo?: boolean | undefined; + + /** + * Specifies the resulting tree format. + * + * **Default:** `treeAdapters.default` + */ + treeAdapter?: TreeAdapter | undefined; +} -const TABLE_STRUCTURE_TAGS = new Set([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]); +interface InternalParserOptions extends ParserOptions { + treeAdapter: TreeAdapter; + + onParseError: ((err: ParserError) => void) | null; +} //Parser -export class Parser { - constructor(options) { +export class Parser { + options: InternalParserOptions; + treeAdapter: TreeAdapter; + pendingScript: null | T['element']; + + constructor(options?: ParserOptions) { this.options = { scriptingEnabled: true, sourceCodeLocationInfo: false, onParseError: null, - treeAdapter: defaultTreeAdapter, + treeAdapter: defaultTreeAdapter as TreeAdapter, ...options, }; - this.treeAdapter = this.options.treeAdapter; + this.treeAdapter = this.options.treeAdapter!; this.pendingScript = null; if (this.options.sourceCodeLocationInfo) { @@ -405,7 +461,7 @@ export class Parser { } // API - parse(html) { + parse(html: string) { const document = this.treeAdapter.createDocument(); this._bootstrap(document, null); @@ -415,7 +471,7 @@ export class Parser { return document; } - parseFragment(html, fragmentContext) { + parseFragment(html: string, fragmentContext?: T['element']) { //NOTE: use