diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts index 73b4221e0..e7019d879 100644 --- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts +++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts @@ -64,13 +64,13 @@ export class ParserFeedbackSimulator { private _enterNamespace(namespace: NS): void { this.namespaceStack.unshift(namespace); this.inForeignContent = namespace !== NS.HTML; - this.tokenizer.allowCDATA = this.inForeignContent; + this.tokenizer.inForeignNode = this.inForeignContent; } private _leaveCurrentNamespace(): void { this.namespaceStack.shift(); this.inForeignContent = this.namespaceStack[0] !== NS.HTML; - this.tokenizer.allowCDATA = this.inForeignContent; + this.tokenizer.inForeignNode = this.inForeignContent; } //Token handlers diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index b3045620a..e6ad91d92 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -224,7 +224,8 @@ export class Parser { openElements: OpenElementStack; activeFormattingElements: FormattingElementList; - private _considerForeignContent = false; + /** Indicates that the current node is not an element in the HTML namespace */ + private currentNotInHTML = false; /** * The template insertion mode stack is maintained from the left. @@ -262,30 +263,13 @@ export class Parser { while (!this.stopped) { const token = this.tokenizer.getNextToken(); - if (token.type === TokenType.HIBERNATION) { - break; - } - - if (this.skipNextNewLine) { - this.skipNextNewLine = false; - - if ( - token.type === TokenType.WHITESPACE_CHARACTER && - token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED - ) { - if (token.chars.length === 1) { - continue; - } + this._processToken(token); - token.chars = token.chars.substr(1); - } + if (token.type === TokenType.START_TAG && token.selfClosing && !token.ackSelfClosing) { + this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus); } - this.currentToken = token; - - this._processInputToken(token); - - if (scriptHandler !== null && this.pendingScript) { + if (!this.tokenizer.active || (scriptHandler !== null && this.pendingScript)) { break; } } @@ -341,8 +325,8 @@ export class Parser { private _setContextModes(current: T['parentNode'], tid: number): void { const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML; - this._considerForeignContent = !isHTML; - this.tokenizer.allowCDATA = !isHTML && !this._isIntegrationPoint(tid, current); + this.currentNotInHTML = !isHTML; + this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current); } _switchToTextParsing( @@ -545,27 +529,31 @@ export class Parser { const ctLoc = closingToken.location; const tn = this.treeAdapter.getTagName(element); - // NOTE: For cases like

- First 'p' closes without a closing - // tag and for cases like

- 'p' closes without a closing tag. - const isClosingEndTag = closingToken.type === TokenType.END_TAG && tn === closingToken.tagName; - const endLoc: Partial = {}; - if (isClosingEndTag) { - endLoc.endTag = { ...ctLoc }; - endLoc.endLine = ctLoc.endLine; - endLoc.endCol = ctLoc.endCol; - endLoc.endOffset = ctLoc.endOffset; - } else { - endLoc.endLine = ctLoc.startLine; - endLoc.endCol = ctLoc.startCol; - endLoc.endOffset = ctLoc.startOffset; - } + const endLoc: Partial = + // NOTE: For cases like

- First 'p' closes without a closing + // tag and for cases like

- 'p' closes without a closing tag. + closingToken.type === TokenType.END_TAG && tn === closingToken.tagName + ? { + endTag: { ...ctLoc }, + endLine: ctLoc.endLine, + endCol: ctLoc.endCol, + endOffset: ctLoc.endOffset, + } + : { + endLine: ctLoc.startLine, + endCol: ctLoc.startCol, + endOffset: ctLoc.startOffset, + }; this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc); } } //Token processing - private _shouldProcessTokenInForeignContent(token: Token): boolean { + private shouldProcessStartTagTokenInForeignContent(token: TagToken): boolean { + // Check that neither current === document, or ns === NS.HTML + if (!this.currentNotInHTML) return false; + let current: T['parentNode']; let currentTagId: number; @@ -576,180 +564,58 @@ export class Parser { ({ current, currentTagId } = this.openElements); } - const ns = this.treeAdapter.getNamespaceURI(current); - - //NOTE: We won't get here with current === document, or ns === NS.HTML - if ( - token.type === TokenType.START_TAG && token.tagID === $.SVG && this.treeAdapter.getTagName(current) === TN.ANNOTATION_XML && - ns === NS.MATHML - ) { - return false; - } - - const isCharacterToken = - token.type === TokenType.CHARACTER || - token.type === TokenType.NULL_CHARACTER || - token.type === TokenType.WHITESPACE_CHARACTER; - - const isMathMLTextStartTag = - token.type === TokenType.START_TAG && token.tagID !== $.MGLYPH && token.tagID !== $.MALIGNMARK; - - if ((isMathMLTextStartTag || isCharacterToken) && this._isIntegrationPoint(currentTagId, current, NS.MATHML)) { - return false; - } - - if ( - (token.type === TokenType.START_TAG || isCharacterToken) && - this._isIntegrationPoint(currentTagId, current, NS.HTML) + this.treeAdapter.getNamespaceURI(current) === NS.MATHML ) { return false; } - return token.type !== TokenType.EOF; + return ( + // Check that `current` is not an integration point for HTML or MathML elements. + this.tokenizer.inForeignNode || + // If it _is_ an integration point, then we might have to check that it is not an HTML + // integration point. + ((token.tagID === $.MGLYPH || token.tagID === $.MALIGNMARK) && + !this._isIntegrationPoint(currentTagId, current, NS.HTML)) + ); } _processToken(token: Token): void { - switch (this.insertionMode) { - case InsertionMode.INITIAL: { - modeInitial(this, token); - break; - } - case InsertionMode.BEFORE_HTML: { - modeBeforeHtml(this, token); - break; - } - case InsertionMode.BEFORE_HEAD: { - modeBeforeHead(this, token); - break; - } - case InsertionMode.IN_HEAD: { - modeInHead(this, token); - break; - } - case InsertionMode.IN_HEAD_NO_SCRIPT: { - modeInHeadNoScript(this, token); - break; - } - case InsertionMode.AFTER_HEAD: { - modeAfterHead(this, token); - break; - } - case InsertionMode.IN_BODY: { - modeInBody(this, token); - break; - } - case InsertionMode.TEXT: { - modeText(this, token); - break; - } - case InsertionMode.IN_TABLE: { - modeInTable(this, token); - break; - } - case InsertionMode.IN_TABLE_TEXT: { - modeInTableText(this, token); - break; - } - case InsertionMode.IN_CAPTION: { - modeInCaption(this, token); - break; - } - case InsertionMode.IN_COLUMN_GROUP: { - modeInColumnGroup(this, token); - break; - } - case InsertionMode.IN_TABLE_BODY: { - modeInTableBody(this, token); - break; - } - case InsertionMode.IN_ROW: { - modeInRow(this, token); - break; - } - case InsertionMode.IN_CELL: { - modeInCell(this, token); - break; - } - case InsertionMode.IN_SELECT: { - modeInSelect(this, token); - break; - } - case InsertionMode.IN_SELECT_IN_TABLE: { - modeInSelectInTable(this, token); - break; - } - case InsertionMode.IN_TEMPLATE: { - modeInTemplate(this, token); - break; - } - case InsertionMode.AFTER_BODY: { - modeAfterBody(this, token); - break; - } - case InsertionMode.IN_FRAMESET: { - modeInFrameset(this, token); - break; - } - case InsertionMode.AFTER_FRAMESET: { - modeAfterFrameset(this, token); - break; - } - case InsertionMode.AFTER_AFTER_BODY: { - modeAfterAfterBody(this, token); - break; - } - case InsertionMode.AFTER_AFTER_FRAMESET: { - modeAfterAfterFrameset(this, token); - break; - } - default: - // Do nothing - } - } - - _processTokenInForeignContent(token: Token): void { switch (token.type) { case TokenType.CHARACTER: { - characterInForeignContent(this, token); + this.onCharacter(token); break; } case TokenType.NULL_CHARACTER: { - nullCharacterInForeignContent(this, token); + this.onNullCharacter(token); break; } - case TokenType.WHITESPACE_CHARACTER: { - this._insertCharacters(token); + case TokenType.COMMENT: { + this.onComment(token); break; } - case TokenType.COMMENT: { - appendComment(this, token); + case TokenType.DOCTYPE: { + this.onDoctype(token); break; } case TokenType.START_TAG: { - startTagInForeignContent(this, token); + this.onStartTag(token); break; } case TokenType.END_TAG: { - endTagInForeignContent(this, token); + this.onEndTag(token); + break; + } + case TokenType.EOF: { + this.onEof(token); + break; + } + case TokenType.WHITESPACE_CHARACTER: { + this.onWhitespaceCharacter(token); break; } - default: - // Do nothing - } - } - - _processInputToken(token: Token): void { - if (this._considerForeignContent && this._shouldProcessTokenInForeignContent(token)) { - this._processTokenInForeignContent(token); - } else { - this._processToken(token); - } - - if (token.type === TokenType.START_TAG && token.selfClosing && !token.ackSelfClosing) { - this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus); } } @@ -918,6 +784,442 @@ export class Parser { return SPECIAL_ELEMENTS[ns].has(id); } + + onCharacter(token: CharacterToken): void { + this.skipNextNewLine = false; + + if (this.tokenizer.inForeignNode) { + characterInForeignContent(this, token); + return; + } + + switch (this.insertionMode) { + case InsertionMode.INITIAL: + tokenInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HTML: + tokenBeforeHtml(this, token); + break; + case InsertionMode.BEFORE_HEAD: + tokenBeforeHead(this, token); + break; + case InsertionMode.IN_HEAD: + tokenInHead(this, token); + break; + case InsertionMode.IN_HEAD_NO_SCRIPT: + tokenInHeadNoScript(this, token); + break; + case InsertionMode.AFTER_HEAD: + tokenAfterHead(this, token); + break; + case InsertionMode.IN_BODY: + case InsertionMode.IN_CAPTION: + case InsertionMode.IN_CELL: + case InsertionMode.IN_TEMPLATE: + characterInBody(this, token); + break; + case InsertionMode.TEXT: + case InsertionMode.IN_SELECT: + case InsertionMode.IN_SELECT_IN_TABLE: + this._insertCharacters(token); + break; + case InsertionMode.IN_TABLE: + case InsertionMode.IN_TABLE_BODY: + case InsertionMode.IN_ROW: + characterInTable(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + characterInTableText(this, token); + break; + case InsertionMode.IN_COLUMN_GROUP: + tokenInColumnGroup(this, token); + break; + case InsertionMode.AFTER_BODY: + tokenAfterBody(this, token); + break; + case InsertionMode.AFTER_AFTER_BODY: + tokenAfterAfterBody(this, token); + break; + default: + // Do nothing + } + } + onNullCharacter(token: CharacterToken): void { + this.skipNextNewLine = false; + + if (this.tokenizer.inForeignNode) { + nullCharacterInForeignContent(this, token); + return; + } + + switch (this.insertionMode) { + case InsertionMode.INITIAL: + tokenInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HTML: + tokenBeforeHtml(this, token); + break; + case InsertionMode.BEFORE_HEAD: + tokenBeforeHead(this, token); + break; + case InsertionMode.IN_HEAD: + tokenInHead(this, token); + break; + case InsertionMode.IN_HEAD_NO_SCRIPT: + tokenInHeadNoScript(this, token); + break; + case InsertionMode.AFTER_HEAD: + tokenAfterHead(this, token); + break; + case InsertionMode.TEXT: + this._insertCharacters(token); + break; + case InsertionMode.IN_TABLE: + case InsertionMode.IN_TABLE_BODY: + case InsertionMode.IN_ROW: + characterInTable(this, token); + break; + case InsertionMode.IN_COLUMN_GROUP: + tokenInColumnGroup(this, token); + break; + case InsertionMode.AFTER_BODY: + tokenAfterBody(this, token); + break; + case InsertionMode.AFTER_AFTER_BODY: + tokenAfterAfterBody(this, token); + break; + default: + // Do nothing + } + } + onComment(token: CommentToken): void { + this.skipNextNewLine = false; + + if (this.currentNotInHTML) { + appendComment(this, token); + return; + } + + switch (this.insertionMode) { + case InsertionMode.INITIAL: + case InsertionMode.BEFORE_HTML: + case InsertionMode.BEFORE_HEAD: + case InsertionMode.IN_HEAD: + case InsertionMode.IN_HEAD_NO_SCRIPT: + case InsertionMode.AFTER_HEAD: + case InsertionMode.IN_BODY: + case InsertionMode.IN_TABLE: + case InsertionMode.IN_CAPTION: + case InsertionMode.IN_COLUMN_GROUP: + case InsertionMode.IN_TABLE_BODY: + case InsertionMode.IN_ROW: + case InsertionMode.IN_CELL: + case InsertionMode.IN_SELECT: + case InsertionMode.IN_SELECT_IN_TABLE: + case InsertionMode.IN_TEMPLATE: + case InsertionMode.IN_FRAMESET: + case InsertionMode.AFTER_FRAMESET: + appendComment(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + tokenInTableText(this, token); + break; + case InsertionMode.AFTER_BODY: + appendCommentToRootHtmlElement(this, token); + break; + case InsertionMode.AFTER_AFTER_BODY: + case InsertionMode.AFTER_AFTER_FRAMESET: + appendCommentToDocument(this, token); + break; + default: + // Do nothing + } + } + onDoctype(token: DoctypeToken): void { + this.skipNextNewLine = false; + switch (this.insertionMode) { + case InsertionMode.INITIAL: + doctypeInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HEAD: + case InsertionMode.IN_HEAD: + case InsertionMode.IN_HEAD_NO_SCRIPT: + case InsertionMode.AFTER_HEAD: + this._err(token, ERR.misplacedDoctype); + break; + case InsertionMode.IN_TABLE_TEXT: + tokenInTableText(this, token); + break; + default: + // Do nothing + } + } + onStartTag(token: TagToken): void { + this.skipNextNewLine = false; + this.currentToken = token; + + if (this.shouldProcessStartTagTokenInForeignContent(token)) { + startTagInForeignContent(this, token); + } else { + this._startTagOutsideForeignContent(token); + } + } + _startTagOutsideForeignContent(token: TagToken): void { + switch (this.insertionMode) { + case InsertionMode.INITIAL: + tokenInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HTML: + startTagBeforeHtml(this, token); + break; + case InsertionMode.BEFORE_HEAD: + startTagBeforeHead(this, token); + break; + case InsertionMode.IN_HEAD: + startTagInHead(this, token); + break; + case InsertionMode.IN_HEAD_NO_SCRIPT: + startTagInHeadNoScript(this, token); + break; + case InsertionMode.AFTER_HEAD: + startTagAfterHead(this, token); + break; + case InsertionMode.IN_BODY: + startTagInBody(this, token); + break; + case InsertionMode.IN_TABLE: + startTagInTable(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + tokenInTableText(this, token); + break; + case InsertionMode.IN_CAPTION: + startTagInCaption(this, token); + break; + case InsertionMode.IN_COLUMN_GROUP: + startTagInColumnGroup(this, token); + break; + case InsertionMode.IN_TABLE_BODY: + startTagInTableBody(this, token); + break; + case InsertionMode.IN_ROW: + startTagInRow(this, token); + break; + case InsertionMode.IN_CELL: + startTagInCell(this, token); + break; + case InsertionMode.IN_SELECT: + startTagInSelect(this, token); + break; + case InsertionMode.IN_SELECT_IN_TABLE: + startTagInSelectInTable(this, token); + break; + case InsertionMode.IN_TEMPLATE: + startTagInTemplate(this, token); + break; + case InsertionMode.AFTER_BODY: + startTagAfterBody(this, token); + break; + case InsertionMode.IN_FRAMESET: + startTagInFrameset(this, token); + break; + case InsertionMode.AFTER_FRAMESET: + startTagAfterFrameset(this, token); + break; + case InsertionMode.AFTER_AFTER_BODY: + startTagAfterAfterBody(this, token); + break; + case InsertionMode.AFTER_AFTER_FRAMESET: + startTagAfterAfterFrameset(this, token); + break; + default: + // Do nothing + } + } + onEndTag(token: TagToken): void { + this.skipNextNewLine = false; + this.currentToken = token; + + if (this.currentNotInHTML) { + endTagInForeignContent(this, token); + } else { + this._endTagOutsideForeignContent(token); + } + } + _endTagOutsideForeignContent(token: TagToken): void { + switch (this.insertionMode) { + case InsertionMode.INITIAL: + tokenInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HTML: + endTagBeforeHtml(this, token); + break; + case InsertionMode.BEFORE_HEAD: + endTagBeforeHead(this, token); + break; + case InsertionMode.IN_HEAD: + endTagInHead(this, token); + break; + case InsertionMode.IN_HEAD_NO_SCRIPT: + endTagInHeadNoScript(this, token); + break; + case InsertionMode.AFTER_HEAD: + endTagAfterHead(this, token); + break; + case InsertionMode.IN_BODY: + endTagInBody(this, token); + break; + case InsertionMode.TEXT: + endTagInText(this, token); + break; + case InsertionMode.IN_TABLE: + endTagInTable(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + tokenInTableText(this, token); + break; + case InsertionMode.IN_CAPTION: + endTagInCaption(this, token); + break; + case InsertionMode.IN_COLUMN_GROUP: + endTagInColumnGroup(this, token); + break; + case InsertionMode.IN_TABLE_BODY: + endTagInTableBody(this, token); + break; + case InsertionMode.IN_ROW: + endTagInRow(this, token); + break; + case InsertionMode.IN_CELL: + endTagInCell(this, token); + break; + case InsertionMode.IN_SELECT: + endTagInSelect(this, token); + break; + case InsertionMode.IN_SELECT_IN_TABLE: + endTagInSelectInTable(this, token); + break; + case InsertionMode.IN_TEMPLATE: + endTagInTemplate(this, token); + break; + case InsertionMode.AFTER_BODY: + endTagAfterBody(this, token); + break; + case InsertionMode.IN_FRAMESET: + endTagInFrameset(this, token); + break; + case InsertionMode.AFTER_FRAMESET: + endTagAfterFrameset(this, token); + break; + case InsertionMode.AFTER_AFTER_BODY: + tokenAfterAfterBody(this, token); + break; + default: + // Do nothing + } + } + onEof(token: EOFToken): void { + switch (this.insertionMode) { + case InsertionMode.INITIAL: + tokenInInitialMode(this, token); + break; + case InsertionMode.BEFORE_HTML: + tokenBeforeHtml(this, token); + break; + case InsertionMode.BEFORE_HEAD: + tokenBeforeHead(this, token); + break; + case InsertionMode.IN_HEAD: + tokenInHead(this, token); + break; + case InsertionMode.IN_HEAD_NO_SCRIPT: + tokenInHeadNoScript(this, token); + break; + case InsertionMode.AFTER_HEAD: + tokenAfterHead(this, token); + break; + case InsertionMode.IN_BODY: + case InsertionMode.IN_TABLE: + case InsertionMode.IN_CAPTION: + case InsertionMode.IN_COLUMN_GROUP: + case InsertionMode.IN_TABLE_BODY: + case InsertionMode.IN_ROW: + case InsertionMode.IN_CELL: + case InsertionMode.IN_SELECT: + case InsertionMode.IN_SELECT_IN_TABLE: + eofInBody(this, token); + break; + case InsertionMode.TEXT: + eofInText(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + tokenInTableText(this, token); + break; + case InsertionMode.IN_TEMPLATE: + eofInTemplate(this, token); + break; + case InsertionMode.AFTER_BODY: + case InsertionMode.IN_FRAMESET: + case InsertionMode.AFTER_FRAMESET: + case InsertionMode.AFTER_AFTER_BODY: + case InsertionMode.AFTER_AFTER_FRAMESET: + stopParsing(this, token); + break; + default: + // Do nothing + } + } + onWhitespaceCharacter(token: CharacterToken): void { + if (this.skipNextNewLine) { + this.skipNextNewLine = false; + + if (token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) { + if (token.chars.length === 1) { + return; + } + + token.chars = token.chars.substr(1); + } + } + + if (this.tokenizer.inForeignNode) { + this._insertCharacters(token); + return; + } + + switch (this.insertionMode) { + case InsertionMode.IN_HEAD: + case InsertionMode.IN_HEAD_NO_SCRIPT: + case InsertionMode.AFTER_HEAD: + case InsertionMode.TEXT: + case InsertionMode.IN_COLUMN_GROUP: + case InsertionMode.IN_SELECT: + case InsertionMode.IN_SELECT_IN_TABLE: + case InsertionMode.IN_FRAMESET: + case InsertionMode.AFTER_FRAMESET: + this._insertCharacters(token); + break; + case InsertionMode.IN_BODY: + case InsertionMode.IN_CAPTION: + case InsertionMode.IN_CELL: + case InsertionMode.IN_TEMPLATE: + case InsertionMode.AFTER_BODY: + case InsertionMode.AFTER_AFTER_BODY: + case InsertionMode.AFTER_AFTER_FRAMESET: + whitespaceCharacterInBody(this, token); + break; + case InsertionMode.IN_TABLE: + case InsertionMode.IN_TABLE_BODY: + case InsertionMode.IN_ROW: + characterInTable(this, token); + break; + case InsertionMode.IN_TABLE_TEXT: + whitespaceCharacterInTableText(this, token); + break; + default: + // Do nothing + } + } } //Adoption agency algorithm @@ -1122,26 +1424,6 @@ function stopParsing(p: Parser, token: EOFToken // The "initial" insertion mode //------------------------------------------------------------------ -function modeInitial(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.DOCTYPE: { - doctypeInInitialMode(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - // Ignore token - break; - } - default: { - tokenInInitialMode(p, token); - } - } -} - function doctypeInInitialMode(p: Parser, token: DoctypeToken): void { p._setDocumentType(token); @@ -1160,36 +1442,11 @@ function tokenInInitialMode(p: Parser, token: T p._err(token, ERR.missingDoctype, true); p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS); p.insertionMode = InsertionMode.BEFORE_HTML; - modeBeforeHtml(p, token); + p._processToken(token); } // The "before html" insertion mode //------------------------------------------------------------------ -function modeBeforeHtml(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.EOF: { - tokenBeforeHtml(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagBeforeHtml(p, token); - break; - } - case TokenType.END_TAG: { - endTagBeforeHtml(p, token); - break; - } - default: - // Do nothing - } -} - function startTagBeforeHtml(p: Parser, token: TagToken): void { if (token.tagID === $.HTML) { p._insertElement(token, NS.HTML); @@ -1210,40 +1467,11 @@ function endTagBeforeHtml(p: Parser, token: Tag function tokenBeforeHtml(p: Parser, token: Token): void { p._insertFakeRootElement(); p.insertionMode = InsertionMode.BEFORE_HEAD; - modeBeforeHead(p, token); + p._processToken(token); } // The "before head" insertion mode //------------------------------------------------------------------ -function modeBeforeHead(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.EOF: { - tokenBeforeHead(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.DOCTYPE: { - p._err(token, ERR.misplacedDoctype); - break; - } - case TokenType.START_TAG: { - startTagBeforeHead(p, token); - break; - } - case TokenType.END_TAG: { - endTagBeforeHead(p, token); - break; - } - default: - // Do nothing - } -} - function startTagBeforeHead(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -1276,44 +1504,11 @@ function tokenBeforeHead(p: Parser, token: Toke p._insertFakeElement(TN.HEAD, $.HEAD); p.headElement = p.openElements.current; p.insertionMode = InsertionMode.IN_HEAD; - modeInHead(p, token); + p._processToken(token); } // The "in head" insertion mode //------------------------------------------------------------------ -function modeInHead(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.EOF: { - tokenInHead(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.DOCTYPE: { - p._err(token, ERR.misplacedDoctype); - break; - } - case TokenType.START_TAG: { - startTagInHead(p, token); - break; - } - case TokenType.END_TAG: { - endTagInHead(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInHead(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -1408,44 +1603,11 @@ function endTagInHead(p: Parser, token: TagToke function tokenInHead(p: Parser, token: Token): void { p.openElements.pop(); p.insertionMode = InsertionMode.AFTER_HEAD; - modeAfterHead(p, token); + p._processToken(token); } // The "in head no script" insertion mode //------------------------------------------------------------------ -function modeInHeadNoScript(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.EOF: { - tokenInHeadNoScript(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.DOCTYPE: { - p._err(token, ERR.misplacedDoctype); - break; - } - case TokenType.START_TAG: { - startTagInHeadNoScript(p, token); - break; - } - case TokenType.END_TAG: { - endTagInHeadNoScript(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInHeadNoScript(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -1495,44 +1657,11 @@ function tokenInHeadNoScript(p: Parser, token: p._err(token, errCode); p.openElements.pop(); p.insertionMode = InsertionMode.IN_HEAD; - modeInHead(p, token); + p._processToken(token); } // The "after head" insertion mode //------------------------------------------------------------------ -function modeAfterHead(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.EOF: { - tokenAfterHead(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.DOCTYPE: { - p._err(token, ERR.misplacedDoctype); - break; - } - case TokenType.START_TAG: { - startTagAfterHead(p, token); - break; - } - case TokenType.END_TAG: { - endTagAfterHead(p, token); - break; - } - default: - // Do nothing - } -} - function startTagAfterHead(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -2208,7 +2337,7 @@ function bodyEndTagInBody(p: Parser, token: Tag function htmlEndTagInBody(p: Parser, token: TagToken): void { if (p.openElements.hasInScope($.BODY)) { p.insertionMode = InsertionMode.AFTER_BODY; - modeAfterBody(p, token); + endTagAfterBody(p, token); } } @@ -2420,27 +2549,6 @@ function eofInBody(p: Parser, token: EOFToken): // The "text" insertion mode //------------------------------------------------------------------ -function modeText(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.END_TAG: { - endTagInText(p, token); - break; - } - case TokenType.EOF: { - eofInText(p, token); - break; - } - default: - // Do nothing - } -} - function endTagInText(p: Parser, token: TagToken): void { if (token.tagID === $.SCRIPT) { p.pendingScript = p.openElements.current; @@ -2454,47 +2562,29 @@ function eofInText(p: Parser, token: EOFToken): p._err(token, ERR.eofInElementThatCanContainOnlyText); p.openElements.pop(); p.insertionMode = p.originalInsertionMode; - p._processToken(token); + p.onEof(token); } // The "in table" insertion mode //------------------------------------------------------------------ -function modeInTable(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - characterInTable(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInTable(p, token); - break; - } - case TokenType.END_TAG: { - endTagInTable(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function characterInTable(p: Parser, token: CharacterToken): void { if (TABLE_STRUCTURE_TAGS.has(p.openElements.currentTagId)) { - p.pendingCharacterTokens = []; + p.pendingCharacterTokens.length = 0; p.hasNonWhitespacePendingCharacterToken = false; p.originalInsertionMode = p.insertionMode; p.insertionMode = InsertionMode.IN_TABLE_TEXT; - modeInTableText(p, token); + + switch (token.type) { + case TokenType.CHARACTER: { + characterInTableText(p, token); + break; + } + case TokenType.WHITESPACE_CHARACTER: { + whitespaceCharacterInTableText(p, token); + break; + } + // Ignore null + } } else { tokenInTable(p, token); } @@ -2517,7 +2607,7 @@ function colStartTagInTable(p: Parser, token: T p.openElements.clearBackToTableContext(); p._insertFakeElement(TN.COLGROUP, $.COLGROUP); p.insertionMode = InsertionMode.IN_COLUMN_GROUP; - modeInColumnGroup(p, token); + startTagInColumnGroup(p, token); } function tbodyStartTagInTable(p: Parser, token: TagToken): void { @@ -2530,14 +2620,14 @@ function tdStartTagInTable(p: Parser, token: Ta p.openElements.clearBackToTableContext(); p._insertFakeElement(TN.TBODY, $.TBODY); p.insertionMode = InsertionMode.IN_TABLE_BODY; - modeInTableBody(p, token); + startTagInTableBody(p, token); } function tableStartTagInTable(p: Parser, token: TagToken): void { if (p.openElements.hasInTableScope($.TABLE)) { p.openElements.popUntilTagNamePopped($.TABLE); p._resetInsertionMode(); - p._processToken(token); + p.onStartTag(token); } } @@ -2653,26 +2743,6 @@ function tokenInTable(p: Parser, token: Token): // The "in table text" insertion mode //------------------------------------------------------------------ -function modeInTableText(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: { - characterInTableText(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInTableText(p, token); - break; - } - case TokenType.NULL_CHARACTER: { - // Ignore token - break; - } - default: { - tokenInTableText(p, token); - } - } -} - function whitespaceCharacterInTableText(p: Parser, token: CharacterToken): void { p.pendingCharacterTokens.push(token); } @@ -2701,37 +2771,6 @@ function tokenInTableText(p: Parser, token: Tok // The "in caption" insertion mode //------------------------------------------------------------------ -function modeInCaption(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: { - characterInBody(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInCaption(p, token); - break; - } - case TokenType.END_TAG: { - endTagInCaption(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - const TABLE_VOID_ELEMENTS = new Set([$.CAPTION, $.COL, $.COLGROUP, $.TBODY, $.TD, $.TFOOT, $.TH, $.THEAD, $.TR]); function startTagInCaption(p: Parser, token: TagToken): void { @@ -2743,7 +2782,7 @@ function startTagInCaption(p: Parser, token: Ta p.openElements.popUntilTagNamePopped($.CAPTION); p.activeFormattingElements.clearToLastMarker(); p.insertionMode = InsertionMode.IN_TABLE; - modeInTable(p, token); + startTagInTable(p, token); } } else { startTagInBody(p, token); @@ -2763,7 +2802,7 @@ function endTagInCaption(p: Parser, token: TagT p.insertionMode = InsertionMode.IN_TABLE; if (tn === $.TABLE) { - modeInTable(p, token); + endTagInTable(p, token); } } break; @@ -2789,38 +2828,6 @@ function endTagInCaption(p: Parser, token: TagT // The "in column group" insertion mode //------------------------------------------------------------------ -function modeInColumnGroup(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: { - tokenInColumnGroup(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInColumnGroup(p, token); - break; - } - case TokenType.END_TAG: { - endTagInColumnGroup(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInColumnGroup(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -2865,45 +2872,16 @@ function endTagInColumnGroup(p: Parser, token: } } -function tokenInColumnGroup(p: Parser, token: Token): void { - if (p.openElements.currentTagId === $.COLGROUP) { - p.openElements.pop(); - p.insertionMode = InsertionMode.IN_TABLE; - modeInTable(p, token); - } -} - -// The "in table body" insertion mode -//------------------------------------------------------------------ -function modeInTableBody(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - characterInTable(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInTableBody(p, token); - break; - } - case TokenType.END_TAG: { - endTagInTableBody(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - +function tokenInColumnGroup(p: Parser, token: Token): void { + if (p.openElements.currentTagId === $.COLGROUP) { + p.openElements.pop(); + p.insertionMode = InsertionMode.IN_TABLE; + p._processToken(token); + } +} + +// The "in table body" insertion mode +//------------------------------------------------------------------ function startTagInTableBody(p: Parser, token: TagToken): void { switch (token.tagID) { case $.TR: { @@ -2917,7 +2895,7 @@ function startTagInTableBody(p: Parser, token: p.openElements.clearBackToTableBodyContext(); p._insertFakeElement(TN.TR, $.TR); p.insertionMode = InsertionMode.IN_ROW; - modeInRow(p, token); + startTagInRow(p, token); break; } case $.CAPTION: @@ -2930,7 +2908,7 @@ function startTagInTableBody(p: Parser, token: p.openElements.clearBackToTableBodyContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; - modeInTable(p, token); + startTagInTable(p, token); } break; } @@ -2959,7 +2937,7 @@ function endTagInTableBody(p: Parser, token: Ta p.openElements.clearBackToTableBodyContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE; - modeInTable(p, token); + endTagInTable(p, token); } break; } @@ -2982,35 +2960,6 @@ function endTagInTableBody(p: Parser, token: Ta // The "in row" insertion mode //------------------------------------------------------------------ -function modeInRow(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - characterInTable(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInRow(p, token); - break; - } - case TokenType.END_TAG: { - endTagInRow(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInRow(p: Parser, token: TagToken): void { switch (token.tagID) { case $.TH: @@ -3032,7 +2981,7 @@ function startTagInRow(p: Parser, token: TagTok p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; - modeInTableBody(p, token); + startTagInTableBody(p, token); } break; } @@ -3057,7 +3006,7 @@ function endTagInRow(p: Parser, token: TagToken p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; - modeInTableBody(p, token); + endTagInTableBody(p, token); } break; } @@ -3068,7 +3017,7 @@ function endTagInRow(p: Parser, token: TagToken p.openElements.clearBackToTableRowContext(); p.openElements.pop(); p.insertionMode = InsertionMode.IN_TABLE_BODY; - modeInTableBody(p, token); + endTagInTableBody(p, token); } break; } @@ -3089,44 +3038,13 @@ function endTagInRow(p: Parser, token: TagToken // The "in cell" insertion mode //------------------------------------------------------------------ -function modeInCell(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: { - characterInBody(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInCell(p, token); - break; - } - case TokenType.END_TAG: { - endTagInCell(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInCell(p: Parser, token: TagToken): void { const tn = token.tagID; if (TABLE_VOID_ELEMENTS.has(tn)) { if (p.openElements.hasInTableScope($.TD) || p.openElements.hasInTableScope($.TH)) { p._closeTableCell(); - p._processToken(token); + startTagInRow(p, token); } } else { startTagInBody(p, token); @@ -3154,7 +3072,7 @@ function endTagInCell(p: Parser, token: TagToke case $.TR: { if (p.openElements.hasInTableScope(tn)) { p._closeTableCell(); - p._processToken(token); + endTagInRow(p, token); } break; } @@ -3174,34 +3092,6 @@ function endTagInCell(p: Parser, token: TagToke // The "in select" insertion mode //------------------------------------------------------------------ -function modeInSelect(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInSelect(p, token); - break; - } - case TokenType.END_TAG: { - endTagInSelect(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInSelect(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -3237,7 +3127,7 @@ function startTagInSelect(p: Parser, token: Tag p._resetInsertionMode(); if (token.tagID !== $.SELECT) { - p._processToken(token); + p.onStartTag(token); } } break; @@ -3292,34 +3182,6 @@ function endTagInSelect(p: Parser, token: TagTo // The "in select in table" insertion mode //------------------------------------------------------------------ -function modeInSelectInTable(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInSelectInTable(p, token); - break; - } - case TokenType.END_TAG: { - endTagInSelectInTable(p, token); - break; - } - case TokenType.EOF: { - eofInBody(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInSelectInTable(p: Parser, token: TagToken): void { const tn = token.tagID; @@ -3335,7 +3197,7 @@ function startTagInSelectInTable(p: Parser, tok ) { p.openElements.popUntilTagNamePopped($.SELECT); p._resetInsertionMode(); - p._processToken(token); + p.onStartTag(token); } else { startTagInSelect(p, token); } @@ -3357,7 +3219,7 @@ function endTagInSelectInTable(p: Parser, token if (p.openElements.hasInTableScope(tn)) { p.openElements.popUntilTagNamePopped($.SELECT); p._resetInsertionMode(); - p._processToken(token); + p.onEndTag(token); } } else { endTagInSelect(p, token); @@ -3366,37 +3228,6 @@ function endTagInSelectInTable(p: Parser, token // The "in template" insertion mode //------------------------------------------------------------------ -function modeInTemplate(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: { - characterInBody(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInTemplate(p, token); - break; - } - case TokenType.END_TAG: { - endTagInTemplate(p, token); - break; - } - case TokenType.EOF: { - eofInTemplate(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInTemplate(p: Parser, token: TagToken): void { switch (token.tagID) { // First, handle tags that can start without a mode change @@ -3421,28 +3252,28 @@ function startTagInTemplate(p: Parser, token: T case $.THEAD: p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE; p.insertionMode = InsertionMode.IN_TABLE; - modeInTable(p, token); + startTagInTable(p, token); break; case $.COL: p.tmplInsertionModeStack[0] = InsertionMode.IN_COLUMN_GROUP; p.insertionMode = InsertionMode.IN_COLUMN_GROUP; - modeInColumnGroup(p, token); + startTagInColumnGroup(p, token); break; case $.TR: p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE_BODY; p.insertionMode = InsertionMode.IN_TABLE_BODY; - modeInTableBody(p, token); + startTagInTableBody(p, token); break; case $.TD: case $.TH: p.tmplInsertionModeStack[0] = InsertionMode.IN_ROW; p.insertionMode = InsertionMode.IN_ROW; - modeInRow(p, token); + startTagInRow(p, token); break; default: p.tmplInsertionModeStack[0] = InsertionMode.IN_BODY; p.insertionMode = InsertionMode.IN_BODY; - modeInBody(p, token); + startTagInBody(p, token); } } @@ -3458,7 +3289,7 @@ function eofInTemplate(p: Parser, token: EOFTok p.activeFormattingElements.clearToLastMarker(); p.tmplInsertionModeStack.shift(); p._resetInsertionMode(); - p._processToken(token); + p.onEof(token); } else { stopParsing(p, token); } @@ -3466,38 +3297,6 @@ function eofInTemplate(p: Parser, token: EOFTok // The "after body" insertion mode //------------------------------------------------------------------ -function modeAfterBody(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: { - tokenAfterBody(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendCommentToRootHtmlElement(p, token); - break; - } - case TokenType.START_TAG: { - startTagAfterBody(p, token); - break; - } - case TokenType.END_TAG: { - endTagAfterBody(p, token); - break; - } - case TokenType.EOF: { - stopParsing(p, token); - break; - } - default: - // Do nothing - } -} - function startTagAfterBody(p: Parser, token: TagToken): void { if (token.tagID === $.HTML) { startTagInBody(p, token); @@ -3529,33 +3328,6 @@ function tokenAfterBody(p: Parser, token: Token // The "in frameset" insertion mode //------------------------------------------------------------------ -function modeInFrameset(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagInFrameset(p, token); - break; - } - case TokenType.END_TAG: { - endTagInFrameset(p, token); - break; - } - case TokenType.EOF: { - stopParsing(p, token); - break; - } - default: - // Do nothing - } -} - function startTagInFrameset(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -3592,33 +3364,6 @@ function endTagInFrameset(p: Parser, token: Tag // The "after frameset" insertion mode //------------------------------------------------------------------ -function modeAfterFrameset(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.WHITESPACE_CHARACTER: { - p._insertCharacters(token); - break; - } - case TokenType.COMMENT: { - appendComment(p, token); - break; - } - case TokenType.START_TAG: { - startTagAfterFrameset(p, token); - break; - } - case TokenType.END_TAG: { - endTagAfterFrameset(p, token); - break; - } - case TokenType.EOF: { - stopParsing(p, token); - break; - } - default: - // Do nothing - } -} - function startTagAfterFrameset(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -3642,35 +3387,6 @@ function endTagAfterFrameset(p: Parser, token: // The "after after body" insertion mode //------------------------------------------------------------------ -function modeAfterAfterBody(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.CHARACTER: - case TokenType.NULL_CHARACTER: - case TokenType.END_TAG: { - tokenAfterAfterBody(p, token); - break; - } - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendCommentToDocument(p, token); - break; - } - case TokenType.START_TAG: { - startTagAfterAfterBody(p, token); - break; - } - case TokenType.EOF: { - stopParsing(p, token); - break; - } - default: - // Do nothing - } -} - function startTagAfterAfterBody(p: Parser, token: TagToken): void { if (token.tagID === $.HTML) { startTagInBody(p, token); @@ -3686,29 +3402,6 @@ function tokenAfterAfterBody(p: Parser, token: // The "after after frameset" insertion mode //------------------------------------------------------------------ -function modeAfterAfterFrameset(p: Parser, token: Token): void { - switch (token.type) { - case TokenType.WHITESPACE_CHARACTER: { - whitespaceCharacterInBody(p, token); - break; - } - case TokenType.COMMENT: { - appendCommentToDocument(p, token); - break; - } - case TokenType.START_TAG: { - startTagAfterAfterFrameset(p, token); - break; - } - case TokenType.EOF: { - stopParsing(p, token); - break; - } - default: - // Do nothing - } -} - function startTagAfterAfterFrameset(p: Parser, token: TagToken): void { switch (token.tagID) { case $.HTML: { @@ -3745,7 +3438,7 @@ function startTagInForeignContent(p: Parser, to p.openElements.pop(); } - p._processToken(token); + p._startTagOutsideForeignContent(token); } else { const current = p._getAdjustedCurrentElement(); const currentNs = p.treeAdapter.getNamespaceURI(current); @@ -3774,7 +3467,7 @@ function endTagInForeignContent(p: Parser, toke const element = p.openElements.items[i]; if (p.treeAdapter.getNamespaceURI(element) === NS.HTML) { - p._processToken(token); + p._endTagOutsideForeignContent(token); break; } diff --git a/packages/parse5/lib/tokenizer/index.ts b/packages/parse5/lib/tokenizer/index.ts index eead9ec90..8b5ed5fdf 100644 --- a/packages/parse5/lib/tokenizer/index.ts +++ b/packages/parse5/lib/tokenizer/index.ts @@ -209,7 +209,13 @@ export class Tokenizer { private tokenQueue: Token[] = []; - public allowCDATA = false; + /** + * Indicates that the current adjusted node exists, is not an element in the HTML namespace, + * and that it is not an integration point for either MathML or HTML. + * + * @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction} + */ + public inForeignNode = false; public lastStartTagName = ''; public active = false; @@ -1961,7 +1967,7 @@ export class Tokenizer { this.currentLocation = this.getCurrentLocation($$.DOCTYPE.length + 1); this.state = State.DOCTYPE; } else if (this._consumeSequenceIfMatch($$.CDATA_START, true)) { - if (this.allowCDATA) { + if (this.inForeignNode) { this.state = State.CDATA_SECTION; } else { this._err(ERR.cdataInHtmlContent); diff --git a/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts b/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts index e45a464ce..2df4e46bf 100644 --- a/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts +++ b/packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts @@ -92,7 +92,7 @@ it('Location Info (Tokenizer)', () => { { initialMode: TokenizerMode.DATA, lastStartTagName: '', - allowCDATA: true, + inForeignNode: true, htmlChunks: ['', '', '', ' \n'], }, ]; @@ -111,7 +111,7 @@ it('Location Info (Tokenizer)', () => { tokenizer.preprocessor.bufferWaterline = 8; tokenizer.state = testCase.initialMode; tokenizer.lastStartTagName = testCase.lastStartTagName; - tokenizer.allowCDATA = !!testCase.allowCDATA; + tokenizer.inForeignNode = !!testCase.inForeignNode; for (let token = tokenizer.getNextToken(), j = 0; token.type !== TokenType.EOF; ) { if (token.type === TokenType.HIBERNATION) {