Skip to content

Commit

Permalink
refactor(parser): Consume tokenizer events (#419)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Mar 2, 2022
1 parent 9d76f78 commit 1615a5e
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 196 deletions.
6 changes: 1 addition & 5 deletions packages/parse5/lib/common/token.ts
Expand Up @@ -96,13 +96,9 @@ export interface EOFToken extends TokenBase {
readonly type: TokenType.EOF;
}

interface HibernationToken extends TokenBase {
readonly type: TokenType.HIBERNATION;
}

export interface CharacterToken extends TokenBase {
type: TokenType.CHARACTER | TokenType.NULL_CHARACTER | TokenType.WHITESPACE_CHARACTER;
chars: string;
}

export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | HibernationToken | CharacterToken;
export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | CharacterToken;
45 changes: 28 additions & 17 deletions packages/parse5/lib/parser/index.ts
@@ -1,5 +1,4 @@
import { TokenizerMode } from '../tokenizer/index.js';
import { QueuedTokenizer } from '../tokenizer/queued.js';
import { TokenHandler, Tokenizer, TokenizerMode } from '../tokenizer/index.js';
import { OpenElementStack } from './open-element-stack.js';
import { FormattingElementList, ElementEntry, EntryType } from './formatting-element-list.js';
import * as defaultTreeAdapter from '../tree-adapters/default.js';
Expand Down Expand Up @@ -120,9 +119,9 @@ const defaultParserOptions = {
};

//Parser
export class Parser<T extends TreeAdapterTypeMap> {
export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler {
treeAdapter: TreeAdapter<T>;
private onParseError: ParserErrorHandler | null;
onParseError: ParserErrorHandler | null;
private currentToken: Token | null = null;
public options: Required<ParserOptions<T>>;
public document: T['document'];
Expand All @@ -147,7 +146,7 @@ export class Parser<T extends TreeAdapterTypeMap> {

this.document = document ?? this.treeAdapter.createDocument();

this.tokenizer = new QueuedTokenizer(this.options);
this.tokenizer = new Tokenizer(this.options, this);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
Expand Down Expand Up @@ -211,7 +210,8 @@ export class Parser<T extends TreeAdapterTypeMap> {
return fragment;
}

tokenizer: QueuedTokenizer;
tokenizer: Tokenizer;

stopped = false;
insertionMode = InsertionMode.INITIAL;
originalInsertionMode = InsertionMode.INITIAL;
Expand Down Expand Up @@ -261,13 +261,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
//Parsing loop
private _runParsingLoop(scriptHandler: null | ((scriptElement: T['element']) => void)): void {
while (!this.stopped) {
const token = this.tokenizer.getNextToken();

this._processToken(token);

if (token.type === TokenType.START_TAG && token.selfClosing && !token.ackSelfClosing) {
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
this.tokenizer.getNextToken();

if (!this.tokenizer.active || (scriptHandler !== null && this.pendingScript)) {
break;
Expand Down Expand Up @@ -601,7 +595,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
break;
}
case TokenType.START_TAG: {
this.onStartTag(token);
this._processStartTag(token);
break;
}
case TokenType.END_TAG: {
Expand Down Expand Up @@ -958,6 +952,23 @@ export class Parser<T extends TreeAdapterTypeMap> {
this.skipNextNewLine = false;
this.currentToken = token;

this._processStartTag(token);

if (token.selfClosing && !token.ackSelfClosing) {
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
}
/**
* Processes a given start tag.
*
* `onStartTag` checks if a self-closing tag was recognized. When a token
* is moved inbetween multiple insertion modes, this check for self-closing
* could lead to false positives. To avoid this, `_processStartTag` is used
* for nested calls.
*
* @param token The token to process.
*/
_processStartTag(token: TagToken): void {
if (this.shouldProcessStartTagTokenInForeignContent(token)) {
startTagInForeignContent(this, token);
} else {
Expand Down Expand Up @@ -2627,7 +2638,7 @@ function tableStartTagInTable<T extends TreeAdapterTypeMap>(p: Parser<T>, token:
if (p.openElements.hasInTableScope($.TABLE)) {
p.openElements.popUntilTagNamePopped($.TABLE);
p._resetInsertionMode();
p.onStartTag(token);
p._processStartTag(token);
}
}

Expand Down Expand Up @@ -3127,7 +3138,7 @@ function startTagInSelect<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Tag
p._resetInsertionMode();

if (token.tagID !== $.SELECT) {
p.onStartTag(token);
p._processStartTag(token);
}
}
break;
Expand Down Expand Up @@ -3197,7 +3208,7 @@ function startTagInSelectInTable<T extends TreeAdapterTypeMap>(p: Parser<T>, tok
) {
p.openElements.popUntilTagNamePopped($.SELECT);
p._resetInsertionMode();
p.onStartTag(token);
p._processStartTag(token);
} else {
startTagInSelect(p, token);
}
Expand Down
105 changes: 0 additions & 105 deletions packages/parse5/lib/tokenizer/queued.ts

This file was deleted.

35 changes: 25 additions & 10 deletions packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts
@@ -1,7 +1,6 @@
import * as assert from 'node:assert';
import { Tokenizer, TokenizerMode } from './index.js';
import { SinglePathHandler } from './queued.js';
import { Location, EOFToken, Token } from '../common/token.js';
import { Tokenizer, TokenizerMode, TokenHandler } from './index.js';
import { Location, EOFToken, CharacterToken, DoctypeToken, TagToken, CommentToken } from '../common/token.js';
import { getSubstringByLineCol, normalizeNewLine } from 'parse5-test-utils/utils/common.js';

interface LocationInfoTestCase {
Expand All @@ -11,22 +10,17 @@ interface LocationInfoTestCase {
}

/** Receives events and immediately compares them against the expected values. */
class LocationInfoHandler extends SinglePathHandler {
class LocationInfoHandler implements TokenHandler {
public sawEof = false;
/** The index of the last html chunk. */
private idx = 0;
/** All of the lines in the input. */
private lines: string[];

constructor(private testCase: LocationInfoTestCase, private html: string) {
super();
this.lines = html.split(/\r?\n/g);
}

protected handleToken(token: Token): void {
this.validateLocation(token.location);
}

private validateLocation(location: Location | null): void {
assert.ok(location);

Expand All @@ -45,7 +39,28 @@ class LocationInfoHandler extends SinglePathHandler {
this.idx += 1;
}

override onEof({ location }: EOFToken): void {
onComment({ location }: CommentToken): void {
this.validateLocation(location);
}
onDoctype({ location }: DoctypeToken): void {
this.validateLocation(location);
}
onStartTag({ location }: TagToken): void {
this.validateLocation(location);
}
onEndTag({ location }: TagToken): void {
this.validateLocation(location);
}
onCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onNullCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onWhitespaceCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onEof({ location }: EOFToken): void {
assert.ok(location);
assert.strictEqual(location.endOffset, location.startOffset);
assert.strictEqual(location.endOffset, this.html.length);
Expand Down

0 comments on commit 1615a5e

Please sign in to comment.