Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): Consume tokenizer events #419

Merged
merged 6 commits into from Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 1 addition & 5 deletions packages/parse5/lib/common/token.ts
Expand Up @@ -96,13 +96,9 @@ export interface EOFToken extends TokenBase {
readonly type: TokenType.EOF;
}

interface HibernationToken extends TokenBase {
readonly type: TokenType.HIBERNATION;
}

export interface CharacterToken extends TokenBase {
type: TokenType.CHARACTER | TokenType.NULL_CHARACTER | TokenType.WHITESPACE_CHARACTER;
chars: string;
}

export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | HibernationToken | CharacterToken;
export type Token = DoctypeToken | TagToken | CommentToken | EOFToken | CharacterToken;
45 changes: 28 additions & 17 deletions packages/parse5/lib/parser/index.ts
@@ -1,5 +1,4 @@
import { TokenizerMode } from '../tokenizer/index.js';
import { QueuedTokenizer } from '../tokenizer/queued.js';
import { TokenHandler, Tokenizer, TokenizerMode } from '../tokenizer/index.js';
import { OpenElementStack } from './open-element-stack.js';
import { FormattingElementList, ElementEntry, EntryType } from './formatting-element-list.js';
import * as defaultTreeAdapter from '../tree-adapters/default.js';
Expand Down Expand Up @@ -120,9 +119,9 @@ const defaultParserOptions = {
};

//Parser
export class Parser<T extends TreeAdapterTypeMap> {
export class Parser<T extends TreeAdapterTypeMap> implements TokenHandler {
treeAdapter: TreeAdapter<T>;
private onParseError: ParserErrorHandler | null;
onParseError: ParserErrorHandler | null;
private currentToken: Token | null = null;
public options: Required<ParserOptions<T>>;
public document: T['document'];
Expand All @@ -147,7 +146,7 @@ export class Parser<T extends TreeAdapterTypeMap> {

this.document = document ?? this.treeAdapter.createDocument();

this.tokenizer = new QueuedTokenizer(this.options);
this.tokenizer = new Tokenizer(this.options, this);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);

this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
Expand Down Expand Up @@ -211,7 +210,8 @@ export class Parser<T extends TreeAdapterTypeMap> {
return fragment;
}

tokenizer: QueuedTokenizer;
tokenizer: Tokenizer;

stopped = false;
insertionMode = InsertionMode.INITIAL;
originalInsertionMode = InsertionMode.INITIAL;
Expand Down Expand Up @@ -261,13 +261,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
//Parsing loop
private _runParsingLoop(scriptHandler: null | ((scriptElement: T['element']) => void)): void {
while (!this.stopped) {
const token = this.tokenizer.getNextToken();

this._processToken(token);

if (token.type === TokenType.START_TAG && token.selfClosing && !token.ackSelfClosing) {
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
this.tokenizer.getNextToken();

if (!this.tokenizer.active || (scriptHandler !== null && this.pendingScript)) {
break;
Expand Down Expand Up @@ -601,7 +595,7 @@ export class Parser<T extends TreeAdapterTypeMap> {
break;
}
case TokenType.START_TAG: {
this.onStartTag(token);
this._processStartTag(token);
break;
}
case TokenType.END_TAG: {
Expand Down Expand Up @@ -958,6 +952,23 @@ export class Parser<T extends TreeAdapterTypeMap> {
this.skipNextNewLine = false;
this.currentToken = token;

this._processStartTag(token);

if (token.selfClosing && !token.ackSelfClosing) {
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
}
/**
* Processes a given start tag.
*
* `onStartTag` checks if a self-closing tag was recognized. When a token
* is moved inbetween multiple insertion modes, this check for self-closing
* could lead to false positives. To avoid this, `_processStartTag` is used
* for nested calls.
*
* @param token The token to process.
*/
_processStartTag(token: TagToken): void {
if (this.shouldProcessStartTagTokenInForeignContent(token)) {
startTagInForeignContent(this, token);
} else {
Expand Down Expand Up @@ -2627,7 +2638,7 @@ function tableStartTagInTable<T extends TreeAdapterTypeMap>(p: Parser<T>, token:
if (p.openElements.hasInTableScope($.TABLE)) {
p.openElements.popUntilTagNamePopped($.TABLE);
p._resetInsertionMode();
p.onStartTag(token);
p._processStartTag(token);
}
}

Expand Down Expand Up @@ -3127,7 +3138,7 @@ function startTagInSelect<T extends TreeAdapterTypeMap>(p: Parser<T>, token: Tag
p._resetInsertionMode();

if (token.tagID !== $.SELECT) {
p.onStartTag(token);
p._processStartTag(token);
}
}
break;
Expand Down Expand Up @@ -3197,7 +3208,7 @@ function startTagInSelectInTable<T extends TreeAdapterTypeMap>(p: Parser<T>, tok
) {
p.openElements.popUntilTagNamePopped($.SELECT);
p._resetInsertionMode();
p.onStartTag(token);
p._processStartTag(token);
} else {
startTagInSelect(p, token);
}
Expand Down
105 changes: 0 additions & 105 deletions packages/parse5/lib/tokenizer/queued.ts

This file was deleted.

35 changes: 25 additions & 10 deletions packages/parse5/lib/tokenizer/tokenizer-location-info.test.ts
@@ -1,7 +1,6 @@
import * as assert from 'node:assert';
import { Tokenizer, TokenizerMode } from './index.js';
import { SinglePathHandler } from './queued.js';
import { Location, EOFToken, Token } from '../common/token.js';
import { Tokenizer, TokenizerMode, TokenHandler } from './index.js';
import { Location, EOFToken, CharacterToken, DoctypeToken, TagToken, CommentToken } from '../common/token.js';
import { getSubstringByLineCol, normalizeNewLine } from 'parse5-test-utils/utils/common.js';

interface LocationInfoTestCase {
Expand All @@ -11,22 +10,17 @@ interface LocationInfoTestCase {
}

/** Receives events and immediately compares them against the expected values. */
class LocationInfoHandler extends SinglePathHandler {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SinglePathHandler was merged into LocationInfoHandler.

class LocationInfoHandler implements TokenHandler {
public sawEof = false;
/** The index of the last html chunk. */
private idx = 0;
/** All of the lines in the input. */
private lines: string[];

constructor(private testCase: LocationInfoTestCase, private html: string) {
super();
this.lines = html.split(/\r?\n/g);
}

protected handleToken(token: Token): void {
this.validateLocation(token.location);
}

private validateLocation(location: Location | null): void {
assert.ok(location);

Expand All @@ -45,7 +39,28 @@ class LocationInfoHandler extends SinglePathHandler {
this.idx += 1;
}

override onEof({ location }: EOFToken): void {
onComment({ location }: CommentToken): void {
this.validateLocation(location);
}
onDoctype({ location }: DoctypeToken): void {
this.validateLocation(location);
}
onStartTag({ location }: TagToken): void {
this.validateLocation(location);
}
onEndTag({ location }: TagToken): void {
this.validateLocation(location);
}
onCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onNullCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onWhitespaceCharacter({ location }: CharacterToken): void {
this.validateLocation(location);
}
onEof({ location }: EOFToken): void {
assert.ok(location);
assert.strictEqual(location.endOffset, location.startOffset);
assert.strictEqual(location.endOffset, this.html.length);
Expand Down