Skip to content

Commit

Permalink
Update generate parser feedback test script
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Feb 27, 2022
1 parent 91dea83 commit 3dac52b
Showing 1 changed file with 82 additions and 59 deletions.
141 changes: 82 additions & 59 deletions scripts/generate-parser-feedback-test/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import * as defaultTreeAdapter from 'parse5/dist/tree-adapters/default.js';
import { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
import { addSlashes } from 'parse5-test-utils/utils/common.js';
import { TokenType, Token } from 'parse5/dist/common/token.js';
import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js';
import { CharacterToken, CommentToken, DoctypeToken, TagToken } from '../../packages/parse5/dist/common/token.js';
import type { TreeAdapterTypeMap } from '../../packages/parse5/dist/tree-adapters/interface.js';

// eslint-disable-next-line no-console
main().catch(console.error);
Expand All @@ -23,78 +23,101 @@ function main(): Promise<void[]> {
return Promise.all(convertPromises);
}

function appendToken(dest: Token[], token: Token): void {
if (token.type === TokenType.EOF) return;

if (token.type === TokenType.NULL_CHARACTER || token.type === TokenType.WHITESPACE_CHARACTER) {
token.type = TokenType.CHARACTER;
}

if (token.type === TokenType.CHARACTER) {
const lastToken = dest[dest.length - 1];
if (lastToken?.type === TokenType.CHARACTER) {
lastToken.chars += token.chars;
return;
}
}

dest.push(token);
}

function convertTokenToHtml5Lib(token: Token): HtmlLibToken {
switch (token.type) {
case TokenType.CHARACTER:
case TokenType.NULL_CHARACTER:
case TokenType.WHITESPACE_CHARACTER:
return ['Character', token.chars];

case TokenType.START_TAG: {
const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];

if (token.selfClosing) {
startTagEntry.push(true);
}
function collectParserTokens(html: string): HtmlLibToken[] {
const tokens: HtmlLibToken[] = [];

return startTagEntry;
}
class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
private isTopLevel = true;
/**
* We only want to add tokens once. We guard against recursive calls
* using the `isTopLevel` flag.
*/
private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void {
const { isTopLevel } = this;
this.isTopLevel = false;

case TokenType.END_TAG:
// NOTE: parser feedback simulator can produce adjusted SVG
// tag names for end tag tokens so we need to lower case it
return ['EndTag', token.tagName.toLowerCase()];
fn();

case TokenType.COMMENT:
return ['Comment', token.data];
if (isTopLevel) {
this.isTopLevel = true;

case TokenType.DOCTYPE:
return ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks];
const token = getToken();

default:
throw new TypeError(`Unrecognized token type: ${token.type}`);
}
}
if (token[0] === 'Character') {
if (token[1] == null || token[1].length === 0) {
return;
}

function collectParserTokens(html: string): HtmlLibToken[] {
const tokens: Token[] = [];
const lastToken = tokens[tokens.length - 1];

class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
override _processInputToken(token: Token): void {
super._processInputToken(token);
if (lastToken?.[0] === 'Character') {
lastToken[1] += token[1];
return;
}
}

// NOTE: Needed to split attributes of duplicate <html> and <body>
// which are otherwise merged as per tree constructor spec
if (token.type === TokenType.START_TAG) {
token.attrs = [...token.attrs];
tokens.push(token);
}
}

appendToken(tokens, token);
override onComment(token: CommentToken): void {
this.guardTopLevel(
() => super.onComment(token),
() => ['Comment', token.data]
);
}
override onDoctype(token: DoctypeToken): void {
this.guardTopLevel(
() => super.onDoctype(token),
() => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks]
);
}
override onStartTag(token: TagToken): void {
this.guardTopLevel(
() => super.onStartTag(token),
() => {
const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];

if (token.selfClosing) {
startTagEntry.push(true);
}

return startTagEntry;
}
);
}
override onEndTag(token: TagToken): void {
this.guardTopLevel(
() => super.onEndTag(token),
// NOTE: parser feedback simulator can produce adjusted SVG
// tag names for end tag tokens so we need to lower case it
() => ['EndTag', token.tagName.toLowerCase()]
);
}
override onCharacter(token: CharacterToken): void {
this.guardTopLevel(
() => super.onCharacter(token),
() => ['Character', token.chars]
);
}
override onNullCharacter(token: CharacterToken): void {
this.guardTopLevel(
() => super.onNullCharacter(token),
() => ['Character', token.chars]
);
}
override onWhitespaceCharacter(token: CharacterToken): void {
this.guardTopLevel(
() => super.onWhitespaceCharacter(token),
() => ['Character', token.chars]
);
}
}

ExtendedParser.parse(html);

return tokens.map((token) => convertTokenToHtml5Lib(token));
return tokens;
}

function generateParserFeedbackTest(parserTestFile: string): string {
Expand Down

0 comments on commit 3dac52b

Please sign in to comment.