diff --git a/.eslintrc.json b/.eslintrc.json index 628193dac..e3d430edf 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -47,6 +47,8 @@ "@typescript-eslint/no-non-null-assertion": "warn", "@typescript-eslint/no-explicit-any": "warn", "@typescript-eslint/explicit-function-return-type": "error", + "@typescript-eslint/no-duplicate-imports": "error", + "@typescript-eslint/consistent-type-imports": "error", "@typescript-eslint/no-unused-vars": ["error", { "argsIgnorePattern": "^_" }] } diff --git a/bench/memory/sax-parser.js b/bench/memory/sax-parser.js index c7e5526b1..0460035ee 100644 --- a/bench/memory/sax-parser.js +++ b/bench/memory/sax-parser.js @@ -1,7 +1,7 @@ import { readFile } from 'node:fs/promises'; import format from 'human-format'; import memwatch from '@airbnb/node-memwatch'; -import { SAXParser } from 'parse5-sax-parser/dist/index.js'; +import { SAXParser } from '../../packages/parse5-sax-parser/dist/index.js'; import { finished } from 'parse5-test-utils/dist/common.js'; main(); diff --git a/packages/parse5-html-rewriting-stream/lib/index.ts b/packages/parse5-html-rewriting-stream/lib/index.ts index ebbb1660d..a2700e415 100644 --- a/packages/parse5-html-rewriting-stream/lib/index.ts +++ b/packages/parse5-html-rewriting-stream/lib/index.ts @@ -1,7 +1,14 @@ +import { html, type Token } from 'parse5'; +import { + SAXParser, + type EndTag, + type StartTag, + type Doctype, + type Text, + type Comment, + type SaxToken, +} from 'parse5-sax-parser'; import { escapeText, escapeAttribute } from 'entities'; -import type { Location } from 'parse5/dist/common/token.js'; -import { SAXParser, EndTag, StartTag, Doctype, Text, Comment, SaxToken } from 'parse5-sax-parser'; -import { hasUnescapedText } from 'parse5/dist/serializer/index.js'; /** * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter. @@ -65,7 +72,7 @@ export class RewritingStream extends SAXParser { return ''; } - private _getRawHtml(location: Location): string { + private _getRawHtml(location: Token.Location): string { const { droppedBufferSize, html } = this.tokenizer.preprocessor; const start = location.startOffset - droppedBufferSize; const end = location.endOffset - droppedBufferSize; @@ -130,7 +137,8 @@ export class RewritingStream extends SAXParser { /** Emits a serialized text token into the output stream. */ public emitText({ text }: Text): void { this.push( - !this.parserFeedbackSimulator.inForeignContent && hasUnescapedText(this.tokenizer.lastStartTagName, true) + !this.parserFeedbackSimulator.inForeignContent && + html.hasUnescapedText(this.tokenizer.lastStartTagName, true) ? text : escapeText(text) ); diff --git a/packages/parse5-html-rewriting-stream/package.json b/packages/parse5-html-rewriting-stream/package.json index f9fd05121..9a1f3b045 100644 --- a/packages/parse5-html-rewriting-stream/package.json +++ b/packages/parse5-html-rewriting-stream/package.json @@ -18,7 +18,9 @@ ], "license": "MIT", "main": "dist/index.js", - "exports": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": "./dist/index.js", "dependencies": { "entities": "^4.2.0", "parse5": "^6.0.1", diff --git a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts index 202c96860..5c7892a61 100644 --- a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts +++ b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts @@ -2,8 +2,12 @@ import * as assert from 'node:assert'; import { outdent } from 'outdent'; import { RewritingStream } from '../lib/index.js'; import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; -import { getStringDiffMsg, writeChunkedToStream, WritableStreamStub } from 'parse5-test-utils/utils/common.js'; -import { finished } from 'parse5-test-utils/utils/common.js'; +import { + finished, + getStringDiffMsg, + writeChunkedToStream, + WritableStreamStub, +} from 'parse5-test-utils/utils/common.js'; const srcHtml = outdent` diff --git a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts index 3fd3d9254..6f6d08df5 100644 --- a/packages/parse5-htmlparser2-tree-adapter/lib/index.ts +++ b/packages/parse5-htmlparser2-tree-adapter/lib/index.ts @@ -1,10 +1,7 @@ -import * as doctype from 'parse5/dist/common/doctype.js'; -import { DOCUMENT_MODE, NAMESPACES as NS } from 'parse5/dist/common/html.js'; -import type { Attribute, ElementLocation } from 'parse5/dist/common/token.js'; -import type { TreeAdapter, TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; +import { type TreeAdapterTypeMap, type TreeAdapter, type Token, html } from 'parse5'; import { - Node, - NodeWithChildren, + type Node, + type NodeWithChildren, Element, Document, ProcessingInstruction, @@ -33,6 +30,33 @@ function createTextNode(value: string): Text { return new Text(value); } +function enquoteDoctypeId(id: string): string { + const quote = id.includes('"') ? "'" : '"'; + + return quote + id + quote; +} + +/** @internal */ +export function serializeDoctypeContent(name: string, publicId: string, systemId: string): string { + let str = '!DOCTYPE '; + + if (name) { + str += name; + } + + if (publicId) { + str += ` PUBLIC ${enquoteDoctypeId(publicId)}`; + } else if (systemId) { + str += ' SYSTEM'; + } + + if (systemId) { + str += ` ${enquoteDoctypeId(systemId)}`; + } + + return str; +} + export const adapter: TreeAdapter = { // Re-exports from domhandler isCommentNode: isComment, @@ -42,7 +66,7 @@ export const adapter: TreeAdapter = { //Node construction createDocument(): Document { const node = new Document([]); - node['x-mode'] = DOCUMENT_MODE.NO_QUIRKS; + node['x-mode'] = html.DOCUMENT_MODE.NO_QUIRKS; return node; }, @@ -50,7 +74,7 @@ export const adapter: TreeAdapter = { return new Document([]); }, - createElement(tagName: string, namespaceURI: NS, attrs: Attribute[]): Element { + createElement(tagName: string, namespaceURI: html.NS, attrs: Token.Attribute[]): Element { const attribs = Object.create(null); const attribsNamespace = Object.create(null); const attribsPrefix = Object.create(null); @@ -112,7 +136,7 @@ export const adapter: TreeAdapter = { }, setDocumentType(document: Document, name: string, publicId: string, systemId: string): void { - const data = doctype.serializeContent(name, publicId, systemId); + const data = serializeDoctypeContent(name, publicId, systemId); let doctypeNode = document.children.find( (node): node is ProcessingInstruction => isDirective(node) && node.name === '!doctype' ); @@ -129,12 +153,12 @@ export const adapter: TreeAdapter = { doctypeNode['x-systemId'] = systemId ?? undefined; }, - setDocumentMode(document: Document, mode: DOCUMENT_MODE): void { + setDocumentMode(document: Document, mode: html.DOCUMENT_MODE): void { document['x-mode'] = mode; }, - getDocumentMode(document: Document): DOCUMENT_MODE { - return document['x-mode'] as DOCUMENT_MODE; + getDocumentMode(document: Document): html.DOCUMENT_MODE { + return document['x-mode'] as html.DOCUMENT_MODE; }, detachNode(node: Node): void { @@ -178,7 +202,7 @@ export const adapter: TreeAdapter = { } }, - adoptAttributes(recipient: Element, attrs: Attribute[]): void { + adoptAttributes(recipient: Element, attrs: Token.Attribute[]): void { for (let i = 0; i < attrs.length; i++) { const attrName = attrs[i].name; @@ -203,7 +227,7 @@ export const adapter: TreeAdapter = { return node.parent; }, - getAttrList(element: Element): Attribute[] { + getAttrList(element: Element): Token.Attribute[] { return element.attributes; }, @@ -212,8 +236,8 @@ export const adapter: TreeAdapter = { return element.name; }, - getNamespaceURI(element: Element): NS { - return element.namespace as NS; + getNamespaceURI(element: Element): html.NS { + return element.namespace as html.NS; }, getTextNodeContent(textNode: Text): string { @@ -243,7 +267,7 @@ export const adapter: TreeAdapter = { }, // Source code location - setNodeSourceCodeLocation(node: Node, location: ElementLocation | null): void { + setNodeSourceCodeLocation(node: Node, location: Token.ElementLocation | null): void { if (location) { node.startIndex = location.startOffset; node.endIndex = location.endOffset; @@ -252,11 +276,11 @@ export const adapter: TreeAdapter = { node.sourceCodeLocation = location; }, - getNodeSourceCodeLocation(node: Node): ElementLocation | null | undefined { + getNodeSourceCodeLocation(node: Node): Token.ElementLocation | null | undefined { return node.sourceCodeLocation; }, - updateNodeSourceCodeLocation(node: Node, endLocation: ElementLocation): void { + updateNodeSourceCodeLocation(node: Node, endLocation: Token.ElementLocation): void { if (endLocation.endOffset != null) node.endIndex = endLocation.endOffset; node.sourceCodeLocation = { diff --git a/packages/parse5-htmlparser2-tree-adapter/package.json b/packages/parse5-htmlparser2-tree-adapter/package.json index 9aaddbc3b..8d86c29ee 100644 --- a/packages/parse5-htmlparser2-tree-adapter/package.json +++ b/packages/parse5-htmlparser2-tree-adapter/package.json @@ -15,6 +15,9 @@ ], "license": "MIT", "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": "./dist/index.js", "dependencies": { "domhandler": "^4.3.1", "parse5": "^6.0.1" diff --git a/packages/parse5-parser-stream/lib/index.ts b/packages/parse5-parser-stream/lib/index.ts index 74b737664..699005ee4 100644 --- a/packages/parse5-parser-stream/lib/index.ts +++ b/packages/parse5-parser-stream/lib/index.ts @@ -1,7 +1,5 @@ import { Writable } from 'node:stream'; -import { Parser, ParserOptions } from 'parse5/dist/parser/index.js'; -import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; -import type { DefaultTreeAdapterMap } from 'parse5/dist/tree-adapters/default.js'; +import { Parser, type ParserOptions, type TreeAdapterTypeMap, type DefaultTreeAdapterMap } from 'parse5'; /* eslint-disable unicorn/consistent-function-scoping -- The rule seems to be broken here. */ diff --git a/packages/parse5-parser-stream/package.json b/packages/parse5-parser-stream/package.json index 8eb736045..45309a9a5 100644 --- a/packages/parse5-parser-stream/package.json +++ b/packages/parse5-parser-stream/package.json @@ -15,6 +15,9 @@ ], "license": "MIT", "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": "./dist/index.js", "dependencies": { "parse5": "^6.0.1" }, diff --git a/packages/parse5-parser-stream/test/scripting.test.ts b/packages/parse5-parser-stream/test/scripting.test.ts index a0f166c8a..953ef6e5a 100644 --- a/packages/parse5-parser-stream/test/scripting.test.ts +++ b/packages/parse5-parser-stream/test/scripting.test.ts @@ -1,8 +1,7 @@ import { ParserStream } from '../lib/index.js'; import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js'; -import { makeChunks, generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js'; +import { makeChunks, generateTestsForEachTreeAdapter, finished } from 'parse5-test-utils/utils/common.js'; import { runInNewContext } from 'node:vm'; -import { finished } from 'parse5-test-utils/utils/common.js'; function pause(): Promise { return new Promise((resolve) => setTimeout(resolve, 5)); diff --git a/packages/parse5-parser-stream/test/utils/parse-chunked.ts b/packages/parse5-parser-stream/test/utils/parse-chunked.ts index 90370a2bf..f219bc012 100644 --- a/packages/parse5-parser-stream/test/utils/parse-chunked.ts +++ b/packages/parse5-parser-stream/test/utils/parse-chunked.ts @@ -1,5 +1,4 @@ -import type { ParserOptions } from 'parse5'; -import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; +import type { ParserOptions, TreeAdapterTypeMap } from 'parse5'; import { ParserStream } from '../../lib/index.js'; import { makeChunks } from 'parse5-test-utils/utils/common.js'; diff --git a/packages/parse5-plain-text-conversion-stream/lib/index.ts b/packages/parse5-plain-text-conversion-stream/lib/index.ts index 8ba6a37ab..f07946e6c 100644 --- a/packages/parse5-plain-text-conversion-stream/lib/index.ts +++ b/packages/parse5-plain-text-conversion-stream/lib/index.ts @@ -1,7 +1,7 @@ -import type { ParserOptions } from 'parse5'; +import { type ParserOptions, type TreeAdapterTypeMap, html } from 'parse5'; import { ParserStream } from 'parse5-parser-stream'; -import { TAG_ID as $, TAG_NAMES as TN } from 'parse5/dist/common/html.js'; -import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; + +const { TAG_ID: $, TAG_NAMES: TN } = html; /** * Converts plain text files into HTML document as required by [HTML specification](https://html.spec.whatwg.org/#read-text). diff --git a/packages/parse5-plain-text-conversion-stream/package.json b/packages/parse5-plain-text-conversion-stream/package.json index 9fa538fb3..ad62b3e4a 100644 --- a/packages/parse5-plain-text-conversion-stream/package.json +++ b/packages/parse5-plain-text-conversion-stream/package.json @@ -18,6 +18,9 @@ ], "license": "MIT", "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": "./dist/index.js", "dependencies": { "parse5": "^6.0.1", "parse5-parser-stream": "^6.0.1" diff --git a/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts b/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts index fa61da664..da8e28201 100644 --- a/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts +++ b/packages/parse5-plain-text-conversion-stream/test/plain-text-conversion-stream.test.ts @@ -1,5 +1,5 @@ import * as assert from 'node:assert'; -import * as parse5 from 'parse5'; +import { serialize } from 'parse5'; import { PlainTextConversionStream } from '../lib/index.js'; import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js'; @@ -12,7 +12,7 @@ generateTestsForEachTreeAdapter('plain-test-conversion-stream', (treeAdapter) => converter.write('\u0000'); converter.end(''); - const result = parse5.serialize(converter.document, { treeAdapter }); + const result = serialize(converter.document, { treeAdapter }); assert.strictEqual( result, diff --git a/packages/parse5-sax-parser/lib/index.ts b/packages/parse5-sax-parser/lib/index.ts index e16c5a72b..8b51209ff 100644 --- a/packages/parse5-sax-parser/lib/index.ts +++ b/packages/parse5-sax-parser/lib/index.ts @@ -1,13 +1,5 @@ import { Transform } from 'node:stream'; -import type { Tokenizer, TokenHandler } from 'parse5/dist/tokenizer/index.js'; -import type { - Attribute, - Location, - TagToken, - CommentToken, - DoctypeToken, - CharacterToken, -} from 'parse5/dist/common/token.js'; +import type { Tokenizer, TokenHandler, Token } from 'parse5'; import { DevNullStream } from './dev-null-stream.js'; import { ParserFeedbackSimulator } from './parser-feedback-simulator.js'; @@ -135,7 +127,7 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onCharacter({ chars, location }: CharacterToken): void { + onCharacter({ chars, location }: Token.CharacterToken): void { if (this.pendingText === null) { this.pendingText = { text: chars, sourceCodeLocation: location }; } else { @@ -158,12 +150,12 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onWhitespaceCharacter(token: CharacterToken): void { + onWhitespaceCharacter(token: Token.CharacterToken): void { this.onCharacter(token); } /** @internal */ - onNullCharacter(token: CharacterToken): void { + onNullCharacter(token: Token.CharacterToken): void { this.onCharacter(token); } @@ -174,7 +166,7 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onStartTag(token: TagToken): void { + onStartTag(token: Token.TagToken): void { this._emitPendingText(); const startTag: StartTag = { @@ -187,7 +179,7 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onEndTag(token: TagToken): void { + onEndTag(token: Token.TagToken): void { this._emitPendingText(); const endTag: EndTag = { @@ -198,7 +190,7 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onDoctype(token: DoctypeToken): void { + onDoctype(token: Token.DoctypeToken): void { this._emitPendingText(); const doctype: Doctype = { @@ -211,7 +203,7 @@ export class SAXParser extends Transform implements TokenHandler { } /** @internal */ - onComment(token: CommentToken): void { + onComment(token: Token.CommentToken): void { this._emitPendingText(); const comment: Comment = { @@ -245,14 +237,14 @@ export class SAXParser extends Transform implements TokenHandler { export interface SaxToken { /** Source code location info. Available if location info is enabled via {@link SAXParserOptions}. */ - sourceCodeLocation?: Location | null; + sourceCodeLocation?: Token.Location | null; } export interface StartTag extends SaxToken { /** Tag name */ tagName: string; /** List of attributes */ - attrs: Attribute[]; + attrs: Token.Attribute[]; /** Indicates if the tag is self-closing */ selfClosing: boolean; } diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts index 06f9d31aa..116637dd1 100644 --- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts +++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts @@ -1,30 +1,40 @@ -import { Tokenizer, TokenizerOptions, TokenizerMode, TokenHandler } from 'parse5/dist/tokenizer/index.js'; -import { TokenType, TagToken, CommentToken, DoctypeToken, CharacterToken, EOFToken } from 'parse5/dist/common/token.js'; -import * as foreignContent from 'parse5/dist/common/foreign-content.js'; -import * as unicode from 'parse5/dist/common/unicode.js'; -import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS, getTagID } from 'parse5/dist/common/html.js'; - -//ParserFeedbackSimulator -//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction. +import { + Tokenizer, + type TokenizerOptions, + TokenizerMode, + type TokenHandler, + Token, + foreignContent, + html, +} from 'parse5'; + +const $ = html.TAG_ID; + +const REPLACEMENT_CHARACTER = '\uFFFD'; +const LINE_FEED_CODE_POINT = 0x0a; + +/** + * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction. + */ export class ParserFeedbackSimulator implements TokenHandler { - private namespaceStack: NS[] = []; + private namespaceStack: html.NS[] = []; public inForeignContent = false; public skipNextNewLine = false; public tokenizer: Tokenizer; constructor(options: TokenizerOptions, private handler: TokenHandler) { this.tokenizer = new Tokenizer(options, this); - this._enterNamespace(NS.HTML); + this._enterNamespace(html.NS.HTML); } /** @internal */ - onNullCharacter(token: CharacterToken): void { + onNullCharacter(token: Token.CharacterToken): void { this.skipNextNewLine = false; if (this.inForeignContent) { this.handler.onCharacter({ - type: TokenType.CHARACTER, - chars: unicode.REPLACEMENT_CHARACTER, + type: Token.TokenType.CHARACTER, + chars: REPLACEMENT_CHARACTER, location: token.location, }); } else { @@ -33,8 +43,8 @@ export class ParserFeedbackSimulator implements TokenHandler { } /** @internal */ - onWhitespaceCharacter(token: CharacterToken): void { - if (this.skipNextNewLine && token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) { + onWhitespaceCharacter(token: Token.CharacterToken): void { + if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) { this.skipNextNewLine = false; if (token.chars.length === 1) { @@ -48,44 +58,44 @@ export class ParserFeedbackSimulator implements TokenHandler { } /** @internal */ - onCharacter(token: CharacterToken): void { + onCharacter(token: Token.CharacterToken): void { this.skipNextNewLine = false; this.handler.onCharacter(token); } /** @internal */ - onComment(token: CommentToken): void { + onComment(token: Token.CommentToken): void { this.skipNextNewLine = false; this.handler.onComment(token); } /** @internal */ - onDoctype(token: DoctypeToken): void { + onDoctype(token: Token.DoctypeToken): void { this.skipNextNewLine = false; this.handler.onDoctype(token); } /** @internal */ - onEof(token: EOFToken): void { + onEof(token: Token.EOFToken): void { this.skipNextNewLine = false; this.handler.onEof(token); } //Namespace stack mutations - private _enterNamespace(namespace: NS): void { + private _enterNamespace(namespace: html.NS): void { this.namespaceStack.unshift(namespace); - this.inForeignContent = namespace !== NS.HTML; + this.inForeignContent = namespace !== html.NS.HTML; this.tokenizer.inForeignNode = this.inForeignContent; } private _leaveCurrentNamespace(): void { this.namespaceStack.shift(); - this.inForeignContent = this.namespaceStack[0] !== NS.HTML; + this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML; this.tokenizer.inForeignNode = this.inForeignContent; } //Token handlers - private _ensureTokenizerMode(tn: $): void { + private _ensureTokenizerMode(tn: html.TAG_ID): void { switch (tn) { case $.TEXTAREA: case $.TITLE: { @@ -115,16 +125,16 @@ export class ParserFeedbackSimulator implements TokenHandler { } /** @internal */ - onStartTag(token: TagToken): void { + onStartTag(token: Token.TagToken): void { let tn = token.tagID; switch (tn) { case $.SVG: { - this._enterNamespace(NS.SVG); + this._enterNamespace(html.NS.SVG); break; } case $.MATH: { - this._enterNamespace(NS.MATHML); + this._enterNamespace(html.NS.MATHML); break; } default: @@ -137,9 +147,9 @@ export class ParserFeedbackSimulator implements TokenHandler { } else { const currentNs = this.namespaceStack[0]; - if (currentNs === NS.MATHML) { + if (currentNs === html.NS.MATHML) { foreignContent.adjustTokenMathMLAttrs(token); - } else if (currentNs === NS.SVG) { + } else if (currentNs === html.NS.SVG) { foreignContent.adjustTokenSVGTagName(token); foreignContent.adjustTokenSVGAttrs(token); } @@ -149,7 +159,7 @@ export class ParserFeedbackSimulator implements TokenHandler { tn = token.tagID; if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) { - this._enterNamespace(NS.HTML); + this._enterNamespace(html.NS.HTML); } } } else { @@ -161,7 +171,7 @@ export class ParserFeedbackSimulator implements TokenHandler { break; } case $.IMAGE: { - token.tagName = TN.IMG; + token.tagName = html.TAG_NAMES.IMG; token.tagID = $.IMG; break; } @@ -176,17 +186,17 @@ export class ParserFeedbackSimulator implements TokenHandler { } /** @internal */ - onEndTag(token: TagToken): void { + onEndTag(token: Token.TagToken): void { let tn = token.tagID; if (!this.inForeignContent) { const previousNs = this.namespaceStack[1]; - if (previousNs === NS.SVG) { + if (previousNs === html.NS.SVG) { const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName); if (adjustedTagName) { - tn = getTagID(adjustedTagName); + tn = html.getTagID(adjustedTagName); } } @@ -195,14 +205,14 @@ export class ParserFeedbackSimulator implements TokenHandler { this._leaveCurrentNamespace(); } } else if ( - (tn === $.SVG && this.namespaceStack[0] === NS.SVG) || - (tn === $.MATH && this.namespaceStack[0] === NS.MATHML) + (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) || + (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML) ) { this._leaveCurrentNamespace(); } // NOTE: adjust end tag name as well for consistency - if (this.namespaceStack[0] === NS.SVG) { + if (this.namespaceStack[0] === html.NS.SVG) { foreignContent.adjustTokenSVGTagName(token); } diff --git a/packages/parse5-sax-parser/package.json b/packages/parse5-sax-parser/package.json index c4bd18b7c..f0ce2db63 100644 --- a/packages/parse5-sax-parser/package.json +++ b/packages/parse5-sax-parser/package.json @@ -16,6 +16,9 @@ ], "license": "MIT", "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": "./dist/index.js", "dependencies": { "parse5": "^6.0.1" }, diff --git a/packages/parse5-sax-parser/test/location-info.test.ts b/packages/parse5-sax-parser/test/location-info.test.ts index e546f60ec..dba662dc6 100644 --- a/packages/parse5-sax-parser/test/location-info.test.ts +++ b/packages/parse5-sax-parser/test/location-info.test.ts @@ -2,9 +2,9 @@ import * as assert from 'node:assert'; import { SAXParser } from '../lib/index.js'; import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; import { writeChunkedToStream } from 'parse5-test-utils/utils/common.js'; -import type { Location } from 'parse5/dist/common/token.js'; +import type { Token } from 'parse5'; -function assertLocation({ sourceCodeLocation }: { sourceCodeLocation: Location }): void { +function assertLocation({ sourceCodeLocation }: { sourceCodeLocation: Token.Location }): void { assert.strictEqual(typeof sourceCodeLocation.startLine, 'number'); assert.strictEqual(typeof sourceCodeLocation.startCol, 'number'); assert.strictEqual(typeof sourceCodeLocation.startOffset, 'number'); diff --git a/packages/parse5-sax-parser/test/sax-parser.test.ts b/packages/parse5-sax-parser/test/sax-parser.test.ts index 11a7d8754..20ee3e6da 100644 --- a/packages/parse5-sax-parser/test/sax-parser.test.ts +++ b/packages/parse5-sax-parser/test/sax-parser.test.ts @@ -1,9 +1,10 @@ import * as assert from 'node:assert'; import * as fs from 'node:fs'; -import { finished } from 'parse5-test-utils/utils/common.js'; -import { SAXParser, SAXParserOptions } from '../lib/index.js'; +import type { SAXParserOptions } from '../lib/index.js'; +import { SAXParser } from '../lib/index.js'; import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; import { + finished, getStringDiffMsg, writeChunkedToStream, removeNewLines, diff --git a/packages/parse5/lib/common/doctype.ts b/packages/parse5/lib/common/doctype.ts index d57b7f9e8..a4c1b9382 100644 --- a/packages/parse5/lib/common/doctype.ts +++ b/packages/parse5/lib/common/doctype.ts @@ -1,5 +1,5 @@ import { DOCUMENT_MODE } from './html.js'; -import { DoctypeToken } from './token.js'; +import type { DoctypeToken } from './token.js'; //Const const VALID_DOCTYPE_NAME = 'html'; @@ -84,12 +84,6 @@ const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [ ]; //Utils -function enquoteDoctypeId(id: string): string { - const quote = id.includes('"') ? "'" : '"'; - - return quote + id + quote; -} - function hasPrefix(publicId: string, prefixes: string[]): boolean { return prefixes.some((prefix) => publicId.startsWith(prefix)); } @@ -139,23 +133,3 @@ export function getDocumentMode(token: DoctypeToken): DOCUMENT_MODE { return DOCUMENT_MODE.NO_QUIRKS; } - -export function serializeContent(name: string, publicId: string, systemId: string): string { - let str = '!DOCTYPE '; - - if (name) { - str += name; - } - - if (publicId) { - str += ` PUBLIC ${enquoteDoctypeId(publicId)}`; - } else if (systemId) { - str += ' SYSTEM'; - } - - if (systemId) { - str += ` ${enquoteDoctypeId(systemId)}`; - } - - return str; -} diff --git a/packages/parse5/lib/common/error-codes.ts b/packages/parse5/lib/common/error-codes.ts index cff89bfd9..e8387848f 100644 --- a/packages/parse5/lib/common/error-codes.ts +++ b/packages/parse5/lib/common/error-codes.ts @@ -1,4 +1,4 @@ -import { Location } from './token.js'; +import type { Location } from './token.js'; export interface ParserError extends Location { code: ERR; diff --git a/packages/parse5/lib/common/foreign-content.ts b/packages/parse5/lib/common/foreign-content.ts index 6ad04506f..891e83329 100644 --- a/packages/parse5/lib/common/foreign-content.ts +++ b/packages/parse5/lib/common/foreign-content.ts @@ -1,4 +1,4 @@ -import { TAG_ID as $, NAMESPACES as NS, ATTRS, getTagID } from './html.js'; +import { TAG_ID as $, NS, ATTRS, getTagID } from './html.js'; import type { TagToken, Attribute } from './token.js'; //MIME types diff --git a/packages/parse5/lib/common/html.ts b/packages/parse5/lib/common/html.ts index 51e1ced6b..48e27317f 100644 --- a/packages/parse5/lib/common/html.ts +++ b/packages/parse5/lib/common/html.ts @@ -1,4 +1,5 @@ -export enum NAMESPACES { +/** All valid namespaces in HTML. */ +export enum NS { HTML = 'http://www.w3.org/1999/xhtml', MATHML = 'http://www.w3.org/1998/Math/MathML', SVG = 'http://www.w3.org/2000/svg', @@ -7,8 +8,6 @@ export enum NAMESPACES { XMLNS = 'http://www.w3.org/2000/xmlns/', } -const NS = NAMESPACES; - export enum ATTRS { TYPE = 'type', ACTION = 'action', @@ -461,7 +460,7 @@ export function getTagID(tagName: string): TAG_ID { const $ = TAG_ID; -export const SPECIAL_ELEMENTS: Record> = { +export const SPECIAL_ELEMENTS: Record> = { [NS.HTML]: new Set([ $.ADDRESS, $.APPLET, @@ -555,3 +554,17 @@ export const SPECIAL_ELEMENTS: Record> = { export function isNumberedHeader(tn: TAG_ID): boolean { return tn === $.H1 || tn === $.H2 || tn === $.H3 || tn === $.H4 || tn === $.H5 || tn === $.H6; } + +const UNESCAPED_TEXT = new Set([ + TAG_NAMES.STYLE, + TAG_NAMES.SCRIPT, + TAG_NAMES.XMP, + TAG_NAMES.IFRAME, + TAG_NAMES.NOEMBED, + TAG_NAMES.NOFRAMES, + TAG_NAMES.PLAINTEXT, +]); + +export function hasUnescapedText(tn: string, scriptingEnabled: boolean): boolean { + return UNESCAPED_TEXT.has(tn) || (scriptingEnabled && tn === TAG_NAMES.NOSCRIPT); +} diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts index c94f3aa5a..807d6712e 100644 --- a/packages/parse5/lib/index.ts +++ b/packages/parse5/lib/index.ts @@ -1,10 +1,22 @@ -import { Parser, ParserOptions } from './parser/index.js'; +import { Parser, type ParserOptions } from './parser/index.js'; import type { DefaultTreeAdapterMap } from './tree-adapters/default.js'; import type { TreeAdapterTypeMap } from './tree-adapters/interface.js'; -export { ParserOptions } from './parser/index.js'; -export { serialize, serializeOuter, SerializerOptions } from './serializer/index.js'; +export { type DefaultTreeAdapterMap, defaultTreeAdapter } from './tree-adapters/default.js'; +export type { TreeAdapter, TreeAdapterTypeMap } from './tree-adapters/interface.js'; +export { type ParserOptions, /** @internal */ Parser } from './parser/index.js'; +export { serialize, serializeOuter, type SerializerOptions } from './serializer/index.js'; +export type { ParserError } from './common/error-codes.js'; + +/** @internal */ +export * as foreignContent from './common/foreign-content.js'; +/** @internal */ +export * as html from './common/html.js'; +/** @internal */ +export * as Token from './common/token.js'; +/** @internal */ +export { Tokenizer, TokenizerOptions, TokenizerMode, TokenHandler } from './tokenizer/index.js'; // Shorthands diff --git a/packages/parse5/lib/parser/formatting-element-list.test.ts b/packages/parse5/lib/parser/formatting-element-list.test.ts index e848a4851..cd3f9af48 100644 --- a/packages/parse5/lib/parser/formatting-element-list.test.ts +++ b/packages/parse5/lib/parser/formatting-element-list.test.ts @@ -1,6 +1,6 @@ import * as assert from 'node:assert'; -import { TAG_NAMES as $, NAMESPACES as NS, getTagID } from '../common/html.js'; -import { TagToken, TokenType } from '../common/token.js'; +import { TAG_NAMES as $, NS, getTagID } from '../common/html.js'; +import { type TagToken, TokenType } from '../common/token.js'; import { FormattingElementList, EntryType } from './formatting-element-list.js'; import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js'; diff --git a/packages/parse5/lib/parser/index.test.ts b/packages/parse5/lib/parser/index.test.ts index 60bec2c7c..fdbc092c3 100644 --- a/packages/parse5/lib/parser/index.test.ts +++ b/packages/parse5/lib/parser/index.test.ts @@ -1,5 +1,5 @@ import * as assert from 'node:assert'; -import * as parse5 from 'parse5'; +import { parseFragment, parse } from 'parse5'; import { jest } from '@jest/globals'; import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; @@ -20,9 +20,7 @@ generateParsingTests( ], }, (test, opts) => ({ - node: test.fragmentContext - ? parse5.parseFragment(test.fragmentContext, test.input, opts) - : parse5.parse(test.input, opts), + node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts), }) ); @@ -35,16 +33,14 @@ generateParsingTests( expectErrors: ['505.search-element', '506.search-element'], }, (test, opts) => ({ - node: test.fragmentContext - ? parse5.parseFragment(test.fragmentContext, test.input, opts) - : parse5.parse(test.input, opts), + node: test.fragmentContext ? parseFragment(test.fragmentContext, test.input, opts) : parse(test.input, opts), }) ); describe('parser', () => { it('Regression - HTML5 Legacy Doctype Misparsed with htmlparser2 tree adapter (GH-45)', () => { const html = 'Hi there!'; - const document = parse5.parse(html, { treeAdapter: treeAdapters.htmlparser2 }); + const document = parse(html, { treeAdapter: treeAdapters.htmlparser2 }); assert.ok(treeAdapters.htmlparser2.isDocumentTypeNode(document.childNodes[0])); assert.strictEqual(document.childNodes[0].data, '!DOCTYPE html SYSTEM "about:legacy-compat"'); @@ -64,7 +60,7 @@ describe('parser', () => { }); it('parses correctly', () => { - const fragment = parse5.parseFragment('
', { + const fragment = parseFragment('
', { treeAdapter: treeAdapters.htmlparser2, }); @@ -74,7 +70,7 @@ describe('parser', () => { }); it('Regression - DOCTYPE empty fields (GH-236)', () => { - const document = parse5.parse(''); + const document = parse(''); const doctype = document.childNodes[0]; expect(doctype).toHaveProperty('name', ''); @@ -86,7 +82,7 @@ describe('parser', () => { it('should support onItemPush and onItemPop', () => { const onItemPush = jest.fn(); const onItemPop = jest.fn(); - const document = parse5.parse('

', { + const document = parse('

', { treeAdapter: { ...treeAdapters.default, onItemPush, diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index bb38706d5..00fb7af8c 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -1,15 +1,15 @@ -import { TokenHandler, Tokenizer, TokenizerMode } from '../tokenizer/index.js'; -import { OpenElementStack, StackHandler } from './open-element-stack.js'; -import { FormattingElementList, ElementEntry, EntryType } from './formatting-element-list.js'; -import { defaultTreeAdapter, DefaultTreeAdapterMap } from '../tree-adapters/default.js'; +import { Tokenizer, TokenizerMode, type TokenHandler } from '../tokenizer/index.js'; +import { OpenElementStack, type StackHandler } from './open-element-stack.js'; +import { FormattingElementList, EntryType, type ElementEntry } from './formatting-element-list.js'; +import { defaultTreeAdapter, type DefaultTreeAdapterMap } from '../tree-adapters/default.js'; import * as doctype from '../common/doctype.js'; import * as foreignContent from '../common/foreign-content.js'; -import { ERR, ParserErrorHandler } from '../common/error-codes.js'; +import { ERR, type ParserErrorHandler } from '../common/error-codes.js'; import * as unicode from '../common/unicode.js'; import { TAG_ID as $, TAG_NAMES as TN, - NAMESPACES as NS, + NS, ATTRS, SPECIAL_ELEMENTS, DOCUMENT_MODE, @@ -20,14 +20,14 @@ import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface import { TokenType, getTokenAttr, - Token, - CommentToken, - CharacterToken, - TagToken, - DoctypeToken, - EOFToken, - LocationWithAttributes, - ElementLocation, + type Token, + type CommentToken, + type CharacterToken, + type TagToken, + type DoctypeToken, + type EOFToken, + type LocationWithAttributes, + type ElementLocation, } from '../common/token.js'; //Misc constants diff --git a/packages/parse5/lib/parser/open-element-stack.test.ts b/packages/parse5/lib/parser/open-element-stack.test.ts index ba0b7d9b9..2ebbebe43 100644 --- a/packages/parse5/lib/parser/open-element-stack.test.ts +++ b/packages/parse5/lib/parser/open-element-stack.test.ts @@ -1,5 +1,5 @@ import * as assert from 'node:assert'; -import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS } from '../common/html.js'; +import { TAG_ID as $, TAG_NAMES as TN, NS } from '../common/html.js'; import { OpenElementStack } from './open-element-stack.js'; import type { TreeAdapterTypeMap } from '../tree-adapters/interface'; import { generateTestsForEachTreeAdapter } from 'parse5-test-utils/utils/common.js'; diff --git a/packages/parse5/lib/parser/open-element-stack.ts b/packages/parse5/lib/parser/open-element-stack.ts index 0df1fb366..9572b44fe 100644 --- a/packages/parse5/lib/parser/open-element-stack.ts +++ b/packages/parse5/lib/parser/open-element-stack.ts @@ -1,4 +1,4 @@ -import { TAG_ID as $, NAMESPACES as NS, isNumberedHeader } from '../common/html.js'; +import { TAG_ID as $, NS, isNumberedHeader } from '../common/html.js'; import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface'; //Element utils diff --git a/packages/parse5/lib/parser/parser-location-info.test.ts b/packages/parse5/lib/parser/parser-location-info.test.ts index 82d7c3fdd..e9cee7333 100644 --- a/packages/parse5/lib/parser/parser-location-info.test.ts +++ b/packages/parse5/lib/parser/parser-location-info.test.ts @@ -1,20 +1,16 @@ import * as assert from 'node:assert'; import { outdent } from 'outdent'; -import * as parse5 from 'parse5'; +import { type ParserOptions, type TreeAdapterTypeMap, parse, parseFragment } from 'parse5'; import { generateLocationInfoParserTests, assertStartTagLocation, assertNodeLocation, } from 'parse5-test-utils/utils/generate-location-info-parser-tests.js'; import { generateTestsForEachTreeAdapter, treeAdapters } from 'parse5-test-utils/utils/common.js'; -import { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; -generateLocationInfoParserTests( - 'location-info-parser', - (input: string, opts: parse5.ParserOptions) => ({ - node: parse5.parse(input, opts), - }) -); +generateLocationInfoParserTests('location-info-parser', (input: string, opts: ParserOptions) => ({ + node: parse(input, opts), +})); generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { test('Regression - Incorrect LocationInfo.endOffset for implicitly closed

element (GH-109)', () => { @@ -25,7 +21,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const firstP = treeAdapter.getChildNodes(fragment)[0]; const firstPLocation = treeAdapter.getNodeSourceCodeLocation(firstP); @@ -41,7 +37,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const firstChild = treeAdapter.getChildNodes(fragment)[0]; const location = treeAdapter.getNodeSourceCodeLocation(firstChild); @@ -57,7 +53,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const firstChild = treeAdapter.getChildNodes(fragment)[0]; assert.ok(treeAdapter.getNodeSourceCodeLocation(firstChild)); @@ -72,7 +68,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { }; assert.doesNotThrow(() => { - parse5.parseFragment(html, opts); + parseFragment(html, opts); }); }); @@ -84,7 +80,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const firstChild = treeAdapter.getChildNodes(fragment)[0]; assert.ok(treeAdapter.getNodeSourceCodeLocation(firstChild)?.attrs?.['test-attr']); @@ -104,7 +100,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const document = parse5.parse(html, opts); + const document = parse(html, opts); const htmlEl = treeAdapter.getChildNodes(document)[0]; const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; const scriptEl = treeAdapter.getChildNodes(bodyEl)[0]; @@ -122,7 +118,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const p = treeAdapter.getChildNodes(fragment)[0]; const location = treeAdapter.getNodeSourceCodeLocation(p); @@ -141,7 +137,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const svg = treeAdapter.getChildNodes(fragment)[0]; const foreignObject = treeAdapter.getChildNodes(svg)[0]; const location = treeAdapter.getNodeSourceCodeLocation(foreignObject); @@ -161,7 +157,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const fragment = parse5.parseFragment(html, opts); + const fragment = parseFragment(html, opts); const script = treeAdapter.getChildNodes(fragment)[0]; const location = treeAdapter.getNodeSourceCodeLocation(script); const textLocation = treeAdapter.getNodeSourceCodeLocation(treeAdapter.getChildNodes(script)[0]); @@ -187,7 +183,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const document = parse5.parse(html, opts); + const document = parse(html, opts); const htmlEl = treeAdapter.getChildNodes(document)[0]; const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; @@ -218,7 +214,7 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { sourceCodeLocationInfo: true, }; - const document = parse5.parse(html, opts); + const document = parse(html, opts); const htmlEl = treeAdapter.getChildNodes(document)[0]; const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; @@ -263,7 +259,7 @@ describe('location-info-parser', () => { }, }; const treeAdapter = { ...treeAdapters.default, ...sourceCodeLocationSetter }; - const document = parse5.parse('Testing location', { + const document = parse('Testing location', { treeAdapter, sourceCodeLocationInfo: true, }); diff --git a/packages/parse5/lib/serializer/index.test.ts b/packages/parse5/lib/serializer/index.test.ts index 573307452..985f5e855 100644 --- a/packages/parse5/lib/serializer/index.test.ts +++ b/packages/parse5/lib/serializer/index.test.ts @@ -1,61 +1,59 @@ import * as assert from 'node:assert'; -import * as parse5 from 'parse5'; +import { html, parse, parseFragment, serialize, serializeOuter, type DefaultTreeAdapterMap } from 'parse5'; import { generateSerializerTests } from 'parse5-test-utils/utils/generate-serializer-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; -import { type Element } from 'parse5/dist/tree-adapters/default.js'; -import { NAMESPACES } from 'parse5/dist/common/html.js'; -generateSerializerTests('serializer', 'Serializer', parse5.serialize); +generateSerializerTests('serializer', 'Serializer', serialize); describe('serializer', () => { describe("Regression - Get text node's parent tagName only if it's an Element node (GH-38)", () => { it('serializes correctly', () => { - const document = parse5.parse(''); + const document = parse(''); const treeAdapter: typeof treeAdapters.default = { ...treeAdapters.default, - getTagName: (element: Element) => { + getTagName: (element: DefaultTreeAdapterMap['element']) => { assert.ok(element.tagName); return treeAdapters.default.getTagName(element); }, }; - parse5.serialize(document, { treeAdapter }); + serialize(document, { treeAdapter }); }); }); describe('serializeOuter', () => { it('serializes outerHTML correctly', () => { - const document = parse5.parseFragment('

'); + const document = parseFragment('
'); const div = document.childNodes[0]; assert.ok(treeAdapters.default.isElementNode(div)); - const html = parse5.serializeOuter(div); + const html = serializeOuter(div); assert.equal(html, '
'); }); }); it('serializes