From 51a7d0939e73603f3b27005fc5e948f476e3eca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sun, 16 Jan 2022 00:08:55 +0000 Subject: [PATCH 01/11] Assemble string from serializer --- .../parse5-serializer-stream/lib/index.ts | 11 +- packages/parse5/lib/serializer/index.ts | 131 +++++++++--------- 2 files changed, 69 insertions(+), 73 deletions(-) diff --git a/packages/parse5-serializer-stream/lib/index.ts b/packages/parse5-serializer-stream/lib/index.ts index c0d7863cb..9d1ac5b65 100644 --- a/packages/parse5-serializer-stream/lib/index.ts +++ b/packages/parse5-serializer-stream/lib/index.ts @@ -34,20 +34,11 @@ export class SerializerStream extends Readable { super({ encoding: 'utf8' }); this.serializer = new Serializer(node, options); - - Object.defineProperty(this.serializer, 'html', { - //NOTE: To make `+=` concat operator work properly we define - //getter which always returns empty string - get() { - return ''; - }, - set: (data: string) => this.push(data), - }); } //Readable stream implementation override _read(): void { - this.serializer.serialize(); + this.push(this.serializer.serialize()); this.push(null); } } diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index bf60a7049..09fe95f2f 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -1,4 +1,3 @@ -import * as defaultTreeAdapter from '../tree-adapters/default.js'; import * as doctype from '../common/doctype.js'; import { TAG_NAMES as $, NAMESPACES as NS } from '../common/html.js'; import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface'; @@ -48,117 +47,123 @@ export interface SerializerOptions { * * @default `treeAdapters.default` */ - treeAdapter?: TreeAdapter; + treeAdapter: TreeAdapter; } //Serializer export class Serializer { - html = ''; - treeAdapter: TreeAdapter; - - constructor( - private startNode: T['parentNode'], - { treeAdapter = defaultTreeAdapter as TreeAdapter }: SerializerOptions - ) { - this.treeAdapter = treeAdapter; - } + constructor(private startNode: T['parentNode'], private options: SerializerOptions) {} //API serialize(): string { - this._serializeChildNodes(this.startNode); - - return this.html; + return this._serializeChildNodes(this.startNode, this.options); } //Internals - private _serializeChildNodes(parentNode: T['parentNode']): void { - const childNodes = this.treeAdapter.getChildNodes(parentNode); + private _serializeChildNodes( + parentNode: T['parentNode'], + options: SerializerOptions + ): string { + let html = ''; + const childNodes = options.treeAdapter.getChildNodes(parentNode); if (childNodes) { for (const currentNode of childNodes) { - if (this.treeAdapter.isElementNode(currentNode)) { - this._serializeElement(currentNode); - } else if (this.treeAdapter.isTextNode(currentNode)) { - this._serializeTextNode(currentNode); - } else if (this.treeAdapter.isCommentNode(currentNode)) { - this._serializeCommentNode(currentNode); - } else if (this.treeAdapter.isDocumentTypeNode(currentNode)) { - this._serializeDocumentTypeNode(currentNode); + if (options.treeAdapter.isElementNode(currentNode)) { + html += this._serializeElement(currentNode, options); + } else if (options.treeAdapter.isTextNode(currentNode)) { + html += this._serializeTextNode(currentNode, options); + } else if (options.treeAdapter.isCommentNode(currentNode)) { + html += this._serializeCommentNode(currentNode, options); + } else if (options.treeAdapter.isDocumentTypeNode(currentNode)) { + html += this._serializeDocumentTypeNode(currentNode, options); } } } - } - private _serializeElement(node: T['element']): void { - const tn = this.treeAdapter.getTagName(node); - const ns = this.treeAdapter.getNamespaceURI(node); - - this.html += `<${tn}`; - this._serializeAttributes(node); - this.html += '>'; - - if (!VOID_ELEMENTS.has(tn)) { - const childNodesHolder = - tn === $.TEMPLATE && ns === NS.HTML ? this.treeAdapter.getTemplateContent(node) : node; - - this._serializeChildNodes(childNodesHolder); - this.html += ``; - } + return html; } - private _serializeAttributes(node: T['element']): void { - for (const attr of this.treeAdapter.getAttrList(node)) { - const value = escapeString(attr.value, true); + private _serializeElement(node: T['element'], options: SerializerOptions): string { + const tn = options.treeAdapter.getTagName(node); + const ns = options.treeAdapter.getNamespaceURI(node); + + return `<${tn}${this._serializeAttributes(node, options)}>${ + VOID_ELEMENTS.has(tn) + ? '' + : `${this._serializeChildNodes( + // Get container of the child nodes + tn === $.TEMPLATE && ns === NS.HTML ? options.treeAdapter.getTemplateContent(node) : node, + options + )}` + }`; + } - this.html += ' '; + private _serializeAttributes( + node: T['element'], + { treeAdapter }: SerializerOptions + ): string { + let html = ''; + for (const attr of treeAdapter.getAttrList(node)) { + html += ' '; if (!attr.namespace) { - this.html += attr.name; + html += attr.name; } else switch (attr.namespace) { case NS.XML: { - this.html += `xml:${attr.name}`; + html += `xml:${attr.name}`; break; } case NS.XMLNS: { if (attr.name !== 'xmlns') { - this.html += 'xmlns:'; + html += 'xmlns:'; } - this.html += attr.name; + html += attr.name; break; } case NS.XLINK: { - this.html += `xlink:${attr.name}`; + html += `xlink:${attr.name}`; break; } default: { - this.html += `${attr.prefix}:${attr.name}`; + html += `${attr.prefix}:${attr.name}`; } } - this.html += `="${value}"`; + html += `="${escapeString(attr.value, true)}"`; } + + return html; } - private _serializeTextNode(node: T['textNode']): void { - const content = this.treeAdapter.getTextNodeContent(node); - const parent = this.treeAdapter.getParentNode(node); + private _serializeTextNode( + node: T['textNode'], + { treeAdapter }: SerializerOptions + ): string { + const content = treeAdapter.getTextNodeContent(node); + const parent = treeAdapter.getParentNode(node); - this.html += - parent && this.treeAdapter.isElementNode(parent) && UNESCAPED_TEXT.has(this.treeAdapter.getTagName(parent)) - ? content - : escapeString(content, false); + return parent && treeAdapter.isElementNode(parent) && UNESCAPED_TEXT.has(treeAdapter.getTagName(parent)) + ? content + : escapeString(content, false); } - private _serializeCommentNode(node: T['commentNode']): void { - this.html += ``; + private _serializeCommentNode( + node: T['commentNode'], + { treeAdapter }: SerializerOptions + ): string { + return ``; } - private _serializeDocumentTypeNode(node: T['documentType']): void { - const name = this.treeAdapter.getDocumentTypeNodeName(node); + private _serializeDocumentTypeNode( + node: T['documentType'], + { treeAdapter }: SerializerOptions + ): string { + const name = treeAdapter.getDocumentTypeNodeName(node); - this.html += `<${doctype.serializeContent(name, null, null)}>`; + return `<${doctype.serializeContent(name, null, null)}>`; } } From 3cccead476e95a004dc668e7d9427902a913d6f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sun, 16 Jan 2022 00:18:10 +0000 Subject: [PATCH 02/11] Remove unnecessary `Serializer` class --- .../parse5-serializer-stream/lib/index.ts | 10 +- packages/parse5/lib/index.ts | 10 +- packages/parse5/lib/serializer/index.ts | 191 +++++++++--------- 3 files changed, 100 insertions(+), 111 deletions(-) diff --git a/packages/parse5-serializer-stream/lib/index.ts b/packages/parse5-serializer-stream/lib/index.ts index 9d1ac5b65..6d659102b 100644 --- a/packages/parse5-serializer-stream/lib/index.ts +++ b/packages/parse5-serializer-stream/lib/index.ts @@ -1,5 +1,5 @@ import { Readable } from 'node:stream'; -import { Serializer, SerializerOptions } from 'parse5/dist/serializer/index.js'; +import { serializeChildNodes, SerializerOptions } from 'parse5/dist/serializer/index.js'; import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; /** @@ -22,23 +22,19 @@ import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js' * ``` */ export class SerializerStream extends Readable { - private serializer: Serializer; - /** * Streaming AST node to an HTML serializer. A readable stream. * * @param node Node to serialize. * @param options Serialization options. */ - constructor(node: T['parentNode'], options: SerializerOptions) { + constructor(private node: T['parentNode'], private options: SerializerOptions) { super({ encoding: 'utf8' }); - - this.serializer = new Serializer(node, options); } //Readable stream implementation override _read(): void { - this.push(this.serializer.serialize()); + this.push(serializeChildNodes(this.node, this.options)); this.push(null); } } diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts index 1e770816e..44490fd84 100644 --- a/packages/parse5/lib/index.ts +++ b/packages/parse5/lib/index.ts @@ -1,6 +1,7 @@ import { Parser, ParserOptions } from './parser/index.js'; -import { Serializer, SerializerOptions } from './serializer/index.js'; +import { serializeChildNodes, SerializerOptions } from './serializer/index.js'; import type { DefaultTreeAdapterMap } from './tree-adapters/default.js'; +import * as DefaultTreeAdapter from './tree-adapters/default.js'; import type { TreeAdapterTypeMap } from './tree-adapters/interface.js'; export { ParserOptions } from './parser/index.js'; @@ -106,9 +107,8 @@ export function parseFragment( node: T['parentNode'], - options: SerializerOptions + options: Partial> ): string { - const serializer = new Serializer(node, options); - - return serializer.serialize(); + const opts = { treeAdapter: DefaultTreeAdapter, ...options }; + return serializeChildNodes(node, opts); } diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index 09fe95f2f..9b4a1b214 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -51,120 +51,113 @@ export interface SerializerOptions { } //Serializer -export class Serializer { - constructor(private startNode: T['parentNode'], private options: SerializerOptions) {} - - //API - serialize(): string { - return this._serializeChildNodes(this.startNode, this.options); - } - - //Internals - private _serializeChildNodes( - parentNode: T['parentNode'], - options: SerializerOptions - ): string { - let html = ''; - const childNodes = options.treeAdapter.getChildNodes(parentNode); - - if (childNodes) { - for (const currentNode of childNodes) { - if (options.treeAdapter.isElementNode(currentNode)) { - html += this._serializeElement(currentNode, options); - } else if (options.treeAdapter.isTextNode(currentNode)) { - html += this._serializeTextNode(currentNode, options); - } else if (options.treeAdapter.isCommentNode(currentNode)) { - html += this._serializeCommentNode(currentNode, options); - } else if (options.treeAdapter.isDocumentTypeNode(currentNode)) { - html += this._serializeDocumentTypeNode(currentNode, options); - } +export function serializeChildNodes( + parentNode: T['parentNode'], + options: SerializerOptions +): string { + let html = ''; + const childNodes = options.treeAdapter.getChildNodes(parentNode); + + if (childNodes) { + for (const currentNode of childNodes) { + if (options.treeAdapter.isElementNode(currentNode)) { + html += serializeElement(currentNode, options); + } else if (options.treeAdapter.isTextNode(currentNode)) { + html += serializeTextNode(currentNode, options); + } else if (options.treeAdapter.isCommentNode(currentNode)) { + html += serializeCommentNode(currentNode, options); + } else if (options.treeAdapter.isDocumentTypeNode(currentNode)) { + html += serializeDocumentTypeNode(currentNode, options); } } - - return html; } - private _serializeElement(node: T['element'], options: SerializerOptions): string { - const tn = options.treeAdapter.getTagName(node); - const ns = options.treeAdapter.getNamespaceURI(node); - - return `<${tn}${this._serializeAttributes(node, options)}>${ - VOID_ELEMENTS.has(tn) - ? '' - : `${this._serializeChildNodes( - // Get container of the child nodes - tn === $.TEMPLATE && ns === NS.HTML ? options.treeAdapter.getTemplateContent(node) : node, - options - )}` - }`; - } + return html; +} - private _serializeAttributes( - node: T['element'], - { treeAdapter }: SerializerOptions - ): string { - let html = ''; - for (const attr of treeAdapter.getAttrList(node)) { - html += ' '; - - if (!attr.namespace) { - html += attr.name; - } else - switch (attr.namespace) { - case NS.XML: { - html += `xml:${attr.name}`; - break; - } - case NS.XMLNS: { - if (attr.name !== 'xmlns') { - html += 'xmlns:'; - } +export function serializeElement( + node: T['element'], + options: SerializerOptions +): string { + const tn = options.treeAdapter.getTagName(node); + const ns = options.treeAdapter.getNamespaceURI(node); + + return `<${tn}${serializeAttributes(node, options)}>${ + VOID_ELEMENTS.has(tn) + ? '' + : `${serializeChildNodes( + // Get container of the child nodes + tn === $.TEMPLATE && ns === NS.HTML ? options.treeAdapter.getTemplateContent(node) : node, + options + )}` + }`; +} - html += attr.name; - break; - } - case NS.XLINK: { - html += `xlink:${attr.name}`; - break; - } - default: { - html += `${attr.prefix}:${attr.name}`; - } +function serializeAttributes( + node: T['element'], + { treeAdapter }: SerializerOptions +): string { + let html = ''; + for (const attr of treeAdapter.getAttrList(node)) { + html += ' '; + + if (!attr.namespace) { + html += attr.name; + } else + switch (attr.namespace) { + case NS.XML: { + html += `xml:${attr.name}`; + break; } + case NS.XMLNS: { + if (attr.name !== 'xmlns') { + html += 'xmlns:'; + } - html += `="${escapeString(attr.value, true)}"`; - } + html += attr.name; + break; + } + case NS.XLINK: { + html += `xlink:${attr.name}`; + break; + } + default: { + html += `${attr.prefix}:${attr.name}`; + } + } - return html; + html += `="${escapeString(attr.value, true)}"`; } - private _serializeTextNode( - node: T['textNode'], - { treeAdapter }: SerializerOptions - ): string { - const content = treeAdapter.getTextNodeContent(node); - const parent = treeAdapter.getParentNode(node); + return html; +} + +function serializeTextNode( + node: T['textNode'], + { treeAdapter }: SerializerOptions +): string { + const content = treeAdapter.getTextNodeContent(node); + const parent = treeAdapter.getParentNode(node); - return parent && treeAdapter.isElementNode(parent) && UNESCAPED_TEXT.has(treeAdapter.getTagName(parent)) - ? content - : escapeString(content, false); - } + return parent && treeAdapter.isElementNode(parent) && UNESCAPED_TEXT.has(treeAdapter.getTagName(parent)) + ? content + : escapeString(content, false); +} - private _serializeCommentNode( - node: T['commentNode'], - { treeAdapter }: SerializerOptions - ): string { - return ``; - } +function serializeCommentNode( + node: T['commentNode'], + { treeAdapter }: SerializerOptions +): string { + return ``; +} - private _serializeDocumentTypeNode( - node: T['documentType'], - { treeAdapter }: SerializerOptions - ): string { - const name = treeAdapter.getDocumentTypeNodeName(node); +function serializeDocumentTypeNode( + node: T['documentType'], + { treeAdapter }: SerializerOptions +): string { + const name = treeAdapter.getDocumentTypeNodeName(node); - return `<${doctype.serializeContent(name, null, null)}>`; - } + return `<${doctype.serializeContent(name, null, null)}>`; } // NOTE: used in tests and by rewriting stream From a00379af5937f1dc59d81ae550bf5c9678217eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sun, 16 Jan 2022 00:19:26 +0000 Subject: [PATCH 03/11] Simplify `serializeDocumentTypeNode` --- packages/parse5/lib/serializer/index.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index 9b4a1b214..9a1c45875 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -1,4 +1,3 @@ -import * as doctype from '../common/doctype.js'; import { TAG_NAMES as $, NAMESPACES as NS } from '../common/html.js'; import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface'; @@ -155,9 +154,7 @@ function serializeDocumentTypeNode( node: T['documentType'], { treeAdapter }: SerializerOptions ): string { - const name = treeAdapter.getDocumentTypeNodeName(node); - - return `<${doctype.serializeContent(name, null, null)}>`; + return ``; } // NOTE: used in tests and by rewriting stream From cf0b73148866715295b45166ad064ee84102c9f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sun, 16 Jan 2022 00:26:35 +0000 Subject: [PATCH 04/11] Delay ns lookup --- packages/parse5/lib/serializer/index.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index 9a1c45875..b692cb68e 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -79,14 +79,15 @@ export function serializeElement( options: SerializerOptions ): string { const tn = options.treeAdapter.getTagName(node); - const ns = options.treeAdapter.getNamespaceURI(node); return `<${tn}${serializeAttributes(node, options)}>${ VOID_ELEMENTS.has(tn) ? '' : `${serializeChildNodes( // Get container of the child nodes - tn === $.TEMPLATE && ns === NS.HTML ? options.treeAdapter.getTemplateContent(node) : node, + tn === $.TEMPLATE && options.treeAdapter.getNamespaceURI(node) === NS.HTML + ? options.treeAdapter.getTemplateContent(node) + : node, options )}` }`; From 677d83a213eddb0216d08266e900bd1697206aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 17 Jan 2022 23:06:22 +0000 Subject: [PATCH 05/11] Move `serialize` fn to serializer file and improve the options type --- .../parse5-serializer-stream/lib/index.ts | 4 +-- packages/parse5/lib/index.ts | 35 ++----------------- 2 files changed, 4 insertions(+), 35 deletions(-) diff --git a/packages/parse5-serializer-stream/lib/index.ts b/packages/parse5-serializer-stream/lib/index.ts index 6d659102b..2cbf9f550 100644 --- a/packages/parse5-serializer-stream/lib/index.ts +++ b/packages/parse5-serializer-stream/lib/index.ts @@ -1,5 +1,5 @@ import { Readable } from 'node:stream'; -import { serializeChildNodes, SerializerOptions } from 'parse5/dist/serializer/index.js'; +import { serialize, type SerializerOptions } from 'parse5/dist/serializer/index.js'; import type { TreeAdapterTypeMap } from 'parse5/dist/tree-adapters/interface.js'; /** @@ -34,7 +34,7 @@ export class SerializerStream extends Readable { //Readable stream implementation override _read(): void { - this.push(serializeChildNodes(this.node, this.options)); + this.push(serialize(this.node, this.options)); this.push(null); } } diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts index 44490fd84..25a4fb8b4 100644 --- a/packages/parse5/lib/index.ts +++ b/packages/parse5/lib/index.ts @@ -1,11 +1,10 @@ import { Parser, ParserOptions } from './parser/index.js'; -import { serializeChildNodes, SerializerOptions } from './serializer/index.js'; + import type { DefaultTreeAdapterMap } from './tree-adapters/default.js'; -import * as DefaultTreeAdapter from './tree-adapters/default.js'; import type { TreeAdapterTypeMap } from './tree-adapters/interface.js'; export { ParserOptions } from './parser/index.js'; -export { SerializerOptions } from './serializer/index.js'; +export { serialize, SerializerOptions } from './serializer/index.js'; // Shorthands @@ -82,33 +81,3 @@ export function parseFragmentHi there!'); - * - * // Serializes a document. - * const html = parse5.serialize(document); - * - * // Serializes the element content. - * const str = parse5.serialize(document.childNodes[1]); - * - * console.log(str); //> 'Hi there!' - * ``` - * - * @param node Node to serialize. - * @param options Serialization options. - */ -export function serialize( - node: T['parentNode'], - options: Partial> -): string { - const opts = { treeAdapter: DefaultTreeAdapter, ...options }; - return serializeChildNodes(node, opts); -} From 18f170c9b36ae17596bde459b08b1b6e71b9b41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 17 Jan 2022 23:07:17 +0000 Subject: [PATCH 06/11] Add `scriptingEnabled` flag to serializer Fixes #332 --- packages/parse5/lib/serializer/index.test.ts | 12 +++ packages/parse5/lib/serializer/index.ts | 78 +++++++++++++------- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/packages/parse5/lib/serializer/index.test.ts b/packages/parse5/lib/serializer/index.test.ts index 3fc221817..3ea76d129 100644 --- a/packages/parse5/lib/serializer/index.test.ts +++ b/packages/parse5/lib/serializer/index.test.ts @@ -22,4 +22,16 @@ describe('serializer', () => { parse5.serialize(document, { treeAdapter }); }); }); + + describe('Scripting flag (GH-332)', () => { + it('should serialize with the scripting flag', () => { + const document = parse5.parse('&'); + expect(parse5.serialize(document, { scriptingEnabled: false })).toBe( + '&' + ); + expect(parse5.serialize(document, { scriptingEnabled: true })).toBe( + '&' + ); + }); + }); }); diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index b692cb68e..303976bbd 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -1,5 +1,6 @@ import { TAG_NAMES as $, NAMESPACES as NS } from '../common/html.js'; import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface'; +import * as DefaultTreeAdapter from '../tree-adapters/default.js'; //Escaping regexes const AMP_REGEX = /&/g; @@ -29,16 +30,7 @@ const VOID_ELEMENTS = new Set([ $.TRACK, $.WBR, ]); -const UNESCAPED_TEXT = new Set([ - $.STYLE, - $.SCRIPT, - $.XMP, - $.IFRAME, - $.NOEMBED, - $.NOFRAMES, - $.PLAINTEXT, - $.NOSCRIPT, -]); +const UNESCAPED_TEXT = new Set([$.STYLE, $.SCRIPT, $.XMP, $.IFRAME, $.NOEMBED, $.NOFRAMES, $.PLAINTEXT]); export interface SerializerOptions { /** @@ -46,13 +38,51 @@ export interface SerializerOptions { * * @default `treeAdapters.default` */ - treeAdapter: TreeAdapter; + treeAdapter?: TreeAdapter; + /** + * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set + * to `true`, `noscript` element content will not be escaped. + * + * @default `true` + */ + scriptingEnabled?: boolean; } -//Serializer -export function serializeChildNodes( - parentNode: T['parentNode'], +type InternalOptions = Required>; + +/** + * Serializes an AST node to an HTML string. + * + * @example + * + * ```js + * const parse5 = require('parse5'); + * + * const document = parse5.parse('Hi there!'); + * + * // Serializes a document. + * const html = parse5.serialize(document); + * + * // Serializes the element content. + * const str = parse5.serialize(document.childNodes[1]); + * + * console.log(str); //> 'Hi there!' + * ``` + * + * @param node Node to serialize. + * @param options Serialization options. + */ +export function serialize( + node: T['parentNode'], options: SerializerOptions +): string { + const opts = { treeAdapter: DefaultTreeAdapter, scriptingEnabled: true, ...options }; + return serializeChildNodes(node, opts); +} + +function serializeChildNodes( + parentNode: T['parentNode'], + options: InternalOptions ): string { let html = ''; const childNodes = options.treeAdapter.getChildNodes(parentNode); @@ -74,10 +104,7 @@ export function serializeChildNodes( return html; } -export function serializeElement( - node: T['element'], - options: SerializerOptions -): string { +function serializeElement(node: T['element'], options: InternalOptions): string { const tn = options.treeAdapter.getTagName(node); return `<${tn}${serializeAttributes(node, options)}>${ @@ -95,7 +122,7 @@ export function serializeElement( function serializeAttributes( node: T['element'], - { treeAdapter }: SerializerOptions + { treeAdapter }: InternalOptions ): string { let html = ''; for (const attr of treeAdapter.getAttrList(node)) { @@ -132,28 +159,27 @@ function serializeAttributes( return html; } -function serializeTextNode( - node: T['textNode'], - { treeAdapter }: SerializerOptions -): string { +function serializeTextNode(node: T['textNode'], options: InternalOptions): string { + const { treeAdapter } = options; const content = treeAdapter.getTextNodeContent(node); const parent = treeAdapter.getParentNode(node); + const parentTn = parent && treeAdapter.isElementNode(parent) && treeAdapter.getTagName(parent); - return parent && treeAdapter.isElementNode(parent) && UNESCAPED_TEXT.has(treeAdapter.getTagName(parent)) + return parentTn && (UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT)) ? content : escapeString(content, false); } function serializeCommentNode( node: T['commentNode'], - { treeAdapter }: SerializerOptions + { treeAdapter }: InternalOptions ): string { return ``; } function serializeDocumentTypeNode( node: T['documentType'], - { treeAdapter }: SerializerOptions + { treeAdapter }: InternalOptions ): string { return ``; } From 67dbf7233847e9187c866149762b1bfaf76970d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 17 Jan 2022 23:15:24 +0000 Subject: [PATCH 07/11] Fix serializing mixed content Fixes #333 --- packages/parse5/lib/serializer/index.test.ts | 15 +++++++++++++++ packages/parse5/lib/serializer/index.ts | 8 +++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/packages/parse5/lib/serializer/index.test.ts b/packages/parse5/lib/serializer/index.test.ts index 3ea76d129..7fbab9585 100644 --- a/packages/parse5/lib/serializer/index.test.ts +++ b/packages/parse5/lib/serializer/index.test.ts @@ -1,5 +1,6 @@ import * as assert from 'node:assert'; import * as parse5 from 'parse5'; +import outdent from 'outdent'; import { generateSerializerTests } from 'parse5-test-utils/utils/generate-serializer-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; import type { Element } from 'parse5/dist/tree-adapters/default'; @@ -34,4 +35,18 @@ describe('serializer', () => { ); }); }); + + describe('Mixed content (GH-333)', () => { + it('should serialize mixed content', () => { + const input = outdent` + + + + + + `; + const document = parse5.parse(input); + expect(parse5.serialize(document)).toContain(input); + }); + }); }); diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index 303976bbd..e02c976f9 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -74,7 +74,7 @@ type InternalOptions = Required( node: T['parentNode'], - options: SerializerOptions + options?: SerializerOptions ): string { const opts = { treeAdapter: DefaultTreeAdapter, scriptingEnabled: true, ...options }; return serializeChildNodes(node, opts); @@ -108,7 +108,7 @@ function serializeElement(node: T['element'], opti const tn = options.treeAdapter.getTagName(node); return `<${tn}${serializeAttributes(node, options)}>${ - VOID_ELEMENTS.has(tn) + options.treeAdapter.getNamespaceURI(node) === NS.HTML && VOID_ELEMENTS.has(tn) ? '' : `${serializeChildNodes( // Get container of the child nodes @@ -165,7 +165,9 @@ function serializeTextNode(node: T['textNode'], op const parent = treeAdapter.getParentNode(node); const parentTn = parent && treeAdapter.isElementNode(parent) && treeAdapter.getTagName(parent); - return parentTn && (UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT)) + return parentTn && + treeAdapter.getNamespaceURI(parent) === NS.HTML && + (UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT)) ? content : escapeString(content, false); } From 844f0724799bffb41753ed0a64bb28f300649164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 17 Jan 2022 23:35:19 +0000 Subject: [PATCH 08/11] feat: Add serializeOuter method Fixes #230, #378 Uses the test case from #378, by @joeldenning Co-Authored-By: Joel Denning <5524384+joeldenning@users.noreply.github.com> --- packages/parse5/lib/index.ts | 2 +- packages/parse5/lib/serializer/index.test.ts | 13 +++++++- packages/parse5/lib/serializer/index.ts | 31 +++++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/packages/parse5/lib/index.ts b/packages/parse5/lib/index.ts index 25a4fb8b4..dd3744b2c 100644 --- a/packages/parse5/lib/index.ts +++ b/packages/parse5/lib/index.ts @@ -4,7 +4,7 @@ import type { DefaultTreeAdapterMap } from './tree-adapters/default.js'; import type { TreeAdapterTypeMap } from './tree-adapters/interface.js'; export { ParserOptions } from './parser/index.js'; -export { serialize, SerializerOptions } from './serializer/index.js'; +export { serialize, serializeOuter, SerializerOptions } from './serializer/index.js'; // Shorthands diff --git a/packages/parse5/lib/serializer/index.test.ts b/packages/parse5/lib/serializer/index.test.ts index 7fbab9585..1e6403684 100644 --- a/packages/parse5/lib/serializer/index.test.ts +++ b/packages/parse5/lib/serializer/index.test.ts @@ -3,7 +3,7 @@ import * as parse5 from 'parse5'; import outdent from 'outdent'; import { generateSerializerTests } from 'parse5-test-utils/utils/generate-serializer-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; -import type { Element } from 'parse5/dist/tree-adapters/default'; +import { type Element, isElementNode } from 'parse5/dist/tree-adapters/default'; generateSerializerTests('serializer', 'Serializer', parse5.serialize); @@ -49,4 +49,15 @@ describe('serializer', () => { expect(parse5.serialize(document)).toContain(input); }); }); + + describe('serializeOuter', () => { + it('serializes outerHTML correctly', () => { + const document = parse5.parseFragment('
'); + const div = document.childNodes[0]; + assert.ok(isElementNode(div)); + const html = parse5.serializeOuter(div); + + assert.equal(html, '
'); + }); + }); }); diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index e02c976f9..6019a0286 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -50,6 +50,8 @@ export interface SerializerOptions { type InternalOptions = Required>; +const defaultOpts = { treeAdapter: DefaultTreeAdapter, scriptingEnabled: true }; + /** * Serializes an AST node to an HTML string. * @@ -76,10 +78,37 @@ export function serialize ): string { - const opts = { treeAdapter: DefaultTreeAdapter, scriptingEnabled: true, ...options }; + const opts = { ...defaultOpts, ...options }; return serializeChildNodes(node, opts); } +/** + * Serializes an AST element node to an HTML string, including the element node. + * + * @example + * + * ```js + * const parse5 = require('parse5'); + * + * const document = parse5.parseFragment('
Hello, world!
'); + * + * // Serializes the
element. + * const html = parse5.serializeOuter(document.childNodes[0]); + * + * console.log(str); //> '
Hello, world!
' + * ``` + * + * @param node Node to serialize. + * @param options Serialization options. + */ +export function serializeOuter( + node: T['element'], + options?: SerializerOptions +): string { + const opts = { ...defaultOpts, ...options }; + return serializeElement(node, opts); +} + function serializeChildNodes( parentNode: T['parentNode'], options: InternalOptions From 9424821eed4bbcd106ac5b5eaec7e8fe051f5f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Tue, 18 Jan 2022 14:46:45 +0000 Subject: [PATCH 09/11] Move serializer tests to data file --- packages/parse5/lib/serializer/index.test.ts | 27 -------------------- test/data/serialization/tests.json | 11 ++++++++ test/utils/generate-serializer-tests.ts | 3 ++- 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/packages/parse5/lib/serializer/index.test.ts b/packages/parse5/lib/serializer/index.test.ts index 1e6403684..233419ea4 100644 --- a/packages/parse5/lib/serializer/index.test.ts +++ b/packages/parse5/lib/serializer/index.test.ts @@ -1,6 +1,5 @@ import * as assert from 'node:assert'; import * as parse5 from 'parse5'; -import outdent from 'outdent'; import { generateSerializerTests } from 'parse5-test-utils/utils/generate-serializer-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; import { type Element, isElementNode } from 'parse5/dist/tree-adapters/default'; @@ -24,32 +23,6 @@ describe('serializer', () => { }); }); - describe('Scripting flag (GH-332)', () => { - it('should serialize with the scripting flag', () => { - const document = parse5.parse('&'); - expect(parse5.serialize(document, { scriptingEnabled: false })).toBe( - '&' - ); - expect(parse5.serialize(document, { scriptingEnabled: true })).toBe( - '&' - ); - }); - }); - - describe('Mixed content (GH-333)', () => { - it('should serialize mixed content', () => { - const input = outdent` - - - - - - `; - const document = parse5.parse(input); - expect(parse5.serialize(document)).toContain(input); - }); - }); - describe('serializeOuter', () => { it('serializes outerHTML correctly', () => { const document = parse5.parseFragment('
'); diff --git a/test/data/serialization/tests.json b/test/data/serialization/tests.json index f06300fbb..c563f340e 100644 --- a/test/data/serialization/tests.json +++ b/test/data/serialization/tests.json @@ -99,6 +99,12 @@ "input": "", "expected": "" }, + { + "name": "Text nodes escaping -