diff --git a/packages/parse5-html-rewriting-stream/lib/index.ts b/packages/parse5-html-rewriting-stream/lib/index.ts index 1d0694062..ebba66d67 100644 --- a/packages/parse5-html-rewriting-stream/lib/index.ts +++ b/packages/parse5-html-rewriting-stream/lib/index.ts @@ -1,6 +1,6 @@ import type { Location } from 'parse5/dist/common/token.js'; import { SAXParser, EndTag, StartTag, Doctype, Text, Comment, SaxToken } from 'parse5-sax-parser'; -import { escapeString } from 'parse5/dist/serializer/index.js'; +import { hasUnescapedText, escapeString } from 'parse5/dist/serializer/index.js'; /** * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter. @@ -125,7 +125,11 @@ export class RewritingStream extends SAXParser { /** Emits serialized text token into the output stream. */ public emitText({ text }: Text): void { - this.push(escapeString(text, false)); + this.push( + !this.parserFeedbackSimulator.inForeignContent && hasUnescapedText(this.tokenizer.lastStartTagName, true) + ? text + : escapeString(text, false) + ); } /** Emits serialized comment token into the output stream. */ diff --git a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts index c38264fc5..4f21088aa 100644 --- a/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts +++ b/packages/parse5-html-rewriting-stream/test/rewriting-stream.test.ts @@ -329,4 +329,21 @@ describe('RewritingStream', () => { expected: LONG_TEXT_WITH_COMMENT, }) ); + + it( + 'Should emit text in script without escaping (GH-339)', + createRewriterTest({ + src: '', + expected: '', + assignTokenHandlers: (rewriter) => { + // On a script tag, emit the text without escaping + rewriter.on('startTag', (token) => { + rewriter.emitStartTag(token); + if (token.tagName === 'script') { + rewriter.emitText({ text: 'foo && bar' }); + } + }); + }, + }) + ); }); diff --git a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts index aa53619e8..06f9d31aa 100644 --- a/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts +++ b/packages/parse5-sax-parser/lib/parser-feedback-simulator.ts @@ -8,7 +8,7 @@ import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS, getTagID } from 'parse5 //Simulates adjustment of the Tokenizer which performed by standard parser during tree construction. export class ParserFeedbackSimulator implements TokenHandler { private namespaceStack: NS[] = []; - private inForeignContent = false; + public inForeignContent = false; public skipNextNewLine = false; public tokenizer: Tokenizer; diff --git a/packages/parse5/lib/serializer/index.ts b/packages/parse5/lib/serializer/index.ts index 65a9ca57b..949445f25 100644 --- a/packages/parse5/lib/serializer/index.ts +++ b/packages/parse5/lib/serializer/index.ts @@ -32,6 +32,10 @@ const VOID_ELEMENTS = new Set([ ]); const UNESCAPED_TEXT = new Set([$.STYLE, $.SCRIPT, $.XMP, $.IFRAME, $.NOEMBED, $.NOFRAMES, $.PLAINTEXT]); +export function hasUnescapedText(tn: string, scriptingEnabled: boolean): boolean { + return UNESCAPED_TEXT.has(tn) || (scriptingEnabled && tn === $.NOSCRIPT); +} + export interface SerializerOptions { /** * Specifies input tree format. @@ -197,7 +201,7 @@ function serializeTextNode(node: T['textNode'], op return parentTn && treeAdapter.getNamespaceURI(parent) === NS.HTML && - (UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT)) + hasUnescapedText(parentTn, options.scriptingEnabled) ? content : escapeString(content, false); }