Skip to content

Commit

Permalink
fix(rewriting-stream): Don't escape text in special tags (inikulin#434)
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 authored and jmbpwtw committed Feb 16, 2023
1 parent 0630256 commit f11ea57
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 4 deletions.
8 changes: 6 additions & 2 deletions packages/parse5-html-rewriting-stream/lib/index.ts
@@ -1,6 +1,6 @@
import type { Location } from 'parse5/dist/common/token.js';
import { SAXParser, EndTag, StartTag, Doctype, Text, Comment, SaxToken } from 'parse5-sax-parser';
import { escapeString } from 'parse5/dist/serializer/index.js';
import { hasUnescapedText, escapeString } from 'parse5/dist/serializer/index.js';

/**
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
Expand Down Expand Up @@ -125,7 +125,11 @@ export class RewritingStream extends SAXParser {

/** Emits serialized text token into the output stream. */
public emitText({ text }: Text): void {
this.push(escapeString(text, false));
this.push(
!this.parserFeedbackSimulator.inForeignContent && hasUnescapedText(this.tokenizer.lastStartTagName, true)
? text
: escapeString(text, false)
);
}

/** Emits serialized comment token into the output stream. */
Expand Down
Expand Up @@ -329,4 +329,21 @@ describe('RewritingStream', () => {
expected: LONG_TEXT_WITH_COMMENT,
})
);

it(
'Should emit text in script without escaping (GH-339)',
createRewriterTest({
src: '<script></script>',
expected: '<script>foo && bar</script>',
assignTokenHandlers: (rewriter) => {
// On a script tag, emit the text without escaping
rewriter.on('startTag', (token) => {
rewriter.emitStartTag(token);
if (token.tagName === 'script') {
rewriter.emitText({ text: 'foo && bar' });
}
});
},
})
);
});
Expand Up @@ -8,7 +8,7 @@ import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS, getTagID } from 'parse5
//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
export class ParserFeedbackSimulator implements TokenHandler {
private namespaceStack: NS[] = [];
private inForeignContent = false;
public inForeignContent = false;
public skipNextNewLine = false;
public tokenizer: Tokenizer;

Expand Down
6 changes: 5 additions & 1 deletion packages/parse5/lib/serializer/index.ts
Expand Up @@ -32,6 +32,10 @@ const VOID_ELEMENTS = new Set<string>([
]);
const UNESCAPED_TEXT = new Set<string>([$.STYLE, $.SCRIPT, $.XMP, $.IFRAME, $.NOEMBED, $.NOFRAMES, $.PLAINTEXT]);

export function hasUnescapedText(tn: string, scriptingEnabled: boolean): boolean {
return UNESCAPED_TEXT.has(tn) || (scriptingEnabled && tn === $.NOSCRIPT);
}

export interface SerializerOptions<T extends TreeAdapterTypeMap> {
/**
* Specifies input tree format.
Expand Down Expand Up @@ -197,7 +201,7 @@ function serializeTextNode<T extends TreeAdapterTypeMap>(node: T['textNode'], op

return parentTn &&
treeAdapter.getNamespaceURI(parent) === NS.HTML &&
(UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT))
hasUnescapedText(parentTn, options.scriptingEnabled)
? content
: escapeString(content, false);
}
Expand Down

0 comments on commit f11ea57

Please sign in to comment.