Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(rewriting-stream): Don't escape text in special tags #434

Merged
merged 1 commit into from Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 6 additions & 2 deletions packages/parse5-html-rewriting-stream/lib/index.ts
@@ -1,6 +1,6 @@
import type { Location } from 'parse5/dist/common/token.js';
import { SAXParser, EndTag, StartTag, Doctype, Text, Comment, SaxToken } from 'parse5-sax-parser';
import { escapeString } from 'parse5/dist/serializer/index.js';
import { hasUnescapedText, escapeString } from 'parse5/dist/serializer/index.js';

/**
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML rewriter.
Expand Down Expand Up @@ -125,7 +125,11 @@ export class RewritingStream extends SAXParser {

/** Emits serialized text token into the output stream. */
public emitText({ text }: Text): void {
this.push(escapeString(text, false));
this.push(
!this.parserFeedbackSimulator.inForeignContent && hasUnescapedText(this.tokenizer.lastStartTagName, true)
? text
: escapeString(text, false)
);
}

/** Emits serialized comment token into the output stream. */
Expand Down
Expand Up @@ -329,4 +329,21 @@ describe('RewritingStream', () => {
expected: LONG_TEXT_WITH_COMMENT,
})
);

it(
'Should emit text in script without escaping (GH-339)',
createRewriterTest({
src: '<script></script>',
expected: '<script>foo && bar</script>',
assignTokenHandlers: (rewriter) => {
// On a script tag, emit the text without escaping
rewriter.on('startTag', (token) => {
rewriter.emitStartTag(token);
if (token.tagName === 'script') {
rewriter.emitText({ text: 'foo && bar' });
}
});
},
})
);
});
Expand Up @@ -8,7 +8,7 @@ import { TAG_ID as $, TAG_NAMES as TN, NAMESPACES as NS, getTagID } from 'parse5
//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
export class ParserFeedbackSimulator implements TokenHandler {
private namespaceStack: NS[] = [];
private inForeignContent = false;
public inForeignContent = false;
public skipNextNewLine = false;
public tokenizer: Tokenizer;

Expand Down
6 changes: 5 additions & 1 deletion packages/parse5/lib/serializer/index.ts
Expand Up @@ -32,6 +32,10 @@ const VOID_ELEMENTS = new Set<string>([
]);
const UNESCAPED_TEXT = new Set<string>([$.STYLE, $.SCRIPT, $.XMP, $.IFRAME, $.NOEMBED, $.NOFRAMES, $.PLAINTEXT]);

export function hasUnescapedText(tn: string, scriptingEnabled: boolean): boolean {
return UNESCAPED_TEXT.has(tn) || (scriptingEnabled && tn === $.NOSCRIPT);
}

export interface SerializerOptions<T extends TreeAdapterTypeMap> {
/**
* Specifies input tree format.
Expand Down Expand Up @@ -197,7 +201,7 @@ function serializeTextNode<T extends TreeAdapterTypeMap>(node: T['textNode'], op

return parentTn &&
treeAdapter.getNamespaceURI(parent) === NS.HTML &&
(UNESCAPED_TEXT.has(parentTn) || (options.scriptingEnabled && parentTn === $.NOSCRIPT))
hasUnescapedText(parentTn, options.scriptingEnabled)
? content
: escapeString(content, false);
}
Expand Down