Skip to content

Commit

Permalink
Merge pull request #912 from capricorn86/task/908-domparserparsefroms…
Browse files Browse the repository at this point in the history
…tring-doesnt-decode-html-entities

#908@minor: Adds support for endoing HTML entities to the XML parser.…
  • Loading branch information
capricorn86 committed May 11, 2023
2 parents 2291dc3 + 5b3960a commit a895911
Show file tree
Hide file tree
Showing 13 changed files with 115 additions and 99 deletions.
35 changes: 18 additions & 17 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions packages/happy-dom/package.json
Expand Up @@ -48,14 +48,13 @@
},
"dependencies": {
"css.escape": "^1.5.1",
"he": "^1.2.0",
"entities": "^4.5.0",
"webidl-conversions": "^7.0.0",
"whatwg-encoding": "^2.0.0",
"whatwg-mimetype": "^3.0.0",
"iconv-lite": "^0.6.3"
},
"devDependencies": {
"@types/he": "^1.1.2",
"@types/jest": "^29.4.0",
"@types/node": "^15.6.0",
"@types/node-fetch": "^2.6.1",
Expand Down
@@ -1,26 +1,5 @@
import ICharacterData from './ICharacterData';

const HTML_ENTITIES = [
{ regex: /"/g, value: '"' },
{ regex: /"/g, value: '"' },
{ regex: /"/g, value: '"' },
{ regex: /&/g, value: '&' },
{ regex: /&/g, value: '&' },
{ regex: /&/g, value: '&' },
{ regex: /'/g, value: "'" },
{ regex: /'/g, value: "'" },
{ regex: /'/g, value: "'" },
{ regex: /&lt;/g, value: '<' },
{ regex: /&#60;/g, value: '<' },
{ regex: /&#x3C;/g, value: '<' },
{ regex: /&gt;/g, value: '>' },
{ regex: /&#62;/g, value: '>' },
{ regex: /&#x3E;/g, value: '>' },
{ regex: /&nbsp;/g, value: ' ' },
{ regex: /&#160;/g, value: ' ' },
{ regex: /&#xA0;/g, value: ' ' }
];

/**
* Child node utility.
*/
Expand Down Expand Up @@ -90,20 +69,4 @@ export default class CharacterDataUtility {
): string {
return characterData.data.substring(offset, offset + count);
}

/**
* Decodes unicode characters to text.
*
* @param html String.
* @returns Decoded HTML string.
*/
public static decodeHTMLEntities(html: string): string {
if (!html) {
return '';
}
for (const entity of HTML_ENTITIES) {
html = html.replace(entity.regex, entity.value);
}
return html;
}
}
12 changes: 7 additions & 5 deletions packages/happy-dom/src/nodes/element/Element.ts
Expand Up @@ -30,7 +30,6 @@ import INamedNodeMap from '../../named-node-map/INamedNodeMap';
import Event from '../../event/Event';
import ElementUtility from './ElementUtility';
import HTMLCollection from './HTMLCollection';
import CharacterDataUtility from '../character-data/CharacterDataUtility';
import EventPhaseEnum from '../../event/EventPhaseEnum';

/**
Expand Down Expand Up @@ -208,7 +207,7 @@ export default class Element extends Node implements IElement {
result += childNode.textContent;
}
}
return CharacterDataUtility.decodeHTMLEntities(result);
return result;
}

/**
Expand Down Expand Up @@ -253,7 +252,7 @@ export default class Element extends Node implements IElement {
* @returns HTML.
*/
public get outerHTML(): string {
return new XMLSerializer().serializeToString(this);
return new XMLSerializer({ escapeEntities: false }).serializeToString(this);
}

/**
Expand Down Expand Up @@ -340,10 +339,13 @@ export default class Element extends Node implements IElement {
* @returns HTML.
*/
public getInnerHTML(options?: { includeShadowRoots?: boolean }): string {
const xmlSerializer = new XMLSerializer();
const xmlSerializer = new XMLSerializer({
includeShadowRoots: options && options.includeShadowRoots,
escapeEntities: false
});
let xml = '';
for (const node of this.childNodes) {
xml += xmlSerializer.serializeToString(node, options);
xml += xmlSerializer.serializeToString(node);
}
return xml;
}
Expand Down
Expand Up @@ -50,10 +50,13 @@ export default class HTMLTemplateElement extends HTMLElement implements IHTMLTem
* @override
*/
public getInnerHTML(options?: { includeShadowRoots?: boolean }): string {
const xmlSerializer = new XMLSerializer();
const xmlSerializer = new XMLSerializer({
includeShadowRoots: options && options.includeShadowRoots,
escapeEntities: false
});
let xml = '';
for (const node of this.content.childNodes) {
xml += xmlSerializer.serializeToString(node, options);
xml += xmlSerializer.serializeToString(node);
}
return xml;
}
Expand Down
4 changes: 3 additions & 1 deletion packages/happy-dom/src/nodes/shadow-root/ShadowRoot.ts
Expand Up @@ -24,7 +24,9 @@ export default class ShadowRoot extends DocumentFragment implements IShadowRoot
* @returns HTML.
*/
public get innerHTML(): string {
const xmlSerializer = new XMLSerializer();
const xmlSerializer = new XMLSerializer({
escapeEntities: false
});
let xml = '';
for (const node of this.childNodes) {
xml += xmlSerializer.serializeToString(node);
Expand Down
26 changes: 18 additions & 8 deletions packages/happy-dom/src/xml-parser/XMLParser.ts
Expand Up @@ -9,7 +9,7 @@ import PlainTextElements from '../config/PlainTextElements';
import IDocumentType from '../nodes/document-type/IDocumentType';
import INode from '../nodes/node/INode';
import IDocumentFragment from '../nodes/document-fragment/IDocumentFragment';
import { decode } from 'he';
import * as Entities from 'entities';

/**
* Markup RegExp.
Expand Down Expand Up @@ -99,7 +99,7 @@ export default class XMLParser {
// Plain text between tags.

currentNode.appendChild(
document.createTextNode(xml.substring(lastIndex, match.index))
document.createTextNode(Entities.decodeHTML(xml.substring(lastIndex, match.index)))
);
}

Expand Down Expand Up @@ -161,13 +161,17 @@ export default class XMLParser {
// Comment.

currentNode.appendChild(
document.createComment((match[6] ? '?' : '') + (match[3] || match[4] || match[6]))
document.createComment(
Entities.decodeHTML((match[6] ? '?' : '') + (match[3] || match[4] || match[6]))
)
);
} else if (match[5]) {
// Exclamation mark comment (usually <!DOCTYPE>).

const exclamationComment = Entities.decodeHTML(match[5]);
currentNode.appendChild(
this.getDocumentTypeNode(document, match[5]) || document.createComment(match[5])
this.getDocumentTypeNode(document, exclamationComment) ||
document.createComment(exclamationComment)
);
} else if (match[6]) {
// Processing instruction (not supported by HTML).
Expand All @@ -176,7 +180,9 @@ export default class XMLParser {
// Plain text between tags, including the match as it is not a valid start or end tag.

currentNode.appendChild(
document.createTextNode(xml.substring(lastIndex, markupRegexp.lastIndex))
document.createTextNode(
Entities.decodeHTML(xml.substring(lastIndex, markupRegexp.lastIndex))
)
);
}

Expand Down Expand Up @@ -205,7 +211,7 @@ export default class XMLParser {

const name = attributeMatch[1] || attributeMatch[5] || attributeMatch[9] || '';
const rawValue = attributeMatch[3] || attributeMatch[7] || '';
const value = rawValue ? decode(rawValue) : '';
const value = rawValue ? Entities.decodeHTMLAttribute(rawValue) : '';
const namespaceURI =
(<IElement>currentNode).tagName === 'SVG' && name === 'xmlns' ? value : null;

Expand Down Expand Up @@ -271,7 +277,9 @@ export default class XMLParser {

// Plain text elements such as <script> and <style> should only contain text.
currentNode.appendChild(
document.createTextNode(xml.substring(startTagIndex, match.index))
document.createTextNode(
Entities.decodeHTML(xml.substring(startTagIndex, match.index))
)
);

stack.pop();
Expand All @@ -289,7 +297,9 @@ export default class XMLParser {
if (lastIndex !== xml.length) {
// Plain text after tags.

currentNode.appendChild(document.createTextNode(xml.substring(lastIndex)));
currentNode.appendChild(
document.createTextNode(Entities.decodeHTML(xml.substring(lastIndex)))
);
}
}

Expand Down

0 comments on commit a895911

Please sign in to comment.