diff --git a/src/nodes/html.ts b/src/nodes/html.ts
index 4d1f21b..10a9a8c 100644
--- a/src/nodes/html.ts
+++ b/src/nodes/html.ts
@@ -133,6 +133,7 @@ export default class HTMLElement extends Node {
return JSON.stringify(attr.replace(/"/g, '"'));
}
+
/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
@@ -260,7 +261,7 @@ export default class HTMLElement extends Node {
// Whitespace node, postponed output
currentBlock.prependWhitespace = true;
} else {
- let text = node.text;
+ let text = (node).trimmedText;
if (currentBlock.prependWhitespace) {
text = ` ${text}`;
currentBlock.prependWhitespace = false;
@@ -401,7 +402,7 @@ export default class HTMLElement extends Node {
if ((node as TextNode).isWhitespace) {
return;
}
- node.rawText = node.rawText.trim();
+ node.rawText = (node).trimmedText;
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
(node as HTMLElement).removeWhitespace();
}
diff --git a/src/nodes/text.ts b/src/nodes/text.ts
index 068666a..9b473d7 100644
--- a/src/nodes/text.ts
+++ b/src/nodes/text.ts
@@ -17,6 +17,45 @@ export default class TextNode extends Node {
*/
public nodeType = NodeType.TEXT_NODE;
+ private _trimmedText?: string;
+
+ /**
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
+ */
+ public get trimmedText() {
+ if (this._trimmedText !== undefined) return this._trimmedText;
+
+ const text = this.rawText;
+ let i = 0;
+ let startPos;
+ let endPos;
+
+ while (i >= 0 && i < text.length) {
+ if (/\S/.test(text[i])) {
+ if (startPos === undefined) {
+ startPos = i;
+ i = text.length;
+ } else {
+ endPos = i;
+ i = void 0;
+ }
+ }
+
+ if (startPos === undefined) i++;
+ else i--;
+ }
+
+ if (startPos === undefined) startPos = 0;
+ if (endPos === undefined) endPos = text.length - 1;
+
+ const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
+ const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);
+
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
+
+ return this._trimmedText;
+ }
+
/**
* Get unescaped text value of current node and its children.
* @return {string} text content
diff --git a/test/html.js b/test/html.js
index 6c9b8de..e84c20b 100644
--- a/test/html.js
+++ b/test/html.js
@@ -126,10 +126,10 @@ describe('HTML Parser', function () {
const script = root.firstChild;
const style = root.lastChild;
script.childNodes.should.not.be.empty;
- script.childNodes.should.eql([new TextNode('1', script)]);
+ script.childNodes.should.eql([ new TextNode('1', script) ]);
script.text.should.eql('1');
style.childNodes.should.not.be.empty;
- style.childNodes.should.eql([new TextNode('2&', style)]);
+ style.childNodes.should.eql([ new TextNode('2&', style) ]);
style.text.should.eql('2&');
style.rawText.should.eql('2&');
});
@@ -198,14 +198,25 @@ describe('HTML Parser', function () {
describe('#removeWhitespace()', function () {
it('should remove whitespaces while preserving nodes with content', function () {
- const root = parseHTML(' \r \n \t
123
');
+ const root = parseHTML(' \r \n \t
123
');
+
+ const textNode = new TextNode(' 123 ');
+ textNode.rawText = textNode.trimmedText;
+ textNode.rawText.should.eql(' 123 ');
const p = new HTMLElement('p', {}, '', root);
- p.appendChild(new HTMLElement('h5', {}, ''))
- .appendChild(new TextNode('123'));
+ p
+ .appendChild(new HTMLElement('h5', {}, ''))
+ .appendChild(textNode);
root.firstChild.removeWhitespace().should.eql(p);
});
+
+ it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
+ parseHTML('Hello World!
').removeWhitespace().firstChild.text.should.eql('Hello World!');
+ parseHTML('\t\nHello\n\tWorld!
').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
+ parseHTML('\t\n Hello \n\tWorld!
').removeWhitespace().firstChild.text.should.eql(' Hello World!');
+ });
});
describe('#rawAttributes', function () {