diff --git a/src/nodes/html.ts b/src/nodes/html.ts index 4d1f21b..10a9a8c 100644 --- a/src/nodes/html.ts +++ b/src/nodes/html.ts @@ -133,6 +133,7 @@ export default class HTMLElement extends Node { return JSON.stringify(attr.replace(/"/g, '"')); } + /** * Creates an instance of HTMLElement. * @param keyAttrs id and class attribute @@ -260,7 +261,7 @@ export default class HTMLElement extends Node { // Whitespace node, postponed output currentBlock.prependWhitespace = true; } else { - let text = node.text; + let text = (node).trimmedText; if (currentBlock.prependWhitespace) { text = ` ${text}`; currentBlock.prependWhitespace = false; @@ -401,7 +402,7 @@ export default class HTMLElement extends Node { if ((node as TextNode).isWhitespace) { return; } - node.rawText = node.rawText.trim(); + node.rawText = (node).trimmedText; } else if (node.nodeType === NodeType.ELEMENT_NODE) { (node as HTMLElement).removeWhitespace(); } diff --git a/src/nodes/text.ts b/src/nodes/text.ts index 068666a..9b473d7 100644 --- a/src/nodes/text.ts +++ b/src/nodes/text.ts @@ -17,6 +17,45 @@ export default class TextNode extends Node { */ public nodeType = NodeType.TEXT_NODE; + private _trimmedText?: string; + + /** + * Returns text with all whitespace trimmed except single leading/trailing non-breaking space + */ + public get trimmedText() { + if (this._trimmedText !== undefined) return this._trimmedText; + + const text = this.rawText; + let i = 0; + let startPos; + let endPos; + + while (i >= 0 && i < text.length) { + if (/\S/.test(text[i])) { + if (startPos === undefined) { + startPos = i; + i = text.length; + } else { + endPos = i; + i = void 0; + } + } + + if (startPos === undefined) i++; + else i--; + } + + if (startPos === undefined) startPos = 0; + if (endPos === undefined) endPos = text.length - 1; + + const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]); + const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]); + + this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : ''); + + return this._trimmedText; + } + /** * Get unescaped text value of current node and its children. * @return {string} text content diff --git a/test/html.js b/test/html.js index 6c9b8de..e84c20b 100644 --- a/test/html.js +++ b/test/html.js @@ -126,10 +126,10 @@ describe('HTML Parser', function () { const script = root.firstChild; const style = root.lastChild; script.childNodes.should.not.be.empty; - script.childNodes.should.eql([new TextNode('1', script)]); + script.childNodes.should.eql([ new TextNode('1', script) ]); script.text.should.eql('1'); style.childNodes.should.not.be.empty; - style.childNodes.should.eql([new TextNode('2&', style)]); + style.childNodes.should.eql([ new TextNode('2&', style) ]); style.text.should.eql('2&'); style.rawText.should.eql('2&'); }); @@ -198,14 +198,25 @@ describe('HTML Parser', function () { describe('#removeWhitespace()', function () { it('should remove whitespaces while preserving nodes with content', function () { - const root = parseHTML('

\r \n \t

123

'); + const root = parseHTML('

\r \n \t

123

'); + + const textNode = new TextNode(' 123 '); + textNode.rawText = textNode.trimmedText; + textNode.rawText.should.eql(' 123 '); const p = new HTMLElement('p', {}, '', root); - p.appendChild(new HTMLElement('h5', {}, '')) - .appendChild(new TextNode('123')); + p + .appendChild(new HTMLElement('h5', {}, '')) + .appendChild(textNode); root.firstChild.removeWhitespace().should.eql(p); }); + + it('should preserve legitimate leading/trailing whitespace in TextNode', function () { + parseHTML('

Hello World!

').removeWhitespace().firstChild.text.should.eql('Hello World!'); + parseHTML('

\t\nHello\n\tWorld!

').removeWhitespace().firstChild.text.should.eql('HelloWorld!'); + parseHTML('

\t\n Hello \n\tWorld!

').removeWhitespace().firstChild.text.should.eql(' Hello World!'); + }); }); describe('#rawAttributes', function () {