Skip to content

Commit

Permalink
Merge pull request #123 from nonara/trim-fix
Browse files Browse the repository at this point in the history
fix issue #115
  • Loading branch information
taoqf committed May 25, 2021
2 parents 416fc9f + d914efa commit 0632085
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 7 deletions.
5 changes: 3 additions & 2 deletions src/nodes/html.ts
Expand Up @@ -133,6 +133,7 @@ export default class HTMLElement extends Node {

return JSON.stringify(attr.replace(/"/g, '"'));
}

/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
Expand Down Expand Up @@ -260,7 +261,7 @@ export default class HTMLElement extends Node {
// Whitespace node, postponed output
currentBlock.prependWhitespace = true;
} else {
let text = node.text;
let text = (<TextNode>node).trimmedText;
if (currentBlock.prependWhitespace) {
text = ` ${text}`;
currentBlock.prependWhitespace = false;
Expand Down Expand Up @@ -401,7 +402,7 @@ export default class HTMLElement extends Node {
if ((node as TextNode).isWhitespace) {
return;
}
node.rawText = node.rawText.trim();
node.rawText = (<TextNode>node).trimmedText;
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
(node as HTMLElement).removeWhitespace();
}
Expand Down
39 changes: 39 additions & 0 deletions src/nodes/text.ts
Expand Up @@ -17,6 +17,45 @@ export default class TextNode extends Node {
*/
public nodeType = NodeType.TEXT_NODE;

private _trimmedText?: string;

/**
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
*/
public get trimmedText() {
if (this._trimmedText !== undefined) return this._trimmedText;

const text = this.rawText;
let i = 0;
let startPos;
let endPos;

while (i >= 0 && i < text.length) {
if (/\S/.test(text[i])) {
if (startPos === undefined) {
startPos = i;
i = text.length;
} else {
endPos = i;
i = void 0;
}
}

if (startPos === undefined) i++;
else i--;
}

if (startPos === undefined) startPos = 0;
if (endPos === undefined) endPos = text.length - 1;

const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);

this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');

return this._trimmedText;
}

/**
* Get unescaped text value of current node and its children.
* @return {string} text content
Expand Down
21 changes: 16 additions & 5 deletions test/html.js
Expand Up @@ -126,10 +126,10 @@ describe('HTML Parser', function () {
const script = root.firstChild;
const style = root.lastChild;
script.childNodes.should.not.be.empty;
script.childNodes.should.eql([new TextNode('1', script)]);
script.childNodes.should.eql([ new TextNode('1', script) ]);
script.text.should.eql('1');
style.childNodes.should.not.be.empty;
style.childNodes.should.eql([new TextNode('2&amp;', style)]);
style.childNodes.should.eql([ new TextNode('2&amp;', style) ]);
style.text.should.eql('2&');
style.rawText.should.eql('2&amp;');
});
Expand Down Expand Up @@ -198,14 +198,25 @@ describe('HTML Parser', function () {

describe('#removeWhitespace()', function () {
it('should remove whitespaces while preserving nodes with content', function () {
const root = parseHTML('<p> \r \n \t <h5> 123 </h5></p>');
const root = parseHTML('<p> \r \n \t <h5> 123 </h5></p>');

const textNode = new TextNode(' 123 ');
textNode.rawText = textNode.trimmedText;
textNode.rawText.should.eql(' 123 ');

const p = new HTMLElement('p', {}, '', root);
p.appendChild(new HTMLElement('h5', {}, ''))
.appendChild(new TextNode('123'));
p
.appendChild(new HTMLElement('h5', {}, ''))
.appendChild(textNode);

root.firstChild.removeWhitespace().should.eql(p);
});

it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
parseHTML('<p>Hello <em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('Hello World!');
parseHTML('<p>\t\nHello\n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
parseHTML('<p>\t\n Hello \n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql(' Hello World!');
});
});

describe('#rawAttributes', function () {
Expand Down

0 comments on commit 0632085

Please sign in to comment.