Skip to content

Commit

Permalink
fix issue #115
Browse files Browse the repository at this point in the history
  • Loading branch information
nonara committed May 22, 2021
1 parent 416fc9f commit d4d06ed
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
38 changes: 37 additions & 1 deletion src/nodes/html.ts
Expand Up @@ -133,6 +133,42 @@ export default class HTMLElement extends Node {

return JSON.stringify(attr.replace(/"/g, '"'));
}

/**
* Trim all whitespace except single leading/trailing non-breaking space
* @param text string to trim
* @returns {string} trimmed value
* @private
*/
private trimTextNodeWhitespace(text: string): string {
let i = 0;
let startPos;
let endPos;

while (i >= 0 && i < text.length) {
if (/\S/.test(text[i])) {
if (startPos === undefined) {
startPos = i;
i = text.length;
} else {
endPos = i;
i = void 0;
}
}

if (startPos === undefined) i++;
else i--;
}

if (startPos === undefined) startPos = 0;
if (endPos === undefined) endPos = text.length - 1;

const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos-1]);
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos+1]);

return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
}

/**
* Creates an instance of HTMLElement.
* @param keyAttrs id and class attribute
Expand Down Expand Up @@ -401,7 +437,7 @@ export default class HTMLElement extends Node {
if ((node as TextNode).isWhitespace) {
return;
}
node.rawText = node.rawText.trim();
node.rawText = this.trimTextNodeWhitespace(node.rawText);
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
(node as HTMLElement).removeWhitespace();
}
Expand Down
8 changes: 7 additions & 1 deletion test/html.js
Expand Up @@ -198,14 +198,20 @@ describe('HTML Parser', function () {

describe('#removeWhitespace()', function () {
it('should remove whitespaces while preserving nodes with content', function () {
const root = parseHTML('<p> \r \n \t <h5> 123 </h5></p>');
const root = parseHTML('<p> \r \n \t <h5>123</h5></p>');

const p = new HTMLElement('p', {}, '', root);
p.appendChild(new HTMLElement('h5', {}, ''))
.appendChild(new TextNode('123'));

root.firstChild.removeWhitespace().should.eql(p);
});

it('should preserve legitimate leading/trailing whitespace in TextNode', function () {
parseHTML('<p>Hello <em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('Hello World!');
parseHTML('<p>\t\nHello\n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql('HelloWorld!');
parseHTML('<p>\t\n Hello \n\t<em>World</em>!</p>').removeWhitespace().firstChild.text.should.eql(' Hello World!');
});
});

describe('#rawAttributes', function () {
Expand Down

0 comments on commit d4d06ed

Please sign in to comment.