Skip to content

Commit

Permalink
fix(HTML parsing): fix HTML parsing issues with nested tags
Browse files Browse the repository at this point in the history
Deeply nested HTML tags and recursive tags broke the HTML parser.

Closes #357, closes #387
  • Loading branch information
tivie committed May 28, 2017
1 parent 813f832 commit 6fbc072
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 10 deletions.
54 changes: 51 additions & 3 deletions dist/showdown.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion dist/showdown.js.map

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions dist/showdown.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/showdown.min.js.map

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions src/helpers.js
Expand Up @@ -273,6 +273,43 @@ showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right
return finalStr;
};

/**
* Returns the index within the passed String object of the first occurrence of the specified regex,
* starting the search at fromIndex. Returns -1 if the value is not found.
*
* @param {string} str string to search
* @param {RegExp} regex Regular expression to search
* @param {int} [fromIndex = 0] Index to start the search
* @returns {Number}
* @throws InvalidArgumentError
*/
showdown.helper.regexIndexOf = function (str, regex, fromIndex) {
'use strict';
if (!showdown.helper.isString(str)) {
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
}
if (regex instanceof RegExp === false) {
throw 'InvalidArgumentError: second parameter of showdown.helper.regexIndexOf function must be an instance of RegExp';
}
var indexOf = str.substring(fromIndex || 0).search(regex);
return (indexOf >= 0) ? (indexOf + (fromIndex || 0)) : indexOf;
};

/**
* Splits the passed string object at the defined index, and returns an array composed of the two substrings
* @param {string} str string to split
* @param {int} index index to split string at
* @returns {[string,string]}
* @throws InvalidArgumentError
*/
showdown.helper.splitAtIndex = function (str, index) {
'use strict';
if (!showdown.helper.isString(str)) {
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
}
return [str.substring(0, index), str.substring(index)];
};

/**
* Obfuscate an e-mail address through the use of Character Entities,
* transforming ASCII characters into their equivalent decimal or hex entities.
Expand Down
15 changes: 13 additions & 2 deletions src/subParsers/hashHTMLBlocks.js
Expand Up @@ -49,9 +49,20 @@ showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
};

for (var i = 0; i < blockTags.length; ++i) {
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^ {0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
}

var opTagPos,
rgx1 = new RegExp('^ {0,3}<' + blockTags[i] + '\\b[^>]*>', 'im'),
patLeft = '<' + blockTags[i] + '\\b[^>]*>',
patRight = '</' + blockTags[i] + '>';
// 1. Look for the first position of the first opening HTML tag in the text
while ((opTagPos = showdown.helper.regexIndexOf(text, rgx1)) !== -1) {
//2. Split the text in that position
var subTexts = showdown.helper.splitAtIndex(text, opTagPos);
//3. Match recursively
subTexts[1] = showdown.helper.replaceRecursiveRegExp(subTexts[1], repFunc, patLeft, patRight, 'im');
text = subTexts[0].concat(subTexts[1]);
}
}
// HR SPECIAL CASE
text = text.replace(/(\n {0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
Expand Down
12 changes: 12 additions & 0 deletions test/issues/deeply-nested-HTML-blocks.html
@@ -0,0 +1,12 @@
<div>
<div>
<div>
<div>
text
</div>
<div>
text
</div>
</div>
</div>
</div>
12 changes: 12 additions & 0 deletions test/issues/deeply-nested-HTML-blocks.md
@@ -0,0 +1,12 @@
<div>
<div>
<div>
<div>
text
</div>
<div>
text
</div>
</div>
</div>
</div>
3 changes: 3 additions & 0 deletions test/issues/one-line-HTML-input.html
@@ -0,0 +1,3 @@
<div><div>a</div><div>b</div></div>
<pre><code>&lt;div&gt;**foobar**&lt;/div&gt;
</code></pre>
3 changes: 3 additions & 0 deletions test/issues/one-line-HTML-input.md
@@ -0,0 +1,3 @@
<div><div>a</div><div>b</div></div>

<div>**foobar**</div>
13 changes: 13 additions & 0 deletions test/node/showdown.helpers.js
Expand Up @@ -233,3 +233,16 @@ describe('forEach()', function () {
});
});
});

describe('matchRecursiveRegExp()', function () {
'use strict';

var rRegExp = showdown.helper.matchRecursiveRegExp;

it('should match nested elements', function () {
var result = rRegExp('<div><div>a</div></div>', '<div\\b[^>]*>', '</div>', 'gim');
result.should.deep.equal([['<div><div>a</div></div>', '<div>a</div>', '<div>', '</div>']]);
});

});

0 comments on commit 6fbc072

Please sign in to comment.