feat(hashHTMLSpans): Add support for hashing span elements

This feature enables hashing span elements that should not be touched by showdown. For instance, `<code>` tags in markdown source should not be parsed by showdown, so the text inside them remains unchanged. This is made possible by a new exciting internal feature, matchRecursiveRegExp. Closes #196, Closes #175, Partially reverts 5f043ca
showdownjs · Oct 19, 2015 · 3097bd4 · 3097bd4
1 parent 0416218
commit 3097bd4
Show file tree

Hide file tree

Showing 10 changed files with 222 additions and 7 deletions.
diff --git a/dist/showdown.js b/dist/showdown.js
diff --git a/dist/showdown.js.map b/dist/showdown.js.map
diff --git a/dist/showdown.min.js b/dist/showdown.min.js
diff --git a/dist/showdown.min.js.map b/dist/showdown.min.js.map
diff --git a/src/converter.js b/src/converter.js
@@ -41,8 +41,10 @@ showdown.Converter = function (converterOptions) {
       parserOrder = [
         'githubCodeBlocks',
         'hashHTMLBlocks',
+        'hashHTMLSpans',
         'stripLinkDefinitions',
         'blockGamut',
+        'unhashHTMLSpans',
         'unescapeSpecialChars'
       ];
 
@@ -183,6 +185,7 @@ showdown.Converter = function (converterOptions) {
 
     var globals = {
       gHtmlBlocks:     [],
+      gHtmlSpans:      [],
       gUrls:           {},
       gTitles:         {},
       gDimensions:     {},

diff --git a/src/helpers.js b/src/helpers.js
@@ -106,6 +106,68 @@ showdown.helper.escapeCharacters = function escapeCharacters(text, charsToEscape
   return text;
 };
 
+/**
+ * matchRecursiveRegExp
+ *
+ * (c) 2007 Steven Levithan <stevenlevithan.com>
+ * MIT License
+ *
+ * Accepts a string to search, a left and right format delimiter
+ * as regex patterns, and optional regex flags. Returns an array
+ * of matches, allowing nested instances of left/right delimiters.
+ * Use the "g" flag to return all matches, otherwise only the
+ * first is returned. Be careful to ensure that the left and
+ * right format delimiters produce mutually exclusive matches.
+ * Backreferences are not supported within the right delimiter
+ * due to how it is internally combined with the left delimiter.
+ * When matching strings whose format delimiters are unbalanced
+ * to the left or right, the output is intentionally as a
+ * conventional regex library with recursion support would
+ * produce, e.g. "<<x>" and "<x>>" both produce ["x"] when using
+ * "<" and ">" as the delimiters (both strings contain a single,
+ * balanced instance of "<x>").
+ *
+ * examples:
+ * matchRecursiveRegExp("test", "\\(", "\\)")
+ * returns: []
+ * matchRecursiveRegExp("<t<<e>><s>>t<>", "<", ">", "g")
+ * returns: ["t<<e>><s>", ""]
+ * matchRecursiveRegExp("<div id=\"x\">test</div>", "<div\\b[^>]*>", "</div>", "gi")
+ * returns: ["test"]
+ */
+showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
+  'use strict';
+  var	f = flags || '',
+    g = f.indexOf('g') > -1,
+    x = new RegExp(left + '|' + right, f),
+    l = new RegExp(left, f.replace(/g/g, '')),
+    a = [],
+    t, s, m, start, end;
+
+  do {
+    t = 0;
+    while ((m = x.exec(str))) {
+      if (l.test(m[0])) {
+        if (!(t++)) {
+          start = m[0];
+          s = x.lastIndex;
+        }
+      } else if (t) {
+        if (!--t) {
+          end = m[0];
+          var match = str.slice(s, m.index);
+          a.push([start + match + end, match]);
+          if (!g) {
+            return a;
+          }
+        }
+      }
+    }
+  } while (t && (x.lastIndex = s));
+
+  return a;
+};
+
 /**
  * POLYFILLS
  */
@@ -118,6 +180,10 @@ if (showdown.helper.isUndefined(console)) {
     log: function (msg) {
       'use strict';
       alert(msg);
+    },
+    error: function (msg) {
+      'use strict';
+      throw msg;
     }
   };
 }
diff --git a/src/subParsers/codeSpans.js b/src/subParsers/codeSpans.js
@@ -26,13 +26,16 @@
 showdown.subParser('codeSpans', function (text) {
   'use strict';
 
+  /*
   //special case -> literal html code tag
+  // Introduced in commit 5f043ca46d20eb88240c753ae7f7c7429f4ee27
+  // Commented out due to issue #196
   text = text.replace(/(<code[^><]*?>)([^]*?)<\/code>/g, function (wholeMatch, tag, c) {
     c = c.replace(/^([ \t]*)/g, '');	// leading whitespace
     c = c.replace(/[ \t]*$/g, '');	// trailing whitespace
     c = showdown.subParser('encodeCode')(c);
     return tag + c + '</code>';
-  });
+  });*/
 
   /*
    text = text.replace(/

diff --git a/src/subParsers/hashHTMLSpans.js b/src/subParsers/hashHTMLSpans.js
@@ -0,0 +1,26 @@
+/**
+ * Hash span elements that should not be parsed as markdown
+ */
+showdown.subParser('hashHTMLSpans', function (text, config, globals) {
+  'use strict';
+
+  var matches = showdown.helper.matchRecursiveRegExp(text, '<code\\b[^>]*>', '</code>', 'gi');
+
+  for (var i = 0; i < matches.length; ++i) {
+    text = text.replace(matches[i][0], '~L' + (globals.gHtmlSpans.push(matches[i][0]) - 1) + 'L');
+  }
+  return text;
+});
+
+/**
+ * Unhash HTML spans
+ */
+showdown.subParser('unhashHTMLSpans', function (text, config, globals) {
+  'use strict';
+
+  for (var i = 0; i < globals.gHtmlSpans.length; ++i) {
+    text = text.replace('~L' + i + 'L', globals.gHtmlSpans[i]);
+  }
+
+  return text;
+});
diff --git a/test/cases/literal-html-tags.html b/test/cases/literal-html-tags.html
@@ -0,0 +1,9 @@
+<p><code>some **code** yeah</code></p>
+
+<p>some <code>inline **code** block</code></p>
+
+<p><code>some inline **code**</code> block</p>
+
+<p>yo dawg <code start="true">some <code start="false">code</code> inception</code></p>
+
+<div>some **div** yeah</div>
diff --git a/test/cases/literal-html-tags.md b/test/cases/literal-html-tags.md
@@ -0,0 +1,9 @@
+<code>some **code** yeah</code>
+
+some <code>inline **code** block</code>
+
+<code>some inline **code**</code> block
+
+yo dawg <code start="true">some <code start="false">code</code> inception</code>
+
+<div>some **div** yeah</div>