From f310f382c0f2c910d162ce890cd4d7ee42f3dbf3 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 29 Aug 2019 23:19:54 +0200 Subject: [PATCH 1/7] Fixed the greedy rematching bug --- components/prism-core.js | 75 ++++++++++++++++++++---------------- components/prism-core.min.js | 2 +- prism.js | 75 ++++++++++++++++++++---------------- 3 files changed, 85 insertions(+), 67 deletions(-) diff --git a/components/prism-core.js b/components/prism-core.js index d4566d5530..ced22bae4e 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -277,13 +277,13 @@ var _ = { return Token.stringify(_.util.encode(env.tokens), env.language); }, - matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { + matchGrammar: function (text, strarr, grammar, startIndex, startPos, greedyRematching, rematchCause, rematchReach) { for (var token in grammar) { if(!grammar.hasOwnProperty(token) || !grammar[token]) { continue; } - if (token == target) { + if (token == rematchCause) { return; } @@ -291,23 +291,28 @@ var _ = { patterns = (_.util.type(patterns) === "Array") ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + "g"); + var flags = patternObj.pattern.toString().match(/[imuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + "g"); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; // Don’t cache length as it changes during the loop - for (var i = index, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { + for (var i = startIndex, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { + + if (greedyRematching && pos >= rematchReach) { + break; + } var str = strarr[i]; @@ -327,12 +332,16 @@ var _ = { break; } - var from = match.index + (lookbehind ? match[1].length : 0), - to = match.index + match[0].length, - k = i, - p = pos; + var from = match.index + (lookbehind ? match[1].length : 0); + var to = match.index + match[0].length; + var len = strarr.length; - for (var len = strarr.length; k < len && (p < to || (!strarr[k].type && !strarr[k - 1].greedy)); ++k) { + for ( + var k = i, p = pos; + // `!strarr[k].type` checks whether `strarr[k]` is a string + k < len && (p < to || !strarr[k].type); + ++k + ) { p += strarr[k].length; // Move the index i to the element in strarr that is closest to from if (from >= p) { @@ -358,23 +367,26 @@ var _ = { } if (!match) { - if (oneshot) { - break; - } - continue; } - if(lookbehind) { + if (lookbehind) { lookbehindLength = match[1] ? match[1].length : 0; } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + + if (greedyRematching && reach > rematchReach) { + // expand rematch reach for potential future matches + rematchReach = reach + } + var args = [i, delNum]; if (before) { @@ -383,7 +395,7 @@ var _ = { args.push(before); } - var wrapped = new Token(token, inside? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); args.push(wrapped); @@ -393,11 +405,9 @@ var _ = { Array.prototype.splice.apply(strarr, args); - if (delNum != 1) - _.matchGrammar(text, strarr, grammar, i, pos, true, token); - - if (oneshot) - break; + if (delNum != 1) { + _.matchGrammar(text, strarr, grammar, i, pos, true, token, reach); + } } } } @@ -416,7 +426,7 @@ var _ = { delete grammar.rest; } - _.matchGrammar(text, strarr, grammar, 0, 0, false); + _.matchGrammar(text, strarr, grammar, 0, 0); return strarr; }, @@ -450,13 +460,12 @@ var _ = { _self.Prism = _; -function Token(type, content, alias, matchedStr, greedy) { +function Token(type, content, alias, matchedStr) { this.type = type; this.content = content; this.alias = alias; // Copy of the full string this token was created from this.length = (matchedStr || "").length|0; - this.greedy = !!greedy; } Token.stringify = function(o, language) { diff --git a/components/prism-core.min.js b/components/prism-core.min.js index 9b65d451cb..eb5dd5f30b 100644 --- a/components/prism-core.min.js +++ b/components/prism-core.min.js @@ -1 +1 @@ -var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(g){var c=/\blang(?:uage)?-([\w-]+)\b/i,a=0,C={manual:g.Prism&&g.Prism.manual,disableWorkerMessageHandler:g.Prism&&g.Prism.disableWorkerMessageHandler,util:{encode:function(e){return e instanceof M?new M(e.type,C.util.encode(e.content),e.alias):Array.isArray(e)?e.map(C.util.encode):e.replace(/&/g,"&").replace(/e.length)return;if(!(k instanceof M)){if(f&&y!=a.length-1){if(c.lastIndex=v,!(x=c.exec(e)))break;for(var b=x.index+(h?x[1].length:0),w=x.index+x[0].length,A=y,P=v,O=a.length;A"+n.content+""},!g.document)return g.addEventListener&&(C.disableWorkerMessageHandler||g.addEventListener("message",function(e){var a=JSON.parse(e.data),n=a.language,t=a.code,r=a.immediateClose;g.postMessage(C.highlight(t,C.languages[n],n)),r&&g.close()},!1)),C;var e=document.currentScript||[].slice.call(document.getElementsByTagName("script")).pop();return e&&(C.filename=e.src,C.manual||e.hasAttribute("data-manual")||("loading"!==document.readyState?window.requestAnimationFrame?window.requestAnimationFrame(C.highlightAll):window.setTimeout(C.highlightAll,16):document.addEventListener("DOMContentLoaded",C.highlightAll))),C}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file +var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(g){var c=/\blang(?:uage)?-([\w-]+)\b/i,a=0,L={manual:g.Prism&&g.Prism.manual,disableWorkerMessageHandler:g.Prism&&g.Prism.disableWorkerMessageHandler,util:{encode:function(e){return e instanceof z?new z(e.type,L.util.encode(e.content),e.alias):Array.isArray(e)?e.map(L.util.encode):e.replace(/&/g,"&").replace(/e.length)return;if(!(w instanceof z)){if(d&&k!=a.length-1){if(v.lastIndex=b,!(j=v.exec(e)))break;for(var A=j.index+(f?j[1].length:0),P=j.index+j[0].length,O=a.length,x=k,N=b;x"+n.content+""},!g.document)return g.addEventListener&&(L.disableWorkerMessageHandler||g.addEventListener("message",function(e){var a=JSON.parse(e.data),n=a.language,t=a.code,r=a.immediateClose;g.postMessage(L.highlight(t,L.languages[n],n)),r&&g.close()},!1)),L;var e=document.currentScript||[].slice.call(document.getElementsByTagName("script")).pop();return e&&(L.filename=e.src,L.manual||e.hasAttribute("data-manual")||("loading"!==document.readyState?window.requestAnimationFrame?window.requestAnimationFrame(L.highlightAll):window.setTimeout(L.highlightAll,16):document.addEventListener("DOMContentLoaded",L.highlightAll))),L}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file diff --git a/prism.js b/prism.js index 2391e8b75a..dc734f0176 100644 --- a/prism.js +++ b/prism.js @@ -282,13 +282,13 @@ var _ = { return Token.stringify(_.util.encode(env.tokens), env.language); }, - matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { + matchGrammar: function (text, strarr, grammar, startIndex, startPos, greedyRematching, rematchCause, rematchReach) { for (var token in grammar) { if(!grammar.hasOwnProperty(token) || !grammar[token]) { continue; } - if (token == target) { + if (token == rematchCause) { return; } @@ -296,23 +296,28 @@ var _ = { patterns = (_.util.type(patterns) === "Array") ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + "g"); + var flags = patternObj.pattern.toString().match(/[imuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + "g"); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; // Don’t cache length as it changes during the loop - for (var i = index, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { + for (var i = startIndex, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { + + if (greedyRematching && pos >= rematchReach) { + break; + } var str = strarr[i]; @@ -332,12 +337,16 @@ var _ = { break; } - var from = match.index + (lookbehind ? match[1].length : 0), - to = match.index + match[0].length, - k = i, - p = pos; + var from = match.index + (lookbehind ? match[1].length : 0); + var to = match.index + match[0].length; + var len = strarr.length; - for (var len = strarr.length; k < len && (p < to || (!strarr[k].type && !strarr[k - 1].greedy)); ++k) { + for ( + var k = i, p = pos; + // `!strarr[k].type` checks whether `strarr[k]` is a string + k < len && (p < to || !strarr[k].type); + ++k + ) { p += strarr[k].length; // Move the index i to the element in strarr that is closest to from if (from >= p) { @@ -363,23 +372,26 @@ var _ = { } if (!match) { - if (oneshot) { - break; - } - continue; } - if(lookbehind) { + if (lookbehind) { lookbehindLength = match[1] ? match[1].length : 0; } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + + if (greedyRematching && reach > rematchReach) { + // expand rematch reach for potential future matches + rematchReach = reach + } + var args = [i, delNum]; if (before) { @@ -388,7 +400,7 @@ var _ = { args.push(before); } - var wrapped = new Token(token, inside? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); args.push(wrapped); @@ -398,11 +410,9 @@ var _ = { Array.prototype.splice.apply(strarr, args); - if (delNum != 1) - _.matchGrammar(text, strarr, grammar, i, pos, true, token); - - if (oneshot) - break; + if (delNum != 1) { + _.matchGrammar(text, strarr, grammar, i, pos, true, token, reach); + } } } } @@ -421,7 +431,7 @@ var _ = { delete grammar.rest; } - _.matchGrammar(text, strarr, grammar, 0, 0, false); + _.matchGrammar(text, strarr, grammar, 0, 0); return strarr; }, @@ -455,13 +465,12 @@ var _ = { _self.Prism = _; -function Token(type, content, alias, matchedStr, greedy) { +function Token(type, content, alias, matchedStr) { this.type = type; this.content = content; this.alias = alias; // Copy of the full string this token was created from this.length = (matchedStr || "").length|0; - this.greedy = !!greedy; } Token.stringify = function(o, language) { From ae6da1c0f4f8d6556a480cdbfc6dbc43f55b6a50 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 29 Aug 2019 23:43:57 +0200 Subject: [PATCH 2/7] Added test --- tests/core/greedy.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/core/greedy.js b/tests/core/greedy.js index ac802b38ad..3caad0c168 100644 --- a/tests/core/greedy.js +++ b/tests/core/greedy.js @@ -56,7 +56,6 @@ describe('Greedy matching', function () { }); // https://github.com/PrismJS/prism/issues/1492 - /* it('should correctly rematch tokens', function () { testTokens({ grammar: { @@ -75,8 +74,9 @@ describe('Greedy matching', function () { code: `<'> '' ''\n<"> "" ""`, expected: [ ["c", "<'>"], - ["a", "''"], - ["a", "''"], + " '", + ["a", "' '"], + "'\n", ["c", "<\">"], ["b", "\"\""], @@ -84,5 +84,4 @@ describe('Greedy matching', function () { ] }); }); - */ }); From 5988083407652f919a3bebb0d7e5f9b86aee35a4 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Mon, 2 Sep 2019 20:09:29 +0200 Subject: [PATCH 3/7] Removed unnecessary comment --- tests/core/greedy.js | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/greedy.js b/tests/core/greedy.js index 3caad0c168..812828cbff 100644 --- a/tests/core/greedy.js +++ b/tests/core/greedy.js @@ -55,7 +55,6 @@ describe('Greedy matching', function () { }); }); - // https://github.com/PrismJS/prism/issues/1492 it('should correctly rematch tokens', function () { testTokens({ grammar: { From 4184eefecf50c7ddea97f8007e7d3693ebb3b35b Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 19 May 2020 14:22:39 +0200 Subject: [PATCH 4/7] Reimplemented fix --- components/prism-core.js | 81 ++++++++++++++++++++++-------------- components/prism-core.min.js | 2 +- prism.js | 81 ++++++++++++++++++++++-------------- 3 files changed, 99 insertions(+), 65 deletions(-) diff --git a/components/prism-core.js b/components/prism-core.js index cd5cee1e81..7008f465b1 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -378,13 +378,19 @@ var _ = { _self.Prism = _; -function Token(type, content, alias, matchedStr, greedy) { +/** + * + * @param {string} type + * @param {string | Token | (string | Token)[]} content + * @param {string | string[] | undefined} [alias] + * @param {string | undefined} [matchedStr] + */ +function Token(type, content, alias, matchedStr) { this.type = type; this.content = content; this.alias = alias; // Copy of the full string this token was created from - this.length = (matchedStr || '').length|0; - this.greedy = !!greedy; + this.length = (matchedStr || '').length | 0; } Token.stringify = function stringify(o, language) { @@ -433,10 +439,14 @@ Token.stringify = function stringify(o, language) { * @param {any} grammar * @param {LinkedListNode} startNode * @param {number} startPos - * @param {boolean} [oneshot=false] - * @param {string} [target] + * @param {RematchOptions} [rematch] + * @returns {void} + * + * @typedef RematchOptions + * @property {string} cause + * @property {number} reach */ -function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { +function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue; @@ -446,24 +456,25 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta patterns = Array.isArray(patterns) ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { + if (rematch && rematch.cause == token + ',' + j) { return; } - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g'); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; @@ -471,6 +482,10 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta pos += currentNode.value.length, currentNode = currentNode.next ) { + if (rematch && pos >= rematch.reach) { + break; + } + var str = currentNode.value; if (tokenList.length > text.length) { @@ -513,7 +528,7 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta // find the last node which is affected by this match for ( var k = currentNode; - k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++; @@ -531,10 +546,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } if (!match) { - if (oneshot) { - break; - } - continue; } @@ -543,11 +554,16 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + if (rematch && reach > rematch.reach) { + rematch.reach = reach; + } + var removeFrom = currentNode.prev; if (before) { @@ -557,19 +573,19 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta removeRange(tokenList, removeFrom, removeCount); - var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); currentNode = addAfter(tokenList, removeFrom, wrapped); if (after) { addAfter(tokenList, currentNode, after); } - - if (removeCount > 1) - matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); - - if (oneshot) - break; + if (removeCount > 1) { + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { + cause: token + ',' + j, + reach: reach + }); + } } } } @@ -578,8 +594,9 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta /** * @typedef LinkedListNode * @property {T} value - * @property {LinkedListNode | null} prev The previous node. - * @property {LinkedListNode | null} next The next node. + * @property {LinkedListNode} prev The previous node. + * @property {LinkedListNode} next The next node. + * * @template T */ diff --git a/components/prism-core.min.js b/components/prism-core.min.js index 698c572d7c..31ec1ebbf8 100644 --- a/components/prism-core.min.js +++ b/components/prism-core.min.js @@ -1 +1 @@ -var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,n=0,C={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(n){return n instanceof _?new _(n.type,e(n.content),n.alias):Array.isArray(n)?n.map(e):n.replace(/&/g,"&").replace(/n.length)return;if(!(b instanceof _)){var x=1;if(d&&y!=t.tail.prev){g.lastIndex=k;var w=g.exec(n);if(!w)break;var A=w.index+(h&&w[1]?w[1].length:0),P=w.index+w[0].length,S=k;for(S+=y.value.length;S<=A;)y=y.next,S+=y.value.length;if(S-=y.value.length,k=S,y.value instanceof _)continue;for(var O=y;O!==t.tail&&(S"+a.content+""},!u.document)return u.addEventListener&&(C.disableWorkerMessageHandler||u.addEventListener("message",function(e){var n=JSON.parse(e.data),t=n.language,r=n.code,a=n.immediateClose;u.postMessage(C.highlight(r,C.languages[t],t)),a&&u.close()},!1)),C;var e=C.util.currentScript();function t(){C.manual||C.highlightAll()}if(e&&(C.filename=e.src,e.hasAttribute("data-manual")&&(C.manual=!0)),!C.manual){var r=document.readyState;"loading"===r||"interactive"===r&&e&&e.defer?document.addEventListener("DOMContentLoaded",t):window.requestAnimationFrame?window.requestAnimationFrame(t):window.setTimeout(t,16)}return C}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file +var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,n=0,M={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(n){return n instanceof W?new W(n.type,e(n.content),n.alias):Array.isArray(n)?n.map(e):n.replace(/&/g,"&").replace(/=i.reach);k+=y.value.length,y=y.next){var b=y.value;if(t.length>n.length)return;if(!(b instanceof W)){var x=1;if(f&&y!=t.tail.prev){m.lastIndex=k;var w=m.exec(n);if(!w)break;var A=w.index+(h&&w[1]?w[1].length:0),P=w.index+w[0].length,S=k;for(S+=y.value.length;S<=A;)y=y.next,S+=y.value.length;if(S-=y.value.length,k=S,y.value instanceof W)continue;for(var O=y;O!==t.tail&&(Si.reach&&(i.reach=L);var C=y.prev;N&&(C=I(t,C,N),k+=N.length),z(t,C,x);var _=new W(o,g?M.tokenize(E,g):E,v,E);y=I(t,C,_),j&&I(t,y,j),1"+a.content+""},!u.document)return u.addEventListener&&(M.disableWorkerMessageHandler||u.addEventListener("message",function(e){var n=JSON.parse(e.data),t=n.language,r=n.code,a=n.immediateClose;u.postMessage(M.highlight(r,M.languages[t],t)),a&&u.close()},!1)),M;var e=M.util.currentScript();function t(){M.manual||M.highlightAll()}if(e&&(M.filename=e.src,e.hasAttribute("data-manual")&&(M.manual=!0)),!M.manual){var r=document.readyState;"loading"===r||"interactive"===r&&e&&e.defer?document.addEventListener("DOMContentLoaded",t):window.requestAnimationFrame?window.requestAnimationFrame(t):window.setTimeout(t,16)}return M}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file diff --git a/prism.js b/prism.js index f257ebf27c..a456352fa2 100644 --- a/prism.js +++ b/prism.js @@ -383,13 +383,19 @@ var _ = { _self.Prism = _; -function Token(type, content, alias, matchedStr, greedy) { +/** + * + * @param {string} type + * @param {string | Token | (string | Token)[]} content + * @param {string | string[] | undefined} [alias] + * @param {string | undefined} [matchedStr] + */ +function Token(type, content, alias, matchedStr) { this.type = type; this.content = content; this.alias = alias; // Copy of the full string this token was created from - this.length = (matchedStr || '').length|0; - this.greedy = !!greedy; + this.length = (matchedStr || '').length | 0; } Token.stringify = function stringify(o, language) { @@ -438,10 +444,14 @@ Token.stringify = function stringify(o, language) { * @param {any} grammar * @param {LinkedListNode} startNode * @param {number} startPos - * @param {boolean} [oneshot=false] - * @param {string} [target] + * @param {RematchOptions} [rematch] + * @returns {void} + * + * @typedef RematchOptions + * @property {string} cause + * @property {number} reach */ -function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { +function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue; @@ -451,24 +461,25 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta patterns = Array.isArray(patterns) ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { + if (rematch && rematch.cause == token + ',' + j) { return; } - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g'); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; @@ -476,6 +487,10 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta pos += currentNode.value.length, currentNode = currentNode.next ) { + if (rematch && pos >= rematch.reach) { + break; + } + var str = currentNode.value; if (tokenList.length > text.length) { @@ -518,7 +533,7 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta // find the last node which is affected by this match for ( var k = currentNode; - k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++; @@ -536,10 +551,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } if (!match) { - if (oneshot) { - break; - } - continue; } @@ -548,11 +559,16 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + if (rematch && reach > rematch.reach) { + rematch.reach = reach; + } + var removeFrom = currentNode.prev; if (before) { @@ -562,19 +578,19 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta removeRange(tokenList, removeFrom, removeCount); - var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); currentNode = addAfter(tokenList, removeFrom, wrapped); if (after) { addAfter(tokenList, currentNode, after); } - - if (removeCount > 1) - matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); - - if (oneshot) - break; + if (removeCount > 1) { + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { + cause: token + ',' + j, + reach: reach + }); + } } } } @@ -583,8 +599,9 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta /** * @typedef LinkedListNode * @property {T} value - * @property {LinkedListNode | null} prev The previous node. - * @property {LinkedListNode | null} next The next node. + * @property {LinkedListNode} prev The previous node. + * @property {LinkedListNode} next The next node. + * * @template T */ From 0712d2a3b2135114b4370720fc6868b0847133a9 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 19 May 2020 15:00:41 +0200 Subject: [PATCH 5/7] Removed empty comment line --- components/prism-core.js | 1 - prism.js | 1 - 2 files changed, 2 deletions(-) diff --git a/components/prism-core.js b/components/prism-core.js index 7008f465b1..dca14c339a 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -379,7 +379,6 @@ var _ = { _self.Prism = _; /** - * * @param {string} type * @param {string | Token | (string | Token)[]} content * @param {string | string[] | undefined} [alias] diff --git a/prism.js b/prism.js index a456352fa2..5e44e5ecd1 100644 --- a/prism.js +++ b/prism.js @@ -384,7 +384,6 @@ var _ = { _self.Prism = _; /** - * * @param {string} type * @param {string | Token | (string | Token)[]} content * @param {string | string[] | undefined} [alias] From fec42435ba4754e6e2a6255d8b762424d79950e3 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 19 May 2020 15:42:03 +0200 Subject: [PATCH 6/7] Changed some comments --- components/prism-core.js | 6 ++++-- prism.js | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/components/prism-core.js b/components/prism-core.js index dca14c339a..0a162f676d 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -580,6 +580,8 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { } if (removeCount > 1) { + // at least one Token object was removed, so we have to do some rematching + // this can only happen if the current pattern is greedy matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { cause: token + ',' + j, reach: reach @@ -593,8 +595,8 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { /** * @typedef LinkedListNode * @property {T} value - * @property {LinkedListNode} prev The previous node. - * @property {LinkedListNode} next The next node. + * @property {LinkedListNode | null} prev The previous node. + * @property {LinkedListNode | null} next The next node. * * @template T */ diff --git a/prism.js b/prism.js index 5e44e5ecd1..593952a850 100644 --- a/prism.js +++ b/prism.js @@ -585,6 +585,8 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { } if (removeCount > 1) { + // at least one Token object was removed, so we have to do some rematching + // this can only happen if the current pattern is greedy matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { cause: token + ',' + j, reach: reach @@ -598,8 +600,8 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { /** * @typedef LinkedListNode * @property {T} value - * @property {LinkedListNode} prev The previous node. - * @property {LinkedListNode} next The next node. + * @property {LinkedListNode | null} prev The previous node. + * @property {LinkedListNode | null} next The next node. * * @template T */ From a9edd206ae608a9b0da42d6785d10a46f699739e Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 19 May 2020 16:48:42 +0200 Subject: [PATCH 7/7] Removed empty comment line --- components/prism-core.js | 1 - prism.js | 1 - 2 files changed, 2 deletions(-) diff --git a/components/prism-core.js b/components/prism-core.js index 0a162f676d..6472512865 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -597,7 +597,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { * @property {T} value * @property {LinkedListNode | null} prev The previous node. * @property {LinkedListNode | null} next The next node. - * * @template T */ diff --git a/prism.js b/prism.js index 593952a850..e3a8df2775 100644 --- a/prism.js +++ b/prism.js @@ -602,7 +602,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { * @property {T} value * @property {LinkedListNode | null} prev The previous node. * @property {LinkedListNode | null} next The next node. - * * @template T */