diff --git a/components/prism-core.js b/components/prism-core.js index 4d1c1aefe5..2be3562fb5 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -715,12 +715,11 @@ _self.Prism = _; * @param {string | TokenStream} content See {@link Token#content content} * @param {string|string[]} [alias] The alias(es) of the token. * @param {string} [matchedStr=""] A copy of the full string this token was created from. - * @param {boolean} [greedy=false] Whether the pattern that created this token is greedy or not. Will be removed soon. * @class * @global * @public */ -function Token(type, content, alias, matchedStr, greedy) { +function Token(type, content, alias, matchedStr) { /** * The type of the token. * @@ -748,8 +747,8 @@ function Token(type, content, alias, matchedStr, greedy) { * @public */ this.alias = alias; - this.length = (matchedStr || "").length|0; - this.greedy = !!greedy; + // Copy of the full string this token was created from + this.length = (matchedStr || '').length | 0; } /** @@ -826,11 +825,15 @@ Token.stringify = function stringify(o, language) { * @param {any} grammar * @param {LinkedListNode} startNode * @param {number} startPos - * @param {boolean} [oneshot=false] - * @param {string} [target] + * @param {RematchOptions} [rematch] + * @returns {void} * @private + * + * @typedef RematchOptions + * @property {string} cause + * @property {number} reach */ -function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { +function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue; @@ -840,24 +843,25 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta patterns = Array.isArray(patterns) ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { + if (rematch && rematch.cause == token + ',' + j) { return; } - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g'); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; @@ -865,6 +869,10 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta pos += currentNode.value.length, currentNode = currentNode.next ) { + if (rematch && pos >= rematch.reach) { + break; + } + var str = currentNode.value; if (tokenList.length > text.length) { @@ -907,7 +915,7 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta // find the last node which is affected by this match for ( var k = currentNode; - k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++; @@ -925,10 +933,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } if (!match) { - if (oneshot) { - break; - } - continue; } @@ -937,11 +941,16 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + if (rematch && reach > rematch.reach) { + rematch.reach = reach; + } + var removeFrom = currentNode.prev; if (before) { @@ -951,19 +960,21 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta removeRange(tokenList, removeFrom, removeCount); - var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); currentNode = addAfter(tokenList, removeFrom, wrapped); if (after) { addAfter(tokenList, currentNode, after); } - - if (removeCount > 1) - matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); - - if (oneshot) - break; + if (removeCount > 1) { + // at least one Token object was removed, so we have to do some rematching + // this can only happen if the current pattern is greedy + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { + cause: token + ',' + j, + reach: reach + }); + } } } } diff --git a/components/prism-core.min.js b/components/prism-core.min.js index 5ca85cbb43..267167ed8b 100644 --- a/components/prism-core.min.js +++ b/components/prism-core.min.js @@ -1 +1 @@ -var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,n=0,C={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(n){return n instanceof _?new _(n.type,e(n.content),n.alias):Array.isArray(n)?n.map(e):n.replace(/&/g,"&").replace(/n.length)return;if(!(b instanceof _)){var x=1;if(d&&y!=t.tail.prev){g.lastIndex=k;var w=g.exec(n);if(!w)break;var A=w.index+(h&&w[1]?w[1].length:0),P=w.index+w[0].length,S=k;for(S+=y.value.length;S<=A;)y=y.next,S+=y.value.length;if(S-=y.value.length,k=S,y.value instanceof _)continue;for(var E=y;E!==t.tail&&(S"+a.content+""},!u.document)return u.addEventListener&&(C.disableWorkerMessageHandler||u.addEventListener("message",function(e){var n=JSON.parse(e.data),t=n.language,r=n.code,a=n.immediateClose;u.postMessage(C.highlight(r,C.languages[t],t)),a&&u.close()},!1)),C;var e=C.util.currentScript();function t(){C.manual||C.highlightAll()}if(e&&(C.filename=e.src,e.hasAttribute("data-manual")&&(C.manual=!0)),!C.manual){var r=document.readyState;"loading"===r||"interactive"===r&&e&&e.defer?document.addEventListener("DOMContentLoaded",t):window.requestAnimationFrame?window.requestAnimationFrame(t):window.setTimeout(t,16)}return C}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file +var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,n=0,M={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(n){return n instanceof W?new W(n.type,e(n.content),n.alias):Array.isArray(n)?n.map(e):n.replace(/&/g,"&").replace(/=l.reach);k+=y.value.length,y=y.next){var b=y.value;if(t.length>n.length)return;if(!(b instanceof W)){var x=1;if(h&&y!=t.tail.prev){m.lastIndex=k;var w=m.exec(n);if(!w)break;var A=w.index+(f&&w[1]?w[1].length:0),P=w.index+w[0].length,S=k;for(S+=y.value.length;S<=A;)y=y.next,S+=y.value.length;if(S-=y.value.length,k=S,y.value instanceof W)continue;for(var E=y;E!==t.tail&&(Sl.reach&&(l.reach=j);var C=y.prev;L&&(C=I(t,C,L),k+=L.length),z(t,C,x);var _=new W(o,g?M.tokenize(O,g):O,v,O);y=I(t,C,_),N&&I(t,y,N),1"+a.content+""},!u.document)return u.addEventListener&&(M.disableWorkerMessageHandler||u.addEventListener("message",function(e){var n=JSON.parse(e.data),t=n.language,r=n.code,a=n.immediateClose;u.postMessage(M.highlight(r,M.languages[t],t)),a&&u.close()},!1)),M;var e=M.util.currentScript();function t(){M.manual||M.highlightAll()}if(e&&(M.filename=e.src,e.hasAttribute("data-manual")&&(M.manual=!0)),!M.manual){var r=document.readyState;"loading"===r||"interactive"===r&&e&&e.defer?document.addEventListener("DOMContentLoaded",t):window.requestAnimationFrame?window.requestAnimationFrame(t):window.setTimeout(t,16)}return M}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file diff --git a/docs/Token.html b/docs/Token.html index 08ec3b6c90..44437763fd 100644 --- a/docs/Token.html +++ b/docs/Token.html @@ -68,7 +68,7 @@

-

new Token(type, content, aliasopt, matchedStropt, greedyopt)

+

new Token(type, content, aliasopt, matchedStropt)

@@ -80,7 +80,7 @@

new TokenSource:
@@ -311,45 +311,6 @@
Parameters:
- - - - greedy - - - - - -boolean - - - - - - - - - <optional>
- - - - - - - - - - - - false - - - - -

Whether the pattern that created this token is greedy or not. Will be removed soon.

- - - @@ -403,7 +364,7 @@

aliasSource:
@@ -486,7 +447,7 @@

contentSource:
@@ -563,7 +524,7 @@

typeSource:
diff --git a/docs/global.html b/docs/global.html index 04ff438b2b..478976c1a5 100644 --- a/docs/global.html +++ b/docs/global.html @@ -143,7 +143,7 @@

Grammar

Source:
@@ -274,7 +274,7 @@

GrammarToken

Source:
@@ -559,7 +559,7 @@

High
Source:
@@ -713,7 +713,7 @@

HookCallb
Source:
@@ -859,7 +859,7 @@

TokenStream

Source:
diff --git a/docs/prism-core.js.html b/docs/prism-core.js.html index 4f75eae243..644beffbea 100644 --- a/docs/prism-core.js.html +++ b/docs/prism-core.js.html @@ -768,12 +768,11 @@

prism-core.js

* @param {string | TokenStream} content See {@link Token#content content} * @param {string|string[]} [alias] The alias(es) of the token. * @param {string} [matchedStr=""] A copy of the full string this token was created from. - * @param {boolean} [greedy=false] Whether the pattern that created this token is greedy or not. Will be removed soon. * @class * @global * @public */ -function Token(type, content, alias, matchedStr, greedy) { +function Token(type, content, alias, matchedStr) { /** * The type of the token. * @@ -801,8 +800,8 @@

prism-core.js

* @public */ this.alias = alias; - this.length = (matchedStr || "").length|0; - this.greedy = !!greedy; + // Copy of the full string this token was created from + this.length = (matchedStr || '').length | 0; } /** @@ -879,11 +878,15 @@

prism-core.js

* @param {any} grammar * @param {LinkedListNode<string | Token>} startNode * @param {number} startPos - * @param {boolean} [oneshot=false] - * @param {string} [target] + * @param {RematchOptions} [rematch] + * @returns {void} * @private + * + * @typedef RematchOptions + * @property {string} cause + * @property {number} reach */ -function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { +function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue; @@ -893,24 +896,25 @@

prism-core.js

patterns = Array.isArray(patterns) ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { + if (rematch && rematch.cause == token + ',' + j) { return; } - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g'); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; @@ -918,6 +922,10 @@

prism-core.js

pos += currentNode.value.length, currentNode = currentNode.next ) { + if (rematch && pos >= rematch.reach) { + break; + } + var str = currentNode.value; if (tokenList.length > text.length) { @@ -960,7 +968,7 @@

prism-core.js

// find the last node which is affected by this match for ( var k = currentNode; - k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++; @@ -978,10 +986,6 @@

prism-core.js

} if (!match) { - if (oneshot) { - break; - } - continue; } @@ -990,11 +994,16 @@

prism-core.js

} var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + if (rematch && reach > rematch.reach) { + rematch.reach = reach; + } + var removeFrom = currentNode.prev; if (before) { @@ -1004,19 +1013,21 @@

prism-core.js

removeRange(tokenList, removeFrom, removeCount); - var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); currentNode = addAfter(tokenList, removeFrom, wrapped); if (after) { addAfter(tokenList, currentNode, after); } - - if (removeCount > 1) - matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); - - if (oneshot) - break; + if (removeCount > 1) { + // at least one Token object was removed, so we have to do some rematching + // this can only happen if the current pattern is greedy + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { + cause: token + ',' + j, + reach: reach + }); + } } } } diff --git a/prism.js b/prism.js index 336152f694..74b8cf894f 100644 --- a/prism.js +++ b/prism.js @@ -720,12 +720,11 @@ _self.Prism = _; * @param {string | TokenStream} content See {@link Token#content content} * @param {string|string[]} [alias] The alias(es) of the token. * @param {string} [matchedStr=""] A copy of the full string this token was created from. - * @param {boolean} [greedy=false] Whether the pattern that created this token is greedy or not. Will be removed soon. * @class * @global * @public */ -function Token(type, content, alias, matchedStr, greedy) { +function Token(type, content, alias, matchedStr) { /** * The type of the token. * @@ -753,8 +752,8 @@ function Token(type, content, alias, matchedStr, greedy) { * @public */ this.alias = alias; - this.length = (matchedStr || "").length|0; - this.greedy = !!greedy; + // Copy of the full string this token was created from + this.length = (matchedStr || '').length | 0; } /** @@ -831,11 +830,15 @@ Token.stringify = function stringify(o, language) { * @param {any} grammar * @param {LinkedListNode} startNode * @param {number} startPos - * @param {boolean} [oneshot=false] - * @param {string} [target] + * @param {RematchOptions} [rematch] + * @returns {void} * @private + * + * @typedef RematchOptions + * @property {string} cause + * @property {number} reach */ -function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { +function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) { for (var token in grammar) { if (!grammar.hasOwnProperty(token) || !grammar[token]) { continue; @@ -845,24 +848,25 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta patterns = Array.isArray(patterns) ? patterns : [patterns]; for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { + if (rematch && rematch.cause == token + ',' + j) { return; } - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, + var patternObj = patterns[j], + inside = patternObj.inside, + lookbehind = !!patternObj.lookbehind, + greedy = !!patternObj.greedy, lookbehindLength = 0, - alias = pattern.alias; + alias = patternObj.alias; - if (greedy && !pattern.pattern.global) { + if (greedy && !patternObj.pattern.global) { // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0]; + patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g'); } - pattern = pattern.pattern || pattern; + /** @type {RegExp} */ + var pattern = patternObj.pattern || patternObj; for ( // iterate the token list and keep track of the current token/string position var currentNode = startNode.next, pos = startPos; @@ -870,6 +874,10 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta pos += currentNode.value.length, currentNode = currentNode.next ) { + if (rematch && pos >= rematch.reach) { + break; + } + var str = currentNode.value; if (tokenList.length > text.length) { @@ -912,7 +920,7 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta // find the last node which is affected by this match for ( var k = currentNode; - k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k !== tokenList.tail && (p < to || typeof k.value === 'string'); k = k.next ) { removeCount++; @@ -930,10 +938,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } if (!match) { - if (oneshot) { - break; - } - continue; } @@ -942,11 +946,16 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta } var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, + matchStr = match[0].slice(lookbehindLength), + to = from + matchStr.length, before = str.slice(0, from), after = str.slice(to); + var reach = pos + str.length; + if (rematch && reach > rematch.reach) { + rematch.reach = reach; + } + var removeFrom = currentNode.prev; if (before) { @@ -956,19 +965,21 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta removeRange(tokenList, removeFrom, removeCount); - var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr); currentNode = addAfter(tokenList, removeFrom, wrapped); if (after) { addAfter(tokenList, currentNode, after); } - - if (removeCount > 1) - matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); - - if (oneshot) - break; + if (removeCount > 1) { + // at least one Token object was removed, so we have to do some rematching + // this can only happen if the current pattern is greedy + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, { + cause: token + ',' + j, + reach: reach + }); + } } } } diff --git a/tests/core/greedy.js b/tests/core/greedy.js index ac802b38ad..812828cbff 100644 --- a/tests/core/greedy.js +++ b/tests/core/greedy.js @@ -55,8 +55,6 @@ describe('Greedy matching', function () { }); }); - // https://github.com/PrismJS/prism/issues/1492 - /* it('should correctly rematch tokens', function () { testTokens({ grammar: { @@ -75,8 +73,9 @@ describe('Greedy matching', function () { code: `<'> '' ''\n<"> "" ""`, expected: [ ["c", "<'>"], - ["a", "''"], - ["a", "''"], + " '", + ["a", "' '"], + "'\n", ["c", "<\">"], ["b", "\"\""], @@ -84,5 +83,4 @@ describe('Greedy matching', function () { ] }); }); - */ });