Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: Fixed greedy matching bug #2032

Merged
merged 13 commits into from Jul 13, 2020
73 changes: 42 additions & 31 deletions components/prism-core.js
Expand Up @@ -715,12 +715,11 @@ _self.Prism = _;
* @param {string | TokenStream} content See {@link Token#content content}
* @param {string|string[]} [alias] The alias(es) of the token.
* @param {string} [matchedStr=""] A copy of the full string this token was created from.
* @param {boolean} [greedy=false] Whether the pattern that created this token is greedy or not. Will be removed soon.
* @class
* @global
* @public
*/
function Token(type, content, alias, matchedStr, greedy) {
function Token(type, content, alias, matchedStr) {
/**
* The type of the token.
*
Expand Down Expand Up @@ -748,8 +747,8 @@ function Token(type, content, alias, matchedStr, greedy) {
* @public
*/
this.alias = alias;
this.length = (matchedStr || "").length|0;
this.greedy = !!greedy;
// Copy of the full string this token was created from
this.length = (matchedStr || '').length | 0;
}

/**
Expand Down Expand Up @@ -826,11 +825,15 @@ Token.stringify = function stringify(o, language) {
* @param {any} grammar
* @param {LinkedListNode<string | Token>} startNode
* @param {number} startPos
* @param {boolean} [oneshot=false]
* @param {string} [target]
* @param {RematchOptions} [rematch]
* @returns {void}
* @private
*
* @typedef RematchOptions
* @property {string} cause
* @property {number} reach
*/
function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) {
function matchGrammar(text, tokenList, grammar, startNode, startPos, rematch) {
for (var token in grammar) {
if (!grammar.hasOwnProperty(token) || !grammar[token]) {
continue;
Expand All @@ -840,31 +843,36 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta
patterns = Array.isArray(patterns) ? patterns : [patterns];

for (var j = 0; j < patterns.length; ++j) {
if (target && target == token + ',' + j) {
if (rematch && rematch.cause == token + ',' + j) {
return;
}

var pattern = patterns[j],
inside = pattern.inside,
lookbehind = !!pattern.lookbehind,
greedy = !!pattern.greedy,
var patternObj = patterns[j],
inside = patternObj.inside,
lookbehind = !!patternObj.lookbehind,
greedy = !!patternObj.greedy,
lookbehindLength = 0,
alias = pattern.alias;
alias = patternObj.alias;

if (greedy && !pattern.pattern.global) {
if (greedy && !patternObj.pattern.global) {
// Without the global flag, lastIndex won't work
var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0];
pattern.pattern = RegExp(pattern.pattern.source, flags + 'g');
var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0];
patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g');
}

pattern = pattern.pattern || pattern;
/** @type {RegExp} */
var pattern = patternObj.pattern || patternObj;

for ( // iterate the token list and keep track of the current token/string position
var currentNode = startNode.next, pos = startPos;
currentNode !== tokenList.tail;
pos += currentNode.value.length, currentNode = currentNode.next
) {

if (rematch && pos >= rematch.reach) {
break;
}

var str = currentNode.value;

if (tokenList.length > text.length) {
Expand Down Expand Up @@ -907,7 +915,7 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta
// find the last node which is affected by this match
for (
var k = currentNode;
k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy));
k !== tokenList.tail && (p < to || typeof k.value === 'string');
k = k.next
) {
removeCount++;
Expand All @@ -925,10 +933,6 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta
}

if (!match) {
if (oneshot) {
break;
}

continue;
}

Expand All @@ -937,11 +941,16 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta
}

var from = match.index + lookbehindLength,
match = match[0].slice(lookbehindLength),
to = from + match.length,
matchStr = match[0].slice(lookbehindLength),
to = from + matchStr.length,
before = str.slice(0, from),
after = str.slice(to);

var reach = pos + str.length;
if (rematch && reach > rematch.reach) {
rematch.reach = reach;
}

var removeFrom = currentNode.prev;

if (before) {
Expand All @@ -951,19 +960,21 @@ function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, ta

removeRange(tokenList, removeFrom, removeCount);

var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy);
var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr);
currentNode = addAfter(tokenList, removeFrom, wrapped);

if (after) {
addAfter(tokenList, currentNode, after);
}


if (removeCount > 1)
matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j);

if (oneshot)
break;
if (removeCount > 1) {
// at least one Token object was removed, so we have to do some rematching
// this can only happen if the current pattern is greedy
matchGrammar(text, tokenList, grammar, currentNode.prev, pos, {
cause: token + ',' + j,
reach: reach
});
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion components/prism-core.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.