Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: Fixed greedy matching bug #2032

Merged
merged 13 commits into from Jul 13, 2020
75 changes: 42 additions & 33 deletions components/prism-core.js
Expand Up @@ -286,7 +286,7 @@ var _ = {
return Token.stringify(_.util.encode(env.tokens), env.language);
},

matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) {
matchGrammar: function (text, strarr, grammar, startIndex, startPos, greedyRematching, rematchCause, rematchReach) {
for (var token in grammar) {
if (!grammar.hasOwnProperty(token) || !grammar[token]) {
continue;
Expand All @@ -296,27 +296,32 @@ var _ = {
patterns = Array.isArray(patterns) ? patterns : [patterns];

for (var j = 0; j < patterns.length; ++j) {
if (target && target == token + ',' + j) {
if (rematchCause && rematchCause == token + ',' + j) {
return;
}

var pattern = patterns[j],
inside = pattern.inside,
lookbehind = !!pattern.lookbehind,
greedy = !!pattern.greedy,
var patternObj = patterns[j],
inside = patternObj.inside,
lookbehind = !!patternObj.lookbehind,
greedy = !!patternObj.greedy,
lookbehindLength = 0,
alias = pattern.alias;
alias = patternObj.alias;

if (greedy && !pattern.pattern.global) {
if (greedy && !patternObj.pattern.global) {
// Without the global flag, lastIndex won't work
var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0];
pattern.pattern = RegExp(pattern.pattern.source, flags + 'g');
var flags = patternObj.pattern.toString().match(/[imsuy]*$/)[0];
patternObj.pattern = RegExp(patternObj.pattern.source, flags + 'g');
}

pattern = pattern.pattern || pattern;
/** @type {RegExp} */
var pattern = patternObj.pattern || patternObj;

// Don’t cache length as it changes during the loop
for (var i = index, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) {
for (var i = startIndex, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) {

if (greedyRematching && pos >= rematchReach) {
break;
}

var str = strarr[i];

Expand All @@ -336,12 +341,16 @@ var _ = {
break;
}

var from = match.index + (lookbehind && match[1] ? match[1].length : 0),
to = match.index + match[0].length,
k = i,
p = pos;
var from = match.index + (lookbehind && match[1] ? match[1].length : 0);
var to = match.index + match[0].length;
var len = strarr.length;

for (var len = strarr.length; k < len && (p < to || (!strarr[k].type && !strarr[k - 1].greedy)); ++k) {
for (
var k = i, p = pos;
// `!strarr[k].type` checks whether `strarr[k]` is a string
k < len && (p < to || !strarr[k].type);
++k
) {
p += strarr[k].length;
// Move the index i to the element in strarr that is closest to from
if (from >= p) {
Expand All @@ -367,23 +376,26 @@ var _ = {
}

if (!match) {
if (oneshot) {
break;
}

continue;
}

if(lookbehind) {
if (lookbehind) {
lookbehindLength = match[1] ? match[1].length : 0;
}

var from = match.index + lookbehindLength,
match = match[0].slice(lookbehindLength),
to = from + match.length,
matchStr = match[0].slice(lookbehindLength),
to = from + matchStr.length,
before = str.slice(0, from),
after = str.slice(to);

var reach = pos + str.length;

if (greedyRematching && reach > rematchReach) {
// expand rematch reach for potential future matches
rematchReach = reach
}

var args = [i, delNum];

if (before) {
Expand All @@ -392,7 +404,7 @@ var _ = {
args.push(before);
}

var wrapped = new Token(token, inside? _.tokenize(match, inside) : match, alias, match, greedy);
var wrapped = new Token(token, inside ? _.tokenize(matchStr, inside) : matchStr, alias, matchStr);

args.push(wrapped);

Expand All @@ -402,11 +414,9 @@ var _ = {

Array.prototype.splice.apply(strarr, args);

if (delNum != 1)
_.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j);

if (oneshot)
break;
if (delNum != 1) {
_.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j, reach);
}
}
}
}
Expand All @@ -425,7 +435,7 @@ var _ = {
delete grammar.rest;
}

_.matchGrammar(text, strarr, grammar, 0, 0, false);
_.matchGrammar(text, strarr, grammar, 0, 0);

return strarr;
},
Expand Down Expand Up @@ -459,13 +469,12 @@ var _ = {

_self.Prism = _;

function Token(type, content, alias, matchedStr, greedy) {
function Token(type, content, alias, matchedStr) {
this.type = type;
this.content = content;
this.alias = alias;
// Copy of the full string this token was created from
this.length = (matchedStr || '').length|0;
this.greedy = !!greedy;
}

Token.stringify = function(o, language) {
Expand Down
2 changes: 1 addition & 1 deletion components/prism-core.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.