Skip to content

Commit

Permalink
[GR-52906] Re-enable fast-forward past empty mandatory iterations in JS.
Browse files Browse the repository at this point in the history
PullRequest: graal/17362
  • Loading branch information
jirkamarsik committed Mar 26, 2024
2 parents 0fe3734 + 1344773 commit 2a77e42
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 10 deletions.
5 changes: 3 additions & 2 deletions regex/ci/ci.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@
["git", "clone", ["mx", "urlrewrite", "https://github.com/graalvm/js-tests.git"], "../../js-tests"],
["mx", "-p", "../vm", "--dynamicimports", "/graal-js,js-tests", "checkout-downstream", "graal-js", "js-tests"],
# run downstream gate from js-tests suite.
["mx", "-p", "../../js-tests", "sversions"],
["mx", "-p", "../../js-tests", "gate", "--no-warning-as-error", "--all-suites", "--tags", "build,Test262-default,TestV8-default,regex"],
["cd", "../../js-tests"],
["mx", "sversions"],
["mx", "gate", "--no-warning-as-error", "--all-suites", "--tags", "build,Test262-default,TestV8-default,regex"],
],
targets: ["gate"],
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,17 @@ public void innerLiteralSurrogates() {
test("\\udf06", "u", "\uD834\uDF06", 0, false);
test("x?\\udf06", "u", "\uD834\uDF06", 0, false);
}

@Test
public void gr52906() {
// Original test case
test("\\b(((.*?)){67108860})\\b|(?=(?=(?!.).\\b(\\d))){0,4}", "yi", "L1O\n\n\n11\n \n\n11\n \uD091 1aa\uFCDB=\n ", 0, true, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1);
// Minimized version
test("(.*?){67108863}", "", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 0, true, 0, 0, 0, 0);

// Linked issue
test("(?=(?=(\\W)\u008e+|\\uC47A|(\\s)))+?|((((?:(\\\u0015)))+?))|(?:\\r|[^]+?[^])|\\3{3,}", "gyim", "", 0, true, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
// Minimized version
test("()\\1{3,}", "", "", 0, true, 0, 0, 0, 0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
import com.oracle.truffle.regex.tregex.util.json.Json;
import com.oracle.truffle.regex.tregex.util.json.JsonValue;

import java.util.Arrays;

/**
* Represents a transition of a {@link PureNFA}.
*/
Expand Down Expand Up @@ -122,6 +124,7 @@ public JsonValue toJson(RegexAST ast) {
Json.prop("source", source.getId()),
Json.prop("target", target.getId()),
Json.prop("groupBoundaries", groupBoundaries),
Json.prop("sourceSections", groupBoundaries.indexUpdateSourceSectionsToJson(ast)));
Json.prop("sourceSections", groupBoundaries.indexUpdateSourceSectionsToJson(ast)),
Json.prop("quantifierGuards", Arrays.stream(quantifierGuards).map(QuantifierGuard::toJson)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import com.oracle.truffle.regex.tregex.parser.Token.Quantifier;
import com.oracle.truffle.regex.tregex.parser.ast.ConditionalBackReferenceGroup;
import com.oracle.truffle.regex.tregex.parser.flavors.RegexFlavor;
import com.oracle.truffle.regex.tregex.util.json.Json;
import com.oracle.truffle.regex.tregex.util.json.JsonValue;

import java.util.Objects;

Expand Down Expand Up @@ -228,4 +230,9 @@ public String toString() {
return kind + " " + index;
}
}

@TruffleBoundary
public JsonValue toJson() {
return Json.val(toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,13 @@ public final class TRegexBacktrackingNFAExecutorNode extends TRegexBacktrackerSu
private static final int FLAG_BACKREF_WITH_NULL_TARGET_FAILS = 1 << 2;
private static final int FLAG_MONITOR_CAPTURE_GROUPS_IN_EMPTY_CHECK = 1 << 3;
private static final int FLAG_TRANSITION_MATCHES_STEP_BY_STEP = 1 << 4;
private static final int FLAG_TRACK_LAST_GROUP = 1 << 5;
private static final int FLAG_RETURNS_FIRST_GROUP = 1 << 6;
private static final int FLAG_MUST_ADVANCE = 1 << 7;
private static final int FLAG_LONE_SURROGATES = 1 << 8;
private static final int FLAG_LOOPBACK_INITIAL_STATE = 1 << 9;
private static final int FLAG_USE_MERGE_EXPLODE = 1 << 10;
private static final int FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS = 1 << 5;
private static final int FLAG_TRACK_LAST_GROUP = 1 << 6;
private static final int FLAG_RETURNS_FIRST_GROUP = 1 << 7;
private static final int FLAG_MUST_ADVANCE = 1 << 8;
private static final int FLAG_LONE_SURROGATES = 1 << 9;
private static final int FLAG_LOOPBACK_INITIAL_STATE = 1 << 10;
private static final int FLAG_USE_MERGE_EXPLODE = 1 << 11;
private static final int FLAG_RECURSIVE_BACK_REFERENCES = 1 << 12;

private final PureNFA nfa;
Expand Down Expand Up @@ -199,6 +200,7 @@ private static int createFlags(RegexAST ast, PureNFA nfa, boolean mustAdvance, R
flags = setFlag(flags, FLAG_BACKREF_WITH_NULL_TARGET_FAILS, ast.getOptions().getFlavor().backreferencesToUnmatchedGroupsFail());
flags = setFlag(flags, FLAG_MONITOR_CAPTURE_GROUPS_IN_EMPTY_CHECK, ast.getOptions().getFlavor().emptyChecksMonitorCaptureGroups());
flags = setFlag(flags, FLAG_TRANSITION_MATCHES_STEP_BY_STEP, ast.getOptions().getFlavor().matchesTransitionsStepByStep());
flags = setFlag(flags, FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS, ast.getOptions().getFlavor().emptyChecksOnMandatoryLoopIterations());
flags = setFlag(flags, FLAG_TRACK_LAST_GROUP, ast.getOptions().getFlavor().usesLastGroupResultField());
flags = setFlag(flags, FLAG_RETURNS_FIRST_GROUP, !isFlagSet(flags, FLAG_FORWARD) && ast.getOptions().getFlavor().lookBehindsRunLeftToRight());
flags = setFlag(flags, FLAG_MUST_ADVANCE, mustAdvance);
Expand Down Expand Up @@ -250,6 +252,10 @@ public boolean isTransitionMatchesStepByStep() {
return isFlagSet(FLAG_TRANSITION_MATCHES_STEP_BY_STEP);
}

public boolean isEmptyChecksOnMandatoryLoopIterations() {
return isFlagSet(FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS);
}

public boolean isTrackLastGroup() {
return isFlagSet(FLAG_TRACK_LAST_GROUP);
}
Expand Down Expand Up @@ -749,7 +755,10 @@ protected boolean transitionMatches(VirtualFrame frame, TRegexBacktrackingNFAExe
case exitZeroWidth:
if (locals.getZeroWidthQuantifierGuardIndex(q) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q)) &&
(!q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) {
// In JS, we allow this guard to pass if we are still in the
// optional part of the quantifier. This allows JS to fast-
// forward past all the empty mandatory iterations.
(isEmptyChecksOnMandatoryLoopIterations() || !q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) {
return false;
}
break;
Expand Down Expand Up @@ -827,6 +836,29 @@ protected void updateState(TRegexBacktrackingNFAExecutorLocals locals, PureNFATr
locals.setZeroWidthQuantifierGuardIndex(q);
locals.setZeroWidthQuantifierResults(q);
break;
case exitZeroWidth:
boolean emptyCheckFailed = locals.getZeroWidthQuantifierGuardIndex(q) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q));
boolean advancePastOptionalIterations = !isEmptyChecksOnMandatoryLoopIterations() && q.hasIndex() && locals.getQuantifierCount(q) < q.getMin();
if (emptyCheckFailed && advancePastOptionalIterations && !transition.hasCaretGuard() && !transition.hasDollarGuard()) {
// We advance the counter to min - 1 to skip past all but one mandatory
// iteration. We do not skip the last mandatory iteration and set the
// counter to min, because of the way JavaScript regexes are executed. The
// JavaScript flavor does not set matchesTransitionStepByStep and therefore
// all guards are tested against the same original state. In the case of the
// last mandatory iteration, we would like it to be possible to match the
// exitZeroWidth guard followed by the exit guard, so that it is possible to
// hit the exact minimum number of iterations. However, this relies on first
// updating the state with exitZeroWidth and then testing this new state
// with the exit guard. This would mean having to enable
// matchesTransitionStepByStep for JavaScript and implementing this logic in
// tryUpdateState instead, which would lead to degraded performance for JS
// regexps. Instead, we choose to advance the counter to just before the
// last mandatory iteration so that this fast-forwarding behavior does not
// coincide with an exit guard that should pass.
locals.setQuantifierCount(q, q.getMin() - 1);
}
break;
default:
break;
}
Expand Down

0 comments on commit 2a77e42

Please sign in to comment.