From 08adcecc14943e90cb5c0fe05748c5ba9fa9e065 Mon Sep 17 00:00:00 2001 From: cpovirk Date: Thu, 12 Jul 2018 08:08:25 -0700 Subject: [PATCH] Internally, use PatternCompiler from Doubles.tryParse. (Redo of CL 202132002, which was partially rolled back in CL 202139691.) This time including tests to cover the RE2J approach. To avoid the errors of last time, I had to remove the possessive quantifiers under RE2J and replace \p{XDigit} with [0-9a-fA-F] (which is equivalent; see the Pattern Javadoc). ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=204301262 --- .../com/google/common/base/SplitterTest.java | 4 +-- .../com/google/common/base/CommonPattern.java | 6 +++- .../google/common/base/PatternCompiler.java | 6 ++++ .../src/com/google/common/base/Platform.java | 9 ++++-- .../com/google/common/primitives/Doubles.java | 31 ++++++++++++++----- .../com/google/common/base/Platform.java | 4 +-- .../com/google/common/base/SplitterTest.java | 4 +-- .../com/google/common/base/CommonPattern.java | 6 +++- .../google/common/base/PatternCompiler.java | 6 ++++ .../src/com/google/common/base/Platform.java | 9 ++++-- .../com/google/common/primitives/Doubles.java | 31 ++++++++++++++----- 11 files changed, 88 insertions(+), 28 deletions(-) diff --git a/android/guava-tests/test/com/google/common/base/SplitterTest.java b/android/guava-tests/test/com/google/common/base/SplitterTest.java index 65a57e8676a3..b1b19fcce4cd 100644 --- a/android/guava-tests/test/com/google/common/base/SplitterTest.java +++ b/android/guava-tests/test/com/google/common/base/SplitterTest.java @@ -354,7 +354,7 @@ public void testPatternSplitWithDoubleDelimiterOmitEmptyStrings() { @GwtIncompatible // java.util.regex.Pattern @AndroidIncompatible // Bug in older versions of Android we test against, since fixed. public void testPatternSplitLookBehind() { - if (!Platform.usingJdkPatternCompiler()) { + if (!CommonPattern.isPcreLike()) { return; } String toSplit = ":foo::barbaz:"; @@ -491,7 +491,7 @@ public void testSplitterIterableIsLazy_string() { @GwtIncompatible // java.util.regex.Pattern @AndroidIncompatible // not clear that j.u.r.Matcher promises to handle mutations during use public void testSplitterIterableIsLazy_pattern() { - if (!Platform.usingJdkPatternCompiler()) { + if (!CommonPattern.isPcreLike()) { return; } assertSplitterIterableIsLazy(Splitter.onPattern(",")); diff --git a/android/guava/src/com/google/common/base/CommonPattern.java b/android/guava/src/com/google/common/base/CommonPattern.java index 898003271b87..6be5b01408aa 100644 --- a/android/guava/src/com/google/common/base/CommonPattern.java +++ b/android/guava/src/com/google/common/base/CommonPattern.java @@ -33,7 +33,11 @@ abstract class CommonPattern { @Override public abstract String toString(); - static CommonPattern compile(String pattern) { + public static CommonPattern compile(String pattern) { return Platform.compilePattern(pattern); } + + public static boolean isPcreLike() { + return Platform.patternCompilerIsPcreLike(); + } } diff --git a/android/guava/src/com/google/common/base/PatternCompiler.java b/android/guava/src/com/google/common/base/PatternCompiler.java index ca9287a8f04a..813a25f65b86 100644 --- a/android/guava/src/com/google/common/base/PatternCompiler.java +++ b/android/guava/src/com/google/common/base/PatternCompiler.java @@ -29,4 +29,10 @@ interface PatternCompiler { * @throws IllegalArgumentException if the pattern is invalid */ CommonPattern compile(String pattern); + + /** + * Returns {@code true} if the regex implementation behaves like Perl -- notably, by supporting + * possessive quantifiers but also being susceptible to catastrophic backtracking. + */ + boolean isPcreLike(); } diff --git a/android/guava/src/com/google/common/base/Platform.java b/android/guava/src/com/google/common/base/Platform.java index 4b9ee14eb464..8c4b11a78d12 100644 --- a/android/guava/src/com/google/common/base/Platform.java +++ b/android/guava/src/com/google/common/base/Platform.java @@ -70,8 +70,8 @@ static CommonPattern compilePattern(String pattern) { return patternCompiler.compile(pattern); } - static boolean usingJdkPatternCompiler() { - return patternCompiler instanceof JdkPatternCompiler; + static boolean patternCompilerIsPcreLike() { + return patternCompiler.isPcreLike(); } private static PatternCompiler loadPatternCompiler() { @@ -92,5 +92,10 @@ private static final class JdkPatternCompiler implements PatternCompiler { public CommonPattern compile(String pattern) { return new JdkPattern(Pattern.compile(pattern)); } + + @Override + public boolean isPcreLike() { + return true; + } } } diff --git a/android/guava/src/com/google/common/primitives/Doubles.java b/android/guava/src/com/google/common/primitives/Doubles.java index c81bc9b01f24..1a43503669fa 100644 --- a/android/guava/src/com/google/common/primitives/Doubles.java +++ b/android/guava/src/com/google/common/primitives/Doubles.java @@ -33,7 +33,6 @@ import java.util.Comparator; import java.util.List; import java.util.RandomAccess; -import java.util.regex.Pattern; import org.checkerframework.checker.nullness.compatqual.NullableDecl; /** @@ -646,16 +645,32 @@ public String toString() { * that pass this regex are valid -- only a performance hit is incurred, not a semantics bug. */ @GwtIncompatible // regular expressions - static final Pattern FLOATING_POINT_PATTERN = fpPattern(); + static final + java.util.regex.Pattern + FLOATING_POINT_PATTERN = fpPattern(); @GwtIncompatible // regular expressions - private static Pattern fpPattern() { - String decimal = "(?:\\d++(?:\\.\\d*+)?|\\.\\d++)"; - String completeDec = decimal + "(?:[eE][+-]?\\d++)?[fFdD]?"; - String hex = "(?:\\p{XDigit}++(?:\\.\\p{XDigit}*+)?|\\.\\p{XDigit}++)"; - String completeHex = "0[xX]" + hex + "[pP][+-]?\\d++[fFdD]?"; + private static + java.util.regex.Pattern + fpPattern() { + /* + * We use # instead of * for possessive quantifiers. This lets us strip them out when building + * the regex for RE2 (which doesn't support them) but leave them in when building it for + * java.util.regex (where we want them in order to avoid catastrophic backtracking). + */ + String decimal = "(?:\\d+#(?:\\.\\d*#)?|\\.\\d+#)"; + String completeDec = decimal + "(?:[eE][+-]?\\d+#)?[fFdD]?"; + String hex = "(?:[0-9a-fA-F]+#(?:\\.[0-9a-fA-F]*#)?|\\.[0-9a-fA-F]+#)"; + String completeHex = "0[xX]" + hex + "[pP][+-]?\\d+#[fFdD]?"; String fpPattern = "[+-]?(?:NaN|Infinity|" + completeDec + "|" + completeHex + ")"; - return Pattern.compile(fpPattern); + fpPattern = + fpPattern.replace( + "#", + "+" + ); + return + java.util.regex.Pattern + .compile(fpPattern); } /** diff --git a/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/Platform.java b/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/Platform.java index d52b01a9ce1a..7d5de70e8927 100644 --- a/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/Platform.java +++ b/guava-gwt/src-super/com/google/common/base/super/com/google/common/base/Platform.java @@ -75,8 +75,8 @@ static CommonPattern compilePattern(String pattern) { throw new UnsupportedOperationException(); } - static boolean usingJdkPatternCompiler() { - return false; + static boolean patternCompilerIsPcreLike() { + throw new UnsupportedOperationException(); } private Platform() {} diff --git a/guava-tests/test/com/google/common/base/SplitterTest.java b/guava-tests/test/com/google/common/base/SplitterTest.java index 65a57e8676a3..b1b19fcce4cd 100644 --- a/guava-tests/test/com/google/common/base/SplitterTest.java +++ b/guava-tests/test/com/google/common/base/SplitterTest.java @@ -354,7 +354,7 @@ public void testPatternSplitWithDoubleDelimiterOmitEmptyStrings() { @GwtIncompatible // java.util.regex.Pattern @AndroidIncompatible // Bug in older versions of Android we test against, since fixed. public void testPatternSplitLookBehind() { - if (!Platform.usingJdkPatternCompiler()) { + if (!CommonPattern.isPcreLike()) { return; } String toSplit = ":foo::barbaz:"; @@ -491,7 +491,7 @@ public void testSplitterIterableIsLazy_string() { @GwtIncompatible // java.util.regex.Pattern @AndroidIncompatible // not clear that j.u.r.Matcher promises to handle mutations during use public void testSplitterIterableIsLazy_pattern() { - if (!Platform.usingJdkPatternCompiler()) { + if (!CommonPattern.isPcreLike()) { return; } assertSplitterIterableIsLazy(Splitter.onPattern(",")); diff --git a/guava/src/com/google/common/base/CommonPattern.java b/guava/src/com/google/common/base/CommonPattern.java index 898003271b87..6be5b01408aa 100644 --- a/guava/src/com/google/common/base/CommonPattern.java +++ b/guava/src/com/google/common/base/CommonPattern.java @@ -33,7 +33,11 @@ abstract class CommonPattern { @Override public abstract String toString(); - static CommonPattern compile(String pattern) { + public static CommonPattern compile(String pattern) { return Platform.compilePattern(pattern); } + + public static boolean isPcreLike() { + return Platform.patternCompilerIsPcreLike(); + } } diff --git a/guava/src/com/google/common/base/PatternCompiler.java b/guava/src/com/google/common/base/PatternCompiler.java index ca9287a8f04a..813a25f65b86 100644 --- a/guava/src/com/google/common/base/PatternCompiler.java +++ b/guava/src/com/google/common/base/PatternCompiler.java @@ -29,4 +29,10 @@ interface PatternCompiler { * @throws IllegalArgumentException if the pattern is invalid */ CommonPattern compile(String pattern); + + /** + * Returns {@code true} if the regex implementation behaves like Perl -- notably, by supporting + * possessive quantifiers but also being susceptible to catastrophic backtracking. + */ + boolean isPcreLike(); } diff --git a/guava/src/com/google/common/base/Platform.java b/guava/src/com/google/common/base/Platform.java index cbb737a3e9e6..769cd2ce54c7 100644 --- a/guava/src/com/google/common/base/Platform.java +++ b/guava/src/com/google/common/base/Platform.java @@ -70,8 +70,8 @@ static CommonPattern compilePattern(String pattern) { return patternCompiler.compile(pattern); } - static boolean usingJdkPatternCompiler() { - return patternCompiler instanceof JdkPatternCompiler; + static boolean patternCompilerIsPcreLike() { + return patternCompiler.isPcreLike(); } private static PatternCompiler loadPatternCompiler() { @@ -87,5 +87,10 @@ private static final class JdkPatternCompiler implements PatternCompiler { public CommonPattern compile(String pattern) { return new JdkPattern(Pattern.compile(pattern)); } + + @Override + public boolean isPcreLike() { + return true; + } } } diff --git a/guava/src/com/google/common/primitives/Doubles.java b/guava/src/com/google/common/primitives/Doubles.java index a5d5922de082..b318ef7fe993 100644 --- a/guava/src/com/google/common/primitives/Doubles.java +++ b/guava/src/com/google/common/primitives/Doubles.java @@ -35,7 +35,6 @@ import java.util.RandomAccess; import java.util.Spliterator; import java.util.Spliterators; -import java.util.regex.Pattern; import org.checkerframework.checker.nullness.qual.Nullable; /** @@ -653,16 +652,32 @@ public String toString() { * that pass this regex are valid -- only a performance hit is incurred, not a semantics bug. */ @GwtIncompatible // regular expressions - static final Pattern FLOATING_POINT_PATTERN = fpPattern(); + static final + java.util.regex.Pattern + FLOATING_POINT_PATTERN = fpPattern(); @GwtIncompatible // regular expressions - private static Pattern fpPattern() { - String decimal = "(?:\\d++(?:\\.\\d*+)?|\\.\\d++)"; - String completeDec = decimal + "(?:[eE][+-]?\\d++)?[fFdD]?"; - String hex = "(?:\\p{XDigit}++(?:\\.\\p{XDigit}*+)?|\\.\\p{XDigit}++)"; - String completeHex = "0[xX]" + hex + "[pP][+-]?\\d++[fFdD]?"; + private static + java.util.regex.Pattern + fpPattern() { + /* + * We use # instead of * for possessive quantifiers. This lets us strip them out when building + * the regex for RE2 (which doesn't support them) but leave them in when building it for + * java.util.regex (where we want them in order to avoid catastrophic backtracking). + */ + String decimal = "(?:\\d+#(?:\\.\\d*#)?|\\.\\d+#)"; + String completeDec = decimal + "(?:[eE][+-]?\\d+#)?[fFdD]?"; + String hex = "(?:[0-9a-fA-F]+#(?:\\.[0-9a-fA-F]*#)?|\\.[0-9a-fA-F]+#)"; + String completeHex = "0[xX]" + hex + "[pP][+-]?\\d+#[fFdD]?"; String fpPattern = "[+-]?(?:NaN|Infinity|" + completeDec + "|" + completeHex + ")"; - return Pattern.compile(fpPattern); + fpPattern = + fpPattern.replace( + "#", + "+" + ); + return + java.util.regex.Pattern + .compile(fpPattern); } /**