graphql-java · andimarek · Jul 14, 2021 · Jun 28, 2021 · Jun 29, 2021 · Jun 29, 2021
diff --git a/src/main/antlr/GraphqlCommon.g4 b/src/main/antlr/GraphqlCommon.g4
@@ -120,12 +120,12 @@ fragment BlockStringCharacter:
 ExtendedSourceCharacter;
 
 fragment StringCharacter:
-([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) |  // this is SoureCharacter without '"' and '\'
+([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) |  // this is SourceCharacter without '"' and '\'
 '\\u' EscapedUnicode  |
 '\\' EscapedCharacter;
 
 fragment EscapedCharacter :  ["\\/bfnrt];
-fragment EscapedUnicode : Hex Hex Hex Hex;
+fragment EscapedUnicode : Hex Hex Hex Hex | '{' Hex+ '}';
 fragment Hex : [0-9a-fA-F];
 
 

diff --git a/src/main/java/graphql/parser/StringValueParsing.java b/src/main/java/graphql/parser/StringValueParsing.java
@@ -30,7 +30,9 @@ public static String removeIndentation(String rawValue) {
         String[] lines = rawValue.split("\\n");
         Integer commonIndent = null;
         for (int i = 0; i < lines.length; i++) {
-            if (i == 0) continue;
+            if (i == 0) {
+                continue;
+            }
             String line = lines[i];
             int length = line.length();
             int indent = leadingWhitespace(line);
@@ -44,7 +46,9 @@ public static String removeIndentation(String rawValue) {
         if (commonIndent != null) {
             for (int i = 0; i < lineList.size(); i++) {
                 String line = lineList.get(i);
-                if (i == 0) continue;
+                if (i == 0) {
+                    continue;
+                }
                 if (line.length() > commonIndent) {
                     line = line.substring(commonIndent);
                     lineList.set(i, line);
@@ -135,10 +139,7 @@ public static String parseSingleQuotedString(String string) {
                     writer.write('\t');
                     continue;
                 case 'u':
-                    String hexStr = string.substring(i + 1, i + 5);
-                    int codepoint = Integer.parseInt(hexStr, 16);
-                    i += 4;
-                    writer.write(codepoint);
+                    i = UnicodeUtil.parseAndWriteUnicode(writer, string, i);
                     continue;
                 default:
                     Assert.assertShouldNeverHappen();

diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -0,0 +1,64 @@
+package graphql.parser;
+
+import graphql.Assert;
+import graphql.Internal;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * Contains Unicode helpers for parsing StringValue types in the grammar
+ */
+@Internal
+public class UnicodeUtil {
+    public static int MAX_UNICODE_CODE_POINT = 0x10FFFF;
+
+    public static int parseAndWriteUnicode(StringWriter writer, String string, int i) {
+        // Unicode characters can either be:
+        //  - four hex characters in the form \\u597D, or
+        //  - any number of hex characters surrounded by a brace in the form \\u{1F37A}
+
+        // Four hex character only case \\u597D, for code points in the Basic Multilingual Plane (BMP)
+        if (isNotBracedEscape(string, i)) {
+            String hexStr = string.substring(i + 1, i + 5);
+            int codepoint = Integer.parseInt(hexStr, 16);
+            writer.write(codepoint);
+            return i + 4;
+            // TODO error checking of invalid values
+        } else {
+            // Any number of hex characters e.g. \\u{1F37A}, which allows code points outside the Basic Multilingual Plane (BMP)
+            int startIx = i + 2;
+            int endIndexExclusive = startIx;
+            do {
+                if (endIndexExclusive + 1 >= string.length()) {
+                    throw new RuntimeException("invalid unicode encoding");
+                }
+            } while (string.charAt(++endIndexExclusive) != '}');
+
+            String hexStr = string.substring(startIx, endIndexExclusive);
+            Integer hexValue = Integer.parseInt(hexStr, 16);
+            if (isValidUnicodeCodePoint(hexValue)) {
+                char[] chars = Character.toChars(hexValue);
+                try {
+                    writer.write(chars);
+                } catch (IOException e) {
+                    return Assert.assertShouldNeverHappen();
+                }
+                return endIndexExclusive;
+            } else {
+                throw new RuntimeException("invalid unicode code point");
+            }
+        }
+//        Assert.assertShouldNeverHappen();
+        // TODO error checking of invalid values
+    }
+
+    private static boolean isNotBracedEscape(String string, int i) {
+        return string.charAt(i + 1) != '{';
+    }
+
+    private static boolean isValidUnicodeCodePoint(Integer value) {
+        // TODO: Add bad surrogate checks
+        return value <= MAX_UNICODE_CODE_POINT;
+    }
+}
diff --git a/src/test/groovy/graphql/parser/StringValueParsingTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingTest.groovy
@@ -40,8 +40,7 @@ class StringValueParsingTest extends Specification {
         parsed == '''"'''
     }
 
-    def "parsing emoji should work"() {
-        // needs surrogate pairs for this emoji
+    def "parsing beer stein as surrogate pair should work"() {
         given:
         def input = '''"\\ud83c\\udf7a"'''
 
@@ -52,18 +51,17 @@ class StringValueParsingTest extends Specification {
         parsed == '''🍺''' // contains the beer icon 	U+1F37A  : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    def "parsing simple unicode should work"() {
+    def "parsing simple unicode should work - Basic Multilingual Plane (BMP)"() {
         given:
-        def input = '''"\\u56fe"'''
+        def input = '''"\\u5564\\u9152"'''
 
         when:
         String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        parsed == '''图'''
+        parsed == '''啤酒'''
     }
 
-
     def "parsing triple quoted string should work"() {
         given:
         def input = '''"""triple quoted"""'''