From a0785c6ffadb847eef7f271c9a43ff9e4068e1b2 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Mon, 28 Jun 2021 11:14:44 +1000
Subject: [PATCH 01/15] Add tests to support full Unicode parser

---
 .../parser/StringValueParsingTest.groovy      |  10 +-
 .../parser/UnicodeUtilParserTest.groovy       | 191 ++++++++++++++++++
 2 files changed, 195 insertions(+), 6 deletions(-)
 create mode 100644 src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy

diff --git a/src/test/groovy/graphql/parser/StringValueParsingTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingTest.groovy
index 8044d1cec7..5543dbc339 100644
--- a/src/test/groovy/graphql/parser/StringValueParsingTest.groovy
+++ b/src/test/groovy/graphql/parser/StringValueParsingTest.groovy
@@ -40,8 +40,7 @@ class StringValueParsingTest extends Specification {
         parsed == '''"'''
     }
 
-    def "parsing emoji should work"() {
-        // needs surrogate pairs for this emoji
+    def "parsing beer stein as surrogate pair should work"() {
         given:
         def input = '''"\\ud83c\\udf7a"'''
 
@@ -52,18 +51,17 @@ class StringValueParsingTest extends Specification {
         parsed == '''🍺''' // contains the beer icon 	U+1F37A  : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    def "parsing simple unicode should work"() {
+    def "parsing simple unicode should work - Basic Multilingual Plane (BMP)"() {
         given:
-        def input = '''"\\u56fe"'''
+        def input = '''"\\u5564\\u9152"'''
 
         when:
         String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        parsed == '''图'''
+        parsed == '''啤酒'''
     }
 
-
     def "parsing triple quoted string should work"() {
         given:
         def input = '''"""triple quoted"""'''
diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
new file mode 100644
index 0000000000..3a5cba4bcf
--- /dev/null
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -0,0 +1,191 @@
+package graphql.parser
+
+import graphql.language.Document
+import graphql.language.Field
+import graphql.language.OperationDefinition
+import graphql.language.StringValue
+import spock.lang.Specification
+
+class UnicodeUtilParserTest extends Specification {
+    /*
+        Implements RFC to support full Unicode
+        Original RFC https://github.com/graphql/graphql-spec/issues/687
+        RFC spec text https://github.com/graphql/graphql-spec/pull/849
+        RFC JS implementation https://github.com/graphql/graphql-js/pull/3117
+
+        TL;DR
+        Previously, valid SourceCharacters included Unicode scalar values up to and including U+FFFF - the Basic Multilingual Plane (BMP)
+        Now this is changing to incorporate all Unicode scalar values
+        Assert {value} is a within the *Unicode scalar value* range (>= 0x0000 and <= 0xD7FF or >= 0xE000 and <= 0x10FFFF).
+        Practically this means you can have your beer emoji (U+1F37A) in queries as \\u{1F37A}
+    */
+
+    // With this RFC, code points outside the Basic Multilingual Plane can be parsed. For example, emojis
+    // Previously emojis could only be parsed with surrogate pairs. Now they can be parsed with the code point directly
+    def "parsing beer stein as escaped unicode"() {
+        given:
+        def input = '''"\\u{1F37A} hello"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
+    }
+
+    def "parsing beer mug non escaped"() {
+        given:
+        def input = '''"🍺 hello"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
+    }
+
+    def "allow braced escaped unicode"() {
+        def input = '''
+              {
+              foo(arg: "\\u{1F37A}")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
+        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
+        def argValue = field.arguments[0].value as StringValue
+
+        then:
+        argValue.getValue() == "🍺"
+    }
+
+    /*
+        From the RFC:
+        For legacy reasons, a *supplementary character* may be escaped by two
+        fixed-width unicode escape sequences forming a *surrogate pair*. For example
+        the input `"\uD83D\uDCA9"` is a valid {StringValue} which represents the same
+        Unicode text as `"\u{1F4A9}"`. While this legacy form is allowed, it should be
+        avoided as a variable-width unicode escape sequence is a clearer way to encode
+        such code points.
+    */
+    def "allow surrogate pairs escaped unicode"() {
+        def input = '''
+              {
+              foo(arg: "\\ud83c\\udf7a")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
+        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
+        def argValue = field.arguments[0].value as StringValue
+
+        then:
+        argValue.getValue() == "🍺"
+    }
+
+    /*
+        From the RFC:
+        * If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*):
+        * Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
+        * Return ({leadingValue} - 0xD800) × 0x400 + ({trailingValue} - 0xDC00) + 0x10000.
+     */
+    def "invalid surrogate pair"() {
+        def input = '''
+              {
+              foo(arg: "\\uD83D\\uDBFF")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+
+        then:
+        // TODO: Raise exception
+        false
+    }
+
+    def "invalid unicode code point"() {
+        def input = '''
+              {
+              foo(arg: "\\u{fffffff}")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+
+        then:
+        // TODO: Raise exception
+        false
+    }
+
+    def "invalid unpaired surrogate" () {
+        def input = '''
+              {
+              foo(arg: "\\uD83D")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+
+        then:
+        // TODO: Discuss whether to raise exception
+        false
+    }
+
+    def "invalid code point - too long" () {
+        given:
+        def input = '''"\\u{000000000}"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        // TODO: Discuss whether to raise exception
+        false
+    }
+
+    /*
+        From the RFC
+        **Byte order mark**
+
+        UnicodeBOM :: "Byte Order Mark (U+FEFF)"
+
+        The *Byte Order Mark* is a special Unicode code point which may appear at the
+        beginning of a file which programs may use to determine the fact that the text
+        stream is Unicode, and what specific encoding has been used.
+
+        As files are often concatenated, a *Byte Order Mark* may appear anywhere within
+        a GraphQL document and is {Ignored}.
+    */
+    def "byte order mark to be ignored" () {
+        // The Byte Order Mark indicates a Unicode stream, and whether the stream is high-endian or low-endian
+        given:
+        def input = '''"hello \\uFEFF\\u4F60\\u597D"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        parsed == '''hello 你好'''
+    }
+
+    // TODO: How do we want to handle control characters?
+    def "escapes zero byte" () {
+        // TODO: This is a test case from the JS implementation. Do we want to implement this case?
+        given:
+        def input = '''"\\x00"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        parsed == '''\\u0000'''
+    }
+}
+

From 5594036820d34413d204631c336ba4c4e4b8b4f9 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Tue, 29 Jun 2021 10:52:48 +1000
Subject: [PATCH 02/15] Fix Groovy Unicode parser issue

---
 .../graphql/parser/UnicodeUtilParserTest.groovy   | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 3a5cba4bcf..1c1af5098f 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -4,6 +4,7 @@ import graphql.language.Document
 import graphql.language.Field
 import graphql.language.OperationDefinition
 import graphql.language.StringValue
+import spock.lang.Ignore
 import spock.lang.Specification
 
 class UnicodeUtilParserTest extends Specification {
@@ -22,6 +23,7 @@ class UnicodeUtilParserTest extends Specification {
 
     // With this RFC, code points outside the Basic Multilingual Plane can be parsed. For example, emojis
     // Previously emojis could only be parsed with surrogate pairs. Now they can be parsed with the code point directly
+    @Ignore
     def "parsing beer stein as escaped unicode"() {
         given:
         def input = '''"\\u{1F37A} hello"'''
@@ -33,6 +35,7 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
+    @Ignore
     def "parsing beer mug non escaped"() {
         given:
         def input = '''"🍺 hello"'''
@@ -44,6 +47,7 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
+    @Ignore
     def "allow braced escaped unicode"() {
         def input = '''
               {
@@ -65,11 +69,12 @@ class UnicodeUtilParserTest extends Specification {
         From the RFC:
         For legacy reasons, a *supplementary character* may be escaped by two
         fixed-width unicode escape sequences forming a *surrogate pair*. For example
-        the input `"\uD83D\uDCA9"` is a valid {StringValue} which represents the same
-        Unicode text as `"\u{1F4A9}"`. While this legacy form is allowed, it should be
+        the input `"\\uD83D\\uDCA9"` is a valid {StringValue} which represents the same
+        Unicode text as `"\\u{1F4A9}"`. While this legacy form is allowed, it should be
         avoided as a variable-width unicode escape sequence is a clearer way to encode
         such code points.
     */
+    @Ignore
     def "allow surrogate pairs escaped unicode"() {
         def input = '''
               {
@@ -93,6 +98,7 @@ class UnicodeUtilParserTest extends Specification {
         * Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
         * Return ({leadingValue} - 0xD800) × 0x400 + ({trailingValue} - 0xDC00) + 0x10000.
      */
+    @Ignore
     def "invalid surrogate pair"() {
         def input = '''
               {
@@ -108,6 +114,7 @@ class UnicodeUtilParserTest extends Specification {
         false
     }
 
+    @Ignore
     def "invalid unicode code point"() {
         def input = '''
               {
@@ -123,6 +130,7 @@ class UnicodeUtilParserTest extends Specification {
         false
     }
 
+    @Ignore
     def "invalid unpaired surrogate" () {
         def input = '''
               {
@@ -138,6 +146,7 @@ class UnicodeUtilParserTest extends Specification {
         false
     }
 
+    @Ignore
     def "invalid code point - too long" () {
         given:
         def input = '''"\\u{000000000}"'''
@@ -163,6 +172,7 @@ class UnicodeUtilParserTest extends Specification {
         As files are often concatenated, a *Byte Order Mark* may appear anywhere within
         a GraphQL document and is {Ignored}.
     */
+    @Ignore
     def "byte order mark to be ignored" () {
         // The Byte Order Mark indicates a Unicode stream, and whether the stream is high-endian or low-endian
         given:
@@ -176,6 +186,7 @@ class UnicodeUtilParserTest extends Specification {
     }
 
     // TODO: How do we want to handle control characters?
+    @Ignore
     def "escapes zero byte" () {
         // TODO: This is a test case from the JS implementation. Do we want to implement this case?
         given:

From 5b12cbeff757c7280177375814b92a4387db152f Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Tue, 29 Jun 2021 11:10:37 +1000
Subject: [PATCH 03/15] Add full Unicode to parser, the happy path

---
 src/main/antlr/GraphqlCommon.g4               |  4 +-
 .../graphql/parser/StringValueParsing.java    | 13 ++---
 src/main/java/graphql/parser/UnicodeUtil.java | 51 +++++++++++++++++++
 .../parser/UnicodeUtilParserTest.groovy       |  4 --
 4 files changed, 60 insertions(+), 12 deletions(-)
 create mode 100644 src/main/java/graphql/parser/UnicodeUtil.java

diff --git a/src/main/antlr/GraphqlCommon.g4 b/src/main/antlr/GraphqlCommon.g4
index e7d500cc71..5ace483ff9 100644
--- a/src/main/antlr/GraphqlCommon.g4
+++ b/src/main/antlr/GraphqlCommon.g4
@@ -120,12 +120,12 @@ fragment BlockStringCharacter:
 ExtendedSourceCharacter;
 
 fragment StringCharacter:
-([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) |  // this is SoureCharacter without '"' and '\'
+([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) |  // this is SourceCharacter without '"' and '\'
 '\\u' EscapedUnicode  |
 '\\' EscapedCharacter;
 
 fragment EscapedCharacter :  ["\\/bfnrt];
-fragment EscapedUnicode : Hex Hex Hex Hex;
+fragment EscapedUnicode : Hex Hex Hex Hex | '{' Hex+ '}';
 fragment Hex : [0-9a-fA-F];
 
 
diff --git a/src/main/java/graphql/parser/StringValueParsing.java b/src/main/java/graphql/parser/StringValueParsing.java
index a3c598c2d6..83b214cab1 100644
--- a/src/main/java/graphql/parser/StringValueParsing.java
+++ b/src/main/java/graphql/parser/StringValueParsing.java
@@ -30,7 +30,9 @@ public static String removeIndentation(String rawValue) {
         String[] lines = rawValue.split("\\n");
         Integer commonIndent = null;
         for (int i = 0; i < lines.length; i++) {
-            if (i == 0) continue;
+            if (i == 0) {
+                continue;
+            }
             String line = lines[i];
             int length = line.length();
             int indent = leadingWhitespace(line);
@@ -44,7 +46,9 @@ public static String removeIndentation(String rawValue) {
         if (commonIndent != null) {
             for (int i = 0; i < lineList.size(); i++) {
                 String line = lineList.get(i);
-                if (i == 0) continue;
+                if (i == 0) {
+                    continue;
+                }
                 if (line.length() > commonIndent) {
                     line = line.substring(commonIndent);
                     lineList.set(i, line);
@@ -135,10 +139,7 @@ public static String parseSingleQuotedString(String string) {
                     writer.write('\t');
                     continue;
                 case 'u':
-                    String hexStr = string.substring(i + 1, i + 5);
-                    int codepoint = Integer.parseInt(hexStr, 16);
-                    i += 4;
-                    writer.write(codepoint);
+                    i = UnicodeUtil.parseAndWriteUnicode(writer, string, i);
                     continue;
                 default:
                     Assert.assertShouldNeverHappen();
diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
new file mode 100644
index 0000000000..0574ddb6a4
--- /dev/null
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -0,0 +1,51 @@
+package graphql.parser;
+
+import graphql.Assert;
+import graphql.Internal;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+/**
+ * Contains Unicode helpers for parsing StringValue types in the grammar
+ */
+@Internal
+public class UnicodeUtil {
+    public static int parseAndWriteUnicode(StringWriter writer, String string, int i) {
+        // Unicode characters can either be:
+        //  - four hex characters in the form \\u597D, or
+        //  - any number of hex characters surrounded by a brace in the form \\u{1F37A}
+
+        // Four hex character only case \\u597D, for code points in the Basic Multilingual Plane (BMP)
+        if (isNotBracedEscape(string, i)) {
+            String hexStr = string.substring(i + 1, i + 5);
+            int codepoint = Integer.parseInt(hexStr, 16);
+            writer.write(codepoint);
+            return i + 4;
+            // TODO error checking of invalid values
+        }
+
+        // Any number of hex characters e.g. \\u{1F37A}, which allows code points outside the Basic Multilingual Plane (BMP)
+        int startIx = i + 2;
+        int endIndexExclusive = startIx;
+        do {
+            if (endIndexExclusive + 1 >= string.length()) {
+                throw new RuntimeException("invalid unicode encoding");
+            }
+        } while (string.charAt(++endIndexExclusive) != '}');
+
+        String hexStr = string.substring(startIx, endIndexExclusive);
+        char[] chars = Character.toChars(Integer.parseInt(hexStr, 16));
+        try {
+            writer.write(chars);
+        } catch (IOException e) {
+            return Assert.assertShouldNeverHappen();
+        }
+        return endIndexExclusive;
+        // TODO error checking of invalid values
+    }
+
+    private static boolean isNotBracedEscape(String string, int i) {
+        return string.charAt(i + 1) != '{';
+    }
+}
diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 1c1af5098f..583e9b26e5 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -23,7 +23,6 @@ class UnicodeUtilParserTest extends Specification {
 
     // With this RFC, code points outside the Basic Multilingual Plane can be parsed. For example, emojis
     // Previously emojis could only be parsed with surrogate pairs. Now they can be parsed with the code point directly
-    @Ignore
     def "parsing beer stein as escaped unicode"() {
         given:
         def input = '''"\\u{1F37A} hello"'''
@@ -35,7 +34,6 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    @Ignore
     def "parsing beer mug non escaped"() {
         given:
         def input = '''"🍺 hello"'''
@@ -47,7 +45,6 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    @Ignore
     def "allow braced escaped unicode"() {
         def input = '''
               {
@@ -74,7 +71,6 @@ class UnicodeUtilParserTest extends Specification {
         avoided as a variable-width unicode escape sequence is a clearer way to encode
         such code points.
     */
-    @Ignore
     def "allow surrogate pairs escaped unicode"() {
         def input = '''
               {

From dc349a0b7f75061680a626d6ea29f69c5a3e6501 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Tue, 29 Jun 2021 11:48:41 +1000
Subject: [PATCH 04/15] Add maximum unicode value check

---
 src/main/java/graphql/parser/UnicodeUtil.java | 45 ++++++++++++-------
 .../parser/UnicodeUtilParserTest.groovy       |  9 ++--
 2 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
index 0574ddb6a4..d90ccdbfcb 100644
--- a/src/main/java/graphql/parser/UnicodeUtil.java
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -11,6 +11,8 @@
  */
 @Internal
 public class UnicodeUtil {
+    public static int MAX_UNICODE_CODE_POINT = 0x10FFFF;
+
     public static int parseAndWriteUnicode(StringWriter writer, String string, int i) {
         // Unicode characters can either be:
         //  - four hex characters in the form \\u597D, or
@@ -23,29 +25,40 @@ public static int parseAndWriteUnicode(StringWriter writer, String string, int i
             writer.write(codepoint);
             return i + 4;
             // TODO error checking of invalid values
-        }
+        } else {
+            // Any number of hex characters e.g. \\u{1F37A}, which allows code points outside the Basic Multilingual Plane (BMP)
+            int startIx = i + 2;
+            int endIndexExclusive = startIx;
+            do {
+                if (endIndexExclusive + 1 >= string.length()) {
+                    throw new RuntimeException("invalid unicode encoding");
+                }
+            } while (string.charAt(++endIndexExclusive) != '}');
 
-        // Any number of hex characters e.g. \\u{1F37A}, which allows code points outside the Basic Multilingual Plane (BMP)
-        int startIx = i + 2;
-        int endIndexExclusive = startIx;
-        do {
-            if (endIndexExclusive + 1 >= string.length()) {
-                throw new RuntimeException("invalid unicode encoding");
+            String hexStr = string.substring(startIx, endIndexExclusive);
+            Integer hexValue = Integer.parseInt(hexStr, 16);
+            if (isValidUnicodeCodePoint(hexValue)) {
+                char[] chars = Character.toChars(hexValue);
+                try {
+                    writer.write(chars);
+                } catch (IOException e) {
+                    return Assert.assertShouldNeverHappen();
+                }
+                return endIndexExclusive;
+            } else {
+                throw new RuntimeException("invalid unicode code point");
             }
-        } while (string.charAt(++endIndexExclusive) != '}');
-
-        String hexStr = string.substring(startIx, endIndexExclusive);
-        char[] chars = Character.toChars(Integer.parseInt(hexStr, 16));
-        try {
-            writer.write(chars);
-        } catch (IOException e) {
-            return Assert.assertShouldNeverHappen();
         }
-        return endIndexExclusive;
+//        Assert.assertShouldNeverHappen();
         // TODO error checking of invalid values
     }
 
     private static boolean isNotBracedEscape(String string, int i) {
         return string.charAt(i + 1) != '{';
     }
+
+    private static boolean isValidUnicodeCodePoint(Integer value) {
+        // TODO: Add bad surrogate checks
+        return value < MAX_UNICODE_CODE_POINT;
+    }
 }
diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 583e9b26e5..0373a35beb 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -4,6 +4,7 @@ import graphql.language.Document
 import graphql.language.Field
 import graphql.language.OperationDefinition
 import graphql.language.StringValue
+import graphql.schema.validation.InvalidSchemaException
 import spock.lang.Ignore
 import spock.lang.Specification
 
@@ -110,7 +111,6 @@ class UnicodeUtilParserTest extends Specification {
         false
     }
 
-    @Ignore
     def "invalid unicode code point"() {
         def input = '''
               {
@@ -122,8 +122,8 @@ class UnicodeUtilParserTest extends Specification {
         Document document = Parser.parse(input)
 
         then:
-        // TODO: Raise exception
-        false
+        Exception e = thrown(Exception)
+        e.message == "invalid unicode code point"
     }
 
     @Ignore
@@ -151,7 +151,7 @@ class UnicodeUtilParserTest extends Specification {
         String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        // TODO: Discuss whether to raise exception
+        // TODO: Discuss whether to raise exception. How do we want to treat leading zeroes?
         false
     }
 
@@ -169,6 +169,7 @@ class UnicodeUtilParserTest extends Specification {
         a GraphQL document and is {Ignored}.
     */
     @Ignore
+    // TODO: BOM was previously implemented. Do we want to change the prior implementation?
     def "byte order mark to be ignored" () {
         // The Byte Order Mark indicates a Unicode stream, and whether the stream is high-endian or low-endian
         given:

From c719779320b03beb86fd03696bd647ed3ac0a79b Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Tue, 29 Jun 2021 11:57:57 +1000
Subject: [PATCH 05/15] Fix typo

---
 src/main/java/graphql/parser/UnicodeUtil.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
index d90ccdbfcb..b1ab251e83 100644
--- a/src/main/java/graphql/parser/UnicodeUtil.java
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -59,6 +59,6 @@ private static boolean isNotBracedEscape(String string, int i) {
 
     private static boolean isValidUnicodeCodePoint(Integer value) {
         // TODO: Add bad surrogate checks
-        return value < MAX_UNICODE_CODE_POINT;
+        return value <= MAX_UNICODE_CODE_POINT;
     }
 }

From ecfffc4a4009774a4e9a6082822c9771a37266d9 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Thu, 8 Jul 2021 14:46:49 +1000
Subject: [PATCH 06/15] Add tests

---
 .../parser/UnicodeUtilParserTest.groovy       | 214 +++++++++++-------
 1 file changed, 137 insertions(+), 77 deletions(-)

diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 0373a35beb..76a68b257a 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -4,26 +4,22 @@ import graphql.language.Document
 import graphql.language.Field
 import graphql.language.OperationDefinition
 import graphql.language.StringValue
-import graphql.schema.validation.InvalidSchemaException
-import spock.lang.Ignore
 import spock.lang.Specification
 
 class UnicodeUtilParserTest extends Specification {
     /*
-        Implements RFC to support full Unicode
-        Original RFC https://github.com/graphql/graphql-spec/issues/687
-        RFC spec text https://github.com/graphql/graphql-spec/pull/849
-        RFC JS implementation https://github.com/graphql/graphql-js/pull/3117
-
-        TL;DR
-        Previously, valid SourceCharacters included Unicode scalar values up to and including U+FFFF - the Basic Multilingual Plane (BMP)
-        Now this is changing to incorporate all Unicode scalar values
-        Assert {value} is a within the *Unicode scalar value* range (>= 0x0000 and <= 0xD7FF or >= 0xE000 and <= 0x10FFFF).
-        Practically this means you can have your beer emoji (U+1F37A) in queries as \\u{1F37A}
+        Implements RFC to support full Unicode https://github.com/graphql/graphql-spec/pull/849
+
+        Key changes
+        * SourceCharacters now include all Unicode scalar values. Previously only included up to U+FFFF (Basic Multilingual Plane).
+        * SourceCharacters now include control characters. Previously certain control characters were excluded.
+        * Surrogate pair validation added.
+
+        Note that "unescaped" Unicode characters such as 🍺 are handled by ANTLR grammar.
+        "Escaped" Unicode characters such as \\u{1F37A} are handled by StringValueParsing.
     */
 
-    // With this RFC, code points outside the Basic Multilingual Plane can be parsed. For example, emojis
-    // Previously emojis could only be parsed with surrogate pairs. Now they can be parsed with the code point directly
+    // With this RFC, escaped code points outside the Basic Multilingual Plane (e.g. emojis) can be parsed.
     def "parsing beer stein as escaped unicode"() {
         given:
         def input = '''"\\u{1F37A} hello"'''
@@ -35,7 +31,7 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    def "parsing beer mug non escaped"() {
+    def "parsing beer stein without escaping"() {
         given:
         def input = '''"🍺 hello"'''
 
@@ -47,6 +43,7 @@ class UnicodeUtilParserTest extends Specification {
     }
 
     def "allow braced escaped unicode"() {
+        given:
         def input = '''
               {
               foo(arg: "\\u{1F37A}")
@@ -60,7 +57,7 @@ class UnicodeUtilParserTest extends Specification {
         def argValue = field.arguments[0].value as StringValue
 
         then:
-        argValue.getValue() == "🍺"
+        argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
     /*
@@ -73,6 +70,7 @@ class UnicodeUtilParserTest extends Specification {
         such code points.
     */
     def "allow surrogate pairs escaped unicode"() {
+        given:
         def input = '''
               {
               foo(arg: "\\ud83c\\udf7a")
@@ -86,114 +84,176 @@ class UnicodeUtilParserTest extends Specification {
         def argValue = field.arguments[0].value as StringValue
 
         then:
-        argValue.getValue() == "🍺"
+        argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
     /*
-        From the RFC:
+        Valid surrogate pair combinations (from the RFC):
         * If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*):
         * Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
-        * Return ({leadingValue} - 0xD800) × 0x400 + ({trailingValue} - 0xDC00) + 0x10000.
      */
-    @Ignore
-    def "invalid surrogate pair"() {
-        def input = '''
-              {
-              foo(arg: "\\uD83D\\uDBFF")
-               }
-        '''
+    def "invalid surrogate pair - no trailing value"() {
+        given:
+        def input = '''"\\uD83D hello"'''
 
         when:
-        Document document = Parser.parse(input)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        // TODO: Raise exception
-        false
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
     }
 
-    def "invalid unicode code point"() {
-        def input = '''
-              {
-              foo(arg: "\\u{fffffff}")
-               }
-        '''
+    def "invalid surrogate pair - end of string"() {
+        given:
+        def input = '''"\\uD83D"'''
 
         when:
-        Document document = Parser.parse(input)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        Exception e = thrown(Exception)
-        e.message == "invalid unicode code point"
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
     }
 
-    @Ignore
-    def "invalid unpaired surrogate" () {
-        def input = '''
-              {
-              foo(arg: "\\uD83D")
-               }
-        '''
+    def "invalid surrogate pair - invalid trailing value"() {
+        given:
+        def input = '''"\\uD83D\\uDBFF"'''
 
         when:
-        Document document = Parser.parse(input)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        // TODO: Discuss whether to raise exception
-        false
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
     }
 
-    @Ignore
-    def "invalid code point - too long" () {
+    def "invalid surrogate pair - no leading value"() {
         given:
-        def input = '''"\\u{000000000}"'''
+        def input = '''"\\uDC00"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        // TODO: Discuss whether to raise exception. How do we want to treat leading zeroes?
-        false
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
     }
 
-    /*
-        From the RFC
-        **Byte order mark**
+    def "invalid surrogate pair - invalid leading value"() {
+        given:
+        def input = '''"\\uD700\\uDC00"'''
 
-        UnicodeBOM :: "Byte Order Mark (U+FEFF)"
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
 
-        The *Byte Order Mark* is a special Unicode code point which may appear at the
-        beginning of a file which programs may use to determine the fact that the text
-        stream is Unicode, and what specific encoding has been used.
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
 
-        As files are often concatenated, a *Byte Order Mark* may appear anywhere within
-        a GraphQL document and is {Ignored}.
-    */
-    @Ignore
-    // TODO: BOM was previously implemented. Do we want to change the prior implementation?
-    def "byte order mark to be ignored" () {
-        // The Byte Order Mark indicates a Unicode stream, and whether the stream is high-endian or low-endian
+    def "valid surrogate pair - leading code with braces"() {
         given:
-        def input = '''"hello \\uFEFF\\u4F60\\u597D"'''
+        def input = '''"hello \\u{d83c}\\udf7a"'''
 
         when:
         String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        parsed == '''hello 你好'''
+        parsed == '''hello 🍺''' // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    // TODO: How do we want to handle control characters?
-    @Ignore
-    def "escapes zero byte" () {
-        // TODO: This is a test case from the JS implementation. Do we want to implement this case?
+    def "valid surrogate pair - trailing code with braces"() {
         given:
-        def input = '''"\\x00"'''
+        def input = '''"hello \\ud83c\\u{df7a}"'''
 
         when:
         String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        parsed == '''\\u0000'''
+        parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
-}
 
+    def "valid surrogate pair - leading and trailing code with braces"() {
+        given:
+        def input = '''"hello \\u{d83c}\\u{df7a}"'''
+
+        when:
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
+    }
+
+    def "invalid surrogate pair - invalid trailing code without unicode escape 1"() {
+        given:
+        def input = '''"hello \\u{d83c}{df7a}"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - invalid trailing code without unicode escape 2"() {
+        given:
+        def input = '''"hello \\u{d83c}df7a"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - invalid leading code"() {
+        given:
+        def input = '''"hello d83c\\u{df7a}"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - invalid leading value with braces"() {
+        given:
+        def input = '''"\\u{5B57}\\uDC00"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - invalid trailing value with braces"() {
+        given:
+        def input = '''"\\uD83D\\u{DBFF}"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid unicode code point - value is too high"() {
+        given:
+        def input = '''"\\u{fffffff}"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+}

From 7f75c6c869fe7bd3410644347391518a38ed97cb Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Thu, 8 Jul 2021 15:51:36 +1000
Subject: [PATCH 07/15] Add more end of string edge cases

---
 .../parser/UnicodeUtilParserTest.groovy       | 86 +++++++++++++------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 76a68b257a..56a0525d6b 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -7,17 +7,17 @@ import graphql.language.StringValue
 import spock.lang.Specification
 
 class UnicodeUtilParserTest extends Specification {
-    /*
-        Implements RFC to support full Unicode https://github.com/graphql/graphql-spec/pull/849
-
-        Key changes
-        * SourceCharacters now include all Unicode scalar values. Previously only included up to U+FFFF (Basic Multilingual Plane).
-        * SourceCharacters now include control characters. Previously certain control characters were excluded.
-        * Surrogate pair validation added.
-
-        Note that "unescaped" Unicode characters such as 🍺 are handled by ANTLR grammar.
-        "Escaped" Unicode characters such as \\u{1F37A} are handled by StringValueParsing.
-    */
+    /**
+     * Implements RFC to support full Unicode https://github.com/graphql/graphql-spec/pull/849
+     *
+     * Key changes
+     * + SourceCharacters now include all Unicode scalar values. Previously only included up to U+FFFF (Basic Multilingual Plane).
+     * + SourceCharacters now include control characters. Previously certain control characters were excluded.
+     * + Surrogate pair validation added.
+     *
+     * Note that "unescaped" Unicode characters such as 🍺 are handled by ANTLR grammar.
+     * "Escaped" Unicode characters such as \\u{1F37A} are handled by StringValueParsing.
+     */
 
     // With this RFC, escaped code points outside the Basic Multilingual Plane (e.g. emojis) can be parsed.
     def "parsing beer stein as escaped unicode"() {
@@ -60,15 +60,15 @@ class UnicodeUtilParserTest extends Specification {
         argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    /*
-        From the RFC:
-        For legacy reasons, a *supplementary character* may be escaped by two
-        fixed-width unicode escape sequences forming a *surrogate pair*. For example
-        the input `"\\uD83D\\uDCA9"` is a valid {StringValue} which represents the same
-        Unicode text as `"\\u{1F4A9}"`. While this legacy form is allowed, it should be
-        avoided as a variable-width unicode escape sequence is a clearer way to encode
-        such code points.
-    */
+    /**
+     * From the RFC:
+     * For legacy reasons, a *supplementary character* may be escaped by two
+     * fixed-width unicode escape sequences forming a *surrogate pair*. For example
+     * the input `"\\uD83D\\uDCA9"` is a valid {StringValue} which represents the same
+     * Unicode text as `"\\u{1F4A9}"`. While this legacy form is allowed, it should be
+     * avoided as a variable-width unicode escape sequence is a clearer way to encode
+     * such code points.
+     */
     def "allow surrogate pairs escaped unicode"() {
         given:
         def input = '''
@@ -84,13 +84,13 @@ class UnicodeUtilParserTest extends Specification {
         def argValue = field.arguments[0].value as StringValue
 
         then:
-        argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
+        argValue.getValue() == "🍺" // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    /*
-        Valid surrogate pair combinations (from the RFC):
-        * If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*):
-        * Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
+    /**
+     * Valid surrogate pair combinations (from the RFC):
+     * + If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*):
+     * + Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
      */
     def "invalid surrogate pair - no trailing value"() {
         given:
@@ -185,6 +185,42 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
+    def "invalid surrogate pair - leading code with only \\ at end of string"() {
+        given:
+        def input = '''"hello \\u{d83c}\\"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - leading code with only \\u at end of string"() {
+        given:
+        def input = '''"hello \\u{d83c}\\u"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
+    def "invalid surrogate pair - trailing code without closing brace"() {
+        given:
+        def input = '''"hello \\u{d83c}\\u{df7a"'''
+
+        when:
+        StringValueParsing.parseSingleQuotedString(input)
+
+        then:
+        RuntimeException e = thrown(RuntimeException)
+        e.message == "Invalid unicode"
+    }
+
     def "invalid surrogate pair - invalid trailing code without unicode escape 1"() {
         given:
         def input = '''"hello \\u{d83c}{df7a}"'''

From 06d334f320314a10fa822471c6eb27e7b1ae766a Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Thu, 8 Jul 2021 15:53:58 +1000
Subject: [PATCH 08/15] Add surrogate pair validation

---
 src/main/java/graphql/parser/UnicodeUtil.java | 131 ++++++++++++------
 1 file changed, 91 insertions(+), 40 deletions(-)

diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
index b1ab251e83..4f1b8f193b 100644
--- a/src/main/java/graphql/parser/UnicodeUtil.java
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -1,64 +1,115 @@
 package graphql.parser;
 
-import graphql.Assert;
 import graphql.Internal;
 
 import java.io.IOException;
 import java.io.StringWriter;
 
+import static graphql.Assert.assertShouldNeverHappen;
+
 /**
  * Contains Unicode helpers for parsing StringValue types in the grammar
  */
 @Internal
 public class UnicodeUtil {
     public static int MAX_UNICODE_CODE_POINT = 0x10FFFF;
+    public static int LEADING_SURROGATE_LOWER_BOUND = 0xD800;
+    public static int LEADING_SURROGATE_UPPER_BOUND = 0xDBFF;
+    public static int TRAILING_SURROGATE_LOWER_BOUND = 0xDC00;
+    public static int TRAILING_SURROGATE_UPPER_BOUND = 0xDFFF;
 
     public static int parseAndWriteUnicode(StringWriter writer, String string, int i) {
-        // Unicode characters can either be:
-        //  - four hex characters in the form \\u597D, or
-        //  - any number of hex characters surrounded by a brace in the form \\u{1F37A}
-
-        // Four hex character only case \\u597D, for code points in the Basic Multilingual Plane (BMP)
-        if (isNotBracedEscape(string, i)) {
-            String hexStr = string.substring(i + 1, i + 5);
-            int codepoint = Integer.parseInt(hexStr, 16);
-            writer.write(codepoint);
-            return i + 4;
-            // TODO error checking of invalid values
-        } else {
-            // Any number of hex characters e.g. \\u{1F37A}, which allows code points outside the Basic Multilingual Plane (BMP)
-            int startIx = i + 2;
-            int endIndexExclusive = startIx;
-            do {
-                if (endIndexExclusive + 1 >= string.length()) {
-                    throw new RuntimeException("invalid unicode encoding");
-                }
-            } while (string.charAt(++endIndexExclusive) != '}');
-
-            String hexStr = string.substring(startIx, endIndexExclusive);
-            Integer hexValue = Integer.parseInt(hexStr, 16);
-            if (isValidUnicodeCodePoint(hexValue)) {
-                char[] chars = Character.toChars(hexValue);
-                try {
-                    writer.write(chars);
-                } catch (IOException e) {
-                    return Assert.assertShouldNeverHappen();
-                }
-                return endIndexExclusive;
-            } else {
-                throw new RuntimeException("invalid unicode code point");
+        // Unicode code points can either be:
+        //  1. Unbraced: four hex characters in the form \\u597D, or
+        //  2. Braced: any number of hex characters surrounded by braces in the form \\u{1F37A}
+
+        // Extract the code point hex digits. Index i points to 'u'
+        int startIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
+        int endIndexExclusive = getEndIndexExclusive(string, i);
+        // Index for parser to continue at, the last character of the escaped unicode character. Either } or hex digit
+        int continueIndex = isBracedEscape(string, i) ? endIndexExclusive : endIndexExclusive - 1;
+
+        String hexStr = string.substring(startIndex, endIndexExclusive);
+        Integer codePoint = Integer.parseInt(hexStr, 16);
+
+        if (isTrailingSurrogateValue(codePoint)) {
+            // A trailing surrogate value must be preceded with a leading surrogate value
+            throw new RuntimeException("Invalid unicode");
+        } else if (isLeadingSurrogateValue(codePoint)) {
+            // A leading surrogate value must be followed by a trailing surrogate value
+            if (!isEscapedUnicode(string, continueIndex + 1)) {
+                throw new RuntimeException("Invalid unicode");
+            }
+
+            // Shift parser ahead to 'u' in second escaped Unicode character
+            i = continueIndex + 2;
+            int trailingStartIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
+            int trailingEndIndexExclusive = getEndIndexExclusive(string, i);
+            String trailingHexStr = string.substring(trailingStartIndex, trailingEndIndexExclusive);
+            Integer trailingCodePoint = Integer.parseInt(trailingHexStr, 16);
+
+            if (isTrailingSurrogateValue(trailingCodePoint)) {
+                writeCodePoint(writer, codePoint);
+                writeCodePoint(writer, trailingCodePoint);
+                continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1;
+                return continueIndex;
             }
+
+            throw new RuntimeException("Invalid unicode");
+        } else if (isValidUnicodeCodePoint(codePoint)) {
+            writeCodePoint(writer, codePoint);
+            return continueIndex;
         }
-//        Assert.assertShouldNeverHappen();
-        // TODO error checking of invalid values
+
+        throw new RuntimeException("Invalid unicode");
     }
 
-    private static boolean isNotBracedEscape(String string, int i) {
-        return string.charAt(i + 1) != '{';
+    private static int getEndIndexExclusive(String string, int i) {
+        // Unbraced case, with exactly 4 hex digits
+        if (string.length() > i + 5 && !isBracedEscape(string, i)) {
+            return i + 5;
+        }
+
+        // Braced case, with any number of hex digits
+        int endIndexExclusive = i + 2;
+        do {
+            if (endIndexExclusive + 1 >= string.length()) {
+                throw new RuntimeException("Invalid unicode");
+            }
+        } while (string.charAt(++endIndexExclusive) != '}');
+
+        return endIndexExclusive;
     }
 
-    private static boolean isValidUnicodeCodePoint(Integer value) {
-        // TODO: Add bad surrogate checks
+    private static boolean isValidUnicodeCodePoint(int value) {
         return value <= MAX_UNICODE_CODE_POINT;
     }
+
+    private static boolean isEscapedUnicode(String string, int index) {
+        if (index + 1 >= string.length()) {
+            throw new RuntimeException("Invalid unicode");
+        }
+        return string.charAt(index) == '\\' && string.charAt(index + 1) == 'u';
+    }
+
+    private static boolean isLeadingSurrogateValue(int value) {
+        return LEADING_SURROGATE_LOWER_BOUND <= value && value <= LEADING_SURROGATE_UPPER_BOUND;
+    }
+
+    private static boolean isTrailingSurrogateValue(int value) {
+        return TRAILING_SURROGATE_LOWER_BOUND <= value && value <= TRAILING_SURROGATE_UPPER_BOUND;
+    }
+
+    private static void writeCodePoint(StringWriter writer, int codepoint) {
+        char[] chars = Character.toChars(codepoint);
+        try {
+            writer.write(chars);
+        } catch (IOException e) {
+            assertShouldNeverHappen();
+        }
+    }
+
+    private static boolean isBracedEscape(String string, int i) {
+        return string.charAt(i + 1) == '{';
+    }
 }

From 5e788a420915176a787f34b16faeb84855d26be7 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Fri, 9 Jul 2021 14:27:17 +1000
Subject: [PATCH 09/15] Raise InvalidSyntaxException for invalid Unicode

---
 src/main/java/graphql/parser/UnicodeUtil.java | 16 +++---
 .../parser/UnicodeUtilParserTest.groovy       | 56 +++++++++----------
 2 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
index 4f1b8f193b..2d131a5c36 100644
--- a/src/main/java/graphql/parser/UnicodeUtil.java
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -33,12 +33,10 @@ public static int parseAndWriteUnicode(StringWriter writer, String string, int i
         Integer codePoint = Integer.parseInt(hexStr, 16);
 
         if (isTrailingSurrogateValue(codePoint)) {
-            // A trailing surrogate value must be preceded with a leading surrogate value
-            throw new RuntimeException("Invalid unicode");
+            throw new InvalidSyntaxException(null, "Invalid unicode - trailing surrogate must be preceded with a leading surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
         } else if (isLeadingSurrogateValue(codePoint)) {
-            // A leading surrogate value must be followed by a trailing surrogate value
             if (!isEscapedUnicode(string, continueIndex + 1)) {
-                throw new RuntimeException("Invalid unicode");
+                throw new InvalidSyntaxException(null, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
             }
 
             // Shift parser ahead to 'u' in second escaped Unicode character
@@ -47,21 +45,21 @@ public static int parseAndWriteUnicode(StringWriter writer, String string, int i
             int trailingEndIndexExclusive = getEndIndexExclusive(string, i);
             String trailingHexStr = string.substring(trailingStartIndex, trailingEndIndexExclusive);
             Integer trailingCodePoint = Integer.parseInt(trailingHexStr, 16);
+            continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1;
 
             if (isTrailingSurrogateValue(trailingCodePoint)) {
                 writeCodePoint(writer, codePoint);
                 writeCodePoint(writer, trailingCodePoint);
-                continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1;
                 return continueIndex;
             }
 
-            throw new RuntimeException("Invalid unicode");
+            throw new InvalidSyntaxException(null, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
         } else if (isValidUnicodeCodePoint(codePoint)) {
             writeCodePoint(writer, codePoint);
             return continueIndex;
         }
 
-        throw new RuntimeException("Invalid unicode");
+        throw new InvalidSyntaxException(null, "Invalid unicode - not a valid code point -", null, string.substring(i - 1, continueIndex + 1), null);
     }
 
     private static int getEndIndexExclusive(String string, int i) {
@@ -74,7 +72,7 @@ private static int getEndIndexExclusive(String string, int i) {
         int endIndexExclusive = i + 2;
         do {
             if (endIndexExclusive + 1 >= string.length()) {
-                throw new RuntimeException("Invalid unicode");
+                throw new InvalidSyntaxException(null, "Invalid unicode - incorrectly formatted escape -", null, string.substring(i - 1, endIndexExclusive), null);
             }
         } while (string.charAt(++endIndexExclusive) != '}');
 
@@ -87,7 +85,7 @@ private static boolean isValidUnicodeCodePoint(int value) {
 
     private static boolean isEscapedUnicode(String string, int index) {
         if (index + 1 >= string.length()) {
-            throw new RuntimeException("Invalid unicode");
+            return false;
         }
         return string.charAt(index) == '\\' && string.charAt(index + 1) == 'u';
     }
diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
index 56a0525d6b..93ab188cf7 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
@@ -100,8 +100,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uD83D'"
     }
 
     def "invalid surrogate pair - end of string"() {
@@ -112,8 +112,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uD83D'"
     }
 
     def "invalid surrogate pair - invalid trailing value"() {
@@ -124,8 +124,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uDBFF'"
     }
 
     def "invalid surrogate pair - no leading value"() {
@@ -136,8 +136,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'"
     }
 
     def "invalid surrogate pair - invalid leading value"() {
@@ -148,8 +148,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'"
     }
 
     def "valid surrogate pair - leading code with braces"() {
@@ -193,8 +193,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'"
     }
 
     def "invalid surrogate pair - leading code with only \\u at end of string"() {
@@ -205,8 +205,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - incorrectly formatted escape - offending token '\\u\"'"
     }
 
     def "invalid surrogate pair - trailing code without closing brace"() {
@@ -217,8 +217,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - incorrectly formatted escape - offending token '\\u{df7a'"
     }
 
     def "invalid surrogate pair - invalid trailing code without unicode escape 1"() {
@@ -229,8 +229,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'"
     }
 
     def "invalid surrogate pair - invalid trailing code without unicode escape 2"() {
@@ -241,8 +241,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'"
     }
 
     def "invalid surrogate pair - invalid leading code"() {
@@ -253,8 +253,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\u{df7a}'"
     }
 
     def "invalid surrogate pair - invalid leading value with braces"() {
@@ -265,8 +265,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'"
     }
 
     def "invalid surrogate pair - invalid trailing value with braces"() {
@@ -277,8 +277,8 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{DBFF}'"
     }
 
     def "invalid unicode code point - value is too high"() {
@@ -289,7 +289,7 @@ class UnicodeUtilParserTest extends Specification {
         StringValueParsing.parseSingleQuotedString(input)
 
         then:
-        RuntimeException e = thrown(RuntimeException)
-        e.message == "Invalid unicode"
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - not a valid code point - offending token '\\u{fffffff}'"
     }
 }

From dd290eabdc219ba5e725e0364cddcd2210fa169b Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 11:54:02 +1000
Subject: [PATCH 10/15] Update ANTLR grammar with new SourceCharacter
 definition

---
 src/main/antlr/GraphqlCommon.g4 | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/main/antlr/GraphqlCommon.g4 b/src/main/antlr/GraphqlCommon.g4
index 5ace483ff9..5ee4110a77 100644
--- a/src/main/antlr/GraphqlCommon.g4
+++ b/src/main/antlr/GraphqlCommon.g4
@@ -117,10 +117,15 @@ StringValue:
 
 fragment BlockStringCharacter:
 '\\"""'|
-ExtendedSourceCharacter;
+SourceCharacter;
 
+// this is SourceCharacter without
+// \u000a New line
+// \u000d Carriage return
+// \u0022 '"'
+// \u005c '\'
 fragment StringCharacter:
-([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) |  // this is SourceCharacter without '"' and '\'
+([\u0000-\u0009] | [\u000b\u000c\u000e-\u0021] | [\u0023-\u005b] | [\u005d-\ud7ff] | [\ue000-\u{10ffff}]) |
 '\\u' EscapedUnicode  |
 '\\' EscapedCharacter;
 
@@ -128,20 +133,13 @@ fragment EscapedCharacter :  ["\\/bfnrt];
 fragment EscapedUnicode : Hex Hex Hex Hex | '{' Hex+ '}';
 fragment Hex : [0-9a-fA-F];
 
+// this is the spec definition. Excludes surrogate leading and trailing values.
+fragment SourceCharacter : [\u0000-\ud7ff] | [\ue000-\u{10ffff}];
 
-// this is currently not covered by the spec because we allow all unicode chars
-// u0009 = \t Horizontal tab
-// u000a = \n line feed
-// u000d = \r carriage return
-// u0020 = space
-fragment ExtendedSourceCharacter :[\u0009\u000A\u000D\u0020-\u{10FFFF}];
-fragment ExtendedSourceCharacterWithoutLineFeed :[\u0009\u0020-\u{10FFFF}];
+// CommentChar
+fragment SourceCharacterWithoutLineFeed : [\u0000-\u0009] | [\u000b\u000c\u000e-\ud7ff] | [\ue000-\u{10ffff}];
 
-// this is the spec definition
-// fragment SourceCharacter :[\u0009\u000A\u000D\u0020-\uFFFF];
-
-
-Comment: '#' ExtendedSourceCharacterWithoutLineFeed* -> channel(2);
+Comment: '#' SourceCharacterWithoutLineFeed* -> channel(2);
 
 LF: [\n] -> channel(3);
 CR: [\r] -> channel(3);

From 2287201f77044cdc425b303f13a0219c903f88c1 Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 12:04:48 +1000
Subject: [PATCH 11/15] Tidy test name and location

---
 .../groovy/graphql/parser/ParserTest.groovy   | 36 ++++++++++++++++
 ...y => StringValueParsingUnicodeTest.groovy} | 43 ++-----------------
 2 files changed, 39 insertions(+), 40 deletions(-)
 rename src/test/groovy/graphql/parser/{UnicodeUtilParserTest.groovy => StringValueParsingUnicodeTest.groovy} (87%)

diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy
index 790491e80b..c39da47874 100644
--- a/src/test/groovy/graphql/parser/ParserTest.groovy
+++ b/src/test/groovy/graphql/parser/ParserTest.groovy
@@ -976,4 +976,40 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
         !type.getIgnoredChars().getLeft().isEmpty()
         !type.getIgnoredChars().getRight().isEmpty()
     }
+
+    def "allow braced escaped unicode"() {
+        given:
+        def input = '''
+              {
+              foo(arg: "\\u{1F37A}")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
+        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
+        def argValue = field.arguments[0].value as StringValue
+
+        then:
+        argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
+    }
+
+    def "allow surrogate pairs escaped unicode"() {
+        given:
+        def input = '''
+              {
+              foo(arg: "\\ud83c\\udf7a")
+               }
+        '''
+
+        when:
+        Document document = Parser.parse(input)
+        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
+        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
+        def argValue = field.arguments[0].value as StringValue
+
+        then:
+        argValue.getValue() == "🍺" // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
+    }
 }
diff --git a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
similarity index 87%
rename from src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
rename to src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
index 93ab188cf7..63c59d8011 100644
--- a/src/test/groovy/graphql/parser/UnicodeUtilParserTest.groovy
+++ b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
@@ -6,7 +6,7 @@ import graphql.language.OperationDefinition
 import graphql.language.StringValue
 import spock.lang.Specification
 
-class UnicodeUtilParserTest extends Specification {
+class StringValueParsingUnicodeTest extends Specification {
     /**
      * Implements RFC to support full Unicode https://github.com/graphql/graphql-spec/pull/849
      *
@@ -42,24 +42,6 @@ class UnicodeUtilParserTest extends Specification {
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
 
-    def "allow braced escaped unicode"() {
-        given:
-        def input = '''
-              {
-              foo(arg: "\\u{1F37A}")
-               }
-        '''
-
-        when:
-        Document document = Parser.parse(input)
-        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
-        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
-        def argValue = field.arguments[0].value as StringValue
-
-        then:
-        argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
-    }
-
     /**
      * From the RFC:
      * For legacy reasons, a *supplementary character* may be escaped by two
@@ -68,27 +50,8 @@ class UnicodeUtilParserTest extends Specification {
      * Unicode text as `"\\u{1F4A9}"`. While this legacy form is allowed, it should be
      * avoided as a variable-width unicode escape sequence is a clearer way to encode
      * such code points.
-     */
-    def "allow surrogate pairs escaped unicode"() {
-        given:
-        def input = '''
-              {
-              foo(arg: "\\ud83c\\udf7a")
-               }
-        '''
-
-        when:
-        Document document = Parser.parse(input)
-        OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
-        def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
-        def argValue = field.arguments[0].value as StringValue
-
-        then:
-        argValue.getValue() == "🍺" // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
-    }
-
-    /**
-     * Valid surrogate pair combinations (from the RFC):
+     *
+     * Valid surrogate pair combinations:
      * + If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*):
      * + Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*).
      */

From c93fd5a0d25e8952a68674a8413377a7a139fc7b Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 12:35:48 +1000
Subject: [PATCH 12/15] Add source location to Unicode error messages

---
 src/main/java/graphql/parser/AntlrHelper.java |  4 ++
 .../parser/GraphqlAntlrToLanguage.java        |  9 +++--
 .../graphql/parser/StringValueParsing.java    |  5 ++-
 src/main/java/graphql/parser/UnicodeUtil.java | 19 +++++-----
 .../StringValueParsingUnicodeTest.groovy      | 38 +++++++++----------
 5 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/src/main/java/graphql/parser/AntlrHelper.java b/src/main/java/graphql/parser/AntlrHelper.java
index a3fc74d272..4eb52d87ba 100644
--- a/src/main/java/graphql/parser/AntlrHelper.java
+++ b/src/main/java/graphql/parser/AntlrHelper.java
@@ -3,6 +3,7 @@
 import graphql.Internal;
 import graphql.language.SourceLocation;
 import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.tree.TerminalNode;
 
 import java.util.List;
 
@@ -28,6 +29,9 @@ public static SourceLocation createSourceLocation(MultiSourceReader multiSourceR
         return AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine());
     }
 
+    public static SourceLocation createSourceLocation(MultiSourceReader multiSourceReader, TerminalNode terminalNode) {
+        return AntlrHelper.createSourceLocation(multiSourceReader, terminalNode.getSymbol().getLine(), terminalNode.getSymbol().getCharPositionInLine());
+    }
 
     /* grabs 3 lines before and after the syntax error */
     public static String createPreview(MultiSourceReader multiSourceReader, int antrlLine) {
diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
index 770a590c27..6e9ecfe6c1 100644
--- a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
+++ b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java
@@ -760,13 +760,14 @@ protected Value createValue(GraphqlParser.ValueContext ctx) {
         return assertShouldNeverHappen();
     }
 
-    static String quotedString(TerminalNode terminalNode) {
+    protected String quotedString(TerminalNode terminalNode) {
         boolean multiLine = terminalNode.getText().startsWith("\"\"\"");
         String strText = terminalNode.getText();
+        SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, terminalNode);
         if (multiLine) {
             return parseTripleQuotedString(strText);
         } else {
-            return parseSingleQuotedString(strText);
+            return parseSingleQuotedString(strText, sourceLocation);
         }
     }
 
@@ -839,12 +840,12 @@ protected Description newDescription(GraphqlParser.DescriptionContext descriptio
         }
         String content = terminalNode.getText();
         boolean multiLine = content.startsWith("\"\"\"");
+        SourceLocation sourceLocation = getSourceLocation(descriptionCtx);
         if (multiLine) {
             content = parseTripleQuotedString(content);
         } else {
-            content = parseSingleQuotedString(content);
+            content = parseSingleQuotedString(content, sourceLocation);
         }
-        SourceLocation sourceLocation = getSourceLocation(descriptionCtx);
         return new Description(content, sourceLocation, multiLine);
     }
 
diff --git a/src/main/java/graphql/parser/StringValueParsing.java b/src/main/java/graphql/parser/StringValueParsing.java
index 83b214cab1..4fe1219061 100644
--- a/src/main/java/graphql/parser/StringValueParsing.java
+++ b/src/main/java/graphql/parser/StringValueParsing.java
@@ -2,6 +2,7 @@
 
 import graphql.Assert;
 import graphql.Internal;
+import graphql.language.SourceLocation;
 
 import java.io.StringWriter;
 import java.util.ArrayList;
@@ -102,7 +103,7 @@ private static boolean containsOnlyWhiteSpace(String str) {
         return leadingWhitespace(str) == str.length();
     }
 
-    public static String parseSingleQuotedString(String string) {
+    public static String parseSingleQuotedString(String string, SourceLocation sourceLocation) {
         StringWriter writer = new StringWriter(string.length() - 2);
         int end = string.length() - 1;
         for (int i = 1; i < end; i++) {
@@ -139,7 +140,7 @@ public static String parseSingleQuotedString(String string) {
                     writer.write('\t');
                     continue;
                 case 'u':
-                    i = UnicodeUtil.parseAndWriteUnicode(writer, string, i);
+                    i = UnicodeUtil.parseAndWriteUnicode(writer, string, i, sourceLocation);
                     continue;
                 default:
                     Assert.assertShouldNeverHappen();
diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java
index 2d131a5c36..53d1cefe05 100644
--- a/src/main/java/graphql/parser/UnicodeUtil.java
+++ b/src/main/java/graphql/parser/UnicodeUtil.java
@@ -1,6 +1,7 @@
 package graphql.parser;
 
 import graphql.Internal;
+import graphql.language.SourceLocation;
 
 import java.io.IOException;
 import java.io.StringWriter;
@@ -18,14 +19,14 @@ public class UnicodeUtil {
     public static int TRAILING_SURROGATE_LOWER_BOUND = 0xDC00;
     public static int TRAILING_SURROGATE_UPPER_BOUND = 0xDFFF;
 
-    public static int parseAndWriteUnicode(StringWriter writer, String string, int i) {
+    public static int parseAndWriteUnicode(StringWriter writer, String string, int i, SourceLocation sourceLocation) {
         // Unicode code points can either be:
         //  1. Unbraced: four hex characters in the form \\u597D, or
         //  2. Braced: any number of hex characters surrounded by braces in the form \\u{1F37A}
 
         // Extract the code point hex digits. Index i points to 'u'
         int startIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
-        int endIndexExclusive = getEndIndexExclusive(string, i);
+        int endIndexExclusive = getEndIndexExclusive(string, i, sourceLocation);
         // Index for parser to continue at, the last character of the escaped unicode character. Either } or hex digit
         int continueIndex = isBracedEscape(string, i) ? endIndexExclusive : endIndexExclusive - 1;
 
@@ -33,16 +34,16 @@ public static int parseAndWriteUnicode(StringWriter writer, String string, int i
         Integer codePoint = Integer.parseInt(hexStr, 16);
 
         if (isTrailingSurrogateValue(codePoint)) {
-            throw new InvalidSyntaxException(null, "Invalid unicode - trailing surrogate must be preceded with a leading surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
+            throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - trailing surrogate must be preceded with a leading surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
         } else if (isLeadingSurrogateValue(codePoint)) {
             if (!isEscapedUnicode(string, continueIndex + 1)) {
-                throw new InvalidSyntaxException(null, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
+                throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
             }
 
             // Shift parser ahead to 'u' in second escaped Unicode character
             i = continueIndex + 2;
             int trailingStartIndex = isBracedEscape(string, i) ? i + 2 : i + 1;
-            int trailingEndIndexExclusive = getEndIndexExclusive(string, i);
+            int trailingEndIndexExclusive = getEndIndexExclusive(string, i, sourceLocation);
             String trailingHexStr = string.substring(trailingStartIndex, trailingEndIndexExclusive);
             Integer trailingCodePoint = Integer.parseInt(trailingHexStr, 16);
             continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1;
@@ -53,16 +54,16 @@ public static int parseAndWriteUnicode(StringWriter writer, String string, int i
                 return continueIndex;
             }
 
-            throw new InvalidSyntaxException(null, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
+            throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null);
         } else if (isValidUnicodeCodePoint(codePoint)) {
             writeCodePoint(writer, codePoint);
             return continueIndex;
         }
 
-        throw new InvalidSyntaxException(null, "Invalid unicode - not a valid code point -", null, string.substring(i - 1, continueIndex + 1), null);
+        throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - not a valid code point -", null, string.substring(i - 1, continueIndex + 1), null);
     }
 
-    private static int getEndIndexExclusive(String string, int i) {
+    private static int getEndIndexExclusive(String string, int i, SourceLocation sourceLocation) {
         // Unbraced case, with exactly 4 hex digits
         if (string.length() > i + 5 && !isBracedEscape(string, i)) {
             return i + 5;
@@ -72,7 +73,7 @@ private static int getEndIndexExclusive(String string, int i) {
         int endIndexExclusive = i + 2;
         do {
             if (endIndexExclusive + 1 >= string.length()) {
-                throw new InvalidSyntaxException(null, "Invalid unicode - incorrectly formatted escape -", null, string.substring(i - 1, endIndexExclusive), null);
+                throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - incorrectly formatted escape -", null, string.substring(i - 1, endIndexExclusive), null);
             }
         } while (string.charAt(++endIndexExclusive) != '}');
 
diff --git a/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
index 63c59d8011..ca8f2035fb 100644
--- a/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
+++ b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
@@ -25,7 +25,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{1F37A} hello"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -36,7 +36,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"🍺 hello"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -60,7 +60,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D hello"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -72,7 +72,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -84,7 +84,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D\\uDBFF"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -96,7 +96,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -108,7 +108,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD700\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -120,7 +120,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\udf7a"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -131,7 +131,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\ud83c\\u{df7a}"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -142,7 +142,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u{df7a}"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input)
+        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -153,7 +153,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -165,7 +165,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -177,7 +177,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u{df7a"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -189,7 +189,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}{df7a}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -201,7 +201,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}df7a"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -213,7 +213,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello d83c\\u{df7a}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -225,7 +225,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{5B57}\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -237,7 +237,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D\\u{DBFF}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -249,7 +249,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{fffffff}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input)
+        StringValueParsing.parseSingleQuotedString(input, null)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)

From 66c0f975f5820cc6f76151c5036a720ce4a89a2e Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 15:31:30 +1000
Subject: [PATCH 13/15] Add String parser overload when SourceLocation is not
 available

---
 .../graphql/parser/StringValueParsing.java    |  4 ++
 .../StringValueParsingUnicodeTest.groovy      | 38 +++++++++----------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/main/java/graphql/parser/StringValueParsing.java b/src/main/java/graphql/parser/StringValueParsing.java
index 4fe1219061..d9da00dc83 100644
--- a/src/main/java/graphql/parser/StringValueParsing.java
+++ b/src/main/java/graphql/parser/StringValueParsing.java
@@ -148,4 +148,8 @@ public static String parseSingleQuotedString(String string, SourceLocation sourc
         }
         return writer.toString();
     }
+
+    public static String parseSingleQuotedString(String string) {
+        return parseSingleQuotedString(string, null);
+    }
 }
diff --git a/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
index ca8f2035fb..63c59d8011 100644
--- a/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
+++ b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy
@@ -25,7 +25,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{1F37A} hello"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -36,7 +36,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"🍺 hello"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
         parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -60,7 +60,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D hello"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -72,7 +72,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -84,7 +84,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D\\uDBFF"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -96,7 +96,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -108,7 +108,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD700\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -120,7 +120,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\udf7a"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -131,7 +131,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\ud83c\\u{df7a}"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -142,7 +142,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u{df7a}"'''
 
         when:
-        String parsed = StringValueParsing.parseSingleQuotedString(input, null)
+        String parsed = StringValueParsing.parseSingleQuotedString(input)
 
         then:
         parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
@@ -153,7 +153,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -165,7 +165,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -177,7 +177,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}\\u{df7a"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -189,7 +189,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}{df7a}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -201,7 +201,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello \\u{d83c}df7a"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -213,7 +213,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"hello d83c\\u{df7a}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -225,7 +225,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{5B57}\\uDC00"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -237,7 +237,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\uD83D\\u{DBFF}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)
@@ -249,7 +249,7 @@ class StringValueParsingUnicodeTest extends Specification {
         def input = '''"\\u{fffffff}"'''
 
         when:
-        StringValueParsing.parseSingleQuotedString(input, null)
+        StringValueParsing.parseSingleQuotedString(input)
 
         then:
         InvalidSyntaxException e = thrown(InvalidSyntaxException)

From 85a5234698469ead601cda52e0344b4d2d2fc99d Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 15:41:57 +1000
Subject: [PATCH 14/15] Add Parser tests with SourceLocation in exception
 message

---
 .../groovy/graphql/parser/ParserTest.groovy   | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy
index c39da47874..dc92da158f 100644
--- a/src/test/groovy/graphql/parser/ParserTest.groovy
+++ b/src/test/groovy/graphql/parser/ParserTest.groovy
@@ -1012,4 +1012,36 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
         then:
         argValue.getValue() == "🍺" // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug
     }
+
+    def "invalid surrogate pair - no trailing value"() {
+        given:
+        def input = '''
+              {
+              foo(arg: "\\ud83c")
+               }
+        '''
+
+        when:
+        Parser.parse(input)
+
+        then:
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\ud83c' at line 3 column 24"
+    }
+
+    def "invalid surrogate pair - no leading value"() {
+        given:
+        def input = '''
+              {
+              foo(arg: "\\uDC00")
+               }
+        '''
+
+        when:
+        Parser.parse(input)
+
+        then:
+        InvalidSyntaxException e = thrown(InvalidSyntaxException)
+        e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00' at line 3 column 24"
+    }
 }

From b60d28ab8963286758547c3fedacb9874814841f Mon Sep 17 00:00:00 2001
From: dondonz <13839920+dondonz@users.noreply.github.com>
Date: Wed, 14 Jul 2021 15:54:30 +1000
Subject: [PATCH 15/15] Add full invalid Unicode surrogate test

---
 src/test/groovy/graphql/GraphQLTest.groovy | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/test/groovy/graphql/GraphQLTest.groovy b/src/test/groovy/graphql/GraphQLTest.groovy
index 2e4f7b9a7c..dcf75a764f 100644
--- a/src/test/groovy/graphql/GraphQLTest.groovy
+++ b/src/test/groovy/graphql/GraphQLTest.groovy
@@ -179,6 +179,31 @@ class GraphQLTest extends Specification {
         errors[0].locations == [new SourceLocation(1, 8)]
     }
 
+    def "query with invalid Unicode surrogate in argument - no trailing value"() {
+        given:
+        GraphQLSchema schema = newSchema().query(
+                newObject()
+                        .name("RootQueryType")
+                        .field(newFieldDefinition()
+                                .name("field")
+                                .type(GraphQLString)
+                                .argument(newArgument()
+                                        .name("arg")
+                                        .type(GraphQLNonNull.nonNull(GraphQLString))))
+                        .build()
+        ).build()
+
+        when:
+        // Invalid Unicode character - leading surrogate value without trailing surrogate value
+        def errors = GraphQL.newGraphQL(schema).build().execute('{ hello(arg:"\\ud83c") }').errors
+
+        then:
+        errors.size() == 1
+        errors[0].errorType == ErrorType.InvalidSyntax
+        errors[0].message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\ud83c' at line 1 column 13"
+        errors[0].locations == [new SourceLocation(1, 13)]
+    }
+
     def "non null argument is missing"() {
         given:
         GraphQLSchema schema = newSchema().query(