diff --git a/src/main/antlr/GraphqlCommon.g4 b/src/main/antlr/GraphqlCommon.g4 index e7d500cc71..5ee4110a77 100644 --- a/src/main/antlr/GraphqlCommon.g4 +++ b/src/main/antlr/GraphqlCommon.g4 @@ -117,31 +117,29 @@ StringValue: fragment BlockStringCharacter: '\\"""'| -ExtendedSourceCharacter; +SourceCharacter; +// this is SourceCharacter without +// \u000a New line +// \u000d Carriage return +// \u0022 '"' +// \u005c '\' fragment StringCharacter: -([\u0009\u0020\u0021] | [\u0023-\u005b] | [\u005d-\u{10FFFF}]) | // this is SoureCharacter without '"' and '\' +([\u0000-\u0009] | [\u000b\u000c\u000e-\u0021] | [\u0023-\u005b] | [\u005d-\ud7ff] | [\ue000-\u{10ffff}]) | '\\u' EscapedUnicode | '\\' EscapedCharacter; fragment EscapedCharacter : ["\\/bfnrt]; -fragment EscapedUnicode : Hex Hex Hex Hex; +fragment EscapedUnicode : Hex Hex Hex Hex | '{' Hex+ '}'; fragment Hex : [0-9a-fA-F]; +// this is the spec definition. Excludes surrogate leading and trailing values. +fragment SourceCharacter : [\u0000-\ud7ff] | [\ue000-\u{10ffff}]; -// this is currently not covered by the spec because we allow all unicode chars -// u0009 = \t Horizontal tab -// u000a = \n line feed -// u000d = \r carriage return -// u0020 = space -fragment ExtendedSourceCharacter :[\u0009\u000A\u000D\u0020-\u{10FFFF}]; -fragment ExtendedSourceCharacterWithoutLineFeed :[\u0009\u0020-\u{10FFFF}]; +// CommentChar +fragment SourceCharacterWithoutLineFeed : [\u0000-\u0009] | [\u000b\u000c\u000e-\ud7ff] | [\ue000-\u{10ffff}]; -// this is the spec definition -// fragment SourceCharacter :[\u0009\u000A\u000D\u0020-\uFFFF]; - - -Comment: '#' ExtendedSourceCharacterWithoutLineFeed* -> channel(2); +Comment: '#' SourceCharacterWithoutLineFeed* -> channel(2); LF: [\n] -> channel(3); CR: [\r] -> channel(3); diff --git a/src/main/java/graphql/parser/AntlrHelper.java b/src/main/java/graphql/parser/AntlrHelper.java index a3fc74d272..4eb52d87ba 100644 --- a/src/main/java/graphql/parser/AntlrHelper.java +++ b/src/main/java/graphql/parser/AntlrHelper.java @@ -3,6 +3,7 @@ import graphql.Internal; import graphql.language.SourceLocation; import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.tree.TerminalNode; import java.util.List; @@ -28,6 +29,9 @@ public static SourceLocation createSourceLocation(MultiSourceReader multiSourceR return AntlrHelper.createSourceLocation(multiSourceReader, token.getLine(), token.getCharPositionInLine()); } + public static SourceLocation createSourceLocation(MultiSourceReader multiSourceReader, TerminalNode terminalNode) { + return AntlrHelper.createSourceLocation(multiSourceReader, terminalNode.getSymbol().getLine(), terminalNode.getSymbol().getCharPositionInLine()); + } /* grabs 3 lines before and after the syntax error */ public static String createPreview(MultiSourceReader multiSourceReader, int antrlLine) { diff --git a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java index 770a590c27..6e9ecfe6c1 100644 --- a/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java +++ b/src/main/java/graphql/parser/GraphqlAntlrToLanguage.java @@ -760,13 +760,14 @@ protected Value createValue(GraphqlParser.ValueContext ctx) { return assertShouldNeverHappen(); } - static String quotedString(TerminalNode terminalNode) { + protected String quotedString(TerminalNode terminalNode) { boolean multiLine = terminalNode.getText().startsWith("\"\"\""); String strText = terminalNode.getText(); + SourceLocation sourceLocation = AntlrHelper.createSourceLocation(multiSourceReader, terminalNode); if (multiLine) { return parseTripleQuotedString(strText); } else { - return parseSingleQuotedString(strText); + return parseSingleQuotedString(strText, sourceLocation); } } @@ -839,12 +840,12 @@ protected Description newDescription(GraphqlParser.DescriptionContext descriptio } String content = terminalNode.getText(); boolean multiLine = content.startsWith("\"\"\""); + SourceLocation sourceLocation = getSourceLocation(descriptionCtx); if (multiLine) { content = parseTripleQuotedString(content); } else { - content = parseSingleQuotedString(content); + content = parseSingleQuotedString(content, sourceLocation); } - SourceLocation sourceLocation = getSourceLocation(descriptionCtx); return new Description(content, sourceLocation, multiLine); } diff --git a/src/main/java/graphql/parser/StringValueParsing.java b/src/main/java/graphql/parser/StringValueParsing.java index a3c598c2d6..d9da00dc83 100644 --- a/src/main/java/graphql/parser/StringValueParsing.java +++ b/src/main/java/graphql/parser/StringValueParsing.java @@ -2,6 +2,7 @@ import graphql.Assert; import graphql.Internal; +import graphql.language.SourceLocation; import java.io.StringWriter; import java.util.ArrayList; @@ -30,7 +31,9 @@ public static String removeIndentation(String rawValue) { String[] lines = rawValue.split("\\n"); Integer commonIndent = null; for (int i = 0; i < lines.length; i++) { - if (i == 0) continue; + if (i == 0) { + continue; + } String line = lines[i]; int length = line.length(); int indent = leadingWhitespace(line); @@ -44,7 +47,9 @@ public static String removeIndentation(String rawValue) { if (commonIndent != null) { for (int i = 0; i < lineList.size(); i++) { String line = lineList.get(i); - if (i == 0) continue; + if (i == 0) { + continue; + } if (line.length() > commonIndent) { line = line.substring(commonIndent); lineList.set(i, line); @@ -98,7 +103,7 @@ private static boolean containsOnlyWhiteSpace(String str) { return leadingWhitespace(str) == str.length(); } - public static String parseSingleQuotedString(String string) { + public static String parseSingleQuotedString(String string, SourceLocation sourceLocation) { StringWriter writer = new StringWriter(string.length() - 2); int end = string.length() - 1; for (int i = 1; i < end; i++) { @@ -135,10 +140,7 @@ public static String parseSingleQuotedString(String string) { writer.write('\t'); continue; case 'u': - String hexStr = string.substring(i + 1, i + 5); - int codepoint = Integer.parseInt(hexStr, 16); - i += 4; - writer.write(codepoint); + i = UnicodeUtil.parseAndWriteUnicode(writer, string, i, sourceLocation); continue; default: Assert.assertShouldNeverHappen(); @@ -146,4 +148,8 @@ public static String parseSingleQuotedString(String string) { } return writer.toString(); } + + public static String parseSingleQuotedString(String string) { + return parseSingleQuotedString(string, null); + } } diff --git a/src/main/java/graphql/parser/UnicodeUtil.java b/src/main/java/graphql/parser/UnicodeUtil.java new file mode 100644 index 0000000000..53d1cefe05 --- /dev/null +++ b/src/main/java/graphql/parser/UnicodeUtil.java @@ -0,0 +1,114 @@ +package graphql.parser; + +import graphql.Internal; +import graphql.language.SourceLocation; + +import java.io.IOException; +import java.io.StringWriter; + +import static graphql.Assert.assertShouldNeverHappen; + +/** + * Contains Unicode helpers for parsing StringValue types in the grammar + */ +@Internal +public class UnicodeUtil { + public static int MAX_UNICODE_CODE_POINT = 0x10FFFF; + public static int LEADING_SURROGATE_LOWER_BOUND = 0xD800; + public static int LEADING_SURROGATE_UPPER_BOUND = 0xDBFF; + public static int TRAILING_SURROGATE_LOWER_BOUND = 0xDC00; + public static int TRAILING_SURROGATE_UPPER_BOUND = 0xDFFF; + + public static int parseAndWriteUnicode(StringWriter writer, String string, int i, SourceLocation sourceLocation) { + // Unicode code points can either be: + // 1. Unbraced: four hex characters in the form \\u597D, or + // 2. Braced: any number of hex characters surrounded by braces in the form \\u{1F37A} + + // Extract the code point hex digits. Index i points to 'u' + int startIndex = isBracedEscape(string, i) ? i + 2 : i + 1; + int endIndexExclusive = getEndIndexExclusive(string, i, sourceLocation); + // Index for parser to continue at, the last character of the escaped unicode character. Either } or hex digit + int continueIndex = isBracedEscape(string, i) ? endIndexExclusive : endIndexExclusive - 1; + + String hexStr = string.substring(startIndex, endIndexExclusive); + Integer codePoint = Integer.parseInt(hexStr, 16); + + if (isTrailingSurrogateValue(codePoint)) { + throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - trailing surrogate must be preceded with a leading surrogate -", null, string.substring(i - 1, continueIndex + 1), null); + } else if (isLeadingSurrogateValue(codePoint)) { + if (!isEscapedUnicode(string, continueIndex + 1)) { + throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null); + } + + // Shift parser ahead to 'u' in second escaped Unicode character + i = continueIndex + 2; + int trailingStartIndex = isBracedEscape(string, i) ? i + 2 : i + 1; + int trailingEndIndexExclusive = getEndIndexExclusive(string, i, sourceLocation); + String trailingHexStr = string.substring(trailingStartIndex, trailingEndIndexExclusive); + Integer trailingCodePoint = Integer.parseInt(trailingHexStr, 16); + continueIndex = isBracedEscape(string, i) ? trailingEndIndexExclusive : trailingEndIndexExclusive - 1; + + if (isTrailingSurrogateValue(trailingCodePoint)) { + writeCodePoint(writer, codePoint); + writeCodePoint(writer, trailingCodePoint); + return continueIndex; + } + + throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - leading surrogate must be followed by a trailing surrogate -", null, string.substring(i - 1, continueIndex + 1), null); + } else if (isValidUnicodeCodePoint(codePoint)) { + writeCodePoint(writer, codePoint); + return continueIndex; + } + + throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - not a valid code point -", null, string.substring(i - 1, continueIndex + 1), null); + } + + private static int getEndIndexExclusive(String string, int i, SourceLocation sourceLocation) { + // Unbraced case, with exactly 4 hex digits + if (string.length() > i + 5 && !isBracedEscape(string, i)) { + return i + 5; + } + + // Braced case, with any number of hex digits + int endIndexExclusive = i + 2; + do { + if (endIndexExclusive + 1 >= string.length()) { + throw new InvalidSyntaxException(sourceLocation, "Invalid unicode - incorrectly formatted escape -", null, string.substring(i - 1, endIndexExclusive), null); + } + } while (string.charAt(++endIndexExclusive) != '}'); + + return endIndexExclusive; + } + + private static boolean isValidUnicodeCodePoint(int value) { + return value <= MAX_UNICODE_CODE_POINT; + } + + private static boolean isEscapedUnicode(String string, int index) { + if (index + 1 >= string.length()) { + return false; + } + return string.charAt(index) == '\\' && string.charAt(index + 1) == 'u'; + } + + private static boolean isLeadingSurrogateValue(int value) { + return LEADING_SURROGATE_LOWER_BOUND <= value && value <= LEADING_SURROGATE_UPPER_BOUND; + } + + private static boolean isTrailingSurrogateValue(int value) { + return TRAILING_SURROGATE_LOWER_BOUND <= value && value <= TRAILING_SURROGATE_UPPER_BOUND; + } + + private static void writeCodePoint(StringWriter writer, int codepoint) { + char[] chars = Character.toChars(codepoint); + try { + writer.write(chars); + } catch (IOException e) { + assertShouldNeverHappen(); + } + } + + private static boolean isBracedEscape(String string, int i) { + return string.charAt(i + 1) == '{'; + } +} diff --git a/src/test/groovy/graphql/GraphQLTest.groovy b/src/test/groovy/graphql/GraphQLTest.groovy index 2e4f7b9a7c..dcf75a764f 100644 --- a/src/test/groovy/graphql/GraphQLTest.groovy +++ b/src/test/groovy/graphql/GraphQLTest.groovy @@ -179,6 +179,31 @@ class GraphQLTest extends Specification { errors[0].locations == [new SourceLocation(1, 8)] } + def "query with invalid Unicode surrogate in argument - no trailing value"() { + given: + GraphQLSchema schema = newSchema().query( + newObject() + .name("RootQueryType") + .field(newFieldDefinition() + .name("field") + .type(GraphQLString) + .argument(newArgument() + .name("arg") + .type(GraphQLNonNull.nonNull(GraphQLString)))) + .build() + ).build() + + when: + // Invalid Unicode character - leading surrogate value without trailing surrogate value + def errors = GraphQL.newGraphQL(schema).build().execute('{ hello(arg:"\\ud83c") }').errors + + then: + errors.size() == 1 + errors[0].errorType == ErrorType.InvalidSyntax + errors[0].message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\ud83c' at line 1 column 13" + errors[0].locations == [new SourceLocation(1, 13)] + } + def "non null argument is missing"() { given: GraphQLSchema schema = newSchema().query( diff --git a/src/test/groovy/graphql/parser/ParserTest.groovy b/src/test/groovy/graphql/parser/ParserTest.groovy index 790491e80b..dc92da158f 100644 --- a/src/test/groovy/graphql/parser/ParserTest.groovy +++ b/src/test/groovy/graphql/parser/ParserTest.groovy @@ -976,4 +976,72 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases""" !type.getIgnoredChars().getLeft().isEmpty() !type.getIgnoredChars().getRight().isEmpty() } + + def "allow braced escaped unicode"() { + given: + def input = ''' + { + foo(arg: "\\u{1F37A}") + } + ''' + + when: + Document document = Parser.parse(input) + OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition) + def field = operationDefinition.getSelectionSet().getSelections()[0] as Field + def argValue = field.arguments[0].value as StringValue + + then: + argValue.getValue() == "🍺" // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "allow surrogate pairs escaped unicode"() { + given: + def input = ''' + { + foo(arg: "\\ud83c\\udf7a") + } + ''' + + when: + Document document = Parser.parse(input) + OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition) + def field = operationDefinition.getSelectionSet().getSelections()[0] as Field + def argValue = field.arguments[0].value as StringValue + + then: + argValue.getValue() == "🍺" // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "invalid surrogate pair - no trailing value"() { + given: + def input = ''' + { + foo(arg: "\\ud83c") + } + ''' + + when: + Parser.parse(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\ud83c' at line 3 column 24" + } + + def "invalid surrogate pair - no leading value"() { + given: + def input = ''' + { + foo(arg: "\\uDC00") + } + ''' + + when: + Parser.parse(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00' at line 3 column 24" + } } diff --git a/src/test/groovy/graphql/parser/StringValueParsingTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingTest.groovy index 8044d1cec7..5543dbc339 100644 --- a/src/test/groovy/graphql/parser/StringValueParsingTest.groovy +++ b/src/test/groovy/graphql/parser/StringValueParsingTest.groovy @@ -40,8 +40,7 @@ class StringValueParsingTest extends Specification { parsed == '''"''' } - def "parsing emoji should work"() { - // needs surrogate pairs for this emoji + def "parsing beer stein as surrogate pair should work"() { given: def input = '''"\\ud83c\\udf7a"''' @@ -52,18 +51,17 @@ class StringValueParsingTest extends Specification { parsed == '''🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug } - def "parsing simple unicode should work"() { + def "parsing simple unicode should work - Basic Multilingual Plane (BMP)"() { given: - def input = '''"\\u56fe"''' + def input = '''"\\u5564\\u9152"''' when: String parsed = StringValueParsing.parseSingleQuotedString(input) then: - parsed == '''图''' + parsed == '''啤酒''' } - def "parsing triple quoted string should work"() { given: def input = '''"""triple quoted"""''' diff --git a/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy new file mode 100644 index 0000000000..63c59d8011 --- /dev/null +++ b/src/test/groovy/graphql/parser/StringValueParsingUnicodeTest.groovy @@ -0,0 +1,258 @@ +package graphql.parser + +import graphql.language.Document +import graphql.language.Field +import graphql.language.OperationDefinition +import graphql.language.StringValue +import spock.lang.Specification + +class StringValueParsingUnicodeTest extends Specification { + /** + * Implements RFC to support full Unicode https://github.com/graphql/graphql-spec/pull/849 + * + * Key changes + * + SourceCharacters now include all Unicode scalar values. Previously only included up to U+FFFF (Basic Multilingual Plane). + * + SourceCharacters now include control characters. Previously certain control characters were excluded. + * + Surrogate pair validation added. + * + * Note that "unescaped" Unicode characters such as 🍺 are handled by ANTLR grammar. + * "Escaped" Unicode characters such as \\u{1F37A} are handled by StringValueParsing. + */ + + // With this RFC, escaped code points outside the Basic Multilingual Plane (e.g. emojis) can be parsed. + def "parsing beer stein as escaped unicode"() { + given: + def input = '''"\\u{1F37A} hello"''' + + when: + String parsed = StringValueParsing.parseSingleQuotedString(input) + + then: + parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "parsing beer stein without escaping"() { + given: + def input = '''"🍺 hello"''' + + when: + String parsed = StringValueParsing.parseSingleQuotedString(input) + + then: + parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + /** + * From the RFC: + * For legacy reasons, a *supplementary character* may be escaped by two + * fixed-width unicode escape sequences forming a *surrogate pair*. For example + * the input `"\\uD83D\\uDCA9"` is a valid {StringValue} which represents the same + * Unicode text as `"\\u{1F4A9}"`. While this legacy form is allowed, it should be + * avoided as a variable-width unicode escape sequence is a clearer way to encode + * such code points. + * + * Valid surrogate pair combinations: + * + If {leadingValue} is >= 0xD800 and <= 0xDBFF (a *Leading Surrogate*): + * + Assert {trailingValue} is >= 0xDC00 and <= 0xDFFF (a *Trailing Surrogate*). + */ + def "invalid surrogate pair - no trailing value"() { + given: + def input = '''"\\uD83D hello"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uD83D'" + } + + def "invalid surrogate pair - end of string"() { + given: + def input = '''"\\uD83D"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uD83D'" + } + + def "invalid surrogate pair - invalid trailing value"() { + given: + def input = '''"\\uD83D\\uDBFF"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\uDBFF'" + } + + def "invalid surrogate pair - no leading value"() { + given: + def input = '''"\\uDC00"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'" + } + + def "invalid surrogate pair - invalid leading value"() { + given: + def input = '''"\\uD700\\uDC00"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'" + } + + def "valid surrogate pair - leading code with braces"() { + given: + def input = '''"hello \\u{d83c}\\udf7a"''' + + when: + String parsed = StringValueParsing.parseSingleQuotedString(input) + + then: + parsed == '''hello 🍺''' // contains the beer icon U+1F37 A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "valid surrogate pair - trailing code with braces"() { + given: + def input = '''"hello \\ud83c\\u{df7a}"''' + + when: + String parsed = StringValueParsing.parseSingleQuotedString(input) + + then: + parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "valid surrogate pair - leading and trailing code with braces"() { + given: + def input = '''"hello \\u{d83c}\\u{df7a}"''' + + when: + String parsed = StringValueParsing.parseSingleQuotedString(input) + + then: + parsed == '''hello 🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug + } + + def "invalid surrogate pair - leading code with only \\ at end of string"() { + given: + def input = '''"hello \\u{d83c}\\"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'" + } + + def "invalid surrogate pair - leading code with only \\u at end of string"() { + given: + def input = '''"hello \\u{d83c}\\u"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - incorrectly formatted escape - offending token '\\u\"'" + } + + def "invalid surrogate pair - trailing code without closing brace"() { + given: + def input = '''"hello \\u{d83c}\\u{df7a"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - incorrectly formatted escape - offending token '\\u{df7a'" + } + + def "invalid surrogate pair - invalid trailing code without unicode escape 1"() { + given: + def input = '''"hello \\u{d83c}{df7a}"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'" + } + + def "invalid surrogate pair - invalid trailing code without unicode escape 2"() { + given: + def input = '''"hello \\u{d83c}df7a"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{d83c}'" + } + + def "invalid surrogate pair - invalid leading code"() { + given: + def input = '''"hello d83c\\u{df7a}"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\u{df7a}'" + } + + def "invalid surrogate pair - invalid leading value with braces"() { + given: + def input = '''"\\u{5B57}\\uDC00"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - trailing surrogate must be preceded with a leading surrogate - offending token '\\uDC00'" + } + + def "invalid surrogate pair - invalid trailing value with braces"() { + given: + def input = '''"\\uD83D\\u{DBFF}"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - leading surrogate must be followed by a trailing surrogate - offending token '\\u{DBFF}'" + } + + def "invalid unicode code point - value is too high"() { + given: + def input = '''"\\u{fffffff}"''' + + when: + StringValueParsing.parseSingleQuotedString(input) + + then: + InvalidSyntaxException e = thrown(InvalidSyntaxException) + e.message == "Invalid Syntax : Invalid unicode - not a valid code point - offending token '\\u{fffffff}'" + } +}