Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds unicode braced escaping and tests #2335

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/antlr/GraphqlCommon.g4
Expand Up @@ -125,7 +125,7 @@ fragment StringCharacter:
'\\' EscapedCharacter;

fragment EscapedCharacter : ["\\/bfnrt];
fragment EscapedUnicode : Hex Hex Hex Hex;
fragment EscapedUnicode : Hex Hex Hex Hex | '{' Hex+ '}';
fragment Hex : [0-9a-fA-F];


Expand Down
39 changes: 33 additions & 6 deletions src/main/java/graphql/parser/StringValueParsing.java
Expand Up @@ -3,6 +3,7 @@
import graphql.Assert;
import graphql.Internal;

import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -30,7 +31,9 @@ public static String removeIndentation(String rawValue) {
String[] lines = rawValue.split("\\n");
Integer commonIndent = null;
for (int i = 0; i < lines.length; i++) {
if (i == 0) continue;
if (i == 0) {
continue;
}
String line = lines[i];
int length = line.length();
int indent = leadingWhitespace(line);
Expand All @@ -44,7 +47,9 @@ public static String removeIndentation(String rawValue) {
if (commonIndent != null) {
for (int i = 0; i < lineList.size(); i++) {
String line = lineList.get(i);
if (i == 0) continue;
if (i == 0) {
continue;
}
if (line.length() > commonIndent) {
line = line.substring(commonIndent);
lineList.set(i, line);
Expand Down Expand Up @@ -135,15 +140,37 @@ public static String parseSingleQuotedString(String string) {
writer.write('\t');
continue;
case 'u':
String hexStr = string.substring(i + 1, i + 5);
int codepoint = Integer.parseInt(hexStr, 16);
i += 4;
writer.write(codepoint);
i = parseEscapedUnicode(writer, string, i);
continue;
default:
Assert.assertShouldNeverHappen();
}
}
return writer.toString();
}

private static int parseEscapedUnicode(StringWriter writer, String string, int i) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel strongly that this should go into a Util class (UnicodeUtil say) and be unit tested on its own

Better units of code

if (string.charAt(i + 1) != '{') {
String hexStr = string.substring(i + 1, i + 5);
int codepoint = Integer.parseInt(hexStr, 16);
writer.write(codepoint);
return i + 4;
}
// this means we have a braced escape which allows code points outside of the BMP: e.g. '\\u{1F37A}'
int startIx = i + 2;
int endIndexExclusive = startIx;
do {
if (endIndexExclusive + 1 >= string.length()) {
throw new RuntimeException("invalid unicode encoding");
}
} while (string.charAt(++endIndexExclusive) != '}');
String hexStr = string.substring(startIx, endIndexExclusive);
char[] chars = Character.toChars(Integer.parseInt(hexStr, 16));
try {
writer.write(chars);
} catch (IOException e) {
return Assert.assertShouldNeverHappen();
}
return endIndexExclusive;
}
}
72 changes: 72 additions & 0 deletions src/test/groovy/graphql/parser/ParserTest.groovy
Expand Up @@ -39,6 +39,7 @@ import graphql.language.VariableDefinition
import graphql.language.VariableReference
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.ParserRuleContext
import org.junit.Ignore
import spock.lang.Specification
import spock.lang.Unroll

Expand Down Expand Up @@ -814,6 +815,76 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
operationDefinition.getComments()[0].content == " Represents the 😕 emoji."
}

def "allow braced escaped unicode"() {
def input = '''
{
foo(arg: "\\u{1F37A}")
}
'''
when:
Document document = Parser.parse(input)
OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
def argValue = field.arguments[0].value as StringValue

then:
argValue.getValue() == "🍺"

}

def "allow surrogate pairs escaped unicode"() {
def input = '''
{
foo(arg: "\\ud83c\\udf7a")
}
'''
when:
Document document = Parser.parse(input)
OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
def argValue = field.arguments[0].value as StringValue

then:
argValue.getValue() == "🍺"

}

@Ignore
def "invalid surrogate pair"() {
def input = '''
{
foo(arg: "\\uD83D\\uDBFF")
}
'''
when:
Document document = Parser.parse(input)
OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
def argValue = field.arguments[0].value as StringValue

then:
argValue.getValue() == "🍺"

}

def "invalid unicode code point"() {
def input = '''
{
foo(arg: "\\u{fffffff}")
}
'''
when:
Document document = Parser.parse(input)
OperationDefinition operationDefinition = (document.definitions[0] as OperationDefinition)
def field = operationDefinition.getSelectionSet().getSelections()[0] as Field
def argValue = field.arguments[0].value as StringValue

then:
argValue.getValue() == "🍺"

}


def "can override antlr to ast"() {

def query = '''
Expand Down Expand Up @@ -897,4 +968,5 @@ triple3 : """edge cases \\""" "" " \\"" \\" edge cases"""
'1.23.4' | _
'1.2e3e' | _
}

}
30 changes: 27 additions & 3 deletions src/test/groovy/graphql/parser/StringValueParsingTest.groovy
Expand Up @@ -40,7 +40,7 @@ class StringValueParsingTest extends Specification {
parsed == '''"'''
}

def "parsing emoji should work"() {
def "parsing beer mug as surrogate pair"() {
// needs surrogate pairs for this emoji
given:
def input = '''"\\ud83c\\udf7a"'''
Expand All @@ -52,15 +52,39 @@ class StringValueParsingTest extends Specification {
parsed == '''🍺''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
}

def "parsing beer mug as escaped unicode"() {
// needs surrogate pairs for this emoji
given:
def input = '''"\\u{1F37A} hello"'''

when:
String parsed = StringValueParsing.parseSingleQuotedString(input)

then:
parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
}

def "parsing beer mug non escaped"() {
// needs surrogate pairs for this emoji
given:
def input = '''"🍺 hello"'''

when:
String parsed = StringValueParsing.parseSingleQuotedString(input)

then:
parsed == '''🍺 hello''' // contains the beer icon U+1F37A : http://www.charbase.com/1f37a-unicode-beer-mug
}

def "parsing simple unicode should work"() {
given:
def input = '''"\\u56fe"'''
def input = '''"\\u56fe hello"'''

when:
String parsed = StringValueParsing.parseSingleQuotedString(input)

then:
parsed == '''图'''
parsed == '''图 hello'''
}


Expand Down