Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SI-12290: support JDK15 text blocks in Java parser #9548

Merged
merged 1 commit into from Apr 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
185 changes: 161 additions & 24 deletions src/compiler/scala/tools/nsc/javac/JavaScanners.scala
Expand Up @@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon {
*/
protected def putChar(c: Char): Unit = { cbuf.append(c) }

/** Remove the last N characters from the buffer */
private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n)

/** Clear buffer and set name */
private def setName(): Unit = {
name = newTermName(cbuf.toString())
Expand Down Expand Up @@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon {

case '\"' =>
in.next()
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
getlitch()
}
if (in.ch == '\"') {
token = STRINGLIT
setName()
in.next()
if (in.ch != '\"') { // "..." non-empty string literal
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
getlitch()
}
if (in.ch == '\"') {
token = STRINGLIT
setName()
in.next()
} else {
syntaxError("unclosed string literal")
}
} else {
syntaxError("unclosed string literal")
in.next()
if (in.ch != '\"') { // "" empty string literal
token = STRINGLIT
setName()
} else {
in.next()
getTextBlock()
}
}
return

Expand Down Expand Up @@ -664,9 +678,12 @@ trait JavaScanners extends ast.parser.ScannersCommon {
// Literals -----------------------------------------------------------------

/** read next character in character or string literal:
*/
protected def getlitch() =
if (in.ch == '\\') {
*
* @param scanOnly skip emitting errors or adding to the literal buffer
* @param inTextBlock is this for a text block?
*/
protected def getlitch(scanOnly: Boolean = false, inTextBlock: Boolean = false): Unit = {
val c: Char = if (in.ch == '\\') {
in.next()
if ('0' <= in.ch && in.ch <= '7') {
val leadch: Char = in.ch
Expand All @@ -680,27 +697,147 @@ trait JavaScanners extends ast.parser.ScannersCommon {
in.next()
}
}
putChar(oct.asInstanceOf[Char])
oct.asInstanceOf[Char]
} else {
in.ch match {
case 'b' => putChar('\b')
case 't' => putChar('\t')
case 'n' => putChar('\n')
case 'f' => putChar('\f')
case 'r' => putChar('\r')
case '\"' => putChar('\"')
case '\'' => putChar('\'')
case '\\' => putChar('\\')
val c: Char = in.ch match {
case 'b' => '\b'
case 's' => ' '
case 't' => '\t'
case 'n' => '\n'
case 'f' => '\f'
case 'r' => '\r'
case '\"' => '\"'
case '\'' => '\''
case '\\' => '\\'
case CR | LF if inTextBlock =>
in.next()
return
case _ =>
syntaxError(in.cpos - 1, "invalid escape character")
putChar(in.ch)
if (!scanOnly) syntaxError(in.cpos - 1, "invalid escape character")
in.ch
}
in.next()
c
}
} else {
putChar(in.ch)
val c = in.ch
in.next()
c
}
if (!scanOnly) putChar(c)
}

/** read a triple-quote delimited text block, starting after the first three
* double quotes
*/
private def getTextBlock(): Unit = {
// Open delimiter is followed by optional space, then a newline
while (in.ch == ' ' || in.ch == '\t' || in.ch == FF) {
in.next()
}
if (in.ch != LF && in.ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
syntaxError("illegal text block open delimiter sequence, missing line terminator")
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of / before returning, maybe we try to advance reader to the end of the text block? Otherwise there are a lot of errors issued when continuing to parse.

Although javac is not any better :-) So I'm fine if you leave it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just not returning would do the trick? I'll try this out after work.

}
in.next()

/* Do a lookahead scan over the full text block to:
* - compute common white space prefix
* - find the offset where the text block ends
*/
lrytz marked this conversation as resolved.
Show resolved Hide resolved
var commonWhiteSpacePrefix = Int.MaxValue
var blockEndOffset = 0
val backtrackTo = in.copy
var blockClosed = false
var lineWhiteSpacePrefix = 0
var lineIsOnlyWhitespace = true
while (!blockClosed && (in.isUnicode || in.ch != SU)) {
if (in.ch == '\"') { // Potential end of the block
in.next()
if (in.ch == '\"') {
in.next()
if (in.ch == '\"') {
blockClosed = true
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
blockEndOffset = in.cpos - 2
}
}

// Not the end of the block - just a single or double " character
if (!blockClosed) {
lineIsOnlyWhitespace = false
}
} else if (in.ch == CR || in.ch == LF) { // new line in the block
in.next()
if (!lineIsOnlyWhitespace) {
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
}
lineWhiteSpacePrefix = 0
lineIsOnlyWhitespace = true
} else if (lineIsOnlyWhitespace && Character.isWhitespace(in.ch)) { // extend white space prefix
in.next()
lineWhiteSpacePrefix += 1
} else {
lineIsOnlyWhitespace = false
getlitch(scanOnly = true, inTextBlock = true)
}
}

// Bail out if the block never did have an end
if (!blockClosed) {
syntaxError("unclosed text block")
return
}

// Second pass: construct the literal string value this time
in = backtrackTo
while (in.cpos < blockEndOffset) {
// Drop the line's leading whitespace
var remainingPrefix = commonWhiteSpacePrefix
while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
in.next()
remainingPrefix -= 1
}

var trailingWhitespaceLength = 0
var escapedNewline = false // Does the line end with `\`?
while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset && !escapedNewline) {
if (Character.isWhitespace(in.ch)) {
trailingWhitespaceLength += 1
} else {
trailingWhitespaceLength = 0
}

// Detect if the line is about to end with `\`
if (in.ch == '\\' && {
val lookahead = in.copy
lookahead.next()
lookahead.ch == CR || lookahead.ch == LF
}) {
escapedNewline = true
}

getlitch(scanOnly = false, inTextBlock = true)
}

// Drop the line's trailing whitespace
popNChars(trailingWhitespaceLength)

// Normalize line terminators
if ((in.ch == CR || in.ch == LF) && !escapedNewline) {
in.next()
putChar('\n')
}
}

token = STRINGLIT
setName()

// Trailing """
in.next()
in.next()
in.next()
}

/** read fractional part and exponent of floating point number
* if one is present.
Expand Down
13 changes: 13 additions & 0 deletions test/files/neg/text-blocks.check
@@ -0,0 +1,13 @@
text-blocks/Invalid1.java:4: error: illegal text block open delimiter sequence, missing line terminator
public static final String badOpeningDelimiter = """non-whitespace
^
text-blocks/Invalid1.java:4: error: <identifier> expected
public static final String badOpeningDelimiter = """non-whitespace
^
text-blocks/Invalid1.java:6: error: illegal text block open delimiter sequence, missing line terminator
""";
^
text-blocks/Invalid2.java:6: error: unclosed string literal
foo"""";
^
4 errors
7 changes: 7 additions & 0 deletions test/files/neg/text-blocks/Invalid1.java
@@ -0,0 +1,7 @@
// javaVersion: 15+
class Invalid1 {

public static final String badOpeningDelimiter = """non-whitespace
foo
""";
}
7 changes: 7 additions & 0 deletions test/files/neg/text-blocks/Invalid2.java
@@ -0,0 +1,7 @@
// javaVersion: 15+
class Invalid2 {

// Closing delimiter is first three eligible `"""`, not last
public static final String closingDelimiterIsNotScalas = """
foo"""";
}
61 changes: 61 additions & 0 deletions test/files/run/t12290.check
@@ -0,0 +1,61 @@
====
A text

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
WHERE "CITY" = 'INDIANAPOLIS'
ORDER BY "EMP_ID", "LAST_NAME";

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
<html>
<body>
<p>Hello, world</p>
</body>

</html>

====
<html>

<body> <p>Hello , world</p>
</body>
</html>

====
this line has 4 tabs before it
this line has 5 spaces before it and space after it
this line has 2 tabs and 3 spaces before it
 this line has 6 spaces before it

====
String text = """
A text block inside a text block
""";

====
foo bar
baz
====

====
30 changes: 30 additions & 0 deletions test/files/run/t12290/Test.scala
@@ -0,0 +1,30 @@
// javaVersion: 15+
/* Using `valueOf` is a way to check that the Java string literals were properly
* parsed, since the parsed value is what the Scala compiler will use when
* resolving the singleton types
*/
object Test extends App {
println("====")
println(valueOf[TextBlocks.aText.type])
println("====")
println(valueOf[TextBlocks.html1.type])
println("====")
println(valueOf[TextBlocks.query.type])
println("====")
println(valueOf[TextBlocks.html2.type])
println("====")
println(valueOf[TextBlocks.html3.type])
println("====")
println(valueOf[TextBlocks.html4.type])
println("====")
println(valueOf[TextBlocks.html5.type])
println("====")
println(valueOf[TextBlocks.mixedIndents.type])
println("====")
println(valueOf[TextBlocks.code.type])
println("====")
println(valueOf[TextBlocks.simpleString.type])
println("====")
println(valueOf[TextBlocks.emptyString.type])
println("====")
}