Skip to content

Commit

Permalink
Merge pull request #9548 from harpocrates/alec/t12290
Browse files Browse the repository at this point in the history
SI-12290: support JDK15 text blocks in Java parser
  • Loading branch information
lrytz committed Apr 23, 2021
2 parents d112b8c + a8225a0 commit ff9cea1
Show file tree
Hide file tree
Showing 7 changed files with 357 additions and 24 deletions.
185 changes: 161 additions & 24 deletions src/compiler/scala/tools/nsc/javac/JavaScanners.scala
Expand Up @@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon {
*/
protected def putChar(c: Char): Unit = { cbuf.append(c) }

/** Remove the last N characters from the buffer */
private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n)

/** Clear buffer and set name */
private def setName(): Unit = {
name = newTermName(cbuf.toString())
Expand Down Expand Up @@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon {

case '\"' =>
in.next()
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
getlitch()
}
if (in.ch == '\"') {
token = STRINGLIT
setName()
in.next()
if (in.ch != '\"') { // "..." non-empty string literal
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
getlitch()
}
if (in.ch == '\"') {
token = STRINGLIT
setName()
in.next()
} else {
syntaxError("unclosed string literal")
}
} else {
syntaxError("unclosed string literal")
in.next()
if (in.ch != '\"') { // "" empty string literal
token = STRINGLIT
setName()
} else {
in.next()
getTextBlock()
}
}
return

Expand Down Expand Up @@ -664,9 +678,12 @@ trait JavaScanners extends ast.parser.ScannersCommon {
// Literals -----------------------------------------------------------------

/** read next character in character or string literal:
*/
protected def getlitch() =
if (in.ch == '\\') {
*
* @param scanOnly skip emitting errors or adding to the literal buffer
* @param inTextBlock is this for a text block?
*/
protected def getlitch(scanOnly: Boolean = false, inTextBlock: Boolean = false): Unit = {
val c: Char = if (in.ch == '\\') {
in.next()
if ('0' <= in.ch && in.ch <= '7') {
val leadch: Char = in.ch
Expand All @@ -680,27 +697,147 @@ trait JavaScanners extends ast.parser.ScannersCommon {
in.next()
}
}
putChar(oct.asInstanceOf[Char])
oct.asInstanceOf[Char]
} else {
in.ch match {
case 'b' => putChar('\b')
case 't' => putChar('\t')
case 'n' => putChar('\n')
case 'f' => putChar('\f')
case 'r' => putChar('\r')
case '\"' => putChar('\"')
case '\'' => putChar('\'')
case '\\' => putChar('\\')
val c: Char = in.ch match {
case 'b' => '\b'
case 's' => ' '
case 't' => '\t'
case 'n' => '\n'
case 'f' => '\f'
case 'r' => '\r'
case '\"' => '\"'
case '\'' => '\''
case '\\' => '\\'
case CR | LF if inTextBlock =>
in.next()
return
case _ =>
syntaxError(in.cpos - 1, "invalid escape character")
putChar(in.ch)
if (!scanOnly) syntaxError(in.cpos - 1, "invalid escape character")
in.ch
}
in.next()
c
}
} else {
putChar(in.ch)
val c = in.ch
in.next()
c
}
if (!scanOnly) putChar(c)
}

/** read a triple-quote delimited text block, starting after the first three
* double quotes
*/
private def getTextBlock(): Unit = {
// Open delimiter is followed by optional space, then a newline
while (in.ch == ' ' || in.ch == '\t' || in.ch == FF) {
in.next()
}
if (in.ch != LF && in.ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
syntaxError("illegal text block open delimiter sequence, missing line terminator")
return
}
in.next()

/* Do a lookahead scan over the full text block to:
* - compute common white space prefix
* - find the offset where the text block ends
*/
var commonWhiteSpacePrefix = Int.MaxValue
var blockEndOffset = 0
val backtrackTo = in.copy
var blockClosed = false
var lineWhiteSpacePrefix = 0
var lineIsOnlyWhitespace = true
while (!blockClosed && (in.isUnicode || in.ch != SU)) {
if (in.ch == '\"') { // Potential end of the block
in.next()
if (in.ch == '\"') {
in.next()
if (in.ch == '\"') {
blockClosed = true
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
blockEndOffset = in.cpos - 2
}
}

// Not the end of the block - just a single or double " character
if (!blockClosed) {
lineIsOnlyWhitespace = false
}
} else if (in.ch == CR || in.ch == LF) { // new line in the block
in.next()
if (!lineIsOnlyWhitespace) {
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
}
lineWhiteSpacePrefix = 0
lineIsOnlyWhitespace = true
} else if (lineIsOnlyWhitespace && Character.isWhitespace(in.ch)) { // extend white space prefix
in.next()
lineWhiteSpacePrefix += 1
} else {
lineIsOnlyWhitespace = false
getlitch(scanOnly = true, inTextBlock = true)
}
}

// Bail out if the block never did have an end
if (!blockClosed) {
syntaxError("unclosed text block")
return
}

// Second pass: construct the literal string value this time
in = backtrackTo
while (in.cpos < blockEndOffset) {
// Drop the line's leading whitespace
var remainingPrefix = commonWhiteSpacePrefix
while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
in.next()
remainingPrefix -= 1
}

var trailingWhitespaceLength = 0
var escapedNewline = false // Does the line end with `\`?
while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset && !escapedNewline) {
if (Character.isWhitespace(in.ch)) {
trailingWhitespaceLength += 1
} else {
trailingWhitespaceLength = 0
}

// Detect if the line is about to end with `\`
if (in.ch == '\\' && {
val lookahead = in.copy
lookahead.next()
lookahead.ch == CR || lookahead.ch == LF
}) {
escapedNewline = true
}

getlitch(scanOnly = false, inTextBlock = true)
}

// Drop the line's trailing whitespace
popNChars(trailingWhitespaceLength)

// Normalize line terminators
if ((in.ch == CR || in.ch == LF) && !escapedNewline) {
in.next()
putChar('\n')
}
}

token = STRINGLIT
setName()

// Trailing """
in.next()
in.next()
in.next()
}

/** read fractional part and exponent of floating point number
* if one is present.
Expand Down
13 changes: 13 additions & 0 deletions test/files/neg/text-blocks.check
@@ -0,0 +1,13 @@
text-blocks/Invalid1.java:4: error: illegal text block open delimiter sequence, missing line terminator
public static final String badOpeningDelimiter = """non-whitespace
^
text-blocks/Invalid1.java:4: error: <identifier> expected
public static final String badOpeningDelimiter = """non-whitespace
^
text-blocks/Invalid1.java:6: error: illegal text block open delimiter sequence, missing line terminator
""";
^
text-blocks/Invalid2.java:6: error: unclosed string literal
foo"""";
^
4 errors
7 changes: 7 additions & 0 deletions test/files/neg/text-blocks/Invalid1.java
@@ -0,0 +1,7 @@
// javaVersion: 15+
class Invalid1 {

public static final String badOpeningDelimiter = """non-whitespace
foo
""";
}
7 changes: 7 additions & 0 deletions test/files/neg/text-blocks/Invalid2.java
@@ -0,0 +1,7 @@
// javaVersion: 15+
class Invalid2 {

// Closing delimiter is first three eligible `"""`, not last
public static final String closingDelimiterIsNotScalas = """
foo"""";
}
61 changes: 61 additions & 0 deletions test/files/run/t12290.check
@@ -0,0 +1,61 @@
====
A text

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
WHERE "CITY" = 'INDIANAPOLIS'
ORDER BY "EMP_ID", "LAST_NAME";

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
<html>
<body>
<p>Hello, world</p>
</body>
</html>

====
<html>
<body>
<p>Hello, world</p>
</body>

</html>

====
<html>

<body> <p>Hello , world</p>
</body>
</html>

====
this line has 4 tabs before it
this line has 5 spaces before it and space after it
this line has 2 tabs and 3 spaces before it
 this line has 6 spaces before it

====
String text = """
A text block inside a text block
""";

====
foo bar
baz
====

====
30 changes: 30 additions & 0 deletions test/files/run/t12290/Test.scala
@@ -0,0 +1,30 @@
// javaVersion: 15+
/* Using `valueOf` is a way to check that the Java string literals were properly
* parsed, since the parsed value is what the Scala compiler will use when
* resolving the singleton types
*/
object Test extends App {
println("====")
println(valueOf[TextBlocks.aText.type])
println("====")
println(valueOf[TextBlocks.html1.type])
println("====")
println(valueOf[TextBlocks.query.type])
println("====")
println(valueOf[TextBlocks.html2.type])
println("====")
println(valueOf[TextBlocks.html3.type])
println("====")
println(valueOf[TextBlocks.html4.type])
println("====")
println(valueOf[TextBlocks.html5.type])
println("====")
println(valueOf[TextBlocks.mixedIndents.type])
println("====")
println(valueOf[TextBlocks.code.type])
println("====")
println(valueOf[TextBlocks.simpleString.type])
println("====")
println(valueOf[TextBlocks.emptyString.type])
println("====")
}

0 comments on commit ff9cea1

Please sign in to comment.