scala · lrytz · Apr 23, 2021 · Mar 23, 2021 · lrytz · Mar 26, 2021
diff --git a/src/compiler/scala/tools/nsc/javac/JavaScanners.scala b/src/compiler/scala/tools/nsc/javac/JavaScanners.scala
@@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon {
     */
     protected def putChar(c: Char): Unit = { cbuf.append(c) }
 
+    /** Remove the last N characters from the buffer */
+    private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n)
+
     /** Clear buffer and set name */
     private def setName(): Unit = {
       name = newTermName(cbuf.toString())
@@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon {
 
               case '\"' =>
                 in.next()
-                while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
-                  getlitch()
-                }
-                if (in.ch == '\"') {
-                  token = STRINGLIT
-                  setName()
-                  in.next()
+                if (in.ch != '\"') { // "..." non-empty string literal
+                  while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
+                    getlitch()
+                  }
+                  if (in.ch == '\"') {
+                    token = STRINGLIT
+                    setName()
+                    in.next()
+                  } else {
+                    syntaxError("unclosed string literal")
+                  }
                 } else {
-                  syntaxError("unclosed string literal")
+                  in.next()
+                  if (in.ch != '\"') { // "" empty string literal
+                    token = STRINGLIT
+                    setName()
+                  } else {
+                    in.next()
+                    getTextBlock()
+                  }
                 }
                 return
 
@@ -664,9 +678,12 @@ trait JavaScanners extends ast.parser.ScannersCommon {
 // Literals -----------------------------------------------------------------
 
     /** read next character in character or string literal:
-    */
-    protected def getlitch() =
-      if (in.ch == '\\') {
+      *
+      * @param scanOnly skip emitting errors or adding to the literal buffer
+      * @param inTextBlock is this for a text block?
+      */
+    protected def getlitch(scanOnly: Boolean = false, inTextBlock: Boolean = false): Unit = {
+      val c: Char = if (in.ch == '\\') {
         in.next()
         if ('0' <= in.ch && in.ch <= '7') {
           val leadch: Char = in.ch
@@ -680,27 +697,147 @@ trait JavaScanners extends ast.parser.ScannersCommon {
               in.next()
             }
           }
-          putChar(oct.asInstanceOf[Char])
+          oct.asInstanceOf[Char]
         } else {
-          in.ch match {
-            case 'b'  => putChar('\b')
-            case 't'  => putChar('\t')
-            case 'n'  => putChar('\n')
-            case 'f'  => putChar('\f')
-            case 'r'  => putChar('\r')
-            case '\"' => putChar('\"')
-            case '\'' => putChar('\'')
-            case '\\' => putChar('\\')
+          val c: Char = in.ch match {
+            case 'b'  => '\b'
+            case 's'  => ' '
+            case 't'  => '\t'
+            case 'n'  => '\n'
+            case 'f'  => '\f'
+            case 'r'  => '\r'
+            case '\"' => '\"'
+            case '\'' => '\''
+            case '\\' => '\\'
+            case CR | LF if inTextBlock =>
+              in.next()
+              return
             case _    =>
-              syntaxError(in.cpos - 1, "invalid escape character")
-              putChar(in.ch)
+              if (!scanOnly) syntaxError(in.cpos - 1, "invalid escape character")
+              in.ch
           }
           in.next()
+          c
         }
       } else  {
-        putChar(in.ch)
+        val c = in.ch
         in.next()
+        c
       }
+      if (!scanOnly) putChar(c)
+    }
+
+    /** read a triple-quote delimited text block, starting after the first three
+      * double quotes
+      */
+    private def getTextBlock(): Unit = {
+      // Open delimiter is followed by optional space, then a newline
+      while (in.ch == ' ' || in.ch == '\t' || in.ch == FF) {
+        in.next()
+      }
+      if (in.ch != LF && in.ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
+        syntaxError("illegal text block open delimiter sequence, missing line terminator")
+        return
+      }
+      in.next()
+
+      /* Do a lookahead scan over the full text block to:
+       *   - compute common white space prefix
+       *   - find the offset where the text block ends
+       */
+      var commonWhiteSpacePrefix = Int.MaxValue
+      var blockEndOffset = 0
+      val backtrackTo = in.copy
+      var blockClosed = false
+      var lineWhiteSpacePrefix = 0
+      var lineIsOnlyWhitespace = true
+      while (!blockClosed && (in.isUnicode || in.ch != SU)) {
+        if (in.ch == '\"') { // Potential end of the block
+          in.next()
+          if (in.ch == '\"') {
+            in.next()
+            if (in.ch == '\"') {
+              blockClosed = true
+              commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
+              blockEndOffset = in.cpos - 2
+            }
+          }
+
+          // Not the end of the block - just a single or double " character
+          if (!blockClosed) {
+            lineIsOnlyWhitespace = false
+          }
+        } else if (in.ch == CR || in.ch == LF) { // new line in the block
+          in.next()
+          if (!lineIsOnlyWhitespace) {
+            commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
+          }
+          lineWhiteSpacePrefix = 0
+          lineIsOnlyWhitespace = true
+        } else if (lineIsOnlyWhitespace && Character.isWhitespace(in.ch)) { // extend white space prefix
+          in.next()
+          lineWhiteSpacePrefix += 1
+        } else {
+          lineIsOnlyWhitespace = false
+          getlitch(scanOnly = true, inTextBlock = true)
+        }
+      }
+
+      // Bail out if the block never did have an end
+      if (!blockClosed) {
+        syntaxError("unclosed text block")
+        return
+      }
+
+      // Second pass: construct the literal string value this time
+      in = backtrackTo
+      while (in.cpos < blockEndOffset) {
+        // Drop the line's leading whitespace
+        var remainingPrefix = commonWhiteSpacePrefix
+        while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
+          in.next()
+          remainingPrefix -= 1
+        }
+
+        var trailingWhitespaceLength = 0
+        var escapedNewline = false         // Does the line end with `\`?
+        while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset && !escapedNewline) {
+          if (Character.isWhitespace(in.ch)) {
+            trailingWhitespaceLength += 1
+          } else {
+            trailingWhitespaceLength = 0
+          }
+
+          // Detect if the line is about to end with `\`
+          if (in.ch == '\\' && {
+            val lookahead = in.copy
+            lookahead.next()
+            lookahead.ch == CR || lookahead.ch == LF
+          }) {
+            escapedNewline = true
+          }
+
+          getlitch(scanOnly = false, inTextBlock = true)
+        }
+
+        // Drop the line's trailing whitespace
+        popNChars(trailingWhitespaceLength)
+
+        // Normalize line terminators
+        if ((in.ch == CR || in.ch == LF) && !escapedNewline) {
+          in.next()
+          putChar('\n')
+        }
+      }
+
+      token = STRINGLIT
+      setName()
+
+      // Trailing """
+      in.next()
+      in.next()
+      in.next()
+    }
 
     /** read fractional part and exponent of floating point number
      *  if one is present.

diff --git a/test/files/neg/text-blocks.check b/test/files/neg/text-blocks.check
@@ -0,0 +1,13 @@
+text-blocks/Invalid1.java:4: error: illegal text block open delimiter sequence, missing line terminator
+    public static final String badOpeningDelimiter = """non-whitespace
+                                                        ^
+text-blocks/Invalid1.java:4: error: <identifier> expected
+    public static final String badOpeningDelimiter = """non-whitespace
+                                                           ^
+text-blocks/Invalid1.java:6: error: illegal text block open delimiter sequence, missing line terminator
+    """;
+       ^
+text-blocks/Invalid2.java:6: error: unclosed string literal
+      foo"""";
+            ^
+4 errors
diff --git a/test/files/neg/text-blocks/Invalid1.java b/test/files/neg/text-blocks/Invalid1.java
@@ -0,0 +1,7 @@
+// javaVersion: 15+
+class Invalid1 {
+
+    public static final String badOpeningDelimiter = """non-whitespace
+      foo
+    """;
+}
diff --git a/test/files/neg/text-blocks/Invalid2.java b/test/files/neg/text-blocks/Invalid2.java
@@ -0,0 +1,7 @@
+// javaVersion: 15+
+class Invalid2 {
+
+    // Closing delimiter is first three eligible `"""`, not last
+    public static final String closingDelimiterIsNotScalas = """
+      foo"""";
+}
diff --git a/test/files/run/t12290.check b/test/files/run/t12290.check
@@ -0,0 +1,61 @@
+====
+A text
+
+====
+<html>
+    <body>
+        <p>Hello, world</p>
+    </body>
+</html>
+
+====
+SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
+WHERE "CITY" = 'INDIANAPOLIS'
+ORDER BY "EMP_ID", "LAST_NAME";
+
+====
+<html>
+    <body>
+        <p>Hello, world</p>
+    </body>
+</html>
+
+====
+                            <html>
+                                <body>
+                                    <p>Hello, world</p>
+                                </body>
+                            </html>
+
+====
+<html>
+    <body>
+        <p>Hello, world</p>
+    </body>
+
+</html>
+
+====
+<html>
+
+    <body>        <p>Hello ,	world</p>
+    </body>
+</html>
+
+====
+   this line has 4 tabs before it
+ this line has 5 spaces before it and space after it
+ this line has 2 tabs and 3 spaces before it
+  this line has 6 spaces before it
+
+====
+String text = """
+    A text block inside a text block
+""";
+
+====
+foo	bar
+baz
+====
+
+====
diff --git a/test/files/run/t12290/Test.scala b/test/files/run/t12290/Test.scala
@@ -0,0 +1,30 @@
+// javaVersion: 15+
+/* Using `valueOf` is a way to check that the Java string literals were properly
+ * parsed, since the parsed value is what the Scala compiler will use when
+ * resolving the singleton types
+ */
+object Test extends App {
+  println("====")
+  println(valueOf[TextBlocks.aText.type])
+  println("====")
+  println(valueOf[TextBlocks.html1.type])
+  println("====")
+  println(valueOf[TextBlocks.query.type])
+  println("====")
+  println(valueOf[TextBlocks.html2.type])
+  println("====")
+  println(valueOf[TextBlocks.html3.type])
+  println("====")
+  println(valueOf[TextBlocks.html4.type])
+  println("====")
+  println(valueOf[TextBlocks.html5.type])
+  println("====")
+  println(valueOf[TextBlocks.mixedIndents.type])
+  println("====")
+  println(valueOf[TextBlocks.code.type])
+  println("====")
+  println(valueOf[TextBlocks.simpleString.type])
+  println("====")
+  println(valueOf[TextBlocks.emptyString.type])
+  println("====")
+}