Skip to content

Commit

Permalink
Remove The Unicode Escape \u000E In Scaladoc Code Comment Parsing
Browse files Browse the repository at this point in the history
The regular expressions for `CodeBlockStartRegex` and `CodeBlockEndRegex` both contain two instances of the Unicode escape `\u000E`. This is the "Shift In" character. I expect that it was inserted as part of a copy/paste error.

Unicode escapes in triple quote strings are deprecated as of 2.13.2 (scala/scala#8282). Further, this character actually makes the regular expression invalid if it is interpreted. This isn't a big deal right now, as it appears to be ignored on Scala 2.12.x, but on Scala 2.13.x this will cause the regular expressions to fail for Scaladoc using the `<pre>` tag. For example,

```scala
import scala.util.matching._

object Main {

  val doc0: String =
    """
    | /** A foo is a bar, for example.
    |   *
    |   * {{{
    |   * val foo: String = "bar"
    |   * }}}
    |   *
    |   * <pre>
    |   * val bar: String = "baz
    |   * </pre>
    |   */""".stripMargin

  val CodeBlockStartRegex =
    new Regex("""(.*?)((?:\{\{\{)|(?:\u000E<pre(?: [^>]*)?>\u000E))(.*)""")

  val CodeBlockStartRegex0 =
    new Regex("""(.*?)((?:\{\{\{)|(?:<pre(?: [^>]*)?>))(.*)""")

  def matchInfo(regex: Regex, value: CharSequence): Unit = {
    println(s"\nTarget: ${value}")
    println(s"Regex: ${regex}")
    val matches: List[Regex.Match] = regex.findAllMatchIn(value).toList
    println(s"Match Count: ${matches.size}")
    println(s"Matches: ${matches}")
  }

  def main(args: Array[String]): Unit = {
    matchInfo(CodeBlockStartRegex, doc0)
    matchInfo(CodeBlockStartRegex0, doc0)
  }
}
```

When run with 2.13.4 yields this result,

```shell
warning: 1 deprecation (since 2.13.2); re-run with -deprecation for details
1 warning
Picked up JAVA_TOOL_OPTIONS: -Dsbt.supershell=false

Target:
 /** A foo is a bar, for example.
   *
   * {{{
   * val foo: String = "bar"
   * }}}
   *
   * <pre>
   * val bar: String = "baz
   * </pre>
   */
Regex: (.*?)((?:\{\{\{)|(?:�<pre(?: [^>]*)?>�))(.*)
Match Count: 1
Matches: List(   * {{{)

Target:
 /** A foo is a bar, for example.
   *
   * {{{
   * val foo: String = "bar"
   * }}}
   *
   * <pre>
   * val bar: String = "baz
   * </pre>
   */
Regex: (.*?)((?:\{\{\{)|(?:<pre(?: [^>]*)?>))(.*)
Match Count: 2
Matches: List(   * {{{,    * <pre>)
```

Note how the first output only found one match, the `{{{` based one, but the second one found both.

Finally, a small test was added to ensure that the change does not break comment parsing.
  • Loading branch information
isomarcte committed Dec 20, 2020
1 parent 18bfa0c commit 4c45595
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
Expand Up @@ -234,13 +234,13 @@ object ScaladocParser {
* The start of a Scaladoc code block
*/
private val CodeBlockStartRegex =
new Regex("""(.*?)((?:\{\{\{)|(?:\u000E<pre(?: [^>]*)?>\u000E))(.*)""")
new Regex("""(.*?)((?:\{\{\{)|(?:<pre(?: [^>]*)?>))(.*)""")

/**
* The end of a Scaladoc code block
*/
private val CodeBlockEndRegex =
new Regex("""(.*?)((?:\}\}\})|(?:\u000E</pre>\u000E))(.*)""")
new Regex("""(.*?)((?:\}\}\})|(?:</pre>))(.*)""")

/**
* A key used for a tag map. The key is built from the name of the tag and
Expand Down
20 changes: 14 additions & 6 deletions tests/unit/src/test/scala/tests/ScaladocSuite.scala
Expand Up @@ -6,19 +6,27 @@ import munit.Location

final class ScaladocSuite extends BaseSuite {

def checkCommentBody(name: String, original: String, expected: Body)(implicit loc: Location): Unit =
/**
* Comment does not directly declare a meaningful equality definition, thus
* this check compares [[Comment.body]] instead.
*/
def checkCommentBody(name: String, original: String, expected: Body)(implicit
loc: Location
): Unit =
test(name) {
val obtained: Comment = ScaladocParser.parseComment(original)
assertEquals(obtained.body, expected)
}

checkCommentBody(
"Brace ({{{) style code comment",
"""/**
| * {{{
| * val foo: Int = 1
| * }}}
| */""".stripMargin,
"""/**{{{val foo: Int = 1}}} */""",
Body(List(Code("val foo: Int = 1")))
)

checkCommentBody(
"HTML <pre> style code comment",
"""/**<pre>val foo: Int = 1</pre> */""",
Body(List(Code("val foo: Int = 1")))
)
}

0 comments on commit 4c45595

Please sign in to comment.