Skip to content

Commit

Permalink
Merge pull request #9556 from harpocrates/alec/indy-string-concat
Browse files Browse the repository at this point in the history
Use `StringConcatFactory` for string concatenation on JDK 9+
  • Loading branch information
lrytz committed Jun 24, 2021
2 parents 37cf33a + b7dc31f commit 7d79bbd
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 30 deletions.
114 changes: 90 additions & 24 deletions src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala
Expand Up @@ -33,7 +33,7 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder {
import bTypes._
import coreBTypes._
import definitions._
import genBCode.postProcessor.backendUtils.addIndyLambdaImplMethod
import genBCode.postProcessor.backendUtils.{addIndyLambdaImplMethod, classfileVersion}
import genBCode.postProcessor.callGraph.{inlineAnnotatedCallsites, noInlineAnnotatedCallsites}

/*
Expand Down Expand Up @@ -990,44 +990,110 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder {
}
}

/* Generate string concatenation
*
* On JDK 8: create and append using `StringBuilder`
* On JDK 9+: use `invokedynamic` with `StringConcatFactory`
*/
def genStringConcat(tree: Tree): BType = {
lineNumber(tree)
liftStringConcat(tree) match {
// Optimization for expressions of the form "" + x. We can avoid the StringBuilder.
// Optimization for expressions of the form "" + x
case List(Literal(Constant("")), arg) =>
genLoad(arg, ObjectRef)
genCallMethod(String_valueOf, InvokeStyle.Static, arg.pos)

case concatenations =>
val approxBuilderSize = concatenations.map {
case Literal(Constant(s: String)) => s.length
case Literal(c @ Constant(value)) if c.isNonUnitAnyVal => String.valueOf(c).length
case _ =>
// could add some guess based on types of primitive args.
// or, we could stringify all the args onto the stack, compute the exact size of
// the StringBuilder.
// or, just let https://openjdk.java.net/jeps/280 (or a re-implementation thereof in our 2.13.x stdlib) do all the hard work at link time
0
}.sum
bc.genStartConcat(tree.pos, approxBuilderSize)
def isEmptyString(t: Tree) = t match {
case Literal(Constant("")) => true
case _ => false
}
for (elem <- concatenations if !isEmptyString(elem)) {
val loadedElem = elem match {

val concatArguments = concatenations.view
.filter {
case Literal(Constant("")) => false // empty strings are no-ops in concatenation
case _ => true
}
.map {
case Apply(boxOp, value :: Nil) if currentRun.runDefinitions.isBox(boxOp.symbol) =>
// Eliminate boxing of primitive values. Boxing is introduced by erasure because
// there's only a single synthetic `+` method "added" to the string class.
value
case other => other
}
.toList

// `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower
if (classfileVersion.get < asm.Opcodes.V9) {

// Estimate capacity needed for the string builder
val approxBuilderSize = concatArguments.view.map {
case Literal(Constant(s: String)) => s.length
case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length
case _ => 0
}.sum
bc.genNewStringBuilder(tree.pos, approxBuilderSize)

for (elem <- concatArguments) {
val elemType = tpeTK(elem)
genLoad(elem, elemType)
bc.genStringBuilderAppend(elemType, elem.pos)
}
bc.genStringBuilderEnd(tree.pos)
} else {

/* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If
* the string concatenation is longer (unlikely), we spill into multiple calls
*/
val MaxIndySlots = 200
val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument
val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant

val recipe = new StringBuilder()
val argTypes = Seq.newBuilder[asm.Type]
val constVals = Seq.newBuilder[String]
var totalArgSlots = 0
var countConcats = 1 // ie. 1 + how many times we spilled

for (elem <- concatArguments) {
val tpe = tpeTK(elem)
val elemSlots = tpe.size

// Unlikely spill case
if (totalArgSlots + elemSlots >= MaxIndySlots) {
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())
countConcats += 1
totalArgSlots = 0
recipe.setLength(0)
argTypes.clear()
constVals.clear()
}

case _ => elem
elem match {
case Literal(Constant(s: String)) =>
if (s.contains(TagArg) || s.contains(TagConst)) {
totalArgSlots += elemSlots
recipe.append(TagConst)
constVals += s
} else {
recipe.append(s)
}

case other =>
totalArgSlots += elemSlots
recipe.append(TagArg)
val tpe = tpeTK(elem)
argTypes += tpe.toASMType
genLoad(elem, tpe)
}
}
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())

// If we spilled, generate one final concat
if (countConcats > 1) {
bc.genIndyStringConcat(
TagArg.toString * countConcats,
Seq.fill(countConcats)(StringRef.toASMType),
Seq.empty
)
}
val elemType = tpeTK(loadedElem)
genLoad(loadedElem, elemType)
bc.genConcat(elemType, loadedElem.pos)
}
bc.genEndConcat(tree.pos)
}
StringRef
}
Expand Down
39 changes: 33 additions & 6 deletions src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala
Expand Up @@ -175,10 +175,11 @@ abstract class BCodeIdiomatic {

} // end of method genPrimitiveShift()

/*
/* Creates a new `StringBuilder` instance with the requested capacity
*
* can-multi-thread
*/
final def genStartConcat(pos: Position, size: Int): Unit = {
final def genNewStringBuilder(pos: Position, size: Int): Unit = {
jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName)
jmethod.visitInsn(Opcodes.DUP)
jmethod.visitLdcInsn(Integer.valueOf(size))
Expand All @@ -191,10 +192,11 @@ abstract class BCodeIdiomatic {
)
}

/*
/* Issue a call to `StringBuilder#append` for the right element type
*
* can-multi-thread
*/
def genConcat(elemType: BType, pos: Position): Unit = {
final def genStringBuilderAppend(elemType: BType, pos: Position): Unit = {
val paramType: BType = elemType match {
case ct: ClassBType if ct.isSubtypeOf(StringRef).get => StringRef
case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef).get => jlStringBufferRef
Expand All @@ -211,13 +213,38 @@ abstract class BCodeIdiomatic {
invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor, pos)
}

/*
/* Extract the built `String` from the `StringBuilder`
*:
* can-multi-thread
*/
final def genEndConcat(pos: Position): Unit = {
final def genStringBuilderEnd(pos: Position): Unit = {
invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;", pos)
}

/* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants`
* (only works for JDK 9+)
*
* can-multi-thread
*/
final def genIndyStringConcat(
recipe: String,
argTypes: Seq[asm.Type],
constants: Seq[String]
): Unit = {
jmethod.visitInvokeDynamicInsn(
"makeConcatWithConstants",
asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*),
new asm.Handle(
asm.Opcodes.H_INVOKESTATIC,
"java/lang/invoke/StringConcatFactory",
"makeConcatWithConstants",
"(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;",
false
),
(recipe +: constants):_*
)
}

/*
* Emits one or more conversion instructions based on the types given as arguments.
*
Expand Down
Binary file added test/files/run/StringConcat.check
Binary file not shown.
86 changes: 86 additions & 0 deletions test/files/run/StringConcat.scala
@@ -0,0 +1,86 @@
// java: -Xss128M

import scala.tools.partest.ReplTest

// ReplTest so that the long concatenation is compiled at test-run-time with the larger `Xss`.
// Tests are always compiled in the partest VM.
object Test extends ReplTest {
def code =
"""// This should generally obey 15.18.1. of the JLS (String Concatenation Operator +)
|def concatenatingVariousTypes(): String = {
| val str: String = "some string"
| val sb: StringBuffer = new StringBuffer("some stringbuffer")
| val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence")
| val i: Int = 123456789
| val s: Short = 345
| val b: Byte = 12
| val z: Boolean = true
| val f: Float = 3.14f
| val j: Long = 98762147483647L
| val d: Double = 3.1415d
|
| "String " + str + "\n" +
| "StringBuffer " + sb + "\n" +
| "CharSequence " + cs + "\n" +
| "Int " + i + "\n" +
| "Short " + s + "\n" +
| "Byte " + b + "\n" +
| "Boolean " + z + "\n" +
| "Float " + f + "\n" +
| "Long " + j + "\n" +
| "Double " + d + "\n"
|}
|// The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory`
|def concatenationInvolvingSpecialCharacters(): String = {
| val s1 = "Qux"
| val s2 = "Quux"
|
| s"Foo \u0001 $s1 Bar \u0002 $s2 Baz"
|}
|// Concatenation involving more than 200 elements
|def largeConcatenation(): String = {
| val s00 = "s00"
| val s01 = "s01"
| val s02 = "s02"
| val s03 = "s03"
| val s04 = "s04"
| val s05 = "s05"
| val s06 = "s06"
| val s07 = "s07"
| val s08 = "s08"
|
| // 24 rows follow
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n"
|}
|println("----------")
|println(concatenatingVariousTypes())
|println("----------")
|println(concatenationInvolvingSpecialCharacters())
|println("----------")
|println(largeConcatenation())
|println("----------")
|""".stripMargin
}

0 comments on commit 7d79bbd

Please sign in to comment.