Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use StringConcatFactory for string concatenation on JDK 9+ #9556

Merged
merged 1 commit into from Jun 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
114 changes: 90 additions & 24 deletions src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala
Expand Up @@ -33,7 +33,7 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder {
import bTypes._
import coreBTypes._
import definitions._
import genBCode.postProcessor.backendUtils.addIndyLambdaImplMethod
import genBCode.postProcessor.backendUtils.{addIndyLambdaImplMethod, classfileVersion}
import genBCode.postProcessor.callGraph.{inlineAnnotatedCallsites, noInlineAnnotatedCallsites}

/*
Expand Down Expand Up @@ -990,44 +990,110 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder {
}
}

/* Generate string concatenation
*
* On JDK 8: create and append using `StringBuilder`
* On JDK 9+: use `invokedynamic` with `StringConcatFactory`
*/
def genStringConcat(tree: Tree): BType = {
lineNumber(tree)
liftStringConcat(tree) match {
// Optimization for expressions of the form "" + x. We can avoid the StringBuilder.
// Optimization for expressions of the form "" + x
case List(Literal(Constant("")), arg) =>
genLoad(arg, ObjectRef)
genCallMethod(String_valueOf, InvokeStyle.Static, arg.pos)

case concatenations =>
val approxBuilderSize = concatenations.map {
case Literal(Constant(s: String)) => s.length
case Literal(c @ Constant(value)) if c.isNonUnitAnyVal => String.valueOf(c).length
case _ =>
// could add some guess based on types of primitive args.
// or, we could stringify all the args onto the stack, compute the exact size of
// the StringBuilder.
// or, just let https://openjdk.java.net/jeps/280 (or a re-implementation thereof in our 2.13.x stdlib) do all the hard work at link time
0
}.sum
bc.genStartConcat(tree.pos, approxBuilderSize)
def isEmptyString(t: Tree) = t match {
case Literal(Constant("")) => true
case _ => false
}
for (elem <- concatenations if !isEmptyString(elem)) {
val loadedElem = elem match {

val concatArguments = concatenations.view
.filter {
case Literal(Constant("")) => false // empty strings are no-ops in concatenation
case _ => true
}
.map {
case Apply(boxOp, value :: Nil) if currentRun.runDefinitions.isBox(boxOp.symbol) =>
// Eliminate boxing of primitive values. Boxing is introduced by erasure because
// there's only a single synthetic `+` method "added" to the string class.
value
case other => other
}
.toList

// `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower
if (classfileVersion.get < asm.Opcodes.V9) {

// Estimate capacity needed for the string builder
val approxBuilderSize = concatArguments.view.map {
case Literal(Constant(s: String)) => s.length
case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length
case _ => 0
}.sum
bc.genNewStringBuilder(tree.pos, approxBuilderSize)

for (elem <- concatArguments) {
val elemType = tpeTK(elem)
genLoad(elem, elemType)
bc.genStringBuilderAppend(elemType, elem.pos)
}
bc.genStringBuilderEnd(tree.pos)
} else {

/* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If
* the string concatenation is longer (unlikely), we spill into multiple calls
*/
val MaxIndySlots = 200
val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument
val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant

val recipe = new StringBuilder()
val argTypes = Seq.newBuilder[asm.Type]
val constVals = Seq.newBuilder[String]
var totalArgSlots = 0
var countConcats = 1 // ie. 1 + how many times we spilled

for (elem <- concatArguments) {
val tpe = tpeTK(elem)
val elemSlots = tpe.size

// Unlikely spill case
if (totalArgSlots + elemSlots >= MaxIndySlots) {
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())
countConcats += 1
totalArgSlots = 0
recipe.setLength(0)
argTypes.clear()
constVals.clear()
}

case _ => elem
elem match {
case Literal(Constant(s: String)) =>
if (s.contains(TagArg) || s.contains(TagConst)) {
totalArgSlots += elemSlots
recipe.append(TagConst)
constVals += s
} else {
recipe.append(s)
}

case other =>
totalArgSlots += elemSlots
recipe.append(TagArg)
val tpe = tpeTK(elem)
argTypes += tpe.toASMType
genLoad(elem, tpe)
}
}
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())

// If we spilled, generate one final concat
if (countConcats > 1) {
bc.genIndyStringConcat(
TagArg.toString * countConcats,
Seq.fill(countConcats)(StringRef.toASMType),
Seq.empty
)
}
val elemType = tpeTK(loadedElem)
genLoad(loadedElem, elemType)
bc.genConcat(elemType, loadedElem.pos)
}
bc.genEndConcat(tree.pos)
}
StringRef
}
Expand Down
39 changes: 33 additions & 6 deletions src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala
Expand Up @@ -175,10 +175,11 @@ abstract class BCodeIdiomatic {

} // end of method genPrimitiveShift()

/*
/* Creates a new `StringBuilder` instance with the requested capacity
*
* can-multi-thread
*/
final def genStartConcat(pos: Position, size: Int): Unit = {
final def genNewStringBuilder(pos: Position, size: Int): Unit = {
jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName)
jmethod.visitInsn(Opcodes.DUP)
jmethod.visitLdcInsn(Integer.valueOf(size))
Expand All @@ -191,10 +192,11 @@ abstract class BCodeIdiomatic {
)
}

/*
/* Issue a call to `StringBuilder#append` for the right element type
*
* can-multi-thread
*/
def genConcat(elemType: BType, pos: Position): Unit = {
final def genStringBuilderAppend(elemType: BType, pos: Position): Unit = {
val paramType: BType = elemType match {
case ct: ClassBType if ct.isSubtypeOf(StringRef).get => StringRef
case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef).get => jlStringBufferRef
Expand All @@ -211,13 +213,38 @@ abstract class BCodeIdiomatic {
invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor, pos)
}

/*
/* Extract the built `String` from the `StringBuilder`
*:
* can-multi-thread
*/
final def genEndConcat(pos: Position): Unit = {
final def genStringBuilderEnd(pos: Position): Unit = {
invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;", pos)
}

/* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants`
* (only works for JDK 9+)
*
* can-multi-thread
*/
final def genIndyStringConcat(
recipe: String,
argTypes: Seq[asm.Type],
constants: Seq[String]
): Unit = {
jmethod.visitInvokeDynamicInsn(
"makeConcatWithConstants",
asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*),
new asm.Handle(
asm.Opcodes.H_INVOKESTATIC,
"java/lang/invoke/StringConcatFactory",
dwijnand marked this conversation as resolved.
Show resolved Hide resolved
"makeConcatWithConstants",
"(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;",
false
),
(recipe +: constants):_*
)
}

/*
* Emits one or more conversion instructions based on the types given as arguments.
*
Expand Down
Binary file added test/files/run/StringConcat.check
Binary file not shown.
86 changes: 86 additions & 0 deletions test/files/run/StringConcat.scala
@@ -0,0 +1,86 @@
// java: -Xss128M

import scala.tools.partest.ReplTest

// ReplTest so that the long concatenation is compiled at test-run-time with the larger `Xss`.
// Tests are always compiled in the partest VM.
object Test extends ReplTest {
def code =
"""// This should generally obey 15.18.1. of the JLS (String Concatenation Operator +)
|def concatenatingVariousTypes(): String = {
| val str: String = "some string"
| val sb: StringBuffer = new StringBuffer("some stringbuffer")
| val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence")
| val i: Int = 123456789
| val s: Short = 345
| val b: Byte = 12
| val z: Boolean = true
| val f: Float = 3.14f
| val j: Long = 98762147483647L
| val d: Double = 3.1415d
|
| "String " + str + "\n" +
| "StringBuffer " + sb + "\n" +
| "CharSequence " + cs + "\n" +
| "Int " + i + "\n" +
| "Short " + s + "\n" +
| "Byte " + b + "\n" +
| "Boolean " + z + "\n" +
| "Float " + f + "\n" +
| "Long " + j + "\n" +
| "Double " + d + "\n"
|}
|// The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory`
|def concatenationInvolvingSpecialCharacters(): String = {
| val s1 = "Qux"
| val s2 = "Quux"
|
| s"Foo \u0001 $s1 Bar \u0002 $s2 Baz"
|}
|// Concatenation involving more than 200 elements
|def largeConcatenation(): String = {
| val s00 = "s00"
| val s01 = "s01"
| val s02 = "s02"
| val s03 = "s03"
| val s04 = "s04"
| val s05 = "s05"
| val s06 = "s06"
| val s07 = "s07"
| val s08 = "s08"
|
| // 24 rows follow
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
| s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n"
|}
|println("----------")
|println(concatenatingVariousTypes())
|println("----------")
|println(concatenationInvolvingSpecialCharacters())
|println("----------")
|println(largeConcatenation())
|println("----------")
|""".stripMargin
}