Skip to content

Commit

Permalink
make VectorBuilder reusable again
Browse files Browse the repository at this point in the history
* `leftAlignPrefix()` now keeps the length of the outermost array at
  (LAST)WIDTH.
  This is necessary because after a call to `result()`, the VB may be
  used further.
* Allow arbitrarily misleading/wrong arguments in `alignTo()`
  Should not fail now, even if the hint given is completely nonsensical,
  `leftAlignPrefix()` now compensates for that.
  Negative alignments are allowed, e.g. if you plan to drop elements of
  the vector to be added.
* Add a bunch of tests to cover previously uncovered branches.
  • Loading branch information
ansvonwa committed Oct 26, 2022
1 parent e4b872a commit fbd5a55
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 36 deletions.
89 changes: 56 additions & 33 deletions src/library/scala/collection/immutable/Vector.scala
Expand Up @@ -1593,76 +1593,102 @@ final class VectorBuilder[A] extends ReusableBuilder[A, Vector[A]] {
* Removes `offset` leading `null`s in the prefix.
* This is needed after calling `alignTo` and subsequent additions,
* directly before the result is used for creating a new Vector.
* Note that the outermost array keeps its length to keep the
* Builder re-usable.
*
* example:
* a2 = Array(null, ..., null, Array(null, .., null, 0, 1, .., x), Array(x+1, .., x+32), ...)
* becomes
* a2 = Array(Array(0, 1, .., x), Array(x+1, .., x+32), ...)
* a2 = Array(Array(0, 1, .., x), Array(x+1, .., x+32), ..., ?, ..., ?)
*/
private[this] def leftAlignPrefix(): Unit = {
@inline def shrinkOffsetIfToLarge(width: Int): Unit = {
val newOffset = offset % width
lenRest -= offset - newOffset
offset = newOffset
}
var a: Array[AnyRef] = null // the array we modify
var aParent: Array[AnyRef] = null // a's parent, so aParent(0) == a
if (depth >= 6) {
a = a6.asInstanceOf[Array[AnyRef]]
val i = offset >>> BITS5
if (i > 0) {
a = copyOfRange(a, i, LASTWIDTH)
a6 = a.asInstanceOf[Arr6]
}
if (i > 0) System.arraycopy(a, i, a, 0, LASTWIDTH - i)
shrinkOffsetIfToLarge(WIDTH5)
if ((lenRest >>> BITS5) == 0) depth = 5
aParent = a
a = a(0).asInstanceOf[Array[AnyRef]]
}
if (depth >= 5) {
if (a == null) a = a5.asInstanceOf[Array[AnyRef]]
val i = (offset >>> BITS4) & MASK
if (i > 0) {
a = copyOfRange(a, i, WIDTH)
if (depth == 5) a5 = a.asInstanceOf[Arr5]
else aParent(0) = a
if (depth == 5) {
if (i > 0) System.arraycopy(a, i, a, 0, WIDTH - i)
a5 = a.asInstanceOf[Arr5]
shrinkOffsetIfToLarge(WIDTH4)
if ((lenRest >>> BITS4) == 0) depth = 4
} else {
if (i > 0) a = copyOfRange(a, i, WIDTH)
aParent(0) = a
}
aParent = a
a = a(0).asInstanceOf[Array[AnyRef]]
}
if (depth >= 4) {
if (a == null) a = a4.asInstanceOf[Array[AnyRef]]
val i = (offset >>> BITS3) & MASK
if (i > 0) {
a = copyOfRange(a, i, WIDTH)
if (depth == 4) a4 = a.asInstanceOf[Arr4]
else aParent(0) = a
if (depth == 4) {
if (i > 0) System.arraycopy(a, i, a, 0, WIDTH - i)
a4 = a.asInstanceOf[Arr4]
shrinkOffsetIfToLarge(WIDTH3)
if ((lenRest >>> BITS3) == 0) depth = 3
} else {
if (i > 0) a = copyOfRange(a, i, WIDTH)
aParent(0) = a
}
aParent = a
a = a(0).asInstanceOf[Array[AnyRef]]
}
if (depth >= 3) {
if (a == null) a = a3.asInstanceOf[Array[AnyRef]]
val i = (offset >>> BITS2) & MASK
if (i > 0) {
a = copyOfRange(a, i, WIDTH)
if (depth == 3) a3 = a.asInstanceOf[Arr3]
else aParent(0) = a
if (depth == 3) {
if (i > 0) System.arraycopy(a, i, a, 0, WIDTH - i)
a3 = a.asInstanceOf[Arr3]
shrinkOffsetIfToLarge(WIDTH2)
if ((lenRest >>> BITS2) == 0) depth = 2
} else {
if (i > 0) a = copyOfRange(a, i, WIDTH)
aParent(0) = a
}
aParent = a
a = a(0).asInstanceOf[Array[AnyRef]]
}
if (depth >= 2) {
if (a == null) a = a2.asInstanceOf[Array[AnyRef]]
val i = (offset >>> BITS) & MASK
if (i > 0) {
a = copyOfRange(a, i, WIDTH)
if (depth == 2) a2 = a.asInstanceOf[Arr2]
else aParent(0) = a
if (depth == 2) {
if (i > 0) System.arraycopy(a, i, a, 0, WIDTH - i)
a2 = a.asInstanceOf[Arr2]
shrinkOffsetIfToLarge(WIDTH)
if ((lenRest >>> BITS) == 0) depth = 1
} else {
if (i > 0) a = copyOfRange(a, i, WIDTH)
aParent(0) = a
}
aParent = a
a = a(0).asInstanceOf[Array[AnyRef]]
}
if (depth >= 1) {
if (a == null) a = a1.asInstanceOf[Array[AnyRef]]
val i = offset & MASK
if (i > 0) {
a = copyOfRange(a, i, WIDTH)
if (depth == 1) a1 = a.asInstanceOf[Arr1]
else aParent(0) = a
if (depth == 1) {
if (i > 0) System.arraycopy(a, i, a, 0, WIDTH - i)
a1 = a.asInstanceOf[Arr1]
len1 -= offset
offset = 0
} else {
if (i > 0) a = copyOfRange(a, i, WIDTH)
aParent(0) = a
}
}
prefixIsRightAligned = false
Expand Down Expand Up @@ -1761,15 +1787,13 @@ final class VectorBuilder[A] extends ReusableBuilder[A, Vector[A]] {
slice.foreach(e => addArrN(e.asInstanceOf[Array[AnyRef]], 5))
return
}
val copy1 = mmin((BITS * 6 + 1 - lenRest) >>> BITS5, sl)
val copy2 = sl - copy1
val copy1 = sl
// there is no copy2 because there can't be another a6 to copy to
val destPos = lenRest >>> BITS5
if (destPos + copy1 > LASTWIDTH)
throw new IllegalArgumentException("exceeding 2^31 elements")
System.arraycopy(slice, 0, a6, destPos, copy1)
advanceN(WIDTH5 * copy1)
if (copy2 > 0) {
System.arraycopy(slice, copy1, a6, 0, copy2)
advanceN(WIDTH5 * copy2)
}
}
}

Expand Down Expand Up @@ -1867,8 +1891,7 @@ final class VectorBuilder[A] extends ReusableBuilder[A, Vector[A]] {
if(realLen == 0) Vector.empty
else if(len < 0) throw new IndexOutOfBoundsException(s"Vector cannot have negative size $len")
else if(len <= WIDTH) {
if(realLen == WIDTH) new Vector1(a1)
else new Vector1(copyOf(a1, realLen))
new Vector1(copyIfDifferentSize(a1, realLen))
} else if(len <= WIDTH2) {
val i1 = (len-1) & MASK
val i2 = (len-1) >>> BITS
Expand Down
123 changes: 120 additions & 3 deletions test/junit/scala/collection/immutable/VectorTest.scala
Expand Up @@ -6,6 +6,7 @@ import org.junit.runners.JUnit4
import org.junit.Test

import scala.annotation.unused
import scala.collection.immutable.VectorInline.{WIDTH, WIDTH2, WIDTH3, WIDTH4, WIDTH5}
import scala.collection.mutable.{ListBuffer, StringBuilder}
import scala.tools.testkit.AssertUtil.intercept

Expand Down Expand Up @@ -110,21 +111,43 @@ class VectorTest {
def testBuilderAlignTo2(): Unit = {
val Large = 1 << 20
for (
size <- Seq(0, 1, 31, 1 << 5, 1 << 10, 1 << 15, 1 << 20, 1 << 25, 1 << 30, (1 << 31) - (1 << 26) - 1000);
size <- Seq(0, 1, 31, 1 << 5, 1 << 10, 1 << 15, 1 << 20, 9 << 20, 1 << 25, 9 << 25, 50 << 25, 1 << 30, (1 << 31) - (1 << 26) - 1000);
i <- Seq(0, 1, 5, 123)
) {
// println((i, size))
val v = if (size < Large) Vector.tabulate(size)(_.toString) else Vector.fillSparse(size)("v")
val prefix = Vector.fill(i)("prefix")
val res = new VectorBuilder[AnyRef]
val vb = new VectorBuilder[AnyRef]
.alignTo(i, v)
.addAll(prefix)
.addAll(v)
.result()
val res = vb.result()
val vDesc = if (v.headOption.contains("v")) s"Vector(\"v\")*$size" else s"Vector(0,..,${size-1})"
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc).size", size + i, res.size)
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc).take($i)", prefix, res.take(i))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc).drop($i)", v.take(Large), res.drop(i).take(Large))

if (size == 9 << 20) {
val v4 = Vector.fillSparse(WIDTH3 * 2)("v4")
val suffix = (v4 ++ v).take(size)
val res2 = vb.addAll(suffix).result()
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).size", 2 * size + i, res2.size)
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).take($i)", prefix, res2.take(i))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop($i).take($size)", v.take(Large), res2.drop(i).take(Large))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop(${i + size}).take($size)", suffix.take(Large), res2.drop(i + size).take(Large))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop(${i + 2 * size})", suffix.drop(size).take(Large), res2.drop(i + 2 * size).take(Large))
} else if (size == 9 << 25) {
val v4 = Vector.fillSparse(WIDTH4 * 2)("v4")
val suffix = (v4 ++ v).take(size)
val res2 = vb.addAll(suffix).result()
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).size", 2 * size + i, res2.size)
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).take($i)", prefix, res2.take(i))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop($i).take($size)", v.take(Large), res2.drop(i).take(Large))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop(${i + size}).take($size)", suffix.take(Large), res2.drop(i + size).take(Large))
assertEquals(s"(Vector(\"prefix\")*$i ++ $vDesc ++ v4 ++ $vDesc).drop(${i + 2 * size})", suffix.drop(size).take(Large), res2.drop(i + 2 * size).take(Large))
} else if (size == 50 << 25) {
assertThrows(classOf[IllegalArgumentException], () => vb.addAll(v))
}
}
}

Expand All @@ -139,6 +162,100 @@ class VectorTest {
assertEquals(copy.take(500), v.take(500))
}

@Test
def testWierdAlignments1(): Unit = {
val v3 = Vector.tabulate(2042)(i => (i - 42).toString).drop(42).asInstanceOf[Vector3[AnyRef]]
for (i <- Seq(0, 1, 5, 41, 42, 43, 123, 949, 950, 982, 1024, 1999, 2000)) {
val res = new VectorBuilder[AnyRef]
.alignTo(i, v3) // pretend to add i elements before v3, but then decide not to... this is slow, but should not fail
.addAll(v3)
.result()
.asInstanceOf[Vector3[AnyRef]]
assertEquals(s"vectors equal", v3, res)
}
}

@Test
def testWierdAlignments2(): Unit = {
val v3 = Vector.tabulate(2042)(i => (i - 42).toString).drop(42).asInstanceOf[Vector3[AnyRef]]
val v2 = Vector.tabulate(765)(i => (i - 123).toString).drop(123).asInstanceOf[Vector2[AnyRef]]
for (i <- Seq(-1234, -42, -1, 0, 1, 5, 41, 42, 43, 123, 949, 950, 982, 1024, 1999, 2000)) {
val res = new VectorBuilder[AnyRef]
.alignTo(i, v3) // pretend to add v3 ...
.addOne("a")
.addAll(v2) // ... but then add completely unrelated v2 instead!
.result()
assertEquals(s"vectors equal", "a" +: v2, res)
}
}

@Test
def testWierdAlignments3(): Unit = for (n <- allSizes; m <- verySmallSizes) {
val vPretend =
if (smallSizes.contains(n))
Vector.tabulate(n + 1337)(i => (i - 1337).toString).drop(1337)
else
Vector.fillSparse(n + 1337)("v").drop(1337)
val vReal = Vector.tabulate(m + 42)(i => (i - 42).toString).drop(42)
for (i <- Seq(-1234, -42, -1, 0, 1, 5, 41, 42, 43, 123, 949, 950, 982, 1024, 1999, 2000, 1234567)) {
val vb = new VectorBuilder[AnyRef]
.alignTo(i, vPretend) // pretend to add vPretend ...
.addAll(vReal) // ... but then add completely unrelated vReal instead!
val res1 = vb.result()
assertEquals(s"vectors not equal, n=$n, m=$m, i=$i", vReal, res1)
val res2 = vb
.addOne("a")
.addAll(vReal) // reuse the builder
.result()
assertEquals(s"vectors not equal, n=$n, m=$m, i=$i", (vReal :+ "a") ++ vReal, res2)
}
}

@Test
def testWierdAlignments4(): Unit = {
var lengths = Set[Int]()
for (
n <- allSizes.init :+ (allSizes.last - WIDTH5 - WIDTH3 + 41); // we need WIDTH5 for prefix, add 1+WIDTH3 and get 42 in suffix free
m <- List(WIDTH4, WIDTH5, Int.MaxValue - WIDTH5)
) {
val vPretend = Vector.fillSparse(42)("v0") ++ Vector.fillSparse(m - 42)("v1")
val vReal = vPretend.take(n)
// if (n==1073741824 && m==2046820352)
// println(s"n=$n, m=$m")
val vb = new VectorBuilder[AnyRef]
.alignTo(0, vPretend) // pretend to add vPretend ...
.addAll(vReal) // ... but then add only a subsequence!
val res1 = vb.result()
assertEquals(s"vectors not equal, n=$n, m=$m", vReal.length, res1.length)
assertEquals(s"vectors not equal, n=$n, m=$m", vReal.take(WIDTH3), res1.take(WIDTH3))
assertEquals(s"vectors not equal, n=$n, m=$m", vReal.takeRight(WIDTH3), res1.takeRight(WIDTH3))
val res2 = vb
.addOne("a")
.addAll(vReal.take(WIDTH3)) // whole vector may take too long
.result()
val expected = (vReal :+ "a") ++ (vReal.take(WIDTH3))
assertEquals(s"vectors not equal, n=$n, m=$m", expected.length, res2.length)
assertEquals(s"vectors not equal, n=$n, m=$m", expected.take(WIDTH3), res2.take(WIDTH3))
assertEquals(s"vectors not equal, n=$n, m=$m", expected.takeRight(WIDTH3), res2.takeRight(WIDTH3))
lengths += res2.length
}
assertEquals(15, lengths.size)
assertEquals(Int.MaxValue - WIDTH5 + 42, lengths.max)
}

@Test
def testNegativeAlignment(): Unit = for (size <- allSizes; i <- allSizes) {
val v = Vector.fillSparse(63 * WIDTH5)("v")
val expected = v.drop(i)
val vb = new VectorBuilder[AnyRef]
.alignTo(-i, v)
.addAll(expected)
val res = vb.result()
assertEquals("lengths not equal", expected.length, res.length)
assertEquals(s"vectors not equal", expected.take(WIDTH3), res.take(WIDTH3))
assertEquals(s"vectors not equal", expected.takeRight(WIDTH3), res.takeRight(WIDTH3))
}

@Test
def factoryReuse(): Unit = {
assertSame(Vector.empty, Vector.empty)
Expand Down

0 comments on commit fbd5a55

Please sign in to comment.