Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change ByteString to use memory and support unsafe create without copy #7645

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile.am
Expand Up @@ -89,6 +89,7 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf.Benchmarks/BenchmarkDatasetConfig.cs \
csharp/src/Google.Protobuf.Benchmarks/BenchmarkMessage1Proto3.cs \
csharp/src/Google.Protobuf.Benchmarks/Benchmarks.cs \
csharp/src/Google.Protobuf.Benchmarks/ByteStringBenchmark.cs \
csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj \
csharp/src/Google.Protobuf.Benchmarks/GoogleMessageBenchmark.cs \
csharp/src/Google.Protobuf.Benchmarks/ParseMessagesBenchmark.cs \
Expand Down Expand Up @@ -171,6 +172,7 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf.sln \
csharp/src/Google.Protobuf/ByteArray.cs \
csharp/src/Google.Protobuf/ByteString.cs \
csharp/src/Google.Protobuf/ByteStringAsync.cs \
csharp/src/Google.Protobuf/CodedInputStream.cs \
csharp/src/Google.Protobuf/CodedOutputStream.ComputeSize.cs \
csharp/src/Google.Protobuf/CodedOutputStream.cs \
Expand Down Expand Up @@ -268,7 +270,8 @@ csharp_EXTRA_DIST= \
csharp/src/Google.Protobuf/WriteContext.cs \
csharp/src/Google.Protobuf/WriteBufferHelper.cs \
csharp/src/Google.Protobuf/UnknownField.cs \
csharp/src/Google.Protobuf/UnknownFieldSet.cs
csharp/src/Google.Protobuf/UnknownFieldSet.cs \
csharp/src/Google.Protobuf/UnsafeByteOperations.cs

java_EXTRA_DIST= \
java/README.md \
Expand Down
72 changes: 72 additions & 0 deletions csharp/src/Google.Protobuf.Benchmarks/ByteStringBenchmark.cs
@@ -0,0 +1,72 @@
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2019 Google Inc. All rights reserved.
// https://github.com/protocolbuffers/protobuf
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion

using BenchmarkDotNet.Attributes;

namespace Google.Protobuf.Benchmarks
{
/// <summary>
/// Benchmarks using ByteString.
/// </summary>
[MemoryDiagnoser]
public class ByteStringBenchmark
{
private const int Zero = 0;
private const int Kilobyte = 1024;
private const int _128Kilobytes = 1024 * 128;
private const int Megabyte = 1024 * 1024;
private const int _10Megabytes = 1024 * 1024 * 10;

byte[] byteBuffer;

[GlobalSetup]
public void GlobalSetup()
{
byteBuffer = new byte[PayloadSize];
}

[Params(Zero, Kilobyte, _128Kilobytes, Megabyte, _10Megabytes)]
public int PayloadSize { get; set; }

[Benchmark]
public ByteString CopyFrom()
{
return ByteString.CopyFrom(byteBuffer);
}

[Benchmark]
public ByteString UnsafeWrap()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I don't think this benchmark class brings much value the way it is. What is does is it basically measures how much time it takes to copy a byte array of given size (which takes long time and does allocate memory but everyone knows that) and the time it takes to create a ReadOnlyMemory from bytes. Comparing these two numbers isn't very useful beyond the obvious "first one is very slow and the latter is very fast".
What would be more interesting is comparison of the original ByteString.Unsafe.FromBytes vs new ByteString(ReadOnlyMemory<>) and the original CreateFrom vs the new CreateFrom (which has now a different implementation) - at least we would have a comparison of whether this PR causes any regressions (if so, they would probably be minor, but we should check anyways).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comparing these two numbers isn't very useful beyond the obvious "first one is very slow and the latter is very fast".

Well the change is basically from doing lots of work to doing nothing. Before creating a byte string involved creating new array, copying all data, garbage collecting array. Now a byte string can be created by using data that you already have available: just reference the existing data.

All this change impacts is creating a ByteString that you then want to serialize. This is an alternative to ByteString.CopyFrom. Comparing ByteString.CopyFrom and UnsafeByteOperation.UnsafeWrap is the most accurate comparison. Slow to fast is why people want this.

What would be more interesting is comparison of the original ByteString.Unsafe.FromBytes vs new ByteString(ReadOnlyMemory<>) and the original CreateFrom vs the new CreateFrom (which has now a different implementation)

Nothing used ByteString.Unsafe.FromBytes. I don't know what you mean by CreateFrom. There isn't an API called that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could update the benchmark to create a ByteString and then serialize it if you want, but the time to serialize it will the the same. It will just add noise onto the original number that compares creating ByteString.

{
return UnsafeByteOperations.UnsafeWrap(byteBuffer);
}
}
}
158 changes: 158 additions & 0 deletions csharp/src/Google.Protobuf.Test/ByteStringTest.cs
Expand Up @@ -34,6 +34,13 @@
using System.Text;
using NUnit.Framework;
using System.IO;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
using System.Buffers;
using System.Runtime.InteropServices;
using System.Threading;
using System.Runtime.CompilerServices;
#if !NET35
using System.Threading.Tasks;
#endif
Expand All @@ -54,6 +61,7 @@ public void Equality()
EqualityTester.AssertInequality(b1, b3);
EqualityTester.AssertInequality(b1, b4);
EqualityTester.AssertInequality(b1, null);
EqualityTester.AssertEquality(ByteString.Empty, ByteString.Empty);
#pragma warning disable 1718 // Deliberately calling ==(b1, b1) and !=(b1, b1)
Assert.IsTrue(b1 == b1);
Assert.IsTrue(b1 == b2);
Expand All @@ -63,6 +71,7 @@ public void Equality()
Assert.IsTrue((ByteString) null == null);
Assert.IsFalse(b1 != b1);
Assert.IsFalse(b1 != b2);
Assert.IsTrue(ByteString.Empty == ByteString.Empty);
#pragma warning disable 1718
Assert.IsTrue(b1 != b3);
Assert.IsTrue(b1 != b4);
Expand Down Expand Up @@ -154,6 +163,84 @@ public void CopyFromPortion()
Assert.AreEqual(3, bs[1]);
}

[Test]
public void CopyTo()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);

byte[] dest = new byte[data.Length];
bs.CopyTo(dest, 0);

CollectionAssert.AreEqual(data, dest);
}

[Test]
public void GetEnumerator()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);

IEnumerator<byte> genericEnumerator = bs.GetEnumerator();
Assert.IsTrue(genericEnumerator.MoveNext());
Assert.AreEqual(0, genericEnumerator.Current);

IEnumerator enumerator = ((IEnumerable)bs).GetEnumerator();
Assert.IsTrue(enumerator.MoveNext());
Assert.AreEqual(0, enumerator.Current);

// Call via LINQ
CollectionAssert.AreEqual(bs.Span.ToArray(), bs.ToArray());
}

[Test]
public void UnsafeWrap()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = UnsafeByteOperations.UnsafeWrap(data.AsMemory(2, 3));
ReadOnlySpan<byte> s = bs.Span;

Assert.AreEqual(3, s.Length);
Assert.AreEqual(2, s[0]);
Assert.AreEqual(3, s[1]);
Assert.AreEqual(4, s[2]);

// Check that the value is not a copy
data[2] = byte.MaxValue;
Assert.AreEqual(byte.MaxValue, s[0]);
}

[Test]
public void WriteToStream()
{
byte[] data = new byte[] { 0, 1, 2, 3, 4, 5, 6 };
ByteString bs = ByteString.CopyFrom(data);

MemoryStream ms = new MemoryStream();
bs.WriteTo(ms);

CollectionAssert.AreEqual(data, ms.ToArray());
}

JamesNK marked this conversation as resolved.
Show resolved Hide resolved
[Test]
public void WriteToStream_Stackalloc()
{
byte[] data = Encoding.UTF8.GetBytes("Hello world");
Span<byte> s = stackalloc byte[data.Length];
data.CopyTo(s);

MemoryStream ms = new MemoryStream();

using (UnmanagedMemoryManager<byte> manager = new UnmanagedMemoryManager<byte>(s))
{
ByteString bs = ByteString.AttachBytes(manager.Memory);

bs.WriteTo(ms);
}

CollectionAssert.AreEqual(data, ms.ToArray());
}

[Test]
public void ToStringUtf8()
{
Expand All @@ -168,6 +255,21 @@ public void ToStringWithExplicitEncoding()
Assert.AreEqual("\u20ac", bs.ToString(Encoding.Unicode));
}

[Test]
public void ToString_Stackalloc()
{
byte[] data = Encoding.UTF8.GetBytes("Hello world");
Span<byte> s = stackalloc byte[data.Length];
data.CopyTo(s);

using (UnmanagedMemoryManager<byte> manager = new UnmanagedMemoryManager<byte>(s))
{
ByteString bs = ByteString.AttachBytes(manager.Memory);

Assert.AreEqual("Hello world", bs.ToString(Encoding.UTF8));
}
}

[Test]
public void FromBase64_WithText()
{
Expand All @@ -184,6 +286,29 @@ public void FromBase64_Empty()
Assert.AreSame(ByteString.Empty, ByteString.FromBase64(""));
}

[Test]
public void ToBase64_Array()
{
ByteString bs = ByteString.CopyFrom(Encoding.UTF8.GetBytes("Hello world"));

Assert.AreEqual("SGVsbG8gd29ybGQ=", bs.ToBase64());
}

[Test]
public void ToBase64_Stackalloc()
{
byte[] data = Encoding.UTF8.GetBytes("Hello world");
Span<byte> s = stackalloc byte[data.Length];
data.CopyTo(s);

using (UnmanagedMemoryManager<byte> manager = new UnmanagedMemoryManager<byte>(s))
{
ByteString bs = ByteString.AttachBytes(manager.Memory);

Assert.AreEqual("SGVsbG8gd29ybGQ=", bs.ToBase64());
}
}

[Test]
public void FromStream_Seekable()
{
Expand Down Expand Up @@ -261,5 +386,38 @@ public void GetContentsAsReadOnlyMemory()
var copied = byteString.Memory.ToArray();
CollectionAssert.AreEqual(byteString, copied);
}

// Create Memory<byte> from non-array source.
// Use by ByteString tests that have optimized path for array backed Memory<byte>.
private sealed unsafe class UnmanagedMemoryManager<T> : MemoryManager<T> where T : unmanaged
{
private readonly T* _pointer;
private readonly int _length;

public UnmanagedMemoryManager(Span<T> span)
{
fixed (T* ptr = &MemoryMarshal.GetReference(span))
{
_pointer = ptr;
_length = span.Length;
}
}

public override Span<T> GetSpan() => new Span<T>(_pointer, _length);

public override MemoryHandle Pin(int elementIndex = 0)
{
if (elementIndex < 0 || elementIndex >= _length)
{
throw new ArgumentOutOfRangeException(nameof(elementIndex));
}

return new MemoryHandle(_pointer + elementIndex);
}

public override void Unpin() { }

protected override void Dispose(bool disposing) { }
}
}
}
Expand Up @@ -6,6 +6,7 @@
<SignAssembly>true</SignAssembly>
<PublicSign Condition=" '$(OS)' != 'Windows_NT' ">true</PublicSign>
<IsPackable>False</IsPackable>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
Expand Down