Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize writing small strings #8149

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs
Expand Up @@ -35,6 +35,7 @@
using Google.Protobuf.TestProtos;
using Google.Protobuf.Buffers;
using NUnit.Framework;
using System.Text;

namespace Google.Protobuf
{
Expand Down Expand Up @@ -516,5 +517,28 @@ public void Dispose_FromByteArray()
var stream = new CodedOutputStream(new byte[10]);
stream.Dispose();
}

[Test]
public void WriteStringsOfDifferentSizes()
{
for (int i = 1; i <= 1024; i++)
{
var buffer = new byte[4096];
var output = new CodedOutputStream(buffer);
var sb = new StringBuilder();
for (int j = 0; j < i; j++)
{
sb.Append((j % 10).ToString()); // incrementing numbers, repeating
}
var s = sb.ToString();
output.WriteString(s);

output.Flush();

// Verify written content
var input = new CodedInputStream(buffer);
Assert.AreEqual(s, input.ReadString());
}
}
}
}
65 changes: 46 additions & 19 deletions csharp/src/Google.Protobuf/WritingPrimitives.cs
Expand Up @@ -163,10 +163,26 @@ public static void WriteBool(ref Span<byte> buffer, ref WriterInternalState stat
/// </summary>
public static void WriteString(ref Span<byte> buffer, ref WriterInternalState state, string value)
{
// Optimise the case where we have enough space to write
// the string directly to the buffer, which should be common.
const int MaxBytesPerChar = 3;
thomasvl marked this conversation as resolved.
Show resolved Hide resolved
const int MaxSmallStringLength = 128 / MaxBytesPerChar;

// The string is small enough that the length will always be a 1 byte varint.
// Also there is enough space to write length + bytes to buffer.
// Write string directly to the buffer, and then write length.
// This saves calling GetByteCount on the string. We get the string length from GetBytes.
if (value.Length <= MaxSmallStringLength && buffer.Length - state.position - 1 >= value.Length * MaxBytesPerChar)
{
// Get the original position, then increment it on state by 1, then write string to buffer.
// Method will return byte length, which is then set to the original position.
buffer[state.position++] = (byte)WriteStringToBuffer(buffer, ref state, value);
JamesNK marked this conversation as resolved.
Show resolved Hide resolved
return;
}

int length = Utf8Encoding.GetByteCount(value);
JamesNK marked this conversation as resolved.
Show resolved Hide resolved
WriteLength(ref buffer, ref state, length);

// Optimise the case where we have enough space to write
// the string directly to the buffer, which should be common.
if (buffer.Length - state.position >= length)
{
if (length == value.Length) // Must be all ASCII...
Expand All @@ -179,23 +195,7 @@ public static void WriteString(ref Span<byte> buffer, ref WriterInternalState st
}
else
{
#if NETSTANDARD1_1
// slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available
byte[] bytes = Utf8Encoding.GetBytes(value);
WriteRawBytes(ref buffer, ref state, bytes);
#else
ReadOnlySpan<char> source = value.AsSpan();
int bytesUsed;
unsafe
{
fixed (char* sourceChars = &MemoryMarshal.GetReference(source))
fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer.Slice(state.position)))
{
bytesUsed = Utf8Encoding.GetBytes(sourceChars, source.Length, destinationBytes, buffer.Length);
}
}
state.position += bytesUsed;
#endif
WriteStringToBuffer(buffer, ref state, value);
}
}
else
Expand All @@ -209,6 +209,33 @@ public static void WriteString(ref Span<byte> buffer, ref WriterInternalState st
}
}

private static int WriteStringToBuffer(Span<byte> buffer, ref WriterInternalState state, string value)
{
#if NETSTANDARD1_1
// slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available
byte[] bytes = Utf8Encoding.GetBytes(value);
WriteRawBytes(ref buffer, ref state, bytes);
return bytes.Length;
#else
ReadOnlySpan<char> source = value.AsSpan();
int bytesUsed;
unsafe
{
fixed (char* sourceChars = &MemoryMarshal.GetReference(source))
fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer))
{
bytesUsed = Utf8Encoding.GetBytes(
sourceChars,
source.Length,
destinationBytes + state.position,
buffer.Length - state.position);
}
}
state.position += bytesUsed;
return bytesUsed;
#endif
}

/// <summary>
/// Write a byte string, without a tag, to the stream.
/// The data is length-prefixed.
Expand Down