Skip to content

Commit

Permalink
Merge pull request #9148 from belugabehr/nio-charsets
Browse files Browse the repository at this point in the history
Transition to NIO StandardCharsets
  • Loading branch information
elharo committed Oct 25, 2021
2 parents d630f96 + 3370cc4 commit b3b3162
Show file tree
Hide file tree
Showing 24 changed files with 84 additions and 62 deletions.
11 changes: 6 additions & 5 deletions java/core/src/main/java/com/google/protobuf/ArrayDecoders.java
Expand Up @@ -34,6 +34,7 @@

import com.google.protobuf.Internal.ProtobufList;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

/**
* Helper functions to decode protobuf wire format from a byte array.
Expand Down Expand Up @@ -191,7 +192,7 @@ static int decodeString(byte[] data, int position, Registers registers)
registers.object1 = "";
return position;
} else {
registers.object1 = new String(data, position, length, Internal.UTF_8);
registers.object1 = new String(data, position, length, StandardCharsets.UTF_8);
return position + length;
}
}
Expand Down Expand Up @@ -577,7 +578,7 @@ static int decodeStringList(
} else if (length == 0) {
output.add("");
} else {
String value = new String(data, position, length, Internal.UTF_8);
String value = new String(data, position, length, StandardCharsets.UTF_8);
output.add(value);
position += length;
}
Expand All @@ -593,7 +594,7 @@ static int decodeStringList(
} else if (nextLength == 0) {
output.add("");
} else {
String value = new String(data, position, nextLength, Internal.UTF_8);
String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
output.add(value);
position += nextLength;
}
Expand All @@ -619,7 +620,7 @@ static int decodeStringListRequireUtf8(
if (!Utf8.isValidUtf8(data, position, position + length)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String value = new String(data, position, length, Internal.UTF_8);
String value = new String(data, position, length, StandardCharsets.UTF_8);
output.add(value);
position += length;
}
Expand All @@ -638,7 +639,7 @@ static int decodeStringListRequireUtf8(
if (!Utf8.isValidUtf8(data, position, position + nextLength)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String value = new String(data, position, nextLength, Internal.UTF_8);
String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
output.add(value);
position += nextLength;
}
Expand Down
Expand Up @@ -41,6 +41,7 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -226,7 +227,7 @@ public String readStringInternal(boolean requireUtf8) throws IOException {
if (requireUtf8 && !Utf8.isValidUtf8(buffer, pos, pos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String result = new String(buffer, pos, size, Internal.UTF_8);
String result = new String(buffer, pos, size, StandardCharsets.UTF_8);
pos += size;
return result;
}
Expand Down
5 changes: 3 additions & 2 deletions java/core/src/main/java/com/google/protobuf/ByteString.java
Expand Up @@ -45,6 +45,7 @@
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -460,7 +461,7 @@ public static ByteString copyFrom(String text, Charset charset) {
* @return new {@code ByteString}
*/
public static ByteString copyFromUtf8(String text) {
return new LiteralByteString(text.getBytes(Internal.UTF_8));
return new LiteralByteString(text.getBytes(StandardCharsets.UTF_8));
}

// =================================================================
Expand Down Expand Up @@ -833,7 +834,7 @@ public final String toString(Charset charset) {
* @return new string using UTF-8 encoding
*/
public final String toStringUtf8() {
return toString(Internal.UTF_8);
return toString(StandardCharsets.UTF_8);
}

/**
Expand Down
Expand Up @@ -32,12 +32,13 @@

import static com.google.protobuf.Internal.EMPTY_BYTE_ARRAY;
import static com.google.protobuf.Internal.EMPTY_BYTE_BUFFER;
import static com.google.protobuf.Internal.UTF_8;
import static com.google.protobuf.Internal.checkNotNull;
import static com.google.protobuf.WireFormat.FIXED32_SIZE;
import static com.google.protobuf.WireFormat.FIXED64_SIZE;
import static com.google.protobuf.WireFormat.MAX_VARINT_SIZE;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
Expand Down
Expand Up @@ -42,6 +42,7 @@
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -842,7 +843,7 @@ public static int computeStringSizeNoTag(final String value) {
length = Utf8.encodedLength(value);
} catch (UnpairedSurrogateException e) {
// TODO(dweis): Consider using nio Charset methods instead.
final byte[] bytes = value.getBytes(Internal.UTF_8);
final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
length = bytes.length;
}

Expand Down Expand Up @@ -989,8 +990,7 @@ final void inefficientWriteStringNoTag(String value, UnpairedSurrogateException
// Unfortunately there does not appear to be any way to tell Java to encode
// UTF-8 directly into our buffer, so we have to let it create its own byte
// array and then copy.
// TODO(dweis): Consider using nio Charset methods instead.
final byte[] bytes = value.getBytes(Internal.UTF_8);
final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
try {
writeUInt32NoTag(bytes.length);
writeLazy(bytes, 0, bytes.length);
Expand Down
5 changes: 3 additions & 2 deletions java/core/src/main/java/com/google/protobuf/Descriptors.java
Expand Up @@ -51,6 +51,7 @@
import com.google.protobuf.Descriptors.FileDescriptor.Syntax;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.WeakReference;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -333,13 +334,13 @@ private static byte[] latin1Cat(final String[] strings) {
// should get the original bytes that we want.
// Literal strings are limited to 64k, so it may be split into multiple strings.
if (strings.length == 1) {
return strings[0].getBytes(Internal.ISO_8859_1);
return strings[0].getBytes(StandardCharsets.ISO_8859_1);
}
StringBuilder descriptorData = new StringBuilder();
for (String part : strings) {
descriptorData.append(part);
}
return descriptorData.toString().getBytes(Internal.ISO_8859_1);
return descriptorData.toString().getBytes(StandardCharsets.ISO_8859_1);
}

private static FileDescriptor[] findDescriptors(
Expand Down
16 changes: 6 additions & 10 deletions java/core/src/main/java/com/google/protobuf/Internal.java
Expand Up @@ -32,7 +32,7 @@

import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.AbstractList;
import java.util.AbstractMap;
import java.util.AbstractSet;
Expand All @@ -54,10 +54,6 @@ public final class Internal {

private Internal() {}

static final Charset US_ASCII = Charset.forName("US-ASCII");
static final Charset UTF_8 = Charset.forName("UTF-8");
static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");

/** Throws an appropriate {@link NullPointerException} if the given objects is {@code null}. */
static <T> T checkNotNull(T obj) {
if (obj == null) {
Expand Down Expand Up @@ -97,7 +93,7 @@ static <T> T checkNotNull(T obj, String message) {
* actually want. The generated code calls this automatically.
*/
public static String stringDefaultValue(String bytes) {
return new String(bytes.getBytes(ISO_8859_1), UTF_8);
return new String(bytes.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

/**
Expand All @@ -108,15 +104,15 @@ public static String stringDefaultValue(String bytes) {
* ISO-8859-1 encoding.
*/
public static ByteString bytesDefaultValue(String bytes) {
return ByteString.copyFrom(bytes.getBytes(ISO_8859_1));
return ByteString.copyFrom(bytes.getBytes(StandardCharsets.ISO_8859_1));
}
/**
* Helper called by generated code to construct default values for bytes fields.
*
* <p>This is like {@link #bytesDefaultValue}, but returns a byte array.
*/
public static byte[] byteArrayDefaultValue(String bytes) {
return bytes.getBytes(ISO_8859_1);
return bytes.getBytes(StandardCharsets.ISO_8859_1);
}

/**
Expand Down Expand Up @@ -183,12 +179,12 @@ public static boolean isValidUtf8(byte[] byteArray) {

/** Helper method to get the UTF-8 bytes of a string. */
public static byte[] toByteArray(String value) {
return value.getBytes(UTF_8);
return value.getBytes(StandardCharsets.UTF_8);
}

/** Helper method to convert a byte array to a string using UTF-8 encoding. */
public static String toStringUtf8(byte[] bytes) {
return new String(bytes, UTF_8);
return new String(bytes, StandardCharsets.UTF_8);
}

/**
Expand Down
Expand Up @@ -76,6 +76,7 @@
import com.google.protobuf.MapEntryLite.Metadata;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -4765,7 +4766,7 @@ private int parseOneofField(
&& !Utf8.isValidUtf8(data, position, position + length)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
final String value = new String(data, position, length, Internal.UTF_8);
final String value = new String(data, position, length, StandardCharsets.UTF_8);
unsafe.putObject(message, fieldOffset, value);
position += length;
}
Expand Down
3 changes: 2 additions & 1 deletion java/core/src/main/java/com/google/protobuf/Utf8.java
Expand Up @@ -42,6 +42,7 @@
import static java.lang.Character.toCodePoint;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

/**
* A set of low-level, high-performance static utility methods related to the UTF-8 character
Expand Down Expand Up @@ -1386,7 +1387,7 @@ String decodeUtf8(byte[] bytes, int index, int size) throws InvalidProtocolBuffe
if (offset == limit) {
// The entire byte sequence is ASCII. Don't bother copying to a char[], JVMs using
// compact strings will just turn it back into the same byte[].
return new String(bytes, index, size, Internal.US_ASCII);
return new String(bytes, index, size, StandardCharsets.US_ASCII);
}

// It's not all ASCII, at this point. This may over-allocate, but we will truncate in the
Expand Down
Expand Up @@ -38,6 +38,8 @@
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -68,7 +70,7 @@ public void setUp() throws Exception {
@Test
public void testToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertWithMessage("%s.substring() must have the expected type", classUnderTest)
.that(classUnderTest)
Expand All @@ -84,13 +86,13 @@ public void testToString() throws UnsupportedEncodingException {
@Test
public void testCharsetToString() {
String testString = "I love unicode \u1234\u5678 characters";
ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertWithMessage("%s.substring() must have the expected type", classUnderTest)
.that(classUnderTest)
.isEqualTo(getActualClassName(chopped));

String roundTripString = chopped.toString(Internal.UTF_8);
String roundTripString = chopped.toString(StandardCharsets.UTF_8);
assertWithMessage("%s unicode bytes must match", classUnderTest)
.that(testString.substring(2, testString.length() - 6))
.isEqualTo(roundTripString);
Expand Down
Expand Up @@ -42,6 +42,7 @@
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
Expand Down Expand Up @@ -198,7 +199,7 @@ public void testCopyFrom_StringEncoding() {
public void testCopyFrom_Utf8() {
String testString = "I love unicode \u1234\u5678 characters";
ByteString byteString = ByteString.copyFromUtf8(testString);
byte[] testBytes = testString.getBytes(Internal.UTF_8);
byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
assertWithMessage("copyFromUtf8 string must respect the charset")
.that(isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length))
.isTrue();
Expand Down Expand Up @@ -516,7 +517,7 @@ public void write(int ignored) {
@Test
public void testToStringUtf8() {
String testString = "I love unicode \u1234\u5678 characters";
byte[] testBytes = testString.getBytes(Internal.UTF_8);
byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
ByteString byteString = ByteString.copyFrom(testBytes);
assertWithMessage("copyToStringUtf8 must respect the charset")
.that(testString)
Expand All @@ -526,7 +527,7 @@ public void testToStringUtf8() {
@Test
public void testToString() {
String toString =
ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(Internal.UTF_8)).toString();
ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(StandardCharsets.UTF_8)).toString();
assertWithMessage(toString).that(toString.contains("size=24")).isTrue();
assertWithMessage(toString)
.that(toString.contains("contents=\"Here are some bytes: \\t\\302\\241\""))
Expand All @@ -538,7 +539,7 @@ public void testToString_long() {
String toString =
ByteString.copyFrom(
"123456789012345678901234567890123456789012345678901234567890"
.getBytes(Internal.UTF_8))
.getBytes(StandardCharsets.UTF_8))
.toString();
assertWithMessage(toString).that(toString.contains("size=60")).isTrue();
assertWithMessage(toString)
Expand Down
Expand Up @@ -41,6 +41,7 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -417,7 +418,7 @@ public void testGetTotalBytesWritten() throws Exception {

// Write some some bytes (more than the buffer can hold) and verify that totalWritten
// is correct.
byte[] value = "abcde".getBytes(Internal.UTF_8);
byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
for (int i = 0; i < 1024; ++i) {
coder.stream().writeRawBytes(value, 0, value.length);
}
Expand Down Expand Up @@ -500,7 +501,7 @@ public void testWriteToByteBuffer() throws Exception {

@Test
public void testWriteByteBuffer() throws Exception {
byte[] value = "abcde".getBytes(Internal.UTF_8);
byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
ByteBuffer byteBuffer = ByteBuffer.wrap(value, 0, 1);
Expand Down Expand Up @@ -543,7 +544,7 @@ public void testSerializeUtf8_MultipleSmallWrites() throws Exception {
for (int pos = 0; pos < source.length(); pos += 2) {
String substr = source.substring(pos, pos + 2);
expectedBytesStream.write(2);
expectedBytesStream.write(substr.getBytes(Internal.UTF_8));
expectedBytesStream.write(substr.getBytes(StandardCharsets.UTF_8));
}
final byte[] expectedBytes = expectedBytesStream.toByteArray();

Expand Down

0 comments on commit b3b3162

Please sign in to comment.