Skip to content

Commit

Permalink
Revert "Transition to NIO StandardCharsets" (#9382)
Browse files Browse the repository at this point in the history
  • Loading branch information
elharo committed Jan 7, 2022
1 parent ee648b7 commit d8ccfbf
Show file tree
Hide file tree
Showing 24 changed files with 62 additions and 84 deletions.
11 changes: 5 additions & 6 deletions java/core/src/main/java/com/google/protobuf/ArrayDecoders.java
Expand Up @@ -34,7 +34,6 @@

import com.google.protobuf.Internal.ProtobufList;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

/**
* Helper functions to decode protobuf wire format from a byte array.
Expand Down Expand Up @@ -192,7 +191,7 @@ static int decodeString(byte[] data, int position, Registers registers)
registers.object1 = "";
return position;
} else {
registers.object1 = new String(data, position, length, StandardCharsets.UTF_8);
registers.object1 = new String(data, position, length, Internal.UTF_8);
return position + length;
}
}
Expand Down Expand Up @@ -578,7 +577,7 @@ static int decodeStringList(
} else if (length == 0) {
output.add("");
} else {
String value = new String(data, position, length, StandardCharsets.UTF_8);
String value = new String(data, position, length, Internal.UTF_8);
output.add(value);
position += length;
}
Expand All @@ -594,7 +593,7 @@ static int decodeStringList(
} else if (nextLength == 0) {
output.add("");
} else {
String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
String value = new String(data, position, nextLength, Internal.UTF_8);
output.add(value);
position += nextLength;
}
Expand All @@ -620,7 +619,7 @@ static int decodeStringListRequireUtf8(
if (!Utf8.isValidUtf8(data, position, position + length)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String value = new String(data, position, length, StandardCharsets.UTF_8);
String value = new String(data, position, length, Internal.UTF_8);
output.add(value);
position += length;
}
Expand All @@ -639,7 +638,7 @@ static int decodeStringListRequireUtf8(
if (!Utf8.isValidUtf8(data, position, position + nextLength)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
String value = new String(data, position, nextLength, Internal.UTF_8);
output.add(value);
position += nextLength;
}
Expand Down
Expand Up @@ -41,7 +41,6 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -227,7 +226,7 @@ public String readStringInternal(boolean requireUtf8) throws IOException {
if (requireUtf8 && !Utf8.isValidUtf8(buffer, pos, pos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String result = new String(buffer, pos, size, StandardCharsets.UTF_8);
String result = new String(buffer, pos, size, Internal.UTF_8);
pos += size;
return result;
}
Expand Down
5 changes: 2 additions & 3 deletions java/core/src/main/java/com/google/protobuf/ByteString.java
Expand Up @@ -45,7 +45,6 @@
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -461,7 +460,7 @@ public static ByteString copyFrom(String text, Charset charset) {
* @return new {@code ByteString}
*/
public static ByteString copyFromUtf8(String text) {
return new LiteralByteString(text.getBytes(StandardCharsets.UTF_8));
return new LiteralByteString(text.getBytes(Internal.UTF_8));
}

// =================================================================
Expand Down Expand Up @@ -834,7 +833,7 @@ public final String toString(Charset charset) {
* @return new string using UTF-8 encoding
*/
public final String toStringUtf8() {
return toString(StandardCharsets.UTF_8);
return toString(Internal.UTF_8);
}

/**
Expand Down
Expand Up @@ -32,13 +32,12 @@

import static com.google.protobuf.Internal.EMPTY_BYTE_ARRAY;
import static com.google.protobuf.Internal.EMPTY_BYTE_BUFFER;
import static com.google.protobuf.Internal.UTF_8;
import static com.google.protobuf.Internal.checkNotNull;
import static com.google.protobuf.WireFormat.FIXED32_SIZE;
import static com.google.protobuf.WireFormat.FIXED64_SIZE;
import static com.google.protobuf.WireFormat.MAX_VARINT_SIZE;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
Expand Down
Expand Up @@ -42,7 +42,6 @@
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -843,7 +842,7 @@ public static int computeStringSizeNoTag(final String value) {
length = Utf8.encodedLength(value);
} catch (UnpairedSurrogateException e) {
// TODO(dweis): Consider using nio Charset methods instead.
final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
final byte[] bytes = value.getBytes(Internal.UTF_8);
length = bytes.length;
}

Expand Down Expand Up @@ -990,7 +989,8 @@ final void inefficientWriteStringNoTag(String value, UnpairedSurrogateException
// Unfortunately there does not appear to be any way to tell Java to encode
// UTF-8 directly into our buffer, so we have to let it create its own byte
// array and then copy.
final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
// TODO(dweis): Consider using nio Charset methods instead.
final byte[] bytes = value.getBytes(Internal.UTF_8);
try {
writeUInt32NoTag(bytes.length);
writeLazy(bytes, 0, bytes.length);
Expand Down
5 changes: 2 additions & 3 deletions java/core/src/main/java/com/google/protobuf/Descriptors.java
Expand Up @@ -51,7 +51,6 @@
import com.google.protobuf.Descriptors.FileDescriptor.Syntax;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.WeakReference;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -334,13 +333,13 @@ private static byte[] latin1Cat(final String[] strings) {
// should get the original bytes that we want.
// Literal strings are limited to 64k, so it may be split into multiple strings.
if (strings.length == 1) {
return strings[0].getBytes(StandardCharsets.ISO_8859_1);
return strings[0].getBytes(Internal.ISO_8859_1);
}
StringBuilder descriptorData = new StringBuilder();
for (String part : strings) {
descriptorData.append(part);
}
return descriptorData.toString().getBytes(StandardCharsets.ISO_8859_1);
return descriptorData.toString().getBytes(Internal.ISO_8859_1);
}

private static FileDescriptor[] findDescriptors(
Expand Down
16 changes: 10 additions & 6 deletions java/core/src/main/java/com/google/protobuf/Internal.java
Expand Up @@ -32,7 +32,7 @@

import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;
import java.util.AbstractList;
import java.util.AbstractMap;
import java.util.AbstractSet;
Expand All @@ -54,6 +54,10 @@ public final class Internal {

private Internal() {}

static final Charset US_ASCII = Charset.forName("US-ASCII");
static final Charset UTF_8 = Charset.forName("UTF-8");
static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");

/** Throws an appropriate {@link NullPointerException} if the given objects is {@code null}. */
static <T> T checkNotNull(T obj) {
if (obj == null) {
Expand Down Expand Up @@ -93,7 +97,7 @@ static <T> T checkNotNull(T obj, String message) {
* actually want. The generated code calls this automatically.
*/
public static String stringDefaultValue(String bytes) {
return new String(bytes.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
return new String(bytes.getBytes(ISO_8859_1), UTF_8);
}

/**
Expand All @@ -104,15 +108,15 @@ public static String stringDefaultValue(String bytes) {
* ISO-8859-1 encoding.
*/
public static ByteString bytesDefaultValue(String bytes) {
return ByteString.copyFrom(bytes.getBytes(StandardCharsets.ISO_8859_1));
return ByteString.copyFrom(bytes.getBytes(ISO_8859_1));
}
/**
* Helper called by generated code to construct default values for bytes fields.
*
* <p>This is like {@link #bytesDefaultValue}, but returns a byte array.
*/
public static byte[] byteArrayDefaultValue(String bytes) {
return bytes.getBytes(StandardCharsets.ISO_8859_1);
return bytes.getBytes(ISO_8859_1);
}

/**
Expand Down Expand Up @@ -179,12 +183,12 @@ public static boolean isValidUtf8(byte[] byteArray) {

/** Helper method to get the UTF-8 bytes of a string. */
public static byte[] toByteArray(String value) {
return value.getBytes(StandardCharsets.UTF_8);
return value.getBytes(UTF_8);
}

/** Helper method to convert a byte array to a string using UTF-8 encoding. */
public static String toStringUtf8(byte[] bytes) {
return new String(bytes, StandardCharsets.UTF_8);
return new String(bytes, UTF_8);
}

/**
Expand Down
Expand Up @@ -76,7 +76,6 @@
import com.google.protobuf.MapEntryLite.Metadata;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -4766,7 +4765,7 @@ private int parseOneofField(
&& !Utf8.isValidUtf8(data, position, position + length)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
final String value = new String(data, position, length, StandardCharsets.UTF_8);
final String value = new String(data, position, length, Internal.UTF_8);
unsafe.putObject(message, fieldOffset, value);
position += length;
}
Expand Down
3 changes: 1 addition & 2 deletions java/core/src/main/java/com/google/protobuf/Utf8.java
Expand Up @@ -42,7 +42,6 @@
import static java.lang.Character.toCodePoint;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

/**
* A set of low-level, high-performance static utility methods related to the UTF-8 character
Expand Down Expand Up @@ -1387,7 +1386,7 @@ String decodeUtf8(byte[] bytes, int index, int size) throws InvalidProtocolBuffe
if (offset == limit) {
// The entire byte sequence is ASCII. Don't bother copying to a char[], JVMs using
// compact strings will just turn it back into the same byte[].
return new String(bytes, index, size, StandardCharsets.US_ASCII);
return new String(bytes, index, size, Internal.US_ASCII);
}

// It's not all ASCII, at this point. This may over-allocate, but we will truncate in the
Expand Down
Expand Up @@ -38,8 +38,6 @@
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -70,7 +68,7 @@ public void setUp() throws Exception {
@Test
public void testToString() throws UnsupportedEncodingException {
String testString = "I love unicode \u1234\u5678 characters";
ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertWithMessage("%s.substring() must have the expected type", classUnderTest)
.that(classUnderTest)
Expand All @@ -86,13 +84,13 @@ public void testToString() throws UnsupportedEncodingException {
@Test
public void testCharsetToString() {
String testString = "I love unicode \u1234\u5678 characters";
ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
ByteString chopped = unicode.substring(2, unicode.size() - 6);
assertWithMessage("%s.substring() must have the expected type", classUnderTest)
.that(classUnderTest)
.isEqualTo(getActualClassName(chopped));

String roundTripString = chopped.toString(StandardCharsets.UTF_8);
String roundTripString = chopped.toString(Internal.UTF_8);
assertWithMessage("%s unicode bytes must match", classUnderTest)
.that(testString.substring(2, testString.length() - 6))
.isEqualTo(roundTripString);
Expand Down
Expand Up @@ -42,7 +42,6 @@
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
Expand Down Expand Up @@ -199,7 +198,7 @@ public void testCopyFrom_StringEncoding() {
public void testCopyFrom_Utf8() {
String testString = "I love unicode \u1234\u5678 characters";
ByteString byteString = ByteString.copyFromUtf8(testString);
byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
byte[] testBytes = testString.getBytes(Internal.UTF_8);
assertWithMessage("copyFromUtf8 string must respect the charset")
.that(isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length))
.isTrue();
Expand Down Expand Up @@ -517,7 +516,7 @@ public void write(int ignored) {
@Test
public void testToStringUtf8() {
String testString = "I love unicode \u1234\u5678 characters";
byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
byte[] testBytes = testString.getBytes(Internal.UTF_8);
ByteString byteString = ByteString.copyFrom(testBytes);
assertWithMessage("copyToStringUtf8 must respect the charset")
.that(testString)
Expand All @@ -527,7 +526,7 @@ public void testToStringUtf8() {
@Test
public void testToString() {
String toString =
ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(StandardCharsets.UTF_8)).toString();
ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(Internal.UTF_8)).toString();
assertWithMessage(toString).that(toString.contains("size=24")).isTrue();
assertWithMessage(toString)
.that(toString.contains("contents=\"Here are some bytes: \\t\\302\\241\""))
Expand All @@ -539,7 +538,7 @@ public void testToString_long() {
String toString =
ByteString.copyFrom(
"123456789012345678901234567890123456789012345678901234567890"
.getBytes(StandardCharsets.UTF_8))
.getBytes(Internal.UTF_8))
.toString();
assertWithMessage(toString).that(toString.contains("size=60")).isTrue();
assertWithMessage(toString)
Expand Down
Expand Up @@ -41,7 +41,6 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -418,7 +417,7 @@ public void testGetTotalBytesWritten() throws Exception {

// Write some some bytes (more than the buffer can hold) and verify that totalWritten
// is correct.
byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
byte[] value = "abcde".getBytes(Internal.UTF_8);
for (int i = 0; i < 1024; ++i) {
coder.stream().writeRawBytes(value, 0, value.length);
}
Expand Down Expand Up @@ -501,7 +500,7 @@ public void testWriteToByteBuffer() throws Exception {

@Test
public void testWriteByteBuffer() throws Exception {
byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
byte[] value = "abcde".getBytes(Internal.UTF_8);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
ByteBuffer byteBuffer = ByteBuffer.wrap(value, 0, 1);
Expand Down Expand Up @@ -544,7 +543,7 @@ public void testSerializeUtf8_MultipleSmallWrites() throws Exception {
for (int pos = 0; pos < source.length(); pos += 2) {
String substr = source.substring(pos, pos + 2);
expectedBytesStream.write(2);
expectedBytesStream.write(substr.getBytes(StandardCharsets.UTF_8));
expectedBytesStream.write(substr.getBytes(Internal.UTF_8));
}
final byte[] expectedBytes = expectedBytesStream.toByteArray();

Expand Down

0 comments on commit d8ccfbf

Please sign in to comment.