Merge pull request #9148 from belugabehr/nio-charsets

Transition to NIO StandardCharsets
protocolbuffers · Oct 25, 2021 · b3b3162 · b3b3162
2 parents d630f96 + 3370cc4
commit b3b3162
Show file tree

Hide file tree

Showing 24 changed files with 84 additions and 62 deletions.
diff --git a/java/core/src/main/java/com/google/protobuf/ArrayDecoders.java b/java/core/src/main/java/com/google/protobuf/ArrayDecoders.java
@@ -34,6 +34,7 @@
 
 import com.google.protobuf.Internal.ProtobufList;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
 /**
  * Helper functions to decode protobuf wire format from a byte array.
@@ -191,7 +192,7 @@ static int decodeString(byte[] data, int position, Registers registers)
       registers.object1 = "";
       return position;
     } else {
-      registers.object1 = new String(data, position, length, Internal.UTF_8);
+      registers.object1 = new String(data, position, length, StandardCharsets.UTF_8);
       return position + length;
     }
   }
@@ -577,7 +578,7 @@ static int decodeStringList(
     } else if (length == 0) {
       output.add("");
     } else {
-      String value = new String(data, position, length, Internal.UTF_8);
+      String value = new String(data, position, length, StandardCharsets.UTF_8);
       output.add(value);
       position += length;
     }
@@ -593,7 +594,7 @@ static int decodeStringList(
       } else if (nextLength == 0) {
         output.add("");
       } else {
-        String value = new String(data, position, nextLength, Internal.UTF_8);
+        String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
         output.add(value);
         position += nextLength;
       }
@@ -619,7 +620,7 @@ static int decodeStringListRequireUtf8(
       if (!Utf8.isValidUtf8(data, position, position + length)) {
         throw InvalidProtocolBufferException.invalidUtf8();
       }
-      String value = new String(data, position, length, Internal.UTF_8);
+      String value = new String(data, position, length, StandardCharsets.UTF_8);
       output.add(value);
       position += length;
     }
@@ -638,7 +639,7 @@ static int decodeStringListRequireUtf8(
         if (!Utf8.isValidUtf8(data, position, position + nextLength)) {
           throw InvalidProtocolBufferException.invalidUtf8();
         }
-        String value = new String(data, position, nextLength, Internal.UTF_8);
+        String value = new String(data, position, nextLength, StandardCharsets.UTF_8);
         output.add(value);
         position += nextLength;
       }

diff --git a/java/core/src/main/java/com/google/protobuf/BinaryReader.java b/java/core/src/main/java/com/google/protobuf/BinaryReader.java
@@ -41,6 +41,7 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.List;
 import java.util.Map;
 
@@ -226,7 +227,7 @@ public String readStringInternal(boolean requireUtf8) throws IOException {
       if (requireUtf8 && !Utf8.isValidUtf8(buffer, pos, pos + size)) {
         throw InvalidProtocolBufferException.invalidUtf8();
       }
-      String result = new String(buffer, pos, size, Internal.UTF_8);
+      String result = new String(buffer, pos, size, StandardCharsets.UTF_8);
       pos += size;
       return result;
     }

diff --git a/java/core/src/main/java/com/google/protobuf/ByteString.java b/java/core/src/main/java/com/google/protobuf/ByteString.java
@@ -45,6 +45,7 @@
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -460,7 +461,7 @@ public static ByteString copyFrom(String text, Charset charset) {
    * @return new {@code ByteString}
    */
   public static ByteString copyFromUtf8(String text) {
-    return new LiteralByteString(text.getBytes(Internal.UTF_8));
+    return new LiteralByteString(text.getBytes(StandardCharsets.UTF_8));
   }
 
   // =================================================================
@@ -833,7 +834,7 @@ public final String toString(Charset charset) {
    * @return new string using UTF-8 encoding
    */
   public final String toStringUtf8() {
-    return toString(Internal.UTF_8);
+    return toString(StandardCharsets.UTF_8);
   }
 
   /**

diff --git a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java
@@ -32,12 +32,13 @@
 
 import static com.google.protobuf.Internal.EMPTY_BYTE_ARRAY;
 import static com.google.protobuf.Internal.EMPTY_BYTE_BUFFER;
-import static com.google.protobuf.Internal.UTF_8;
 import static com.google.protobuf.Internal.checkNotNull;
 import static com.google.protobuf.WireFormat.FIXED32_SIZE;
 import static com.google.protobuf.WireFormat.FIXED64_SIZE;
 import static com.google.protobuf.WireFormat.MAX_VARINT_SIZE;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;

diff --git a/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -42,6 +42,7 @@
 import java.nio.BufferOverflowException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -842,7 +843,7 @@ public static int computeStringSizeNoTag(final String value) {
       length = Utf8.encodedLength(value);
     } catch (UnpairedSurrogateException e) {
       // TODO(dweis): Consider using nio Charset methods instead.
-      final byte[] bytes = value.getBytes(Internal.UTF_8);
+      final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
       length = bytes.length;
     }
 
@@ -989,8 +990,7 @@ final void inefficientWriteStringNoTag(String value, UnpairedSurrogateException
     // Unfortunately there does not appear to be any way to tell Java to encode
     // UTF-8 directly into our buffer, so we have to let it create its own byte
     // array and then copy.
-    // TODO(dweis): Consider using nio Charset methods instead.
-    final byte[] bytes = value.getBytes(Internal.UTF_8);
+    final byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
     try {
       writeUInt32NoTag(bytes.length);
       writeLazy(bytes, 0, bytes.length);

diff --git a/java/core/src/main/java/com/google/protobuf/Descriptors.java b/java/core/src/main/java/com/google/protobuf/Descriptors.java
@@ -51,6 +51,7 @@
 import com.google.protobuf.Descriptors.FileDescriptor.Syntax;
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.WeakReference;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -333,13 +334,13 @@ private static byte[] latin1Cat(final String[] strings) {
       //   should get the original bytes that we want.
       // Literal strings are limited to 64k, so it may be split into multiple strings.
       if (strings.length == 1) {
-        return strings[0].getBytes(Internal.ISO_8859_1);
+        return strings[0].getBytes(StandardCharsets.ISO_8859_1);
       }
       StringBuilder descriptorData = new StringBuilder();
       for (String part : strings) {
         descriptorData.append(part);
       }
-      return descriptorData.toString().getBytes(Internal.ISO_8859_1);
+      return descriptorData.toString().getBytes(StandardCharsets.ISO_8859_1);
     }
 
     private static FileDescriptor[] findDescriptors(

diff --git a/java/core/src/main/java/com/google/protobuf/Internal.java b/java/core/src/main/java/com/google/protobuf/Internal.java
@@ -32,7 +32,7 @@
 
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.AbstractList;
 import java.util.AbstractMap;
 import java.util.AbstractSet;
@@ -54,10 +54,6 @@ public final class Internal {
 
   private Internal() {}
 
-  static final Charset US_ASCII = Charset.forName("US-ASCII");
-  static final Charset UTF_8 = Charset.forName("UTF-8");
-  static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
-
   /** Throws an appropriate {@link NullPointerException} if the given objects is {@code null}. */
   static <T> T checkNotNull(T obj) {
     if (obj == null) {
@@ -97,7 +93,7 @@ static <T> T checkNotNull(T obj, String message) {
    * actually want. The generated code calls this automatically.
    */
   public static String stringDefaultValue(String bytes) {
-    return new String(bytes.getBytes(ISO_8859_1), UTF_8);
+    return new String(bytes.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
   }
 
   /**
@@ -108,15 +104,15 @@ public static String stringDefaultValue(String bytes) {
    * ISO-8859-1 encoding.
    */
   public static ByteString bytesDefaultValue(String bytes) {
-    return ByteString.copyFrom(bytes.getBytes(ISO_8859_1));
+    return ByteString.copyFrom(bytes.getBytes(StandardCharsets.ISO_8859_1));
   }
   /**
    * Helper called by generated code to construct default values for bytes fields.
    *
    * <p>This is like {@link #bytesDefaultValue}, but returns a byte array.
    */
   public static byte[] byteArrayDefaultValue(String bytes) {
-    return bytes.getBytes(ISO_8859_1);
+    return bytes.getBytes(StandardCharsets.ISO_8859_1);
   }
 
   /**
@@ -183,12 +179,12 @@ public static boolean isValidUtf8(byte[] byteArray) {
 
   /** Helper method to get the UTF-8 bytes of a string. */
   public static byte[] toByteArray(String value) {
-    return value.getBytes(UTF_8);
+    return value.getBytes(StandardCharsets.UTF_8);
   }
 
   /** Helper method to convert a byte array to a string using UTF-8 encoding. */
   public static String toStringUtf8(byte[] bytes) {
-    return new String(bytes, UTF_8);
+    return new String(bytes, StandardCharsets.UTF_8);
   }
 
   /**

diff --git a/java/core/src/main/java/com/google/protobuf/MessageSchema.java b/java/core/src/main/java/com/google/protobuf/MessageSchema.java
@@ -76,6 +76,7 @@
 import com.google.protobuf.MapEntryLite.Metadata;
 import java.io.IOException;
 import java.lang.reflect.Field;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
@@ -4765,7 +4766,7 @@ private int parseOneofField(
                 && !Utf8.isValidUtf8(data, position, position + length)) {
               throw InvalidProtocolBufferException.invalidUtf8();
             }
-            final String value = new String(data, position, length, Internal.UTF_8);
+            final String value = new String(data, position, length, StandardCharsets.UTF_8);
             unsafe.putObject(message, fieldOffset, value);
             position += length;
           }

diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java
@@ -42,6 +42,7 @@
 import static java.lang.Character.toCodePoint;
 
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 
 /**
  * A set of low-level, high-performance static utility methods related to the UTF-8 character
@@ -1386,7 +1387,7 @@ String decodeUtf8(byte[] bytes, int index, int size) throws InvalidProtocolBuffe
       if (offset == limit) {
         // The entire byte sequence is ASCII.  Don't bother copying to a char[], JVMs using
         // compact strings will just turn it back into the same byte[].
-        return new String(bytes, index, size, Internal.US_ASCII);
+        return new String(bytes, index, size, StandardCharsets.US_ASCII);
       }
 
       // It's not all ASCII, at this point.  This may over-allocate, but we will truncate in the

diff --git a/java/core/src/test/java/com/google/protobuf/BoundedByteStringTest.java b/java/core/src/test/java/com/google/protobuf/BoundedByteStringTest.java
@@ -38,6 +38,8 @@
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
+
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -68,7 +70,7 @@ public void setUp() throws Exception {
   @Test
   public void testToString() throws UnsupportedEncodingException {
     String testString = "I love unicode \u1234\u5678 characters";
-    ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
+    ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
     ByteString chopped = unicode.substring(2, unicode.size() - 6);
     assertWithMessage("%s.substring() must have the expected type", classUnderTest)
         .that(classUnderTest)
@@ -84,13 +86,13 @@ public void testToString() throws UnsupportedEncodingException {
   @Test
   public void testCharsetToString() {
     String testString = "I love unicode \u1234\u5678 characters";
-    ByteString unicode = ByteString.wrap(testString.getBytes(Internal.UTF_8));
+    ByteString unicode = ByteString.wrap(testString.getBytes(StandardCharsets.UTF_8));
     ByteString chopped = unicode.substring(2, unicode.size() - 6);
     assertWithMessage("%s.substring() must have the expected type", classUnderTest)
         .that(classUnderTest)
         .isEqualTo(getActualClassName(chopped));
 
-    String roundTripString = chopped.toString(Internal.UTF_8);
+    String roundTripString = chopped.toString(StandardCharsets.UTF_8);
     assertWithMessage("%s unicode bytes must match", classUnderTest)
         .that(testString.substring(2, testString.length() - 6))
         .isEqualTo(roundTripString);

diff --git a/java/core/src/test/java/com/google/protobuf/ByteStringTest.java b/java/core/src/test/java/com/google/protobuf/ByteStringTest.java
@@ -42,6 +42,7 @@
 import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -198,7 +199,7 @@ public void testCopyFrom_StringEncoding() {
   public void testCopyFrom_Utf8() {
     String testString = "I love unicode \u1234\u5678 characters";
     ByteString byteString = ByteString.copyFromUtf8(testString);
-    byte[] testBytes = testString.getBytes(Internal.UTF_8);
+    byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
     assertWithMessage("copyFromUtf8 string must respect the charset")
         .that(isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length))
         .isTrue();
@@ -516,7 +517,7 @@ public void write(int ignored) {
   @Test
   public void testToStringUtf8() {
     String testString = "I love unicode \u1234\u5678 characters";
-    byte[] testBytes = testString.getBytes(Internal.UTF_8);
+    byte[] testBytes = testString.getBytes(StandardCharsets.UTF_8);
     ByteString byteString = ByteString.copyFrom(testBytes);
     assertWithMessage("copyToStringUtf8 must respect the charset")
         .that(testString)
@@ -526,7 +527,7 @@ public void testToStringUtf8() {
   @Test
   public void testToString() {
     String toString =
-        ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(Internal.UTF_8)).toString();
+        ByteString.copyFrom("Here are some bytes: \t\u00a1".getBytes(StandardCharsets.UTF_8)).toString();
     assertWithMessage(toString).that(toString.contains("size=24")).isTrue();
     assertWithMessage(toString)
         .that(toString.contains("contents=\"Here are some bytes: \\t\\302\\241\""))
@@ -538,7 +539,7 @@ public void testToString_long() {
     String toString =
         ByteString.copyFrom(
                 "123456789012345678901234567890123456789012345678901234567890"
-                    .getBytes(Internal.UTF_8))
+                    .getBytes(StandardCharsets.UTF_8))
             .toString();
     assertWithMessage(toString).that(toString.contains("size=60")).isTrue();
     assertWithMessage(toString)

diff --git a/java/core/src/test/java/com/google/protobuf/CodedOutputStreamTest.java b/java/core/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
@@ -41,6 +41,7 @@
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -417,7 +418,7 @@ public void testGetTotalBytesWritten() throws Exception {
 
     // Write some some bytes (more than the buffer can hold) and verify that totalWritten
     // is correct.
-    byte[] value = "abcde".getBytes(Internal.UTF_8);
+    byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
     for (int i = 0; i < 1024; ++i) {
       coder.stream().writeRawBytes(value, 0, value.length);
     }
@@ -500,7 +501,7 @@ public void testWriteToByteBuffer() throws Exception {
 
   @Test
   public void testWriteByteBuffer() throws Exception {
-    byte[] value = "abcde".getBytes(Internal.UTF_8);
+    byte[] value = "abcde".getBytes(StandardCharsets.UTF_8);
     ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
     CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
     ByteBuffer byteBuffer = ByteBuffer.wrap(value, 0, 1);
@@ -543,7 +544,7 @@ public void testSerializeUtf8_MultipleSmallWrites() throws Exception {
     for (int pos = 0; pos < source.length(); pos += 2) {
       String substr = source.substring(pos, pos + 2);
       expectedBytesStream.write(2);
-      expectedBytesStream.write(substr.getBytes(Internal.UTF_8));
+      expectedBytesStream.write(substr.getBytes(StandardCharsets.UTF_8));
     }
     final byte[] expectedBytes = expectedBytesStream.toByteArray();