protocolbuffers · zhangskz · Feb 3, 2022 · Jan 10, 2021 · Jan 10, 2021 · Feb 19, 2021
diff --git a/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java b/java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
@@ -61,8 +61,31 @@
 final class DescriptorMessageInfoFactory implements MessageInfoFactory {
   private static final String GET_DEFAULT_INSTANCE_METHOD_NAME = "getDefaultInstance";
   private static final DescriptorMessageInfoFactory instance = new DescriptorMessageInfoFactory();
+
+  /**
+   * Names that should be avoided (in UpperCamelCase format).
+   * Using them causes the compiler to generate accessors whose names are
+   * colliding with methods defined in base classes.
+   *
+   * Keep this list in sync with kForbiddenWordList in
+   * src/google/protobuf/compiler/java/java_helpers.cc
+   */
   private static final Set<String> specialFieldNames =
-      new HashSet<>(Arrays.asList("cached_size", "serialized_size", "class"));
+      new HashSet<>(Arrays.asList(
+        // java.lang.Object:
+        "Class",
+        // com.google.protobuf.MessageLiteOrBuilder:
+        "DefaultInstanceForType",
+        // com.google.protobuf.MessageLite:
+        "ParserForType",
+        "SerializedSize",
+        // com.google.protobuf.MessageOrBuilder:
+        "AllFields",
+        "DescriptorForType",
+        "InitializationErrorString",
+        "UnknownFields",
+        // obsolete. kept for backwards compatibility of generated code
+        "CachedSize"));
 
   // Disallow construction - it's a singleton.
   private DescriptorMessageInfoFactory() {}
@@ -593,21 +616,88 @@ static String getFieldName(FieldDescriptor fd) {
     String name = (fd.getType() == FieldDescriptor.Type.GROUP)
                   ? fd.getMessageType().getName()
                   : fd.getName();
-    String suffix = specialFieldNames.contains(name) ? "__" : "_";
-    return snakeCaseToCamelCase(name) + suffix;
+
+    // convert to UpperCamelCase for comparison to the specialFieldNames
+    // (which are in UpperCamelCase)
+    String upperCamelCaseName = snakeCaseToUpperCamelCase(name);
+
+    String suffix = specialFieldNames.contains(upperCamelCaseName)
+                  // For field names that match the specialFieldNames,
+                  // append "__" to prevent field accessor method names from
+                  // clashing with other methods.
+                  ? "__"
+                  // For other field names, append "_" to prevent field names
+                  // from clashing with java keywords.
+                  : "_";
 static String getFieldName(FieldDescriptor fd) { 
   String name = (fd.getType() == FieldDescriptor.Type.GROUP) 
                 ? fd.getMessageType().getName() 
                 : fd.getName(); 
   String suffix = specialFieldNames.contains(name) ? "__" : "_"; 
   return snakeCaseToCamelCase(name) + suffix; 
 } 
 static String getFieldName(FieldDescriptor fd) { 
   String name = (fd.getType() == FieldDescriptor.Type.GROUP) 
                 ? fd.getMessageType().getName() 
                 : fd.getName(); 
   String suffix = specialFieldNames.contains(name) ? "__" : "_"; 
   return snakeCaseToCamelCase(name) + suffix; 
 } 
+    return snakeCaseToLowerCamelCase(name) + suffix;
   }
 
   private static String getCachedSizeFieldName(FieldDescriptor fd) {
-    return snakeCaseToCamelCase(fd.getName()) + "MemoizedSerializedSize";
+    return snakeCaseToLowerCamelCase(fd.getName()) + "MemoizedSerializedSize";
+  }
+
+  /**
+   * Converts a snake case string into lower camel case.
+   *
+   * <p>Some examples:</p>
+   * <pre>
+   *     snakeCaseToLowerCamelCase("foo_bar") => "fooBar"
+   *     snakeCaseToLowerCamelCase("foo") => "foo"
+   * </pre>
+   *
+   * @param snakeCase the string in snake case to convert
+   * @return the string converted to camel case, with a lowercase first character
+   */
+  private static String snakeCaseToLowerCamelCase(String snakeCase) {
+    return snakeCaseToCamelCase(snakeCase, false);
   }
 
   /**
-   * This method must match exactly with the corresponding function in protocol compiler. See:
-   * https://github.com/google/protobuf/blob/v3.0.0/src/google/protobuf/compiler/java/java_helpers.cc#L153
+   * Converts a snake case string into upper camel case.
+   *
+   * <p>Some examples:</p>
+   * <pre>
+   *     snakeCaseToUpperCamelCase("foo_bar") => "FooBar"
+   *     snakeCaseToUpperCamelCase("foo") => "Foo"
+   * </pre>
+   *
+   * @param snakeCase the string in snake case to convert
+   * @return the string converted to camel case, with an uppercase first character
+   */
+  private static String snakeCaseToUpperCamelCase(String snakeCase) {
+    return snakeCaseToCamelCase(snakeCase, true);
+  }
+
+  /**
+   * Converts a snake case string into camel case.
+   *
+   * <p>For better readability, prefer calling either
+   * {@link #snakeCaseToLowerCamelCase(String)} or {@link #snakeCaseToUpperCamelCase(String)}.</p>
+   *
+   * <p>Some examples:</p>
+   * <pre>
+   *     snakeCaseToCamelCase("foo_bar", false) => "fooBar"
+   *     snakeCaseToCamelCase("foo_bar", true) => "FooBar"
+   *     snakeCaseToCamelCase("foo", false) => "foo"
+   *     snakeCaseToCamelCase("foo", true) => "Foo"
+   *     snakeCaseToCamelCase("Foo", false) => "foo"
+   *     snakeCaseToCamelCase("fooBar", false) => "fooBar"
+   * </pre>
+   *
+   * <p>This implementation of this method must exactly match the corresponding
+   * function in the protocol compiler.  Specifically, the
+   * {@code UnderscoresToCamelCase} function in
+   * {@code src/google/protobuf/compiler/java/java_helpers.cc}.</p>
+   *
+   * @param snakeCase the string in snake case to convert
+   * @param capFirst true if the first letter of the returned string should be uppercase.
+   *                 false if the first letter of the returned string should be lowercase.
+   * @return the string converted to camel case, with an uppercase or lowercase first
+   *         character depending on if {@code capFirst} is true or false, respectively
    */
-  private static String snakeCaseToCamelCase(String snakeCase) {
+  private static String snakeCaseToCamelCase(String snakeCase, boolean capFirst) {
     StringBuilder sb = new StringBuilder(snakeCase.length() + 1);
-    boolean capNext = false;
+    boolean capNext = capFirst;
     for (int ctr = 0; ctr < snakeCase.length(); ctr++) {
       char next = snakeCase.charAt(ctr);
       if (next == '_') {
@@ -653,7 +743,7 @@ private static Class<?> getTypeForRepeatedMessageField(Class<?> messageType, Fie
 
   /** Constructs the name of the get method for the given field in the proto. */
   private static String getterForField(String snakeCase) {
-    String camelCase = snakeCaseToCamelCase(snakeCase);
+    String camelCase = snakeCaseToLowerCamelCase(snakeCase);
     StringBuilder builder = new StringBuilder("get");
     // Capitalize the first character in the field name.
     builder.append(Character.toUpperCase(camelCase.charAt(0)));
@@ -679,7 +769,7 @@ OneofInfo getOneof(Class<?> messageType, OneofDescriptor desc) {
     }
 
     private static OneofInfo newInfo(Class<?> messageType, OneofDescriptor desc) {
-      String camelCase = snakeCaseToCamelCase(desc.getName());
+      String camelCase = snakeCaseToLowerCamelCase(desc.getName());
       String valueFieldName = camelCase + "_";
       String caseFieldName = camelCase + "Case_";
 

diff --git a/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto b/java/core/src/test/proto/com/google/protobuf/test_bad_identifiers.proto
@@ -43,10 +43,61 @@ option java_generic_services = true;  // auto-added
 option java_package = "com.google.protobuf";
 option java_outer_classname = "TestBadIdentifiersProto";
 
-message TestMessage {
-  optional string cached_size = 1;
-  optional string serialized_size = 2;
-  optional string class = 3;
+// Message with field names using underscores that conflict with accessors in the base message class in java.
+// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc
+message ForbiddenWordsUnderscoreMessage {
+  // java.lang.Object
+  optional bool class = 1;
+  // com.google.protobuf.MessageLiteOrBuilder
+  optional bool default_instance_for_type = 2;
+  // com.google.protobuf.MessageLite
+  optional bool parser_for_type = 3;
+  optional bool serialized_size = 4;
+  // com.google.protobuf.MessageOrBuilder
+  optional bool all_fields = 5;
+  optional bool descriptor_for_type = 6;
+  optional bool initialization_error_string = 7;
+  optional bool unknown_fields = 8;
+  // obsolete. kept for backwards compatibility of generated code
+  optional bool cached_size = 9;
+}
+
+// Message with field names using leading underscores that conflict with accessors in the base message class in java.
+// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc
+message ForbiddenWordsLeadingUnderscoreMessage {
+  // java.lang.Object
+  optional bool _class = 1;
+  // com.google.protobuf.MessageLiteOrBuilder
+  optional bool _default_instance_for_type = 2;
+  // com.google.protobuf.MessageLite
+  optional bool _parser_for_type = 3;
+  optional bool _serialized_size = 4;
+  // com.google.protobuf.MessageOrBuilder
+  optional bool _all_fields = 5;
+  optional bool _descriptor_for_type = 6;
+  optional bool _initialization_error_string = 7;
+  optional bool _unknown_fields = 8;
+  // obsolete. kept for backwards compatibility of generated code
+  optional bool _cached_size = 9;
+}
+
+// Message with field names in camel case that conflict with accessors in the base message class in java.
+// See kForbiddenWordList in src/google/protobuf/compiler/java/java_helpers.cc
+message ForbiddenWordsCamelMessage {
+  // java.lang.Object
+  optional bool class = 1;
+  // com.google.protobuf.MessageLiteOrBuilder
+  optional bool defaultInstanceForType = 2;
+  // com.google.protobuf.MessageLite
+  optional bool serializedSize = 3;
+  optional bool parserForType = 4;
+  // com.google.protobuf.MessageOrBuilder:
+  optional bool initializationErrorString = 5;
+  optional bool descriptorForType = 6;
+  optional bool allFields = 7;
+  optional bool unknownFields = 8;
+  // obsolete. kept for backwards compatibility of generated code
+  optional bool cachedSize = 9;
 }
 
 message Descriptor {
@@ -84,7 +135,7 @@ message Deprecated {
 
   optional int32 field1 = 1 [deprecated = true];
   optional TestEnum field2 = 2 [deprecated = true];
-  optional TestMessage field3 = 3 [deprecated = true];
+  optional ForbiddenWordsUnderscoreMessage field3 = 3 [deprecated = true];
 }
 
 message Override {
@@ -117,32 +168,32 @@ message Double {
 }
 
 service TestConflictingMethodNames {
-  rpc Override(TestMessage) returns (TestMessage);
+  rpc Override(ForbiddenWordsUnderscoreMessage) returns (ForbiddenWordsUnderscoreMessage);
 }
 
 message TestConflictingFieldNames {
   enum TestEnum {
     UNKNOWN = 0;
     FOO = 1;
   }
-  message TestMessage {}
+  message ForbiddenWordsUnderscoreMessage {}
   repeated int32 int32_field = 1;
   repeated TestEnum enum_field = 2;
   repeated string string_field = 3;
   repeated bytes bytes_field = 4;
-  repeated TestMessage message_field = 5;
+  repeated ForbiddenWordsUnderscoreMessage message_field = 5;
 
   optional int32 int32_field_count = 11;
   optional TestEnum enum_field_count = 12;
   optional string string_field_count = 13;
   optional bytes bytes_field_count = 14;
-  optional TestMessage message_field_count = 15;
+  optional ForbiddenWordsUnderscoreMessage message_field_count = 15;
 
   repeated int32 Int32Field = 21;          // NO_PROTO3
   repeated TestEnum EnumField = 22;        // NO_PROTO3
   repeated string StringField = 23;        // NO_PROTO3
   repeated bytes BytesField = 24;          // NO_PROTO3
-  repeated TestMessage MessageField = 25;  // NO_PROTO3
+  repeated ForbiddenWordsUnderscoreMessage MessageField = 25;  // NO_PROTO3
 
   // This field conflicts with "int32_field" as they both generate
   // the method getInt32FieldList().

diff --git a/src/google/protobuf/compiler/java/java_helpers.cc b/src/google/protobuf/compiler/java/java_helpers.cc
@@ -66,15 +66,26 @@ namespace {
 
 const char* kDefaultPackage = "";
 
-// Names that should be avoided as field names.
+// Names that should be avoided (in UpperCamelCase format).
 // Using them will cause the compiler to generate accessors whose names are
 // colliding with methods defined in base classes.
+// Keep this list in sync with specialFieldNames in
+// java/core/src/main/java/com/google/protobuf/DescriptorMessageInfoFactory.java
 const char* kForbiddenWordList[] = {
-    // message base class:
-    "cached_size",
-    "serialized_size",
     // java.lang.Object:
-    "class",
+    "Class",
+    // com.google.protobuf.MessageLiteOrBuilder:
+    "DefaultInstanceForType",
+    // com.google.protobuf.MessageLite:
+    "ParserForType",
+    "SerializedSize",
+    // com.google.protobuf.MessageOrBuilder:
+    "AllFields",
+    "DescriptorForType",
+    "InitializationErrorString",
+    "UnknownFields",
+    // obsolete. kept for backwards compatibility of generated code
+    "CachedSize",
 };
 
 const std::unordered_set<std::string>* kReservedNames =
@@ -93,7 +104,7 @@ const std::unordered_set<std::string>* kReservedNames =
 
 bool IsForbidden(const std::string& field_name) {
   for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
-    if (field_name == kForbiddenWordList[i]) {
+    if (UnderscoresToCamelCase(field_name, true) == kForbiddenWordList[i]) {
       return true;
     }
   }