diff --git a/Makefile.am b/Makefile.am index 707f7823d26e..99bd4c79f5ac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1094,17 +1094,21 @@ ruby_EXTRA_DIST= \ ruby/compatibility_tests/v3.0.0/test.sh \ ruby/compatibility_tests/v3.0.0/Rakefile \ ruby/compatibility_tests/v3.0.0/README.md \ + ruby/ext/google/protobuf_c/convert.c \ + ruby/ext/google/protobuf_c/convert.h \ ruby/ext/google/protobuf_c/defs.c \ - ruby/ext/google/protobuf_c/encode_decode.c \ + ruby/ext/google/protobuf_c/defs.h \ ruby/ext/google/protobuf_c/extconf.rb \ ruby/ext/google/protobuf_c/map.c \ + ruby/ext/google/protobuf_c/map.h \ ruby/ext/google/protobuf_c/message.c \ + ruby/ext/google/protobuf_c/message.h \ ruby/ext/google/protobuf_c/protobuf.c \ ruby/ext/google/protobuf_c/protobuf.h \ ruby/ext/google/protobuf_c/repeated_field.c \ - ruby/ext/google/protobuf_c/storage.c \ - ruby/ext/google/protobuf_c/upb.c \ - ruby/ext/google/protobuf_c/upb.h \ + ruby/ext/google/protobuf_c/repeated_field.h \ + ruby/ext/google/protobuf_c/ruby-upb.c \ + ruby/ext/google/protobuf_c/ruby-upb.h \ ruby/ext/google/protobuf_c/wrap_memcpy.c \ ruby/google-protobuf.gemspec \ ruby/lib/google/protobuf/message_exts.rb \ diff --git a/conformance/Makefile.am b/conformance/Makefile.am index 0adfb8360015..b281f9cfaa08 100644 --- a/conformance/Makefile.am +++ b/conformance/Makefile.am @@ -5,7 +5,7 @@ conformance_protoc_inputs = \ $(top_srcdir)/src/google/protobuf/test_messages_proto3.proto # proto2 input files, should be separated with proto3, as we -# can't generate proto2 files for ruby, php and objc +# can't generate proto2 files for php. conformance_proto2_protoc_inputs = \ $(top_srcdir)/src/google/protobuf/test_messages_proto2.proto @@ -261,7 +261,7 @@ if USE_EXTERNAL_PROTOC # Some implementations include pre-generated versions of well-known types. protoc_middleman: $(conformance_protoc_inputs) $(conformance_proto2_protoc_inputs) $(well_known_type_protoc_inputs) google-protobuf $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --ruby_out=. --objc_out=. --python_out=. --php_out=. --js_out=import_style=commonjs,binary:. $(conformance_protoc_inputs) - $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --objc_out=. --python_out=. --js_out=import_style=commonjs,binary:. $(conformance_proto2_protoc_inputs) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --ruby_out=. --objc_out=. --python_out=. --js_out=import_style=commonjs,binary:. $(conformance_proto2_protoc_inputs) $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --ruby_out=. --python_out=. --js_out=import_style=commonjs,binary:google-protobuf $(well_known_type_protoc_inputs) ## $(PROTOC) -I$(srcdir) -I$(top_srcdir) --java_out=lite:lite $(conformance_protoc_inputs) $(well_known_type_protoc_inputs) touch protoc_middleman @@ -273,7 +273,7 @@ else # building out-of-tree. protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(conformance_protoc_inputs) $(conformance_proto2_protoc_inputs) $(well_known_type_protoc_inputs) google-protobuf oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --ruby_out=$$oldpwd --objc_out=$$oldpwd --python_out=$$oldpwd --php_out=$$oldpwd --js_out=import_style=commonjs,binary:$$oldpwd $(conformance_protoc_inputs) ) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --objc_out=. --python_out=$$oldpwd --js_out=import_style=commonjs,binary:$$oldpwd $(conformance_proto2_protoc_inputs) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --ruby_out=$$oldpwd --objc_out=$$oldpwd --python_out=$$oldpwd --js_out=import_style=commonjs,binary:$$oldpwd $(conformance_proto2_protoc_inputs) ) oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --ruby_out=$$oldpwd --python_out=$$oldpwd --js_out=import_style=commonjs,binary:$$oldpwd/google-protobuf $(well_known_type_protoc_inputs) ) ## @mkdir -p lite ## oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --java_out=lite:$$oldpwd/lite $(conformance_protoc_inputs) $(well_known_type_protoc_inputs) ) diff --git a/conformance/conformance_ruby.rb b/conformance/conformance_ruby.rb index 79d8d3dd9e94..4af7659b09ea 100755 --- a/conformance/conformance_ruby.rb +++ b/conformance/conformance_ruby.rb @@ -32,6 +32,7 @@ require 'conformance_pb' require 'google/protobuf/test_messages_proto3_pb' +require 'google/protobuf/test_messages_proto2_pb' $test_count = 0 $verbose = false @@ -39,39 +40,39 @@ def do_test(request) test_message = ProtobufTestMessages::Proto3::TestAllTypesProto3.new response = Conformance::ConformanceResponse.new + descriptor = Google::Protobuf::DescriptorPool.generated_pool.lookup(request.message_type) + + unless descriptor + response.skipped = "Unknown message type: " + request.message_type + end begin case request.payload when :protobuf_payload - if request.message_type.eql?('protobuf_test_messages.proto3.TestAllTypesProto3') - begin - test_message = ProtobufTestMessages::Proto3::TestAllTypesProto3.decode( - request.protobuf_payload) - rescue Google::Protobuf::ParseError => err - response.parse_error = err.message.encode('utf-8') - return response - end - elsif request.message_type.eql?('protobuf_test_messages.proto2.TestAllTypesProto2') - response.skipped = "Ruby doesn't support proto2" + begin + test_message = descriptor.msgclass.decode(request.protobuf_payload) + rescue Google::Protobuf::ParseError => err + response.parse_error = err.message.encode('utf-8') return response - else - fail "Protobuf request doesn't have specific payload type" end when :json_payload begin - test_message = ProtobufTestMessages::Proto3::TestAllTypesProto3.decode_json( - request.json_payload) + options = {} + if request.test_category == :JSON_IGNORE_UNKNOWN_PARSING_TEST + options[:ignore_unknown_fields] = true + end + test_message = descriptor.msgclass.decode_json(request.json_payload, options) rescue Google::Protobuf::ParseError => err response.parse_error = err.message.encode('utf-8') return response end - - when :text_payload - begin - response.skipped = "Ruby doesn't support proto2" - return response - end + + when :text_payload + begin + response.skipped = "Ruby doesn't support text format" + return response + end when nil fail "Request didn't have payload" @@ -82,10 +83,18 @@ def do_test(request) fail 'Unspecified output format' when :PROTOBUF - response.protobuf_payload = test_message.to_proto + begin + response.protobuf_payload = test_message.to_proto + rescue Google::Protobuf::ParseError => err + response.serialize_error = err.message.encode('utf-8') + end when :JSON - response.json_payload = test_message.to_json + begin + response.json_payload = test_message.to_json + rescue Google::Protobuf::ParseError => err + response.serialize_error = err.message.encode('utf-8') + end when nil fail "Request didn't have requested output format" diff --git a/conformance/failure_list_ruby.txt b/conformance/failure_list_ruby.txt index 1aea14560c90..4938202ad70b 100644 --- a/conformance/failure_list_ruby.txt +++ b/conformance/failure_list_ruby.txt @@ -1,114 +1,58 @@ -Recommended.FieldMaskNumbersDontRoundTrip.JsonOutput -Recommended.FieldMaskPathsDontRoundTrip.JsonOutput -Recommended.FieldMaskTooManyUnderscore.JsonOutput Recommended.Proto2.JsonInput.FieldNameExtension.Validator -Recommended.Proto3.JsonInput.BytesFieldBase64Url.JsonOutput -Recommended.Proto3.JsonInput.BytesFieldBase64Url.ProtobufOutput -Recommended.Proto3.JsonInput.DurationHas3FractionalDigits.Validator -Recommended.Proto3.JsonInput.DurationHas6FractionalDigits.Validator -Recommended.Proto3.JsonInput.FieldMaskInvalidCharacter -Recommended.Proto3.JsonInput.MapFieldValueIsNull -Recommended.Proto3.JsonInput.NullValueInNormalMessage.Validator -Recommended.Proto3.JsonInput.NullValueInOtherOneofNewFormat.Validator -Recommended.Proto3.JsonInput.NullValueInOtherOneofOldFormat.Validator -Recommended.Proto3.JsonInput.RepeatedFieldMessageElementIsNull -Recommended.Proto3.JsonInput.RepeatedFieldPrimitiveElementIsNull -Recommended.Proto3.JsonInput.StringEndsWithEscapeChar -Recommended.Proto3.JsonInput.StringFieldSurrogateInWrongOrder -Recommended.Proto3.JsonInput.StringFieldUnpairedHighSurrogate -Recommended.Proto3.JsonInput.StringFieldUnpairedLowSurrogate -Recommended.Proto3.JsonInput.TimestampHas3FractionalDigits.Validator -Recommended.Proto3.JsonInput.TimestampHas6FractionalDigits.Validator -Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.BOOL.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.BOOL.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.DOUBLE.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.DOUBLE.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.ENUM.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.ENUM.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FIXED32.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FIXED32.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FIXED64.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FIXED64.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FLOAT.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.FLOAT.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.INT32.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.INT32.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.INT64.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.INT64.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SFIXED32.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SFIXED32.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SFIXED64.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SFIXED64.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SINT32.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SINT32.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SINT64.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.SINT64.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.PackedOutput.ProtobufOutput +Recommended.Proto2.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.PackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataOneofBinary.MESSAGE.Merge.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.BOOL.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.DOUBLE.UnpackedInput.UnpackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.PackedInput.UnpackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.UnpackedInput.PackedOutput.ProtobufOutput Recommended.Proto3.ProtobufInput.ValidDataRepeated.ENUM.UnpackedInput.UnpackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.DefaultOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.PackedOutput.ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataScalarBinary.ENUM[3].ProtobufOutput -Recommended.Proto3.ProtobufInput.ValidDataScalarBinary.ENUM[4].ProtobufOutput -Required.DurationProtoInputTooLarge.JsonOutput -Required.DurationProtoInputTooSmall.JsonOutput -Required.Proto2.JsonInput.StoresDefaultPrimitive.Validator -Required.Proto3.JsonInput.DoubleFieldMaxNegativeValue.JsonOutput -Required.Proto3.JsonInput.DoubleFieldMaxNegativeValue.ProtobufOutput -Required.Proto3.JsonInput.DoubleFieldMinPositiveValue.JsonOutput -Required.Proto3.JsonInput.DoubleFieldMinPositiveValue.ProtobufOutput -Required.Proto3.JsonInput.DoubleFieldNan.JsonOutput -Required.Proto3.JsonInput.DurationMinValue.JsonOutput -Required.Proto3.JsonInput.DurationRepeatedValue.JsonOutput -Required.Proto3.JsonInput.FloatFieldInfinity.JsonOutput -Required.Proto3.JsonInput.FloatFieldNan.JsonOutput -Required.Proto3.JsonInput.FloatFieldNegativeInfinity.JsonOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonFalse.ProtobufOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonNull.ProtobufOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonNumber.ProtobufOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonObject.ProtobufOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonString.ProtobufOutput -Required.Proto3.JsonInput.IgnoreUnknownJsonTrue.ProtobufOutput -Required.Proto3.JsonInput.OneofFieldDuplicate -Required.Proto3.JsonInput.RejectTopLevelNull -Required.Proto3.JsonInput.StringFieldSurrogatePair.JsonOutput -Required.Proto3.JsonInput.StringFieldSurrogatePair.ProtobufOutput -Required.Proto3.ProtobufInput.DoubleFieldNormalizeQuietNan.JsonOutput -Required.Proto3.ProtobufInput.DoubleFieldNormalizeSignalingNan.JsonOutput -Required.Proto3.ProtobufInput.FloatFieldNormalizeQuietNan.JsonOutput -Required.Proto3.ProtobufInput.FloatFieldNormalizeSignalingNan.JsonOutput -Required.Proto3.ProtobufInput.ValidDataMap.STRING.MESSAGE.MissingDefault.JsonOutput -Required.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.PackedInput.JsonOutput -Required.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.UnpackedInput.JsonOutput -Required.Proto3.ProtobufInput.ValidDataScalar.FLOAT[2].JsonOutput -Required.TimestampProtoInputTooLarge.JsonOutput -Required.TimestampProtoInputTooSmall.JsonOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED32.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FIXED64.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.FLOAT.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT32.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.INT64.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED32.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SFIXED64.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT32.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.SINT64.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT32.UnpackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.PackedInput.UnpackedOutput.ProtobufOutput +Recommended.Proto3.ProtobufInput.ValidDataRepeated.UINT64.UnpackedInput.UnpackedOutput.ProtobufOutput diff --git a/kokoro/linux/dockerfile/test/ruby/Dockerfile b/kokoro/linux/dockerfile/test/ruby/Dockerfile index 9037da715f09..b73bf84095a7 100644 --- a/kokoro/linux/dockerfile/test/ruby/Dockerfile +++ b/kokoro/linux/dockerfile/test/ruby/Dockerfile @@ -26,13 +26,14 @@ RUN apt-get update && apt-get install -y \ RUN gpg --keyserver hkp://keys.gnupg.net --recv-keys \ 409B6B1796C275462A1703113804BB82D39DC0E3 \ 7D2BAF1CF37B13E2069D6956105BD0E739499BDB -RUN \curl -sSL https://get.rvm.io | bash -s stable +RUN \curl -sSL https://get.rvm.io | bash -s master RUN /bin/bash -l -c "rvm install 2.3.8" RUN /bin/bash -l -c "rvm install 2.4.5" RUN /bin/bash -l -c "rvm install 2.5.1" RUN /bin/bash -l -c "rvm install 2.6.0" RUN /bin/bash -l -c "rvm install 2.7.0" +RUN /bin/bash -l -c "rvm install 3.0.0" RUN /bin/bash -l -c "echo 'gem: --no-ri --no-rdoc' > ~/.gemrc" RUN /bin/bash -l -c "echo 'export PATH=/usr/local/rvm/bin:$PATH' >> ~/.bashrc" diff --git a/kokoro/linux/ruby30/build.sh b/kokoro/linux/ruby30/build.sh new file mode 100755 index 000000000000..9e44575652bb --- /dev/null +++ b/kokoro/linux/ruby30/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# +# This is the top-level script we give to Kokoro as the entry point for +# running the "pull request" project: +# +# This script selects a specific Dockerfile (for building a Docker image) and +# a script to run inside that image. Then we delegate to the general +# build_and_run_docker.sh script. + +# Change to repo root +cd $(dirname $0)/../../.. + +export DOCKERHUB_ORGANIZATION=protobuftesting +export DOCKERFILE_DIR=kokoro/linux/dockerfile/test/ruby +export DOCKER_RUN_SCRIPT=kokoro/linux/pull_request_in_docker.sh +export OUTPUT_DIR=testoutput +export TEST_SET="ruby30" +./kokoro/linux/build_and_run_docker.sh diff --git a/kokoro/linux/ruby30/continuous.cfg b/kokoro/linux/ruby30/continuous.cfg new file mode 100644 index 000000000000..b03a3352f487 --- /dev/null +++ b/kokoro/linux/ruby30/continuous.cfg @@ -0,0 +1,11 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/linux/ruby30/build.sh" +timeout_mins: 120 + +action { + define_artifacts { + regex: "**/sponge_log.xml" + } +} diff --git a/kokoro/linux/ruby30/presubmit.cfg b/kokoro/linux/ruby30/presubmit.cfg new file mode 100644 index 000000000000..b03a3352f487 --- /dev/null +++ b/kokoro/linux/ruby30/presubmit.cfg @@ -0,0 +1,11 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/linux/ruby30/build.sh" +timeout_mins: 120 + +action { + define_artifacts { + regex: "**/sponge_log.xml" + } +} diff --git a/kokoro/macos/prepare_build_macos_rc b/kokoro/macos/prepare_build_macos_rc index d1154e3f5cf9..d94dd543fd5b 100755 --- a/kokoro/macos/prepare_build_macos_rc +++ b/kokoro/macos/prepare_build_macos_rc @@ -80,5 +80,5 @@ if [[ "${KOKORO_INSTALL_RVM:-}" == "yes" ]] ; then curl -sSL https://rvm.io/mpapis.asc | gpg --import - curl -sSL https://rvm.io/pkuczynski.asc | gpg --import - - curl -sSL https://get.rvm.io | bash -s stable --ruby + curl -sSL https://get.rvm.io | bash -s master --ruby fi diff --git a/kokoro/macos/ruby30/build.sh b/kokoro/macos/ruby30/build.sh new file mode 100755 index 000000000000..6b9bfb321573 --- /dev/null +++ b/kokoro/macos/ruby30/build.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# +# Build file to set up and run tests + +# Change to repo root +cd $(dirname $0)/../../.. + +# Prepare worker environment to run tests +KOKORO_INSTALL_RUBY=yes +KOKORO_INSTALL_RVM=yes +source kokoro/macos/prepare_build_macos_rc + +./tests.sh ruby30 diff --git a/kokoro/macos/ruby30/continuous.cfg b/kokoro/macos/ruby30/continuous.cfg new file mode 100644 index 000000000000..d5051170bef0 --- /dev/null +++ b/kokoro/macos/ruby30/continuous.cfg @@ -0,0 +1,5 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/macos/ruby30/build.sh" +timeout_mins: 1440 diff --git a/kokoro/macos/ruby30/presubmit.cfg b/kokoro/macos/ruby30/presubmit.cfg new file mode 100644 index 000000000000..d5051170bef0 --- /dev/null +++ b/kokoro/macos/ruby30/presubmit.cfg @@ -0,0 +1,5 @@ +# Config file for running tests in Kokoro + +# Location of the build script in repository +build_file: "protobuf/kokoro/macos/ruby30/build.sh" +timeout_mins: 1440 diff --git a/kokoro/release/ruby/macos/ruby/ruby_build_environment.sh b/kokoro/release/ruby/macos/ruby/ruby_build_environment.sh index 880c23390cfb..046b604b40e1 100755 --- a/kokoro/release/ruby/macos/ruby/ruby_build_environment.sh +++ b/kokoro/release/ruby/macos/ruby/ruby_build_environment.sh @@ -21,13 +21,13 @@ rm -rf ~/.rake-compiler CROSS_RUBY=$(mktemp tmpfile.XXXXXXXX) -curl https://raw.githubusercontent.com/rake-compiler/rake-compiler/v1.1.0/tasks/bin/cross-ruby.rake > "$CROSS_RUBY" +curl https://raw.githubusercontent.com/rake-compiler/rake-compiler/72184e51779b6a3b9b8580b036a052fdc3181ced/tasks/bin/cross-ruby.rake > "$CROSS_RUBY" # See https://github.com/grpc/grpc/issues/12161 for verconf.h patch details patch "$CROSS_RUBY" << EOF ---- cross-ruby.rake 2018-04-10 11:32:16.000000000 -0700 -+++ patched 2018-04-10 11:40:25.000000000 -0700 -@@ -141,8 +141,10 @@ +--- cross-ruby.rake 2020-12-11 11:17:53.000000000 +0900 ++++ patched 2020-12-11 11:18:52.000000000 +0900 +@@ -111,10 +111,12 @@ "--host=#{MINGW_HOST}", "--target=#{MINGW_TARGET}", "--build=#{RUBY_BUILD}", @@ -36,10 +36,13 @@ patch "$CROSS_RUBY" << EOF + '--disable-shared', '--disable-install-doc', + '--without-gmp', - '--with-ext=' + '--with-ext=', +- 'LDFLAGS=-pipe -s', ++ 'LDFLAGS=-pipe', ] -@@ -159,6 +161,7 @@ + # Force Winsock2 for Ruby 1.8, 1.9 defaults to it +@@ -130,6 +132,7 @@ # make file "#{build_dir}/ruby.exe" => ["#{build_dir}/Makefile"] do |t| chdir File.dirname(t.prerequisites.first) do @@ -55,7 +58,7 @@ set +x # rvm commands are very verbose rvm use 2.7.0 set -x ruby --version | grep 'ruby 2.7.0' -for v in 2.7.0 ; do +for v in 3.0.0 2.7.0 ; do ccache -c rake -f "$CROSS_RUBY" cross-ruby VERSION="$v" HOST=x86_64-darwin11 MAKE="$MAKE" done diff --git a/ruby/Rakefile b/ruby/Rakefile index 2aa7743e20d1..2d020e7173ba 100644 --- a/ruby/Rakefile +++ b/ruby/Rakefile @@ -51,6 +51,12 @@ if RUBY_PLATFORM == "java" system("mvn --batch-mode package") end else + unless ENV['IN_DOCKER'] == 'true' + # We need wyhash in-tree. + FileUtils.mkdir_p("ext/google/protobuf_c/third_party/wyhash") + FileUtils.cp("../third_party/wyhash/wyhash.h", "ext/google/protobuf_c/third_party/wyhash/wyhash.h") + end + Rake::ExtensionTask.new("protobuf_c", spec) do |ext| unless RUBY_PLATFORM =~ /darwin/ # TODO: also set "no_native to true" for mac if possible. As is, @@ -73,7 +79,7 @@ else ['x86-mingw32', 'x64-mingw32', 'x86_64-linux', 'x86-linux'].each do |plat| RakeCompilerDock.sh <<-"EOT", platform: plat bundle && \ - IN_DOCKER=true rake native:#{plat} pkg/#{spec.full_name}-#{plat}.gem RUBY_CC_VERSION=2.7.0:2.6.0:2.5.0:2.4.0:2.3.0 + IN_DOCKER=true rake native:#{plat} pkg/#{spec.full_name}-#{plat}.gem RUBY_CC_VERSION=3.0.0:2.7.0:2.6.0:2.5.0:2.4.0:2.3.0 EOT end end @@ -81,7 +87,7 @@ else if RUBY_PLATFORM =~ /darwin/ task 'gem:native' do system "rake genproto" - system "rake cross native gem RUBY_CC_VERSION=2.7.0:2.6.0:2.5.1:2.4.0:2.3.0" + system "rake cross native gem RUBY_CC_VERSION=3.0.0:2.7.0:2.6.0:2.5.1:2.4.0:2.3.0" end else task 'gem:native' => [:genproto, 'gem:windows'] diff --git a/ruby/compatibility_tests/v3.0.0/tests/basic.rb b/ruby/compatibility_tests/v3.0.0/tests/basic.rb index bfe68eff44a8..7228144cb165 100755 --- a/ruby/compatibility_tests/v3.0.0/tests/basic.rb +++ b/ruby/compatibility_tests/v3.0.0/tests/basic.rb @@ -1264,10 +1264,10 @@ def test_json_maps m = MapMessage.new(:map_string_int32 => {"a" => 1}) expected = '{"mapStringInt32":{"a":1},"mapStringMsg":{}}' expected_preserve = '{"map_string_int32":{"a":1},"map_string_msg":{}}' - assert MapMessage.encode_json(m) == expected + assert_equal expected, MapMessage.encode_json(m, :emit_defaults => true) - json = MapMessage.encode_json(m, :preserve_proto_fieldnames => true) - assert json == expected_preserve + json = MapMessage.encode_json(m, :preserve_proto_fieldnames => true, :emit_defaults => true) + assert_equal expected_preserve, json m2 = MapMessage.decode_json(MapMessage.encode_json(m)) assert m == m2 diff --git a/ruby/ext/google/protobuf_c/convert.c b/ruby/ext/google/protobuf_c/convert.c new file mode 100644 index 000000000000..bc3e35a5ed26 --- /dev/null +++ b/ruby/ext/google/protobuf_c/convert.c @@ -0,0 +1,349 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ----------------------------------------------------------------------------- +// Ruby <-> upb data conversion functions. +// +// This file Also contains a few other assorted algorithms on upb_msgval. +// +// None of the algorithms in this file require any access to the internal +// representation of Ruby or upb objects. +// ----------------------------------------------------------------------------- + +#include "convert.h" + +#include "message.h" +#include "protobuf.h" +#include "third_party/wyhash/wyhash.h" + +static upb_strview Convert_StringData(VALUE str, upb_arena *arena) { + upb_strview ret; + if (arena) { + char *ptr = upb_arena_malloc(arena, RSTRING_LEN(str)); + memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str)); + ret.data = ptr; + } else { + // Data is only needed temporarily (within map lookup). + ret.data = RSTRING_PTR(str); + } + ret.size = RSTRING_LEN(str); + return ret; +} + +static bool is_ruby_num(VALUE value) { + return (TYPE(value) == T_FLOAT || + TYPE(value) == T_FIXNUM || + TYPE(value) == T_BIGNUM); +} + +static void Convert_CheckInt(const char* name, upb_fieldtype_t type, + VALUE val) { + if (!is_ruby_num(val)) { + rb_raise(cTypeError, + "Expected number type for integral field '%s' (given %s).", name, + rb_class2name(CLASS_OF(val))); + } + + // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper + // bound; we just need to do precision checks (i.e., disallow rounding) and + // check for < 0 on unsigned types. + if (TYPE(val) == T_FLOAT) { + double dbl_val = NUM2DBL(val); + if (floor(dbl_val) != dbl_val) { + rb_raise(rb_eRangeError, + "Non-integral floating point value assigned to integer field " + "'%s' (given %s).", + name, rb_class2name(CLASS_OF(val))); + } + } + if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) { + if (NUM2DBL(val) < 0) { + rb_raise( + rb_eRangeError, + "Assigning negative value to unsigned integer field '%s' (given %s).", + name, rb_class2name(CLASS_OF(val))); + } + } +} + +static int32_t Convert_ToEnum(VALUE value, const char* name, + const upb_enumdef* e) { + int32_t val; + + switch (TYPE(value)) { + case T_FLOAT: + case T_FIXNUM: + case T_BIGNUM: + Convert_CheckInt(name, UPB_TYPE_INT32, value); + val = NUM2INT(value); + break; + case T_STRING: + if (!upb_enumdef_ntoi(e, RSTRING_PTR(value), RSTRING_LEN(value), &val)) { + goto unknownval; + } + break; + case T_SYMBOL: + if (!upb_enumdef_ntoiz(e, rb_id2name(SYM2ID(value)), &val)) { + goto unknownval; + } + break; + default: + rb_raise(cTypeError, + "Expected number or symbol type for enum field '%s'.", name); + } + + return val; + +unknownval: + rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name); +} + +upb_msgval Convert_RubyToUpb(VALUE value, const char* name, TypeInfo type_info, + upb_arena* arena) { + upb_msgval ret; + + switch (type_info.type) { + case UPB_TYPE_FLOAT: + if (!is_ruby_num(value)) { + rb_raise(cTypeError, "Expected number type for float field '%s' (given %s).", + name, rb_class2name(CLASS_OF(value))); + } + ret.float_val = NUM2DBL(value); + break; + case UPB_TYPE_DOUBLE: + if (!is_ruby_num(value)) { + rb_raise(cTypeError, "Expected number type for double field '%s' (given %s).", + name, rb_class2name(CLASS_OF(value))); + } + ret.double_val = NUM2DBL(value); + break; + case UPB_TYPE_BOOL: { + if (value == Qtrue) { + ret.bool_val = 1; + } else if (value == Qfalse) { + ret.bool_val = 0; + } else { + rb_raise(cTypeError, "Invalid argument for boolean field '%s' (given %s).", + name, rb_class2name(CLASS_OF(value))); + } + break; + } + case UPB_TYPE_STRING: { + VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding()); + if (CLASS_OF(value) == rb_cSymbol) { + value = rb_funcall(value, rb_intern("to_s"), 0); + } else if (CLASS_OF(value) != rb_cString) { + rb_raise(cTypeError, "Invalid argument for string field '%s' (given %s).", + name, rb_class2name(CLASS_OF(value))); + } + + if (rb_obj_encoding(value) != utf8) { + // Note: this will not duplicate underlying string data unless necessary. + value = rb_str_encode(value, utf8, 0, Qnil); + + if (rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "String is invalid UTF-8"); + } + } + + ret.str_val = Convert_StringData(value, arena); + break; + } + case UPB_TYPE_BYTES: { + VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding()); + if (CLASS_OF(value) != rb_cString) { + rb_raise(cTypeError, "Invalid argument for bytes field '%s' (given %s).", + name, rb_class2name(CLASS_OF(value))); + } + + if (rb_obj_encoding(value) != bytes) { + // Note: this will not duplicate underlying string data unless necessary. + // TODO(haberman): is this really necessary to get raw bytes? + value = rb_str_encode(value, bytes, 0, Qnil); + } + + ret.str_val = Convert_StringData(value, arena); + break; + } + case UPB_TYPE_MESSAGE: + ret.msg_val = + Message_GetUpbMessage(value, type_info.def.msgdef, name, arena); + break; + case UPB_TYPE_ENUM: + ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef); + break; + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + Convert_CheckInt(name, type_info.type, value); + switch (type_info.type) { + case UPB_TYPE_INT32: + ret.int32_val = NUM2INT(value); + break; + case UPB_TYPE_INT64: + ret.int64_val = NUM2LL(value); + break; + case UPB_TYPE_UINT32: + ret.uint32_val = NUM2UINT(value); + break; + case UPB_TYPE_UINT64: + ret.uint64_val = NUM2ULL(value); + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} + +VALUE Convert_UpbToRuby(upb_msgval upb_val, TypeInfo type_info, VALUE arena) { + switch (type_info.type) { + case UPB_TYPE_FLOAT: + return DBL2NUM(upb_val.float_val); + case UPB_TYPE_DOUBLE: + return DBL2NUM(upb_val.double_val); + case UPB_TYPE_BOOL: + return upb_val.bool_val ? Qtrue : Qfalse; + case UPB_TYPE_INT32: + return INT2NUM(upb_val.int32_val); + case UPB_TYPE_INT64: + return LL2NUM(upb_val.int64_val); + case UPB_TYPE_UINT32: + return UINT2NUM(upb_val.uint32_val); + case UPB_TYPE_UINT64: + return ULL2NUM(upb_val.int64_val); + case UPB_TYPE_ENUM: { + const char* name = + upb_enumdef_iton(type_info.def.enumdef, upb_val.int32_val); + if (name) { + return ID2SYM(rb_intern(name)); + } else { + return INT2NUM(upb_val.int32_val); + } + } + case UPB_TYPE_STRING: { + VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size); + rb_enc_associate(str_rb, rb_utf8_encoding()); + rb_obj_freeze(str_rb); + return str_rb; + } + case UPB_TYPE_BYTES: { + VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size); + rb_enc_associate(str_rb, rb_ascii8bit_encoding()); + rb_obj_freeze(str_rb); + return str_rb; + } + case UPB_TYPE_MESSAGE: + return Message_GetRubyWrapper((upb_msg*)upb_val.msg_val, + type_info.def.msgdef, arena); + default: + rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d", + (int)type_info.type); + } +} + +upb_msgval Msgval_DeepCopy(upb_msgval msgval, TypeInfo type_info, + upb_arena* arena) { + upb_msgval new_msgval; + + switch (type_info.type) { + default: + memcpy(&new_msgval, &msgval, sizeof(msgval)); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + size_t n = msgval.str_val.size; + char *mem = upb_arena_malloc(arena, n); + new_msgval.str_val.data = mem; + new_msgval.str_val.size = n; + memcpy(mem, msgval.str_val.data, n); + break; + } + case UPB_TYPE_MESSAGE: + new_msgval.msg_val = + Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena); + break; + } + + return new_msgval; +} + +bool Msgval_IsEqual(upb_msgval val1, upb_msgval val2, TypeInfo type_info) { + switch (type_info.type) { + case UPB_TYPE_BOOL: + return memcmp(&val1, &val2, 1) == 0; + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + return memcmp(&val1, &val2, 4) == 0; + case UPB_TYPE_DOUBLE: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return memcmp(&val1, &val2, 8) == 0; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return val1.str_val.size != val2.str_val.size || + memcmp(val1.str_val.data, val2.str_val.data, + val1.str_val.size) == 0; + case UPB_TYPE_MESSAGE: + return Message_Equal(val1.msg_val, val2.msg_val, type_info.def.msgdef); + default: + rb_raise(rb_eRuntimeError, "Internal error, unexpected type"); + } +} + +uint64_t Msgval_GetHash(upb_msgval val, TypeInfo type_info, uint64_t seed) { + switch (type_info.type) { + case UPB_TYPE_BOOL: + return wyhash(&val, 1, seed, _wyp); + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + return wyhash(&val, 4, seed, _wyp); + case UPB_TYPE_DOUBLE: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return wyhash(&val, 8, seed, _wyp); + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return wyhash(val.str_val.data, val.str_val.size, seed, _wyp); + case UPB_TYPE_MESSAGE: + return Message_Hash(val.msg_val, type_info.def.msgdef, seed); + default: + rb_raise(rb_eRuntimeError, "Internal error, unexpected type"); + } +} diff --git a/ruby/ext/google/protobuf_c/convert.h b/ruby/ext/google/protobuf_c/convert.h new file mode 100644 index 000000000000..cda18a0547f4 --- /dev/null +++ b/ruby/ext/google/protobuf_c/convert.h @@ -0,0 +1,72 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RUBY_PROTOBUF_CONVERT_H_ +#define RUBY_PROTOBUF_CONVERT_H_ + +#include + +#include "protobuf.h" +#include "ruby-upb.h" + +// Converts |ruby_val| to a upb_msgval according to |type_info|. +// +// The |arena| parameter indicates the lifetime of the container where this +// value will be assigned. It is used as follows: +// - If type is string or bytes, the string data will be copied into |arena|. +// - If type is message, and we need to auto-construct a message due to implicit +// conversions (eg. Time -> Google::Protobuf::Timestamp), the new message +// will be created in |arena|. +// - If type is message and the Ruby value is a message instance, we will fuse +// the message's arena into |arena|, to ensure that this message outlives the +// container. +upb_msgval Convert_RubyToUpb(VALUE ruby_val, const char *name, + TypeInfo type_info, upb_arena *arena); + +// Converts |upb_val| to a Ruby VALUE according to |type_info|. This may involve +// creating a Ruby wrapper object. +// +// The |arena| parameter indicates the arena that owns the lifetime of +// |upb_val|. Any Ruby wrapper object that is created will reference |arena| +// and ensure it outlives the wrapper. +VALUE Convert_UpbToRuby(upb_msgval upb_val, TypeInfo type_info, VALUE arena); + +// Creates a deep copy of |msgval| in |arena|. +upb_msgval Msgval_DeepCopy(upb_msgval msgval, TypeInfo type_info, + upb_arena *arena); + +// Returns true if |val1| and |val2| are equal. Their type is given by +// |type_info|. +bool Msgval_IsEqual(upb_msgval val1, upb_msgval val2, TypeInfo type_info); + +// Returns a hash value for the given upb_msgval. +uint64_t Msgval_GetHash(upb_msgval val, TypeInfo type_info, uint64_t seed); + +#endif // RUBY_PROTOBUF_CONVERT_H_ diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 1a09cc5ffb77..6cf8174ccd56 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -30,8 +30,35 @@ #include #include +#include + +#include "convert.h" +#include "message.h" #include "protobuf.h" +static VALUE Builder_build(VALUE _self); + +static VALUE cMessageBuilderContext; +static VALUE cOneofBuilderContext; +static VALUE cEnumBuilderContext; +static VALUE cBuilder; + +// ----------------------------------------------------------------------------- +// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor +// instances. +// ----------------------------------------------------------------------------- + +static VALUE get_msgdef_obj(VALUE descriptor_pool, const upb_msgdef* def); +static VALUE get_enumdef_obj(VALUE descriptor_pool, const upb_enumdef* def); +static VALUE get_fielddef_obj(VALUE descriptor_pool, const upb_fielddef* def); +static VALUE get_filedef_obj(VALUE descriptor_pool, const upb_filedef* def); +static VALUE get_oneofdef_obj(VALUE descriptor_pool, const upb_oneofdef* def); + +// A distinct object that is not accessible from Ruby. We use this as a +// constructor argument to enforce that certain objects cannot be created from +// Ruby. +VALUE c_only_cookie = Qnil; + // ----------------------------------------------------------------------------- // Common utilities. // ----------------------------------------------------------------------------- @@ -48,6 +75,10 @@ static VALUE rb_str_maybe_null(const char* s) { return rb_str_new2(s); } +// ----------------------------------------------------------------------------- +// Backward compatibility code. +// ----------------------------------------------------------------------------- + static void rewrite_enum_default(const upb_symtab* symtab, google_protobuf_FileDescriptorProto* file, google_protobuf_FieldDescriptorProto* field) { @@ -205,221 +236,70 @@ static void rewrite_nesting(VALUE msg_ent, google_protobuf_DescriptorProto* msg, } } -/* We have to do some relatively complicated logic here for backward - * compatibility. - * - * In descriptor.proto, messages are nested inside other messages if that is - * what the original .proto file looks like. For example, suppose we have this - * foo.proto: - * - * package foo; - * message Bar { - * message Baz {} - * } - * - * The descriptor for this must look like this: - * - * file { - * name: "test.proto" - * package: "foo" - * message_type { - * name: "Bar" - * nested_type { - * name: "Baz" - * } - * } - * } - * - * However, the Ruby generated code has always generated messages in a flat, - * non-nested way: - * - * Google::Protobuf::DescriptorPool.generated_pool.build do - * add_message "foo.Bar" do - * end - * add_message "foo.Bar.Baz" do - * end - * end - * - * Here we need to do a translation where we turn this generated code into the - * above descriptor. We need to infer that "foo" is the package name, and not - * a message itself. - * - * We delegate to Ruby to compute the transformation, for more concice and - * readable code than we can do in C */ -static void rewrite_names(VALUE _file_builder, - google_protobuf_FileDescriptorProto* file_proto) { - FileBuilderContext* file_builder = ruby_to_FileBuilderContext(_file_builder); - upb_arena *arena = file_builder->arena; - // Build params (package, msg_names, enum_names). - VALUE package = Qnil; - VALUE msg_names = rb_ary_new(); - VALUE enum_names = rb_ary_new(); - size_t msg_count, enum_count, i; - VALUE new_package, nesting, msg_ents, enum_ents; - google_protobuf_DescriptorProto** msgs; - google_protobuf_EnumDescriptorProto** enums; - - if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { - upb_strview package_str = - google_protobuf_FileDescriptorProto_package(file_proto); - package = rb_str_new(package_str.data, package_str.size); - } - - msgs = google_protobuf_FileDescriptorProto_mutable_message_type(file_proto, - &msg_count); - for (i = 0; i < msg_count; i++) { - upb_strview name = google_protobuf_DescriptorProto_name(msgs[i]); - rb_ary_push(msg_names, rb_str_new(name.data, name.size)); - } - - enums = google_protobuf_FileDescriptorProto_mutable_enum_type(file_proto, - &enum_count); - for (i = 0; i < enum_count; i++) { - upb_strview name = google_protobuf_EnumDescriptorProto_name(enums[i]); - rb_ary_push(enum_names, rb_str_new(name.data, name.size)); - } - - { - // Call Ruby code to calculate package name and nesting. - VALUE args[3] = { package, msg_names, enum_names }; - VALUE internal = rb_eval_string("Google::Protobuf::Internal"); - VALUE ret = rb_funcallv(internal, rb_intern("fixup_descriptor"), 3, args); - - new_package = rb_ary_entry(ret, 0); - nesting = rb_ary_entry(ret, 1); - } - - // Rewrite package and names. - if (new_package != Qnil) { - upb_strview new_package_str = - FileBuilderContext_strdup(_file_builder, new_package); - google_protobuf_FileDescriptorProto_set_package(file_proto, - new_package_str); - } - - for (i = 0; i < msg_count; i++) { - upb_strview name = google_protobuf_DescriptorProto_name(msgs[i]); - remove_path(&name); - google_protobuf_DescriptorProto_set_name(msgs[i], name); - } - - for (i = 0; i < enum_count; i++) { - upb_strview name = google_protobuf_EnumDescriptorProto_name(enums[i]); - remove_path(&name); - google_protobuf_EnumDescriptorProto_set_name(enums[i], name); - } - - // Rewrite nesting. - msg_ents = rb_hash_aref(nesting, ID2SYM(rb_intern("msgs"))); - enum_ents = rb_hash_aref(nesting, ID2SYM(rb_intern("enums"))); - - Check_Type(msg_ents, T_ARRAY); - Check_Type(enum_ents, T_ARRAY); - - for (i = 0; i < (size_t)RARRAY_LEN(msg_ents); i++) { - VALUE msg_ent = rb_ary_entry(msg_ents, i); - VALUE pos = rb_hash_aref(msg_ent, ID2SYM(rb_intern("pos"))); - msgs[i] = msgs[NUM2INT(pos)]; - rewrite_nesting(msg_ent, msgs[i], msgs, enums, arena); - } - - for (i = 0; i < (size_t)RARRAY_LEN(enum_ents); i++) { - VALUE enum_pos = rb_ary_entry(enum_ents, i); - enums[i] = enums[NUM2INT(enum_pos)]; - } - - google_protobuf_FileDescriptorProto_resize_message_type( - file_proto, RARRAY_LEN(msg_ents), arena); - google_protobuf_FileDescriptorProto_resize_enum_type( - file_proto, RARRAY_LEN(enum_ents), arena); -} - // ----------------------------------------------------------------------------- // DescriptorPool. // ----------------------------------------------------------------------------- -#define DEFINE_CLASS(name, string_name) \ - VALUE c ## name = Qnil; \ - const rb_data_type_t _ ## name ## _type = { \ - string_name, \ - { name ## _mark, name ## _free, NULL }, \ - }; \ - name* ruby_to_ ## name(VALUE val) { \ - name* ret; \ - TypedData_Get_Struct(val, name, &_ ## name ## _type, ret); \ - return ret; \ - } \ - -#define DEFINE_SELF(type, var, rb_var) \ - type* var = ruby_to_ ## type(rb_var) +typedef struct { + VALUE def_to_descriptor; // Hash table of def* -> Ruby descriptor. + upb_symtab* symtab; +} DescriptorPool; + +VALUE cDescriptorPool = Qnil; // Global singleton DescriptorPool. The user is free to create others, but this // is used by generated code. VALUE generated_pool = Qnil; -DEFINE_CLASS(DescriptorPool, "Google::Protobuf::DescriptorPool"); - -void DescriptorPool_mark(void* _self) { +static void DescriptorPool_mark(void* _self) { DescriptorPool* self = _self; rb_gc_mark(self->def_to_descriptor); } -void DescriptorPool_free(void* _self) { +static void DescriptorPool_free(void* _self) { DescriptorPool* self = _self; - upb_symtab_free(self->symtab); - upb_handlercache_free(self->fill_handler_cache); - upb_handlercache_free(self->pb_serialize_handler_cache); - upb_handlercache_free(self->json_serialize_handler_cache); - upb_handlercache_free(self->json_serialize_handler_preserve_cache); - upb_pbcodecache_free(self->fill_method_cache); - upb_json_codecache_free(self->json_fill_method_cache); - xfree(self); } +static const rb_data_type_t DescriptorPool_type = { + "Google::Protobuf::DescriptorPool", + {DescriptorPool_mark, DescriptorPool_free, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static DescriptorPool* ruby_to_DescriptorPool(VALUE val) { + DescriptorPool* ret; + TypedData_Get_Struct(val, DescriptorPool, &DescriptorPool_type, ret); + return ret; +} + +// Exposed to other modules in defs.h. +const upb_symtab *DescriptorPool_GetSymtab(VALUE desc_pool_rb) { + DescriptorPool *pool = ruby_to_DescriptorPool(desc_pool_rb); + return pool->symtab; +} + /* * call-seq: * DescriptorPool.new => pool * * Creates a new, empty, descriptor pool. */ -VALUE DescriptorPool_alloc(VALUE klass) { +static VALUE DescriptorPool_alloc(VALUE klass) { DescriptorPool* self = ALLOC(DescriptorPool); VALUE ret; self->def_to_descriptor = Qnil; - ret = TypedData_Wrap_Struct(klass, &_DescriptorPool_type, self); + ret = TypedData_Wrap_Struct(klass, &DescriptorPool_type, self); self->def_to_descriptor = rb_hash_new(); self->symtab = upb_symtab_new(); - self->fill_handler_cache = - upb_handlercache_new(add_handlers_for_message, (void*)ret); - self->pb_serialize_handler_cache = upb_pb_encoder_newcache(); - self->json_serialize_handler_cache = upb_json_printer_newcache(false); - self->json_serialize_handler_preserve_cache = - upb_json_printer_newcache(true); - self->fill_method_cache = upb_pbcodecache_new(self->fill_handler_cache); - self->json_fill_method_cache = upb_json_codecache_new(); + ObjectCache_Add(self->symtab, ret, _upb_symtab_arena(self->symtab)); return ret; } -void DescriptorPool_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "DescriptorPool", rb_cObject); - rb_define_alloc_func(klass, DescriptorPool_alloc); - rb_define_method(klass, "build", DescriptorPool_build, -1); - rb_define_method(klass, "lookup", DescriptorPool_lookup, 1); - rb_define_singleton_method(klass, "generated_pool", - DescriptorPool_generated_pool, 0); - rb_gc_register_address(&cDescriptorPool); - cDescriptorPool = klass; - - rb_gc_register_address(&generated_pool); - generated_pool = rb_class_new_instance(0, NULL, klass); -} - /* * call-seq: * DescriptorPool.build(&block) @@ -430,7 +310,7 @@ void DescriptorPool_register(VALUE module) { * Builder#add_enum within the block as appropriate. This is the recommended, * idiomatic way to define new message and enum types. */ -VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self) { +static VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self) { VALUE ctx = rb_class_new_instance(1, &_self, cBuilder); VALUE block = rb_block_proc(); rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); @@ -445,8 +325,8 @@ VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self) { * Finds a Descriptor or EnumDescriptor by name and returns it, or nil if none * exists with the given name. */ -VALUE DescriptorPool_lookup(VALUE _self, VALUE name) { - DEFINE_SELF(DescriptorPool, self, _self); +static VALUE DescriptorPool_lookup(VALUE _self, VALUE name) { + DescriptorPool* self = ruby_to_DescriptorPool(_self); const char* name_str = get_str(name); const upb_msgdef* msgdef; const upb_enumdef* enumdef; @@ -473,31 +353,53 @@ VALUE DescriptorPool_lookup(VALUE _self, VALUE name) { * register types in this pool for convenience so that they do not have to hold * a reference to a private pool instance. */ -VALUE DescriptorPool_generated_pool(VALUE _self) { +static VALUE DescriptorPool_generated_pool(VALUE _self) { return generated_pool; } +static void DescriptorPool_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "DescriptorPool", rb_cObject); + rb_define_alloc_func(klass, DescriptorPool_alloc); + rb_define_method(klass, "build", DescriptorPool_build, -1); + rb_define_method(klass, "lookup", DescriptorPool_lookup, 1); + rb_define_singleton_method(klass, "generated_pool", + DescriptorPool_generated_pool, 0); + rb_gc_register_address(&cDescriptorPool); + cDescriptorPool = klass; + + rb_gc_register_address(&generated_pool); + generated_pool = rb_class_new_instance(0, NULL, klass); +} + // ----------------------------------------------------------------------------- // Descriptor. // ----------------------------------------------------------------------------- -DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor"); +typedef struct { + const upb_msgdef* msgdef; + VALUE klass; + VALUE descriptor_pool; +} Descriptor; + +VALUE cDescriptor = Qnil; -void Descriptor_mark(void* _self) { +static void Descriptor_mark(void* _self) { Descriptor* self = _self; rb_gc_mark(self->klass); rb_gc_mark(self->descriptor_pool); - if (self->layout && self->layout->empty_template) { - layout_mark(self->layout, self->layout->empty_template); - } } -void Descriptor_free(void* _self) { - Descriptor* self = _self; - if (self->layout) { - free_layout(self->layout); - } - xfree(self); +static const rb_data_type_t Descriptor_type = { + "Google::Protobuf::Descriptor", + {Descriptor_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static Descriptor* ruby_to_Descriptor(VALUE val) { + Descriptor* ret; + TypedData_Get_Struct(val, Descriptor, &Descriptor_type, ret); + return ret; } /* @@ -509,42 +411,24 @@ void Descriptor_free(void* _self) { * it is added to a pool, after which it becomes immutable (as part of a * finalization process). */ -VALUE Descriptor_alloc(VALUE klass) { +static VALUE Descriptor_alloc(VALUE klass) { Descriptor* self = ALLOC(Descriptor); - VALUE ret = TypedData_Wrap_Struct(klass, &_Descriptor_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &Descriptor_type, self); self->msgdef = NULL; self->klass = Qnil; self->descriptor_pool = Qnil; - self->layout = NULL; return ret; } -void Descriptor_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "Descriptor", rb_cObject); - rb_define_alloc_func(klass, Descriptor_alloc); - rb_define_method(klass, "initialize", Descriptor_initialize, 3); - rb_define_method(klass, "each", Descriptor_each, 0); - rb_define_method(klass, "lookup", Descriptor_lookup, 1); - rb_define_method(klass, "each_oneof", Descriptor_each_oneof, 0); - rb_define_method(klass, "lookup_oneof", Descriptor_lookup_oneof, 1); - rb_define_method(klass, "msgclass", Descriptor_msgclass, 0); - rb_define_method(klass, "name", Descriptor_name, 0); - rb_define_method(klass, "file_descriptor", Descriptor_file_descriptor, 0); - rb_include_module(klass, rb_mEnumerable); - rb_gc_register_address(&cDescriptor); - cDescriptor = klass; -} - /* * call-seq: * Descriptor.new(c_only_cookie, ptr) => Descriptor * * Creates a descriptor wrapper object. May only be called from C. */ -VALUE Descriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_initialize(VALUE _self, VALUE cookie, + VALUE descriptor_pool, VALUE ptr) { + Descriptor* self = ruby_to_Descriptor(_self); if (cookie != c_only_cookie) { rb_raise(rb_eRuntimeError, @@ -563,8 +447,8 @@ VALUE Descriptor_initialize(VALUE _self, VALUE cookie, * * Returns the FileDescriptor object this message belongs to. */ -VALUE Descriptor_file_descriptor(VALUE _self) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_file_descriptor(VALUE _self) { + Descriptor* self = ruby_to_Descriptor(_self); return get_filedef_obj(self->descriptor_pool, upb_msgdef_file(self->msgdef)); } @@ -575,8 +459,8 @@ VALUE Descriptor_file_descriptor(VALUE _self) { * Returns the name of this message type as a fully-qualified string (e.g., * My.Package.MessageType). */ -VALUE Descriptor_name(VALUE _self) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_name(VALUE _self) { + Descriptor* self = ruby_to_Descriptor(_self); return rb_str_maybe_null(upb_msgdef_fullname(self->msgdef)); } @@ -586,8 +470,8 @@ VALUE Descriptor_name(VALUE _self) { * * Iterates over fields in this message type, yielding to the block on each one. */ -VALUE Descriptor_each(VALUE _self) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_each(VALUE _self) { + Descriptor* self = ruby_to_Descriptor(_self); upb_msg_field_iter it; for (upb_msg_field_begin(&it, self->msgdef); @@ -607,8 +491,8 @@ VALUE Descriptor_each(VALUE _self) { * Returns the field descriptor for the field with the given name, if present, * or nil if none. */ -VALUE Descriptor_lookup(VALUE _self, VALUE name) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_lookup(VALUE _self, VALUE name) { + Descriptor* self = ruby_to_Descriptor(_self); const char* s = get_str(name); const upb_fielddef* field = upb_msgdef_ntofz(self->msgdef, s); if (field == NULL) { @@ -624,8 +508,8 @@ VALUE Descriptor_lookup(VALUE _self, VALUE name) { * Invokes the given block for each oneof in this message type, passing the * corresponding OneofDescriptor. */ -VALUE Descriptor_each_oneof(VALUE _self) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_each_oneof(VALUE _self) { + Descriptor* self = ruby_to_Descriptor(_self); upb_msg_oneof_iter it; for (upb_msg_oneof_begin(&it, self->msgdef); @@ -645,8 +529,8 @@ VALUE Descriptor_each_oneof(VALUE _self) { * Returns the oneof descriptor for the oneof with the given name, if present, * or nil if none. */ -VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name) { + Descriptor* self = ruby_to_Descriptor(_self); const char* s = get_str(name); const upb_oneofdef* oneof = upb_msgdef_ntooz(self->msgdef, s); if (oneof == NULL) { @@ -661,32 +545,62 @@ VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name) { * * Returns the Ruby class created for this message type. */ -VALUE Descriptor_msgclass(VALUE _self) { - DEFINE_SELF(Descriptor, self, _self); +static VALUE Descriptor_msgclass(VALUE _self) { + Descriptor* self = ruby_to_Descriptor(_self); if (self->klass == Qnil) { self->klass = build_class_from_descriptor(_self); } return self->klass; } +static void Descriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "Descriptor", rb_cObject); + rb_define_alloc_func(klass, Descriptor_alloc); + rb_define_method(klass, "initialize", Descriptor_initialize, 3); + rb_define_method(klass, "each", Descriptor_each, 0); + rb_define_method(klass, "lookup", Descriptor_lookup, 1); + rb_define_method(klass, "each_oneof", Descriptor_each_oneof, 0); + rb_define_method(klass, "lookup_oneof", Descriptor_lookup_oneof, 1); + rb_define_method(klass, "msgclass", Descriptor_msgclass, 0); + rb_define_method(klass, "name", Descriptor_name, 0); + rb_define_method(klass, "file_descriptor", Descriptor_file_descriptor, 0); + rb_include_module(klass, rb_mEnumerable); + rb_gc_register_address(&cDescriptor); + cDescriptor = klass; +} + // ----------------------------------------------------------------------------- // FileDescriptor. // ----------------------------------------------------------------------------- -DEFINE_CLASS(FileDescriptor, "Google::Protobuf::FileDescriptor"); +typedef struct { + const upb_filedef* filedef; + VALUE descriptor_pool; // Owns the upb_filedef. +} FileDescriptor; -void FileDescriptor_mark(void* _self) { +static VALUE cFileDescriptor = Qnil; + +static void FileDescriptor_mark(void* _self) { FileDescriptor* self = _self; rb_gc_mark(self->descriptor_pool); } -void FileDescriptor_free(void* _self) { - xfree(_self); +static const rb_data_type_t FileDescriptor_type = { + "Google::Protobuf::FileDescriptor", + {FileDescriptor_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static FileDescriptor* ruby_to_FileDescriptor(VALUE val) { + FileDescriptor* ret; + TypedData_Get_Struct(val, FileDescriptor, &FileDescriptor_type, ret); + return ret; } -VALUE FileDescriptor_alloc(VALUE klass) { +static VALUE FileDescriptor_alloc(VALUE klass) { FileDescriptor* self = ALLOC(FileDescriptor); - VALUE ret = TypedData_Wrap_Struct(klass, &_FileDescriptor_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &FileDescriptor_type, self); self->descriptor_pool = Qnil; self->filedef = NULL; return ret; @@ -699,9 +613,9 @@ VALUE FileDescriptor_alloc(VALUE klass) { * Returns a new file descriptor. The syntax must be set before it's passed * to a builder. */ -VALUE FileDescriptor_initialize(VALUE _self, VALUE cookie, +static VALUE FileDescriptor_initialize(VALUE _self, VALUE cookie, VALUE descriptor_pool, VALUE ptr) { - DEFINE_SELF(FileDescriptor, self, _self); + FileDescriptor* self = ruby_to_FileDescriptor(_self); if (cookie != c_only_cookie) { rb_raise(rb_eRuntimeError, @@ -714,25 +628,14 @@ VALUE FileDescriptor_initialize(VALUE _self, VALUE cookie, return Qnil; } -void FileDescriptor_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "FileDescriptor", rb_cObject); - rb_define_alloc_func(klass, FileDescriptor_alloc); - rb_define_method(klass, "initialize", FileDescriptor_initialize, 3); - rb_define_method(klass, "name", FileDescriptor_name, 0); - rb_define_method(klass, "syntax", FileDescriptor_syntax, 0); - rb_gc_register_address(&cFileDescriptor); - cFileDescriptor = klass; -} - /* * call-seq: * FileDescriptor.name => name * * Returns the name of the file. */ -VALUE FileDescriptor_name(VALUE _self) { - DEFINE_SELF(FileDescriptor, self, _self); +static VALUE FileDescriptor_name(VALUE _self) { + FileDescriptor* self = ruby_to_FileDescriptor(_self); const char* name = upb_filedef_name(self->filedef); return name == NULL ? Qnil : rb_str_new2(name); } @@ -746,8 +649,8 @@ VALUE FileDescriptor_name(VALUE _self) { * Valid syntax versions are: * :proto2 or :proto3. */ -VALUE FileDescriptor_syntax(VALUE _self) { - DEFINE_SELF(FileDescriptor, self, _self); +static VALUE FileDescriptor_syntax(VALUE _self) { + FileDescriptor* self = ruby_to_FileDescriptor(_self); switch (upb_filedef_syntax(self->filedef)) { case UPB_SYNTAX_PROTO3: return ID2SYM(rb_intern("proto3")); @@ -756,19 +659,43 @@ VALUE FileDescriptor_syntax(VALUE _self) { } } +static void FileDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "FileDescriptor", rb_cObject); + rb_define_alloc_func(klass, FileDescriptor_alloc); + rb_define_method(klass, "initialize", FileDescriptor_initialize, 3); + rb_define_method(klass, "name", FileDescriptor_name, 0); + rb_define_method(klass, "syntax", FileDescriptor_syntax, 0); + rb_gc_register_address(&cFileDescriptor); + cFileDescriptor = klass; +} + // ----------------------------------------------------------------------------- // FieldDescriptor. // ----------------------------------------------------------------------------- -DEFINE_CLASS(FieldDescriptor, "Google::Protobuf::FieldDescriptor"); +typedef struct { + const upb_fielddef* fielddef; + VALUE descriptor_pool; // Owns the upb_fielddef. +} FieldDescriptor; + +static VALUE cFieldDescriptor = Qnil; -void FieldDescriptor_mark(void* _self) { +static void FieldDescriptor_mark(void* _self) { FieldDescriptor* self = _self; rb_gc_mark(self->descriptor_pool); } -void FieldDescriptor_free(void* _self) { - xfree(_self); +static const rb_data_type_t FieldDescriptor_type = { + "Google::Protobuf::FieldDescriptor", + {FieldDescriptor_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static FieldDescriptor* ruby_to_FieldDescriptor(VALUE val) { + FieldDescriptor* ret; + TypedData_Get_Struct(val, FieldDescriptor, &FieldDescriptor_type, ret); + return ret; } /* @@ -778,42 +705,22 @@ void FieldDescriptor_free(void* _self) { * Returns a new field descriptor. Its name, type, etc. must be set before it is * added to a message type. */ -VALUE FieldDescriptor_alloc(VALUE klass) { +static VALUE FieldDescriptor_alloc(VALUE klass) { FieldDescriptor* self = ALLOC(FieldDescriptor); - VALUE ret = TypedData_Wrap_Struct(klass, &_FieldDescriptor_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &FieldDescriptor_type, self); self->fielddef = NULL; return ret; } -void FieldDescriptor_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "FieldDescriptor", rb_cObject); - rb_define_alloc_func(klass, FieldDescriptor_alloc); - rb_define_method(klass, "initialize", FieldDescriptor_initialize, 3); - rb_define_method(klass, "name", FieldDescriptor_name, 0); - rb_define_method(klass, "type", FieldDescriptor_type, 0); - rb_define_method(klass, "default", FieldDescriptor_default, 0); - rb_define_method(klass, "label", FieldDescriptor_label, 0); - rb_define_method(klass, "number", FieldDescriptor_number, 0); - rb_define_method(klass, "submsg_name", FieldDescriptor_submsg_name, 0); - rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0); - rb_define_method(klass, "has?", FieldDescriptor_has, 1); - rb_define_method(klass, "clear", FieldDescriptor_clear, 1); - rb_define_method(klass, "get", FieldDescriptor_get, 1); - rb_define_method(klass, "set", FieldDescriptor_set, 2); - rb_gc_register_address(&cFieldDescriptor); - cFieldDescriptor = klass; -} - /* * call-seq: * EnumDescriptor.new(c_only_cookie, pool, ptr) => EnumDescriptor * * Creates a descriptor wrapper object. May only be called from C. */ -VALUE FieldDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_initialize(VALUE _self, VALUE cookie, + VALUE descriptor_pool, VALUE ptr) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); if (cookie != c_only_cookie) { rb_raise(rb_eRuntimeError, @@ -832,11 +739,12 @@ VALUE FieldDescriptor_initialize(VALUE _self, VALUE cookie, * * Returns the name of this field. */ -VALUE FieldDescriptor_name(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_name(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); return rb_str_maybe_null(upb_fielddef_name(self->fielddef)); } +// Non-static, exposed to other .c files. upb_fieldtype_t ruby_to_fieldtype(VALUE type) { if (TYPE(type) != T_SYMBOL) { rb_raise(rb_eArgError, "Expected symbol for field type."); @@ -865,27 +773,7 @@ upb_fieldtype_t ruby_to_fieldtype(VALUE type) { return 0; } -VALUE fieldtype_to_ruby(upb_fieldtype_t type) { - switch (type) { -#define CONVERT(upb, ruby) \ - case UPB_TYPE_ ## upb : return ID2SYM(rb_intern( # ruby )); - CONVERT(FLOAT, float); - CONVERT(DOUBLE, double); - CONVERT(BOOL, bool); - CONVERT(STRING, string); - CONVERT(BYTES, bytes); - CONVERT(MESSAGE, message); - CONVERT(ENUM, enum); - CONVERT(INT32, int32); - CONVERT(INT64, int64); - CONVERT(UINT32, uint32); - CONVERT(UINT64, uint64); -#undef CONVERT - } - return Qnil; -} - -upb_descriptortype_t ruby_to_descriptortype(VALUE type) { +static upb_descriptortype_t ruby_to_descriptortype(VALUE type) { if (TYPE(type) != T_SYMBOL) { rb_raise(rb_eArgError, "Expected symbol for field type."); } @@ -920,7 +808,7 @@ upb_descriptortype_t ruby_to_descriptortype(VALUE type) { return 0; } -VALUE descriptortype_to_ruby(upb_descriptortype_t type) { +static VALUE descriptortype_to_ruby(upb_descriptortype_t type) { switch (type) { #define CONVERT(upb, ruby) \ case UPB_DESCRIPTOR_TYPE_ ## upb : return ID2SYM(rb_intern( # ruby )); @@ -947,29 +835,6 @@ VALUE descriptortype_to_ruby(upb_descriptortype_t type) { return Qnil; } -VALUE ruby_to_label(VALUE label) { - upb_label_t upb_label; - bool converted = false; - -#define CONVERT(upb, ruby) \ - if (SYM2ID(label) == rb_intern( # ruby )) { \ - upb_label = UPB_LABEL_ ## upb; \ - converted = true; \ - } - - CONVERT(OPTIONAL, optional); - CONVERT(REQUIRED, required); - CONVERT(REPEATED, repeated); - -#undef CONVERT - - if (!converted) { - rb_raise(rb_eArgError, "Unknown field label."); - } - - return upb_label; -} - /* * call-seq: * FieldDescriptor.type => type @@ -980,8 +845,8 @@ VALUE ruby_to_label(VALUE label) { * :int32, :int64, :uint32, :uint64, :float, :double, :bool, :string, * :bytes, :message. */ -VALUE FieldDescriptor_type(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor__type(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); return descriptortype_to_ruby(upb_fielddef_descriptortype(self->fielddef)); } @@ -991,9 +856,16 @@ VALUE FieldDescriptor_type(VALUE _self) { * * Returns this field's default, as a Ruby object, or nil if not yet set. */ -VALUE FieldDescriptor_default(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); - return layout_get_default(self->fielddef); +static VALUE FieldDescriptor_default(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); + const upb_fielddef *f = self->fielddef; + upb_msgval default_val = {0}; + if (upb_fielddef_issubmsg(f)) { + return Qnil; + } else if (!upb_fielddef_isseq(f)) { + default_val = upb_fielddef_default(f); + } + return Convert_UpbToRuby(default_val, TypeInfo_get(self->fielddef), Qnil); } /* @@ -1005,8 +877,8 @@ VALUE FieldDescriptor_default(VALUE _self) { * Valid field labels are: * :optional, :repeated */ -VALUE FieldDescriptor_label(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_label(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); switch (upb_fielddef_label(self->fielddef)) { #define CONVERT(upb, ruby) \ case UPB_LABEL_ ## upb : return ID2SYM(rb_intern( # ruby )); @@ -1027,8 +899,8 @@ VALUE FieldDescriptor_label(VALUE _self) { * * Returns the tag number for this field. */ -VALUE FieldDescriptor_number(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_number(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); return INT2NUM(upb_fielddef_number(self->fielddef)); } @@ -1041,8 +913,8 @@ VALUE FieldDescriptor_number(VALUE _self) { * name will be resolved within the context of the pool to which the containing * message type is added. */ -VALUE FieldDescriptor_submsg_name(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_submsg_name(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); switch (upb_fielddef_type(self->fielddef)) { case UPB_TYPE_ENUM: return rb_str_new2( @@ -1064,8 +936,8 @@ VALUE FieldDescriptor_submsg_name(VALUE _self) { * called *until* the containing message type is added to a pool (and thus * resolved). */ -VALUE FieldDescriptor_subtype(VALUE _self) { - DEFINE_SELF(FieldDescriptor, self, _self); +static VALUE FieldDescriptor_subtype(VALUE _self) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); switch (upb_fielddef_type(self->fielddef)) { case UPB_TYPE_ENUM: return get_enumdef_obj(self->descriptor_pool, @@ -1085,14 +957,19 @@ VALUE FieldDescriptor_subtype(VALUE _self) { * Returns the value set for this field on the given message. Raises an * exception if message is of the wrong type. */ -VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb) { - DEFINE_SELF(FieldDescriptor, self, _self); - MessageHeader* msg; - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { +static VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); + const upb_msgdef *m; + const upb_msgdef *msg = Message_Get(msg_rb, &m); + VALUE arena = Message_GetArena(msg_rb); + upb_msgval msgval; + + if (m != upb_fielddef_containingtype(self->fielddef)) { rb_raise(cTypeError, "get method called on wrong message type"); } - return layout_get(msg->descriptor->layout, Message_data(msg), self->fielddef); + + msgval = upb_msg_get(msg, self->fielddef); + return Convert_UpbToRuby(msgval, TypeInfo_get(self->fielddef), arena); } /* @@ -1102,17 +979,18 @@ VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb) { * Returns whether the value is set on the given message. Raises an * exception when calling for fields that do not have presence. */ -VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb) { - DEFINE_SELF(FieldDescriptor, self, _self); - MessageHeader* msg; - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { +static VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); + const upb_msgdef *m; + const upb_msgdef *msg = Message_Get(msg_rb, &m); + + if (m != upb_fielddef_containingtype(self->fielddef)) { rb_raise(cTypeError, "has method called on wrong message type"); } else if (!upb_fielddef_haspresence(self->fielddef)) { rb_raise(rb_eArgError, "does not track presence"); } - return layout_has(msg->descriptor->layout, Message_data(msg), self->fielddef); + return upb_msg_has(msg, self->fielddef) ? Qtrue : Qfalse; } /* @@ -1121,15 +999,16 @@ VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb) { * * Clears the field from the message if it's set. */ -VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb) { - DEFINE_SELF(FieldDescriptor, self, _self); - MessageHeader* msg; - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { +static VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); + const upb_msgdef *m; + upb_msgdef *msg = Message_GetMutable(msg_rb, &m); + + if (m != upb_fielddef_containingtype(self->fielddef)) { rb_raise(cTypeError, "has method called on wrong message type"); } - layout_clear(msg->descriptor->layout, Message_data(msg), self->fielddef); + upb_msg_clearfield(msg, self->fielddef); return Qnil; } @@ -1141,30 +1020,69 @@ VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb) { * message. Raises an exception if message is of the wrong type. Performs the * ordinary type-checks for field setting. */ -VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value) { - DEFINE_SELF(FieldDescriptor, self, _self); - MessageHeader* msg; - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { +static VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value) { + FieldDescriptor* self = ruby_to_FieldDescriptor(_self); + const upb_msgdef *m; + upb_msgdef *msg = Message_GetMutable(msg_rb, &m); + upb_arena *arena = Arena_get(Message_GetArena(msg_rb)); + upb_msgval msgval; + + if (m != upb_fielddef_containingtype(self->fielddef)) { rb_raise(cTypeError, "set method called on wrong message type"); } - layout_set(msg->descriptor->layout, Message_data(msg), self->fielddef, value); + + msgval = Convert_RubyToUpb(value, upb_fielddef_name(self->fielddef), + TypeInfo_get(self->fielddef), arena); + upb_msg_set(msg, self->fielddef, msgval, arena); return Qnil; } +static void FieldDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "FieldDescriptor", rb_cObject); + rb_define_alloc_func(klass, FieldDescriptor_alloc); + rb_define_method(klass, "initialize", FieldDescriptor_initialize, 3); + rb_define_method(klass, "name", FieldDescriptor_name, 0); + rb_define_method(klass, "type", FieldDescriptor__type, 0); + rb_define_method(klass, "default", FieldDescriptor_default, 0); + rb_define_method(klass, "label", FieldDescriptor_label, 0); + rb_define_method(klass, "number", FieldDescriptor_number, 0); + rb_define_method(klass, "submsg_name", FieldDescriptor_submsg_name, 0); + rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0); + rb_define_method(klass, "has?", FieldDescriptor_has, 1); + rb_define_method(klass, "clear", FieldDescriptor_clear, 1); + rb_define_method(klass, "get", FieldDescriptor_get, 1); + rb_define_method(klass, "set", FieldDescriptor_set, 2); + rb_gc_register_address(&cFieldDescriptor); + cFieldDescriptor = klass; +} + // ----------------------------------------------------------------------------- // OneofDescriptor. // ----------------------------------------------------------------------------- -DEFINE_CLASS(OneofDescriptor, "Google::Protobuf::OneofDescriptor"); +typedef struct { + const upb_oneofdef* oneofdef; + VALUE descriptor_pool; // Owns the upb_oneofdef. +} OneofDescriptor; + +static VALUE cOneofDescriptor = Qnil; -void OneofDescriptor_mark(void* _self) { +static void OneofDescriptor_mark(void* _self) { OneofDescriptor* self = _self; rb_gc_mark(self->descriptor_pool); } -void OneofDescriptor_free(void* _self) { - xfree(_self); +static const rb_data_type_t OneofDescriptor_type = { + "Google::Protobuf::OneofDescriptor", + {OneofDescriptor_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static OneofDescriptor* ruby_to_OneofDescriptor(VALUE val) { + OneofDescriptor* ret; + TypedData_Get_Struct(val, OneofDescriptor, &OneofDescriptor_type, ret); + return ret; } /* @@ -1174,35 +1092,23 @@ void OneofDescriptor_free(void* _self) { * Creates a new, empty, oneof descriptor. The oneof may only be modified prior * to being added to a message descriptor which is subsequently added to a pool. */ -VALUE OneofDescriptor_alloc(VALUE klass) { +static VALUE OneofDescriptor_alloc(VALUE klass) { OneofDescriptor* self = ALLOC(OneofDescriptor); - VALUE ret = TypedData_Wrap_Struct(klass, &_OneofDescriptor_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &OneofDescriptor_type, self); self->oneofdef = NULL; self->descriptor_pool = Qnil; return ret; } -void OneofDescriptor_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "OneofDescriptor", rb_cObject); - rb_define_alloc_func(klass, OneofDescriptor_alloc); - rb_define_method(klass, "initialize", OneofDescriptor_initialize, 3); - rb_define_method(klass, "name", OneofDescriptor_name, 0); - rb_define_method(klass, "each", OneofDescriptor_each, 0); - rb_include_module(klass, rb_mEnumerable); - rb_gc_register_address(&cOneofDescriptor); - cOneofDescriptor = klass; -} - /* * call-seq: * OneofDescriptor.new(c_only_cookie, pool, ptr) => OneofDescriptor * * Creates a descriptor wrapper object. May only be called from C. */ -VALUE OneofDescriptor_initialize(VALUE _self, VALUE cookie, +static VALUE OneofDescriptor_initialize(VALUE _self, VALUE cookie, VALUE descriptor_pool, VALUE ptr) { - DEFINE_SELF(OneofDescriptor, self, _self); + OneofDescriptor* self = ruby_to_OneofDescriptor(_self); if (cookie != c_only_cookie) { rb_raise(rb_eRuntimeError, @@ -1221,8 +1127,8 @@ VALUE OneofDescriptor_initialize(VALUE _self, VALUE cookie, * * Returns the name of this oneof. */ -VALUE OneofDescriptor_name(VALUE _self) { - DEFINE_SELF(OneofDescriptor, self, _self); +static VALUE OneofDescriptor_name(VALUE _self) { + OneofDescriptor* self = ruby_to_OneofDescriptor(_self); return rb_str_maybe_null(upb_oneofdef_name(self->oneofdef)); } @@ -1232,8 +1138,8 @@ VALUE OneofDescriptor_name(VALUE _self) { * * Iterates through fields in this oneof, yielding to the block on each one. */ -VALUE OneofDescriptor_each(VALUE _self) { - DEFINE_SELF(OneofDescriptor, self, _self); +static VALUE OneofDescriptor_each(VALUE _self) { + OneofDescriptor* self = ruby_to_OneofDescriptor(_self); upb_oneof_iter it; for (upb_oneof_begin(&it, self->oneofdef); !upb_oneof_done(&it); @@ -1245,40 +1151,72 @@ VALUE OneofDescriptor_each(VALUE _self) { return Qnil; } +static void OneofDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "OneofDescriptor", rb_cObject); + rb_define_alloc_func(klass, OneofDescriptor_alloc); + rb_define_method(klass, "initialize", OneofDescriptor_initialize, 3); + rb_define_method(klass, "name", OneofDescriptor_name, 0); + rb_define_method(klass, "each", OneofDescriptor_each, 0); + rb_include_module(klass, rb_mEnumerable); + rb_gc_register_address(&cOneofDescriptor); + cOneofDescriptor = klass; +} + // ----------------------------------------------------------------------------- // EnumDescriptor. // ----------------------------------------------------------------------------- -DEFINE_CLASS(EnumDescriptor, "Google::Protobuf::EnumDescriptor"); +typedef struct { + const upb_enumdef* enumdef; + VALUE module; // begins as nil + VALUE descriptor_pool; // Owns the upb_enumdef. +} EnumDescriptor; + +static VALUE cEnumDescriptor = Qnil; -void EnumDescriptor_mark(void* _self) { +static void EnumDescriptor_mark(void* _self) { EnumDescriptor* self = _self; rb_gc_mark(self->module); rb_gc_mark(self->descriptor_pool); } -void EnumDescriptor_free(void* _self) { - xfree(_self); +static const rb_data_type_t EnumDescriptor_type = { + "Google::Protobuf::EnumDescriptor", + {EnumDescriptor_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static EnumDescriptor* ruby_to_EnumDescriptor(VALUE val) { + EnumDescriptor* ret; + TypedData_Get_Struct(val, EnumDescriptor, &EnumDescriptor_type, ret); + return ret; } -VALUE EnumDescriptor_alloc(VALUE klass) { +static VALUE EnumDescriptor_alloc(VALUE klass) { EnumDescriptor* self = ALLOC(EnumDescriptor); - VALUE ret = TypedData_Wrap_Struct(klass, &_EnumDescriptor_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &EnumDescriptor_type, self); self->enumdef = NULL; self->module = Qnil; self->descriptor_pool = Qnil; return ret; } +// Exposed to other modules in defs.h. +const upb_enumdef *EnumDescriptor_GetEnumDef(VALUE enum_desc_rb) { + EnumDescriptor *desc = ruby_to_EnumDescriptor(enum_desc_rb); + return desc->enumdef; +} + /* * call-seq: * EnumDescriptor.new(c_only_cookie, ptr) => EnumDescriptor * * Creates a descriptor wrapper object. May only be called from C. */ -VALUE EnumDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_initialize(VALUE _self, VALUE cookie, + VALUE descriptor_pool, VALUE ptr) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); if (cookie != c_only_cookie) { rb_raise(rb_eRuntimeError, @@ -1291,30 +1229,14 @@ VALUE EnumDescriptor_initialize(VALUE _self, VALUE cookie, return Qnil; } -void EnumDescriptor_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "EnumDescriptor", rb_cObject); - rb_define_alloc_func(klass, EnumDescriptor_alloc); - rb_define_method(klass, "initialize", EnumDescriptor_initialize, 3); - rb_define_method(klass, "name", EnumDescriptor_name, 0); - rb_define_method(klass, "lookup_name", EnumDescriptor_lookup_name, 1); - rb_define_method(klass, "lookup_value", EnumDescriptor_lookup_value, 1); - rb_define_method(klass, "each", EnumDescriptor_each, 0); - rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0); - rb_define_method(klass, "file_descriptor", EnumDescriptor_file_descriptor, 0); - rb_include_module(klass, rb_mEnumerable); - rb_gc_register_address(&cEnumDescriptor); - cEnumDescriptor = klass; -} - /* * call-seq: * EnumDescriptor.file_descriptor * * Returns the FileDescriptor object this enum belongs to. */ -VALUE EnumDescriptor_file_descriptor(VALUE _self) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_file_descriptor(VALUE _self) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); return get_filedef_obj(self->descriptor_pool, upb_enumdef_file(self->enumdef)); } @@ -1325,8 +1247,8 @@ VALUE EnumDescriptor_file_descriptor(VALUE _self) { * * Returns the name of this enum type. */ -VALUE EnumDescriptor_name(VALUE _self) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_name(VALUE _self) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); return rb_str_maybe_null(upb_enumdef_fullname(self->enumdef)); } @@ -1337,8 +1259,8 @@ VALUE EnumDescriptor_name(VALUE _self) { * Returns the numeric value corresponding to the given key name (as a Ruby * symbol), or nil if none. */ -VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); const char* name_str= rb_id2name(SYM2ID(name)); int32_t val = 0; if (upb_enumdef_ntoiz(self->enumdef, name_str, &val)) { @@ -1355,8 +1277,8 @@ VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name) { * Returns the key name (as a Ruby symbol) corresponding to the integer value, * or nil if none. */ -VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); int32_t val = NUM2INT(number); const char* name = upb_enumdef_iton(self->enumdef, val); if (name != NULL) { @@ -1373,8 +1295,8 @@ VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number) { * Iterates over key => value mappings in this enum's definition, yielding to * the block with (key, value) arguments for each one. */ -VALUE EnumDescriptor_each(VALUE _self) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_each(VALUE _self) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); upb_enum_iter it; for (upb_enum_begin(&it, self->enumdef); @@ -1394,53 +1316,365 @@ VALUE EnumDescriptor_each(VALUE _self) { * * Returns the Ruby module corresponding to this enum type. */ -VALUE EnumDescriptor_enummodule(VALUE _self) { - DEFINE_SELF(EnumDescriptor, self, _self); +static VALUE EnumDescriptor_enummodule(VALUE _self) { + EnumDescriptor* self = ruby_to_EnumDescriptor(_self); if (self->module == Qnil) { self->module = build_module_from_enumdesc(_self); } return self->module; } +static void EnumDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "EnumDescriptor", rb_cObject); + rb_define_alloc_func(klass, EnumDescriptor_alloc); + rb_define_method(klass, "initialize", EnumDescriptor_initialize, 3); + rb_define_method(klass, "name", EnumDescriptor_name, 0); + rb_define_method(klass, "lookup_name", EnumDescriptor_lookup_name, 1); + rb_define_method(klass, "lookup_value", EnumDescriptor_lookup_value, 1); + rb_define_method(klass, "each", EnumDescriptor_each, 0); + rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0); + rb_define_method(klass, "file_descriptor", EnumDescriptor_file_descriptor, 0); + rb_include_module(klass, rb_mEnumerable); + rb_gc_register_address(&cEnumDescriptor); + cEnumDescriptor = klass; +} + +// ----------------------------------------------------------------------------- +// FileBuilderContext. +// ----------------------------------------------------------------------------- + +typedef struct { + upb_arena *arena; + google_protobuf_FileDescriptorProto* file_proto; + VALUE descriptor_pool; +} FileBuilderContext; + +static VALUE cFileBuilderContext = Qnil; + +static void FileBuilderContext_mark(void* _self) { + FileBuilderContext* self = _self; + rb_gc_mark(self->descriptor_pool); +} + +static void FileBuilderContext_free(void* _self) { + FileBuilderContext* self = _self; + upb_arena_free(self->arena); + xfree(self); +} + +static const rb_data_type_t FileBuilderContext_type = { + "Google::Protobuf::Internal::FileBuilderContext", + {FileBuilderContext_mark, FileBuilderContext_free, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static FileBuilderContext* ruby_to_FileBuilderContext(VALUE val) { + FileBuilderContext* ret; + TypedData_Get_Struct(val, FileBuilderContext, &FileBuilderContext_type, ret); + return ret; +} + +static upb_strview FileBuilderContext_strdup2(VALUE _self, const char *str) { + FileBuilderContext* self = ruby_to_FileBuilderContext(_self); + upb_strview ret; + char *data; + + ret.size = strlen(str); + data = upb_malloc(upb_arena_alloc(self->arena), ret.size + 1); + ret.data = data; + memcpy(data, str, ret.size); + /* Null-terminate required by rewrite_enum_defaults() above. */ + data[ret.size] = '\0'; + return ret; +} + +static upb_strview FileBuilderContext_strdup(VALUE _self, VALUE rb_str) { + return FileBuilderContext_strdup2(_self, get_str(rb_str)); +} + +static upb_strview FileBuilderContext_strdup_sym(VALUE _self, VALUE rb_sym) { + Check_Type(rb_sym, T_SYMBOL); + return FileBuilderContext_strdup(_self, rb_id2str(SYM2ID(rb_sym))); +} + +static VALUE FileBuilderContext_alloc(VALUE klass) { + FileBuilderContext* self = ALLOC(FileBuilderContext); + VALUE ret = TypedData_Wrap_Struct(klass, &FileBuilderContext_type, self); + self->arena = upb_arena_new(); + self->file_proto = google_protobuf_FileDescriptorProto_new(self->arena); + self->descriptor_pool = Qnil; + return ret; +} + +/* + * call-seq: + * FileBuilderContext.new(descriptor_pool) => context + * + * Create a new file builder context for the given file descriptor and + * builder context. This class is intended to serve as a DSL context to be used + * with #instance_eval. + */ +static VALUE FileBuilderContext_initialize(VALUE _self, VALUE descriptor_pool, + VALUE name, VALUE options) { + FileBuilderContext* self = ruby_to_FileBuilderContext(_self); + self->descriptor_pool = descriptor_pool; + + google_protobuf_FileDescriptorProto_set_name( + self->file_proto, FileBuilderContext_strdup(_self, name)); + + // Default syntax for Ruby is proto3. + google_protobuf_FileDescriptorProto_set_syntax( + self->file_proto, + FileBuilderContext_strdup(_self, rb_str_new2("proto3"))); + + if (options != Qnil) { + VALUE syntax; + + Check_Type(options, T_HASH); + syntax = rb_hash_lookup2(options, ID2SYM(rb_intern("syntax")), Qnil); + + if (syntax != Qnil) { + VALUE syntax_str; + + Check_Type(syntax, T_SYMBOL); + syntax_str = rb_id2str(SYM2ID(syntax)); + google_protobuf_FileDescriptorProto_set_syntax( + self->file_proto, FileBuilderContext_strdup(_self, syntax_str)); + } + } + + return Qnil; +} + +static void MessageBuilderContext_add_synthetic_oneofs(VALUE _self); + +/* + * call-seq: + * FileBuilderContext.add_message(name, &block) + * + * Creates a new, empty descriptor with the given name, and invokes the block in + * the context of a MessageBuilderContext on that descriptor. The block can then + * call, e.g., MessageBuilderContext#optional and MessageBuilderContext#repeated + * methods to define the message fields. + * + * This is the recommended, idiomatic way to build message definitions. + */ +static VALUE FileBuilderContext_add_message(VALUE _self, VALUE name) { + VALUE args[2] = { _self, name }; + VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext); + VALUE block = rb_block_proc(); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + MessageBuilderContext_add_synthetic_oneofs(ctx); + return Qnil; +} + +/* We have to do some relatively complicated logic here for backward + * compatibility. + * + * In descriptor.proto, messages are nested inside other messages if that is + * what the original .proto file looks like. For example, suppose we have this + * foo.proto: + * + * package foo; + * message Bar { + * message Baz {} + * } + * + * The descriptor for this must look like this: + * + * file { + * name: "test.proto" + * package: "foo" + * message_type { + * name: "Bar" + * nested_type { + * name: "Baz" + * } + * } + * } + * + * However, the Ruby generated code has always generated messages in a flat, + * non-nested way: + * + * Google::Protobuf::DescriptorPool.generated_pool.build do + * add_message "foo.Bar" do + * end + * add_message "foo.Bar.Baz" do + * end + * end + * + * Here we need to do a translation where we turn this generated code into the + * above descriptor. We need to infer that "foo" is the package name, and not + * a message itself. + * + * We delegate to Ruby to compute the transformation, for more concice and + * readable code than we can do in C */ +static void rewrite_names(VALUE _file_builder, + google_protobuf_FileDescriptorProto* file_proto) { + FileBuilderContext* file_builder = ruby_to_FileBuilderContext(_file_builder); + upb_arena *arena = file_builder->arena; + // Build params (package, msg_names, enum_names). + VALUE package = Qnil; + VALUE msg_names = rb_ary_new(); + VALUE enum_names = rb_ary_new(); + size_t msg_count, enum_count, i; + VALUE new_package, nesting, msg_ents, enum_ents; + google_protobuf_DescriptorProto** msgs; + google_protobuf_EnumDescriptorProto** enums; + + if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { + upb_strview package_str = + google_protobuf_FileDescriptorProto_package(file_proto); + package = rb_str_new(package_str.data, package_str.size); + } + + msgs = google_protobuf_FileDescriptorProto_mutable_message_type(file_proto, + &msg_count); + for (i = 0; i < msg_count; i++) { + upb_strview name = google_protobuf_DescriptorProto_name(msgs[i]); + rb_ary_push(msg_names, rb_str_new(name.data, name.size)); + } + + enums = google_protobuf_FileDescriptorProto_mutable_enum_type(file_proto, + &enum_count); + for (i = 0; i < enum_count; i++) { + upb_strview name = google_protobuf_EnumDescriptorProto_name(enums[i]); + rb_ary_push(enum_names, rb_str_new(name.data, name.size)); + } + + { + // Call Ruby code to calculate package name and nesting. + VALUE args[3] = { package, msg_names, enum_names }; + VALUE internal = rb_eval_string("Google::Protobuf::Internal"); + VALUE ret = rb_funcallv(internal, rb_intern("fixup_descriptor"), 3, args); + + new_package = rb_ary_entry(ret, 0); + nesting = rb_ary_entry(ret, 1); + } + + // Rewrite package and names. + if (new_package != Qnil) { + upb_strview new_package_str = + FileBuilderContext_strdup(_file_builder, new_package); + google_protobuf_FileDescriptorProto_set_package(file_proto, + new_package_str); + } + + for (i = 0; i < msg_count; i++) { + upb_strview name = google_protobuf_DescriptorProto_name(msgs[i]); + remove_path(&name); + google_protobuf_DescriptorProto_set_name(msgs[i], name); + } + + for (i = 0; i < enum_count; i++) { + upb_strview name = google_protobuf_EnumDescriptorProto_name(enums[i]); + remove_path(&name); + google_protobuf_EnumDescriptorProto_set_name(enums[i], name); + } + + // Rewrite nesting. + msg_ents = rb_hash_aref(nesting, ID2SYM(rb_intern("msgs"))); + enum_ents = rb_hash_aref(nesting, ID2SYM(rb_intern("enums"))); + + Check_Type(msg_ents, T_ARRAY); + Check_Type(enum_ents, T_ARRAY); + + for (i = 0; i < (size_t)RARRAY_LEN(msg_ents); i++) { + VALUE msg_ent = rb_ary_entry(msg_ents, i); + VALUE pos = rb_hash_aref(msg_ent, ID2SYM(rb_intern("pos"))); + msgs[i] = msgs[NUM2INT(pos)]; + rewrite_nesting(msg_ent, msgs[i], msgs, enums, arena); + } + + for (i = 0; i < (size_t)RARRAY_LEN(enum_ents); i++) { + VALUE enum_pos = rb_ary_entry(enum_ents, i); + enums[i] = enums[NUM2INT(enum_pos)]; + } + + google_protobuf_FileDescriptorProto_resize_message_type( + file_proto, RARRAY_LEN(msg_ents), arena); + google_protobuf_FileDescriptorProto_resize_enum_type( + file_proto, RARRAY_LEN(enum_ents), arena); +} + +/* + * call-seq: + * FileBuilderContext.add_enum(name, &block) + * + * Creates a new, empty enum descriptor with the given name, and invokes the + * block in the context of an EnumBuilderContext on that descriptor. The block + * can then call EnumBuilderContext#add_value to define the enum values. + * + * This is the recommended, idiomatic way to build enum definitions. + */ +static VALUE FileBuilderContext_add_enum(VALUE _self, VALUE name) { + VALUE args[2] = { _self, name }; + VALUE ctx = rb_class_new_instance(2, args, cEnumBuilderContext); + VALUE block = rb_block_proc(); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + return Qnil; +} + +static void FileBuilderContext_build(VALUE _self) { + FileBuilderContext* self = ruby_to_FileBuilderContext(_self); + DescriptorPool* pool = ruby_to_DescriptorPool(self->descriptor_pool); + upb_status status; + + rewrite_enum_defaults(pool->symtab, self->file_proto); + rewrite_names(_self, self->file_proto); + + upb_status_clear(&status); + if (!upb_symtab_addfile(pool->symtab, self->file_proto, &status)) { + rb_raise(cTypeError, "Unable to add defs to DescriptorPool: %s", + upb_status_errmsg(&status)); + } +} + +static void FileBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under(module, "FileBuilderContext", rb_cObject); + rb_define_alloc_func(klass, FileBuilderContext_alloc); + rb_define_method(klass, "initialize", FileBuilderContext_initialize, 3); + rb_define_method(klass, "add_message", FileBuilderContext_add_message, 1); + rb_define_method(klass, "add_enum", FileBuilderContext_add_enum, 1); + rb_gc_register_address(&cFileBuilderContext); + cFileBuilderContext = klass; +} + // ----------------------------------------------------------------------------- // MessageBuilderContext. // ----------------------------------------------------------------------------- -DEFINE_CLASS(MessageBuilderContext, - "Google::Protobuf::Internal::MessageBuilderContext"); +typedef struct { + google_protobuf_DescriptorProto* msg_proto; + VALUE file_builder; +} MessageBuilderContext; + +static VALUE cMessageBuilderContext = Qnil; -void MessageBuilderContext_mark(void* _self) { +static void MessageBuilderContext_mark(void* _self) { MessageBuilderContext* self = _self; rb_gc_mark(self->file_builder); } -void MessageBuilderContext_free(void* _self) { - MessageBuilderContext* self = _self; - xfree(self); -} +static const rb_data_type_t MessageBuilderContext_type = { + "Google::Protobuf::Internal::MessageBuilderContext", + {MessageBuilderContext_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; -VALUE MessageBuilderContext_alloc(VALUE klass) { - MessageBuilderContext* self = ALLOC(MessageBuilderContext); - VALUE ret = TypedData_Wrap_Struct( - klass, &_MessageBuilderContext_type, self); - self->file_builder = Qnil; +static MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE val) { + MessageBuilderContext* ret; + TypedData_Get_Struct(val, MessageBuilderContext, &MessageBuilderContext_type, + ret); return ret; } -void MessageBuilderContext_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "MessageBuilderContext", rb_cObject); - rb_define_alloc_func(klass, MessageBuilderContext_alloc); - rb_define_method(klass, "initialize", - MessageBuilderContext_initialize, 2); - rb_define_method(klass, "optional", MessageBuilderContext_optional, -1); - rb_define_method(klass, "proto3_optional", MessageBuilderContext_proto3_optional, -1); - rb_define_method(klass, "required", MessageBuilderContext_required, -1); - rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); - rb_define_method(klass, "map", MessageBuilderContext_map, -1); - rb_define_method(klass, "oneof", MessageBuilderContext_oneof, 1); - rb_gc_register_address(&cMessageBuilderContext); - cMessageBuilderContext = klass; +static VALUE MessageBuilderContext_alloc(VALUE klass) { + MessageBuilderContext* self = ALLOC(MessageBuilderContext); + VALUE ret = TypedData_Wrap_Struct(klass, &MessageBuilderContext_type, self); + self->file_builder = Qnil; + return ret; } /* @@ -1451,10 +1685,9 @@ void MessageBuilderContext_register(VALUE module) { * builder context. This class is intended to serve as a DSL context to be used * with #instance_eval. */ -VALUE MessageBuilderContext_initialize(VALUE _self, - VALUE _file_builder, - VALUE name) { - DEFINE_SELF(MessageBuilderContext, self, _self); +static VALUE MessageBuilderContext_initialize(VALUE _self, VALUE _file_builder, + VALUE name) { + MessageBuilderContext* self = ruby_to_MessageBuilderContext(_self); FileBuilderContext* file_builder = ruby_to_FileBuilderContext(_file_builder); google_protobuf_FileDescriptorProto* file_proto = file_builder->file_proto; @@ -1472,7 +1705,7 @@ static void msgdef_add_field(VALUE msgbuilder_rb, upb_label_t label, VALUE name, VALUE type, VALUE number, VALUE type_class, VALUE options, int oneof_index, bool proto3_optional) { - DEFINE_SELF(MessageBuilderContext, self, msgbuilder_rb); + MessageBuilderContext* self = ruby_to_MessageBuilderContext(msgbuilder_rb); FileBuilderContext* file_context = ruby_to_FileBuilderContext(self->file_builder); google_protobuf_FieldDescriptorProto* field_proto; @@ -1527,9 +1760,16 @@ static void msgdef_add_field(VALUE msgbuilder_rb, upb_label_t label, VALUE name, } } +#if RUBY_API_VERSION_CODE >= 20700 +static VALUE make_mapentry(VALUE _message_builder, VALUE types, int argc, + const VALUE* argv, VALUE blockarg) { + (void)blockarg; +#else static VALUE make_mapentry(VALUE _message_builder, VALUE types, int argc, VALUE* argv) { - DEFINE_SELF(MessageBuilderContext, message_builder, _message_builder); +#endif + MessageBuilderContext* message_builder = + ruby_to_MessageBuilderContext(_message_builder); VALUE type_class = rb_ary_entry(types, 2); FileBuilderContext* file_context = ruby_to_FileBuilderContext(message_builder->file_builder); @@ -1596,8 +1836,8 @@ VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { * FieldDescriptor#type=) and the type_class must be a string, if present (as * accepted by FieldDescriptor#submsg_name=). */ -VALUE MessageBuilderContext_proto3_optional(int argc, VALUE* argv, - VALUE _self) { +static VALUE MessageBuilderContext_proto3_optional(int argc, VALUE* argv, + VALUE _self) { VALUE name, type, number; VALUE type_class, options = Qnil; @@ -1630,7 +1870,8 @@ VALUE MessageBuilderContext_proto3_optional(int argc, VALUE* argv, * completeness. Any attempt to add a message type with required fields to a * pool will currently result in an error. */ -VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self) { +static VALUE MessageBuilderContext_required(int argc, VALUE* argv, + VALUE _self) { VALUE name, type, number; VALUE type_class, options = Qnil; @@ -1658,7 +1899,8 @@ VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self) { * symbol (as accepted by FieldDescriptor#type=) and the type_class must be a * string, if present (as accepted by FieldDescriptor#submsg_name=). */ -VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { +static VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, + VALUE _self) { VALUE name, type, number, type_class; if (argc < 3) { @@ -1687,8 +1929,8 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { * type_class must be a string, if present (as accepted by * FieldDescriptor#submsg_name=). */ -VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { - DEFINE_SELF(MessageBuilderContext, self, _self); +static VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { + MessageBuilderContext* self = ruby_to_MessageBuilderContext(_self); VALUE name, key_type, value_type, number, type_class; VALUE mapentry_desc_name; FileBuilderContext* file_builder; @@ -1717,14 +1959,6 @@ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { file_builder = ruby_to_FileBuilderContext(self->file_builder); - // TODO(haberman): remove this restriction, maps are supported in proto2. - if (upb_strview_eql( - google_protobuf_FileDescriptorProto_syntax(file_builder->file_proto), - upb_strview_makez("proto2"))) { - rb_raise(rb_eArgError, - "Cannot add a native map field using proto2 syntax."); - } - // Create a new message descriptor for the map entry message, and create a // repeated submessage field here with that type. msg_name = google_protobuf_DescriptorProto_name(self->msg_proto); @@ -1768,8 +2002,8 @@ VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) { * * This is the recommended, idiomatic way to build oneof definitions. */ -VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name) { - DEFINE_SELF(MessageBuilderContext, self, _self); +static VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name) { + MessageBuilderContext* self = ruby_to_MessageBuilderContext(_self); size_t oneof_count; FileBuilderContext* file_context = ruby_to_FileBuilderContext(self->file_builder); @@ -1795,8 +2029,8 @@ VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name) { return Qnil; } -void MessageBuilderContext_add_synthetic_oneofs(VALUE _self) { - DEFINE_SELF(MessageBuilderContext, self, _self); +static void MessageBuilderContext_add_synthetic_oneofs(VALUE _self) { + MessageBuilderContext* self = ruby_to_MessageBuilderContext(_self); FileBuilderContext* file_context = ruby_to_FileBuilderContext(self->file_builder); size_t field_count, oneof_count; @@ -1845,42 +2079,59 @@ void MessageBuilderContext_add_synthetic_oneofs(VALUE _self) { } } +static void MessageBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "MessageBuilderContext", rb_cObject); + rb_define_alloc_func(klass, MessageBuilderContext_alloc); + rb_define_method(klass, "initialize", + MessageBuilderContext_initialize, 2); + rb_define_method(klass, "optional", MessageBuilderContext_optional, -1); + rb_define_method(klass, "proto3_optional", MessageBuilderContext_proto3_optional, -1); + rb_define_method(klass, "required", MessageBuilderContext_required, -1); + rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); + rb_define_method(klass, "map", MessageBuilderContext_map, -1); + rb_define_method(klass, "oneof", MessageBuilderContext_oneof, 1); + rb_gc_register_address(&cMessageBuilderContext); + cMessageBuilderContext = klass; +} + // ----------------------------------------------------------------------------- // OneofBuilderContext. // ----------------------------------------------------------------------------- -DEFINE_CLASS(OneofBuilderContext, - "Google::Protobuf::Internal::OneofBuilderContext"); +typedef struct { + int oneof_index; + VALUE message_builder; +} OneofBuilderContext; + +static VALUE cOneofBuilderContext = Qnil; void OneofBuilderContext_mark(void* _self) { OneofBuilderContext* self = _self; rb_gc_mark(self->message_builder); } -void OneofBuilderContext_free(void* _self) { - xfree(_self); +static const rb_data_type_t OneofBuilderContext_type = { + "Google::Protobuf::Internal::OneofBuilderContext", + {OneofBuilderContext_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static OneofBuilderContext* ruby_to_OneofBuilderContext(VALUE val) { + OneofBuilderContext* ret; + TypedData_Get_Struct(val, OneofBuilderContext, &OneofBuilderContext_type, + ret); + return ret; } -VALUE OneofBuilderContext_alloc(VALUE klass) { +static VALUE OneofBuilderContext_alloc(VALUE klass) { OneofBuilderContext* self = ALLOC(OneofBuilderContext); - VALUE ret = TypedData_Wrap_Struct( - klass, &_OneofBuilderContext_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &OneofBuilderContext_type, self); self->oneof_index = 0; self->message_builder = Qnil; return ret; } -void OneofBuilderContext_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "OneofBuilderContext", rb_cObject); - rb_define_alloc_func(klass, OneofBuilderContext_alloc); - rb_define_method(klass, "initialize", - OneofBuilderContext_initialize, 2); - rb_define_method(klass, "optional", OneofBuilderContext_optional, -1); - rb_gc_register_address(&cOneofBuilderContext); - cOneofBuilderContext = klass; -} - /* * call-seq: * OneofBuilderContext.new(oneof_index, message_builder) => context @@ -1889,10 +2140,9 @@ void OneofBuilderContext_register(VALUE module) { * builder context. This class is intended to serve as a DSL context to be used * with #instance_eval. */ -VALUE OneofBuilderContext_initialize(VALUE _self, - VALUE oneof_index, - VALUE message_builder) { - DEFINE_SELF(OneofBuilderContext, self, _self); +static VALUE OneofBuilderContext_initialize(VALUE _self, VALUE oneof_index, + VALUE message_builder) { + OneofBuilderContext* self = ruby_to_OneofBuilderContext(_self); self->oneof_index = NUM2INT(oneof_index); self->message_builder = message_builder; return Qnil; @@ -1908,8 +2158,8 @@ VALUE OneofBuilderContext_initialize(VALUE _self, * (as accepted by FieldDescriptor#type=) and the type_class must be a string, * if present (as accepted by FieldDescriptor#submsg_name=). */ -VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { - DEFINE_SELF(OneofBuilderContext, self, _self); +static VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { + OneofBuilderContext* self = ruby_to_OneofBuilderContext(_self); VALUE name, type, number; VALUE type_class, options = Qnil; @@ -1921,41 +2171,53 @@ VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { return Qnil; } +static void OneofBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "OneofBuilderContext", rb_cObject); + rb_define_alloc_func(klass, OneofBuilderContext_alloc); + rb_define_method(klass, "initialize", + OneofBuilderContext_initialize, 2); + rb_define_method(klass, "optional", OneofBuilderContext_optional, -1); + rb_gc_register_address(&cOneofBuilderContext); + cOneofBuilderContext = klass; +} + // ----------------------------------------------------------------------------- // EnumBuilderContext. // ----------------------------------------------------------------------------- -DEFINE_CLASS(EnumBuilderContext, - "Google::Protobuf::Internal::EnumBuilderContext"); +typedef struct { + google_protobuf_EnumDescriptorProto* enum_proto; + VALUE file_builder; +} EnumBuilderContext; + +static VALUE cEnumBuilderContext = Qnil; void EnumBuilderContext_mark(void* _self) { EnumBuilderContext* self = _self; rb_gc_mark(self->file_builder); } -void EnumBuilderContext_free(void* _self) { - xfree(_self); +static const rb_data_type_t EnumBuilderContext_type = { + "Google::Protobuf::Internal::EnumBuilderContext", + {EnumBuilderContext_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static EnumBuilderContext* ruby_to_EnumBuilderContext(VALUE val) { + EnumBuilderContext* ret; + TypedData_Get_Struct(val, EnumBuilderContext, &EnumBuilderContext_type, ret); + return ret; } -VALUE EnumBuilderContext_alloc(VALUE klass) { +static VALUE EnumBuilderContext_alloc(VALUE klass) { EnumBuilderContext* self = ALLOC(EnumBuilderContext); - VALUE ret = TypedData_Wrap_Struct( - klass, &_EnumBuilderContext_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &EnumBuilderContext_type, self); self->enum_proto = NULL; self->file_builder = Qnil; return ret; } -void EnumBuilderContext_register(VALUE module) { - VALUE klass = rb_define_class_under( - module, "EnumBuilderContext", rb_cObject); - rb_define_alloc_func(klass, EnumBuilderContext_alloc); - rb_define_method(klass, "initialize", EnumBuilderContext_initialize, 2); - rb_define_method(klass, "value", EnumBuilderContext_value, 2); - rb_gc_register_address(&cEnumBuilderContext); - cEnumBuilderContext = klass; -} - /* * call-seq: * EnumBuilderContext.new(file_builder) => context @@ -1963,9 +2225,9 @@ void EnumBuilderContext_register(VALUE module) { * Create a new builder context around the given enum descriptor. This class is * intended to serve as a DSL context to be used with #instance_eval. */ -VALUE EnumBuilderContext_initialize(VALUE _self, VALUE _file_builder, - VALUE name) { - DEFINE_SELF(EnumBuilderContext, self, _self); +static VALUE EnumBuilderContext_initialize(VALUE _self, VALUE _file_builder, + VALUE name) { + EnumBuilderContext* self = ruby_to_EnumBuilderContext(_self); FileBuilderContext* file_builder = ruby_to_FileBuilderContext(_file_builder); google_protobuf_FileDescriptorProto* file_proto = file_builder->file_proto; @@ -1986,8 +2248,8 @@ VALUE EnumBuilderContext_initialize(VALUE _self, VALUE _file_builder, * Adds the given name => number mapping to the enum type. Name must be a Ruby * symbol. */ -VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number) { - DEFINE_SELF(EnumBuilderContext, self, _self); +static VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number) { + EnumBuilderContext* self = ruby_to_EnumBuilderContext(_self); FileBuilderContext* file_builder = ruby_to_FileBuilderContext(self->file_builder); google_protobuf_EnumValueDescriptorProto* enum_value; @@ -2003,196 +2265,53 @@ VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number) { return Qnil; } - -// ----------------------------------------------------------------------------- -// FileBuilderContext. -// ----------------------------------------------------------------------------- - -DEFINE_CLASS(FileBuilderContext, - "Google::Protobuf::Internal::FileBuilderContext"); - -void FileBuilderContext_mark(void* _self) { - FileBuilderContext* self = _self; - rb_gc_mark(self->descriptor_pool); -} - -void FileBuilderContext_free(void* _self) { - FileBuilderContext* self = _self; - upb_arena_free(self->arena); - xfree(self); -} - -upb_strview FileBuilderContext_strdup2(VALUE _self, const char *str) { - DEFINE_SELF(FileBuilderContext, self, _self); - upb_strview ret; - char *data; - - ret.size = strlen(str); - data = upb_malloc(upb_arena_alloc(self->arena), ret.size + 1); - ret.data = data; - memcpy(data, str, ret.size); - /* Null-terminate required by rewrite_enum_defaults() above. */ - data[ret.size] = '\0'; - return ret; -} - -upb_strview FileBuilderContext_strdup(VALUE _self, VALUE rb_str) { - return FileBuilderContext_strdup2(_self, get_str(rb_str)); -} - -upb_strview FileBuilderContext_strdup_sym(VALUE _self, VALUE rb_sym) { - Check_Type(rb_sym, T_SYMBOL); - return FileBuilderContext_strdup(_self, rb_id2str(SYM2ID(rb_sym))); -} - -VALUE FileBuilderContext_alloc(VALUE klass) { - FileBuilderContext* self = ALLOC(FileBuilderContext); - VALUE ret = TypedData_Wrap_Struct(klass, &_FileBuilderContext_type, self); - self->arena = upb_arena_new(); - self->file_proto = google_protobuf_FileDescriptorProto_new(self->arena); - self->descriptor_pool = Qnil; - return ret; -} - -void FileBuilderContext_register(VALUE module) { - VALUE klass = rb_define_class_under(module, "FileBuilderContext", rb_cObject); - rb_define_alloc_func(klass, FileBuilderContext_alloc); - rb_define_method(klass, "initialize", FileBuilderContext_initialize, 3); - rb_define_method(klass, "add_message", FileBuilderContext_add_message, 1); - rb_define_method(klass, "add_enum", FileBuilderContext_add_enum, 1); - rb_gc_register_address(&cFileBuilderContext); - cFileBuilderContext = klass; -} - -/* - * call-seq: - * FileBuilderContext.new(descriptor_pool) => context - * - * Create a new file builder context for the given file descriptor and - * builder context. This class is intended to serve as a DSL context to be used - * with #instance_eval. - */ -VALUE FileBuilderContext_initialize(VALUE _self, VALUE descriptor_pool, - VALUE name, VALUE options) { - DEFINE_SELF(FileBuilderContext, self, _self); - self->descriptor_pool = descriptor_pool; - - google_protobuf_FileDescriptorProto_set_name( - self->file_proto, FileBuilderContext_strdup(_self, name)); - - // Default syntax for Ruby is proto3. - google_protobuf_FileDescriptorProto_set_syntax( - self->file_proto, - FileBuilderContext_strdup(_self, rb_str_new2("proto3"))); - - if (options != Qnil) { - VALUE syntax; - - Check_Type(options, T_HASH); - syntax = rb_hash_lookup2(options, ID2SYM(rb_intern("syntax")), Qnil); - - if (syntax != Qnil) { - VALUE syntax_str; - - Check_Type(syntax, T_SYMBOL); - syntax_str = rb_id2str(SYM2ID(syntax)); - google_protobuf_FileDescriptorProto_set_syntax( - self->file_proto, FileBuilderContext_strdup(_self, syntax_str)); - } - } - - return Qnil; -} - -/* - * call-seq: - * FileBuilderContext.add_message(name, &block) - * - * Creates a new, empty descriptor with the given name, and invokes the block in - * the context of a MessageBuilderContext on that descriptor. The block can then - * call, e.g., MessageBuilderContext#optional and MessageBuilderContext#repeated - * methods to define the message fields. - * - * This is the recommended, idiomatic way to build message definitions. - */ -VALUE FileBuilderContext_add_message(VALUE _self, VALUE name) { - VALUE args[2] = { _self, name }; - VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext); - VALUE block = rb_block_proc(); - rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); - MessageBuilderContext_add_synthetic_oneofs(ctx); - return Qnil; -} - -/* - * call-seq: - * FileBuilderContext.add_enum(name, &block) - * - * Creates a new, empty enum descriptor with the given name, and invokes the - * block in the context of an EnumBuilderContext on that descriptor. The block - * can then call EnumBuilderContext#add_value to define the enum values. - * - * This is the recommended, idiomatic way to build enum definitions. - */ -VALUE FileBuilderContext_add_enum(VALUE _self, VALUE name) { - VALUE args[2] = { _self, name }; - VALUE ctx = rb_class_new_instance(2, args, cEnumBuilderContext); - VALUE block = rb_block_proc(); - rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); - return Qnil; -} - -void FileBuilderContext_build(VALUE _self) { - DEFINE_SELF(FileBuilderContext, self, _self); - DescriptorPool* pool = ruby_to_DescriptorPool(self->descriptor_pool); - upb_status status; - - rewrite_enum_defaults(pool->symtab, self->file_proto); - rewrite_names(_self, self->file_proto); - - upb_status_clear(&status); - if (!upb_symtab_addfile(pool->symtab, self->file_proto, &status)) { - rb_raise(cTypeError, "Unable to add defs to DescriptorPool: %s", - upb_status_errmsg(&status)); - } +static void EnumBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "EnumBuilderContext", rb_cObject); + rb_define_alloc_func(klass, EnumBuilderContext_alloc); + rb_define_method(klass, "initialize", EnumBuilderContext_initialize, 2); + rb_define_method(klass, "value", EnumBuilderContext_value, 2); + rb_gc_register_address(&cEnumBuilderContext); + cEnumBuilderContext = klass; } // ----------------------------------------------------------------------------- // Builder. // ----------------------------------------------------------------------------- -DEFINE_CLASS(Builder, "Google::Protobuf::Internal::Builder"); +typedef struct { + VALUE descriptor_pool; + VALUE default_file_builder; +} Builder; -void Builder_mark(void* _self) { +static VALUE cBuilder = Qnil; + +static void Builder_mark(void* _self) { Builder* self = _self; rb_gc_mark(self->descriptor_pool); rb_gc_mark(self->default_file_builder); } -void Builder_free(void* _self) { - xfree(_self); +static const rb_data_type_t Builder_type = { + "Google::Protobuf::Internal::Builder", + {Builder_mark, RUBY_DEFAULT_FREE, NULL}, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static Builder* ruby_to_Builder(VALUE val) { + Builder* ret; + TypedData_Get_Struct(val, Builder, &Builder_type, ret); + return ret; } -VALUE Builder_alloc(VALUE klass) { +static VALUE Builder_alloc(VALUE klass) { Builder* self = ALLOC(Builder); - VALUE ret = TypedData_Wrap_Struct( - klass, &_Builder_type, self); + VALUE ret = TypedData_Wrap_Struct(klass, &Builder_type, self); self->descriptor_pool = Qnil; self->default_file_builder = Qnil; return ret; } -void Builder_register(VALUE module) { - VALUE klass = rb_define_class_under(module, "Builder", rb_cObject); - rb_define_alloc_func(klass, Builder_alloc); - rb_define_method(klass, "initialize", Builder_initialize, 1); - rb_define_method(klass, "add_file", Builder_add_file, -1); - rb_define_method(klass, "add_message", Builder_add_message, 1); - rb_define_method(klass, "add_enum", Builder_add_enum, 1); - rb_gc_register_address(&cBuilder); - cBuilder = klass; -} - /* * call-seq: * Builder.new(descriptor_pool) => builder @@ -2201,8 +2320,8 @@ void Builder_register(VALUE module) { * descriptors and atomically register them into a pool in a way that allows for * (co)recursive type references. */ -VALUE Builder_initialize(VALUE _self, VALUE pool) { - DEFINE_SELF(Builder, self, _self); +static VALUE Builder_initialize(VALUE _self, VALUE pool) { + Builder* self = ruby_to_Builder(_self); self->descriptor_pool = pool; self->default_file_builder = Qnil; // Created lazily if needed. return Qnil; @@ -2219,8 +2338,8 @@ VALUE Builder_initialize(VALUE _self, VALUE pool) { * * This is the recommended, idiomatic way to build file descriptors. */ -VALUE Builder_add_file(int argc, VALUE* argv, VALUE _self) { - DEFINE_SELF(Builder, self, _self); +static VALUE Builder_add_file(int argc, VALUE* argv, VALUE _self) { + Builder* self = ruby_to_Builder(_self); VALUE name, options; VALUE ctx; VALUE block; @@ -2240,7 +2359,7 @@ VALUE Builder_add_file(int argc, VALUE* argv, VALUE _self) { } static VALUE Builder_get_default_file(VALUE _self) { - DEFINE_SELF(Builder, self, _self); + Builder* self = ruby_to_Builder(_self); /* Lazily create only if legacy builder-level methods are called. */ if (self->default_file_builder == Qnil) { @@ -2264,7 +2383,7 @@ static VALUE Builder_get_default_file(VALUE _self) { * files generated by protoc which don't add messages within "add_file" block. * Descriptors created this way get assigned to a default empty FileDescriptor. */ -VALUE Builder_add_message(VALUE _self, VALUE name) { +static VALUE Builder_add_message(VALUE _self, VALUE name) { VALUE file_builder = Builder_get_default_file(_self); rb_funcall_with_block(file_builder, rb_intern("add_message"), 1, &name, rb_block_proc()); @@ -2283,7 +2402,7 @@ VALUE Builder_add_message(VALUE _self, VALUE name) { * Enum descriptors created this way get assigned to a default empty * FileDescriptor. */ -VALUE Builder_add_enum(VALUE _self, VALUE name) { +static VALUE Builder_add_enum(VALUE _self, VALUE name) { VALUE file_builder = Builder_get_default_file(_self); rb_funcall_with_block(file_builder, rb_intern("add_enum"), 1, &name, rb_block_proc()); @@ -2292,8 +2411,8 @@ VALUE Builder_add_enum(VALUE _self, VALUE name) { /* This method is hidden from Ruby, and only called directly from * DescriptorPool_build(). */ -VALUE Builder_build(VALUE _self) { - DEFINE_SELF(Builder, self, _self); +static VALUE Builder_build(VALUE _self) { + Builder* self = ruby_to_Builder(_self); if (self->default_file_builder != Qnil) { FileBuilderContext_build(self->default_file_builder); @@ -2303,8 +2422,19 @@ VALUE Builder_build(VALUE _self) { return Qnil; } +static void Builder_register(VALUE module) { + VALUE klass = rb_define_class_under(module, "Builder", rb_cObject); + rb_define_alloc_func(klass, Builder_alloc); + rb_define_method(klass, "initialize", Builder_initialize, 1); + rb_define_method(klass, "add_file", Builder_add_file, -1); + rb_define_method(klass, "add_message", Builder_add_message, 1); + rb_define_method(klass, "add_enum", Builder_add_enum, 1); + rb_gc_register_address(&cBuilder); + cBuilder = klass; +} + static VALUE get_def_obj(VALUE _descriptor_pool, const void* ptr, VALUE klass) { - DEFINE_SELF(DescriptorPool, descriptor_pool, _descriptor_pool); + DescriptorPool* descriptor_pool = ruby_to_DescriptorPool(_descriptor_pool); VALUE key = ULL2NUM((intptr_t)ptr); VALUE def; @@ -2319,48 +2449,111 @@ static VALUE get_def_obj(VALUE _descriptor_pool, const void* ptr, VALUE klass) { VALUE args[3] = { c_only_cookie, _descriptor_pool, key }; def = rb_class_new_instance(3, args, klass); rb_hash_aset(descriptor_pool->def_to_descriptor, key, def); - - // For message defs, we now eagerly get/create descriptors for all - // submessages. We will need these anyway to parse or serialize this - // message type. But more importantly, we must do this now so that - // add_handlers_for_message() (which calls get_msgdef_obj()) does *not* - // need to create a Ruby object or insert into a Ruby Hash. We need to - // avoid triggering GC, which can switch Ruby threads and re-enter our - // C extension from a different thread. This wreaks havoc on our state - // if we were in the middle of building handlers. - if (klass == cDescriptor) { - const upb_msgdef *m = ptr; - upb_msg_field_iter it; - for (upb_msg_field_begin(&it, m); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* f = upb_msg_iter_field(&it); - if (upb_fielddef_issubmsg(f)) { - get_msgdef_obj(_descriptor_pool, upb_fielddef_msgsubdef(f)); - } - } - } } return def; } -VALUE get_msgdef_obj(VALUE descriptor_pool, const upb_msgdef* def) { +static VALUE get_msgdef_obj(VALUE descriptor_pool, const upb_msgdef* def) { return get_def_obj(descriptor_pool, def, cDescriptor); } -VALUE get_enumdef_obj(VALUE descriptor_pool, const upb_enumdef* def) { +static VALUE get_enumdef_obj(VALUE descriptor_pool, const upb_enumdef* def) { return get_def_obj(descriptor_pool, def, cEnumDescriptor); } -VALUE get_fielddef_obj(VALUE descriptor_pool, const upb_fielddef* def) { +static VALUE get_fielddef_obj(VALUE descriptor_pool, const upb_fielddef* def) { return get_def_obj(descriptor_pool, def, cFieldDescriptor); } -VALUE get_filedef_obj(VALUE descriptor_pool, const upb_filedef* def) { +static VALUE get_filedef_obj(VALUE descriptor_pool, const upb_filedef* def) { return get_def_obj(descriptor_pool, def, cFileDescriptor); } -VALUE get_oneofdef_obj(VALUE descriptor_pool, const upb_oneofdef* def) { +static VALUE get_oneofdef_obj(VALUE descriptor_pool, const upb_oneofdef* def) { return get_def_obj(descriptor_pool, def, cOneofDescriptor); } + +// ----------------------------------------------------------------------------- +// Shared functions +// ----------------------------------------------------------------------------- + +// Functions exposed to other modules in defs.h. + +VALUE Descriptor_DefToClass(const upb_msgdef *m) { + const upb_symtab *symtab = upb_filedef_symtab(upb_msgdef_file(m)); + VALUE pool = ObjectCache_Get(symtab); + PBRUBY_ASSERT(pool != Qnil); + VALUE desc_rb = get_msgdef_obj(pool, m); + const Descriptor* desc = ruby_to_Descriptor(desc_rb); + return desc->klass; +} + +const upb_msgdef *Descriptor_GetMsgDef(VALUE desc_rb) { + const Descriptor* desc = ruby_to_Descriptor(desc_rb); + return desc->msgdef; +} + +VALUE TypeInfo_InitArg(int argc, VALUE *argv, int skip_arg) { + if (argc > skip_arg) { + if (argc > 1 + skip_arg) { + rb_raise(rb_eArgError, "Expected a maximum of %d arguments.", skip_arg + 1); + } + return argv[skip_arg]; + } else { + return Qnil; + } +} + +TypeInfo TypeInfo_FromClass(int argc, VALUE* argv, int skip_arg, + VALUE* type_class, VALUE* init_arg) { + TypeInfo ret = {ruby_to_fieldtype(argv[skip_arg])}; + + if (ret.type == UPB_TYPE_MESSAGE || ret.type == UPB_TYPE_ENUM) { + *init_arg = TypeInfo_InitArg(argc, argv, skip_arg + 2); + + if (argc < 2 + skip_arg) { + rb_raise(rb_eArgError, "Expected at least %d arguments for message/enum.", + 2 + skip_arg); + } + + VALUE klass = argv[1 + skip_arg]; + VALUE desc = MessageOrEnum_GetDescriptor(klass); + *type_class = klass; + + if (desc == Qnil) { + rb_raise(rb_eArgError, + "Type class has no descriptor. Please pass a " + "class or enum as returned by the DescriptorPool."); + } + + if (ret.type == UPB_TYPE_MESSAGE) { + ret.def.msgdef = ruby_to_Descriptor(desc)->msgdef; + Message_CheckClass(klass); + } else { + PBRUBY_ASSERT(ret.type == UPB_TYPE_ENUM); + ret.def.enumdef = ruby_to_EnumDescriptor(desc)->enumdef; + } + } else { + *init_arg = TypeInfo_InitArg(argc, argv, skip_arg + 1); + } + + return ret; +} + +void Defs_register(VALUE module) { + DescriptorPool_register(module); + Descriptor_register(module); + FileDescriptor_register(module); + FieldDescriptor_register(module); + OneofDescriptor_register(module); + EnumDescriptor_register(module); + FileBuilderContext_register(module); + MessageBuilderContext_register(module); + OneofBuilderContext_register(module); + EnumBuilderContext_register(module); + Builder_register(module); + + rb_gc_register_address(&c_only_cookie); + c_only_cookie = rb_class_new_instance(0, NULL, rb_cObject); +} diff --git a/ruby/ext/google/protobuf_c/defs.h b/ruby/ext/google/protobuf_c/defs.h new file mode 100644 index 000000000000..97a94bb7b545 --- /dev/null +++ b/ruby/ext/google/protobuf_c/defs.h @@ -0,0 +1,107 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RUBY_PROTOBUF_DEFS_H_ +#define RUBY_PROTOBUF_DEFS_H_ + +#include + +#include "protobuf.h" +#include "ruby-upb.h" + +// ----------------------------------------------------------------------------- +// TypeInfo +// ----------------------------------------------------------------------------- + +// This bundles a upb_fieldtype_t and msgdef/enumdef when appropriate. This is +// convenient for functions that need type information but cannot necessarily +// assume a upb_fielddef will be available. +// +// For example, Google::Protobuf::Map and Google::Protobuf::RepeatedField can +// be constructed with type information alone: +// +// # RepeatedField will internally store the type information in a TypeInfo. +// Google::Protobuf::RepeatedField.new(:message, FooMessage) + +typedef struct { + upb_fieldtype_t type; + union { + const upb_msgdef* msgdef; // When type == UPB_TYPE_MESSAGE + const upb_enumdef* enumdef; // When type == UPB_TYPE_ENUM + } def; +} TypeInfo; + +static inline TypeInfo TypeInfo_get(const upb_fielddef *f) { + TypeInfo ret = {upb_fielddef_type(f), {NULL}}; + switch (ret.type) { + case UPB_TYPE_MESSAGE: + ret.def.msgdef = upb_fielddef_msgsubdef(f); + break; + case UPB_TYPE_ENUM: + ret.def.enumdef = upb_fielddef_enumsubdef(f); + break; + default: + break; + } + return ret; +} + +TypeInfo TypeInfo_FromClass(int argc, VALUE* argv, int skip_arg, + VALUE* type_class, VALUE* init_arg); + +static inline TypeInfo TypeInfo_from_type(upb_fieldtype_t type) { + TypeInfo ret = {type}; + assert(type != UPB_TYPE_MESSAGE && type != UPB_TYPE_ENUM); + return ret; +} + +// ----------------------------------------------------------------------------- +// Other utilities +// ----------------------------------------------------------------------------- + +VALUE Descriptor_DefToClass(const upb_msgdef *m); + +// Returns the underlying msgdef, enumdef, or symtab (respectively) for the +// given Descriptor, EnumDescriptor, or DescriptorPool Ruby object. +const upb_enumdef *EnumDescriptor_GetEnumDef(VALUE enum_desc_rb); +const upb_symtab *DescriptorPool_GetSymtab(VALUE desc_pool_rb); +const upb_msgdef *Descriptor_GetMsgDef(VALUE desc_rb); + +// Returns a upb field type for the given Ruby symbol +// (eg. :float => UPB_TYPE_FLOAT). +upb_fieldtype_t ruby_to_fieldtype(VALUE type); + +// The singleton generated pool (a DescriptorPool object). +extern VALUE generated_pool; + +// Call at startup to register all types in this module. +void Defs_register(VALUE module); + +#endif // RUBY_PROTOBUF_DEFS_H_ diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c deleted file mode 100644 index a58a5d2f849d..000000000000 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ /dev/null @@ -1,1795 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2014 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "protobuf.h" - -VALUE initialize_rb_class_with_no_args(VALUE klass) { - return rb_funcall(klass, rb_intern("new"), 0); -} - -// This function is equivalent to rb_str_cat(), but unlike the real -// rb_str_cat(), it doesn't leak memory in some versions of Ruby. -// For more information, see: -// https://bugs.ruby-lang.org/issues/11328 -VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) { - char *p; - size_t oldlen = RSTRING_LEN(rb_str); - rb_str_modify_expand(rb_str, len); - p = RSTRING_PTR(rb_str); - memcpy(p + oldlen, str, len); - rb_str_set_len(rb_str, oldlen + len); - return rb_str; -} - -bool is_wrapper(const upb_msgdef* m) { - switch (upb_msgdef_wellknowntype(m)) { - case UPB_WELLKNOWN_DOUBLEVALUE: - case UPB_WELLKNOWN_FLOATVALUE: - case UPB_WELLKNOWN_INT64VALUE: - case UPB_WELLKNOWN_UINT64VALUE: - case UPB_WELLKNOWN_INT32VALUE: - case UPB_WELLKNOWN_UINT32VALUE: - case UPB_WELLKNOWN_STRINGVALUE: - case UPB_WELLKNOWN_BYTESVALUE: - case UPB_WELLKNOWN_BOOLVALUE: - return true; - default: - return false; - } -} - -// The code below also comes from upb's prototype Ruby binding, developed by -// haberman@. - -/* stringsink *****************************************************************/ - -static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { - stringsink *sink = _sink; - sink->len = 0; - return sink; -} - -static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, - size_t len, const upb_bufhandle *handle) { - stringsink *sink = _sink; - size_t new_size = sink->size; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - while (sink->len + len > new_size) { - new_size *= 2; - } - - if (new_size != sink->size) { - sink->ptr = realloc(sink->ptr, new_size); - sink->size = new_size; - } - - memcpy(sink->ptr + sink->len, ptr, len); - sink->len += len; - - return len; -} - -void stringsink_init(stringsink *sink) { - upb_byteshandler_init(&sink->handler); - upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); - upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); - - upb_bytessink_reset(&sink->sink, &sink->handler, sink); - - sink->size = 32; - sink->ptr = malloc(sink->size); - sink->len = 0; -} - -void stringsink_uninit(stringsink *sink) { - free(sink->ptr); -} - -// ----------------------------------------------------------------------------- -// Parsing. -// ----------------------------------------------------------------------------- - -#define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs) - -typedef struct { - size_t ofs; - int32_t hasbit; -} field_handlerdata_t; - -// Creates a handlerdata that contains the offset and the hasbit for the field -static const void* newhandlerdata(upb_handlers* h, uint32_t ofs, int32_t hasbit) { - field_handlerdata_t *hd = ALLOC(field_handlerdata_t); - hd->ofs = ofs; - hd->hasbit = hasbit; - upb_handlers_addcleanup(h, hd, xfree); - return hd; -} - -typedef struct { - size_t ofs; - int32_t hasbit; - upb_fieldtype_t wrapped_type; // Only for wrappers. - VALUE subklass; -} submsg_handlerdata_t; - -// Creates a handlerdata that contains offset and submessage type information. -static const void *newsubmsghandlerdata(upb_handlers* h, - const upb_fielddef *f, - uint32_t ofs, - int32_t hasbit, - VALUE subklass) { - submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t); - const upb_msgdef *subm = upb_fielddef_msgsubdef(f); - hd->ofs = ofs; - hd->hasbit = hasbit; - hd->subklass = subklass; - upb_handlers_addcleanup(h, hd, xfree); - if (is_wrapper(subm)) { - const upb_fielddef *value_f = upb_msgdef_itof(subm, 1); - hd->wrapped_type = upb_fielddef_type(value_f); - } - return hd; -} - -typedef struct { - size_t ofs; // union data slot - size_t case_ofs; // oneof_case field - uint32_t oneof_case_num; // oneof-case number to place in oneof_case field - VALUE subklass; -} oneof_handlerdata_t; - -static const void *newoneofhandlerdata(upb_handlers *h, - uint32_t ofs, - uint32_t case_ofs, - const upb_fielddef *f, - const Descriptor* desc) { - oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t); - hd->ofs = ofs; - hd->case_ofs = case_ofs; - // We reuse the field tag number as a oneof union discriminant tag. Note that - // we don't expose these numbers to the user, so the only requirement is that - // we have some unique ID for each union case/possibility. The field tag - // numbers are already present and are easy to use so there's no reason to - // create a separate ID space. In addition, using the field tag number here - // lets us easily look up the field in the oneof accessor. - hd->oneof_case_num = upb_fielddef_number(f); - if (is_value_field(f)) { - hd->oneof_case_num |= ONEOF_CASE_MASK; - } - hd->subklass = field_type_class(desc->layout, f); - upb_handlers_addcleanup(h, hd, xfree); - return hd; -} - -// A handler that starts a repeated field. Gets the Repeated*Field instance for -// this field (such an instance always exists even in an empty message). -static void *startseq_handler(void* closure, const void* hd) { - MessageHeader* msg = closure; - const size_t *ofs = hd; - return (void*)DEREF(msg, *ofs, VALUE); -} - -// Handlers that append primitive values to a repeated field. -#define DEFINE_APPEND_HANDLER(type, ctype) \ - static bool append##type##_handler(void *closure, const void *hd, \ - ctype val) { \ - VALUE ary = (VALUE)closure; \ - RepeatedField_push_native(ary, &val); \ - return true; \ - } - -DEFINE_APPEND_HANDLER(bool, bool) -DEFINE_APPEND_HANDLER(int32, int32_t) -DEFINE_APPEND_HANDLER(uint32, uint32_t) -DEFINE_APPEND_HANDLER(float, float) -DEFINE_APPEND_HANDLER(int64, int64_t) -DEFINE_APPEND_HANDLER(uint64, uint64_t) -DEFINE_APPEND_HANDLER(double, double) - -// Appends a string to a repeated field. -static void* appendstr_handler(void *closure, - const void *hd, - size_t size_hint) { - VALUE ary = (VALUE)closure; - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyStringUtf8Encoding); - RepeatedField_push_native(ary, &str); - return (void*)str; -} - -static void set_hasbit(void *closure, int32_t hasbit) { - if (hasbit > 0) { - uint8_t* storage = closure; - storage[hasbit/8] |= 1 << (hasbit % 8); - } -} - -// Appends a 'bytes' string to a repeated field. -static void* appendbytes_handler(void *closure, - const void *hd, - size_t size_hint) { - VALUE ary = (VALUE)closure; - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyString8bitEncoding); - RepeatedField_push_native(ary, &str); - return (void*)str; -} - -// Sets a non-repeated string field in a message. -static void* str_handler(void *closure, - const void *hd, - size_t size_hint) { - MessageHeader* msg = closure; - const field_handlerdata_t *fieldhandler = hd; - - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyStringUtf8Encoding); - DEREF(msg, fieldhandler->ofs, VALUE) = str; - set_hasbit(closure, fieldhandler->hasbit); - return (void*)str; -} - -// Sets a non-repeated 'bytes' field in a message. -static void* bytes_handler(void *closure, - const void *hd, - size_t size_hint) { - MessageHeader* msg = closure; - const field_handlerdata_t *fieldhandler = hd; - - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyString8bitEncoding); - DEREF(msg, fieldhandler->ofs, VALUE) = str; - set_hasbit(closure, fieldhandler->hasbit); - return (void*)str; -} - -static size_t stringdata_handler(void* closure, const void* hd, - const char* str, size_t len, - const upb_bufhandle* handle) { - VALUE rb_str = (VALUE)closure; - noleak_rb_str_cat(rb_str, str, len); - return len; -} - -static bool stringdata_end_handler(void* closure, const void* hd) { - VALUE rb_str = (VALUE)closure; - rb_obj_freeze(rb_str); - return true; -} - -static bool appendstring_end_handler(void* closure, const void* hd) { - VALUE rb_str = (VALUE)closure; - rb_obj_freeze(rb_str); - return true; -} - -// Appends a submessage to a repeated field (a regular Ruby array for now). -static void *appendsubmsg_handler(void *closure, const void *hd) { - VALUE ary = (VALUE)closure; - const submsg_handlerdata_t *submsgdata = hd; - MessageHeader* submsg; - - VALUE submsg_rb = initialize_rb_class_with_no_args(submsgdata->subklass); - RepeatedField_push(ary, submsg_rb); - - TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); - return submsg; -} - -// Appends a wrapper to a repeated field (a regular Ruby array for now). -static void *appendwrapper_handler(void *closure, const void *hd) { - VALUE ary = (VALUE)closure; - int size = RepeatedField_size(ary); - (void)hd; - - RepeatedField_push(ary, Qnil); - - return RepeatedField_index_native(ary, size); -} - -// Sets a non-repeated submessage field in a message. -static void *submsg_handler(void *closure, const void *hd) { - MessageHeader* msg = closure; - const submsg_handlerdata_t* submsgdata = hd; - VALUE submsg_rb; - MessageHeader* submsg; - - if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { - DEREF(msg, submsgdata->ofs, VALUE) = - initialize_rb_class_with_no_args(submsgdata->subklass); - } - - set_hasbit(closure, submsgdata->hasbit); - - submsg_rb = DEREF(msg, submsgdata->ofs, VALUE); - TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); - - return submsg; -} - -static void* startwrapper(void* closure, const void* hd) { - const submsg_handlerdata_t* submsgdata = hd; - char* msg = closure; - VALUE* field = (VALUE*)(msg + submsgdata->ofs); - - set_hasbit(closure, submsgdata->hasbit); - - switch (submsgdata->wrapped_type) { - case UPB_TYPE_FLOAT: - case UPB_TYPE_DOUBLE: - *field = DBL2NUM(0); - break; - case UPB_TYPE_BOOL: - *field = Qfalse; - break; - case UPB_TYPE_STRING: - *field = get_frozen_string(NULL, 0, false); - break; - case UPB_TYPE_BYTES: - *field = get_frozen_string(NULL, 0, true); - break; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - *field = INT2NUM(0); - break; - case UPB_TYPE_MESSAGE: - rb_raise(rb_eRuntimeError, - "Internal logic error with well-known types."); - } - - return field; -} - -// Handler data for startmap/endmap handlers. -typedef struct { - size_t ofs; - upb_fieldtype_t key_field_type; - upb_fieldtype_t value_field_type; - VALUE subklass; -} map_handlerdata_t; - -// Temporary frame for map parsing: at the beginning of a map entry message, a -// submsg handler allocates a frame to hold (i) a reference to the Map object -// into which this message will be inserted and (ii) storage slots to -// temporarily hold the key and value for this map entry until the end of the -// submessage. When the submessage ends, another handler is called to insert the -// value into the map. -typedef struct { - VALUE map; - const map_handlerdata_t* handlerdata; - char key_storage[NATIVE_SLOT_MAX_SIZE]; - char value_storage[NATIVE_SLOT_MAX_SIZE]; -} map_parse_frame_t; - -static void MapParseFrame_mark(void* _self) { - map_parse_frame_t* frame = _self; - - // This shouldn't strictly be necessary since this should be rooted by the - // message itself, but it can't hurt. - rb_gc_mark(frame->map); - - native_slot_mark(frame->handlerdata->key_field_type, &frame->key_storage); - native_slot_mark(frame->handlerdata->value_field_type, &frame->value_storage); -} - -void MapParseFrame_free(void* self) { - xfree(self); -} - -rb_data_type_t MapParseFrame_type = { - "MapParseFrame", - { MapParseFrame_mark, MapParseFrame_free, NULL }, -}; - -// Handler to begin a map entry: allocates a temporary frame. This is the -// 'startsubmsg' handler on the msgdef that contains the map field. -static void *startmap_handler(void *closure, const void *hd) { - MessageHeader* msg = closure; - const map_handlerdata_t* mapdata = hd; - map_parse_frame_t* frame = ALLOC(map_parse_frame_t); - VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE); - - frame->handlerdata = mapdata; - frame->map = map_rb; - native_slot_init(mapdata->key_field_type, &frame->key_storage); - native_slot_init(mapdata->value_field_type, &frame->value_storage); - - Map_set_frame(map_rb, - TypedData_Wrap_Struct(rb_cObject, &MapParseFrame_type, frame)); - - return frame; -} - -static bool endmap_handler(void *closure, const void *hd) { - map_parse_frame_t* frame = closure; - Map_set_frame(frame->map, Qnil); - return true; -} - -// Handler to end a map entry: inserts the value defined during the message into -// the map. This is the 'endmsg' handler on the map entry msgdef. -static bool endmapentry_handler(void* closure, const void* hd, upb_status* s) { - map_parse_frame_t* frame = closure; - const map_handlerdata_t* mapdata = hd; - - VALUE key = native_slot_get( - mapdata->key_field_type, Qnil, - &frame->key_storage); - - VALUE value = native_slot_get( - mapdata->value_field_type, mapdata->subklass, - &frame->value_storage); - - Map_index_set(frame->map, key, value); - - return true; -} - -// Allocates a new map_handlerdata_t given the map entry message definition. If -// the offset of the field within the parent message is also given, that is -// added to the handler data as well. Note that this is called *twice* per map -// field: once in the parent message handler setup when setting the startsubmsg -// handler and once in the map entry message handler setup when setting the -// key/value and endmsg handlers. The reason is that there is no easy way to -// pass the handlerdata down to the sub-message handler setup. -static map_handlerdata_t* new_map_handlerdata( - size_t ofs, - const upb_msgdef* mapentry_def, - const Descriptor* desc) { - const upb_fielddef* key_field; - const upb_fielddef* value_field; - map_handlerdata_t* hd = ALLOC(map_handlerdata_t); - hd->ofs = ofs; - key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD); - assert(key_field != NULL); - hd->key_field_type = upb_fielddef_type(key_field); - value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD); - assert(value_field != NULL); - hd->value_field_type = upb_fielddef_type(value_field); - hd->subklass = field_type_class(desc->layout, value_field); - - return hd; -} - -// Handlers that set primitive values in oneofs. -#define DEFINE_ONEOF_HANDLER(type, ctype) \ - static bool oneof##type##_handler(void *closure, const void *hd, \ - ctype val) { \ - const oneof_handlerdata_t *oneofdata = hd; \ - DEREF(closure, oneofdata->case_ofs, uint32_t) = \ - oneofdata->oneof_case_num; \ - DEREF(closure, oneofdata->ofs, ctype) = val; \ - return true; \ - } - -DEFINE_ONEOF_HANDLER(bool, bool) -DEFINE_ONEOF_HANDLER(int32, int32_t) -DEFINE_ONEOF_HANDLER(uint32, uint32_t) -DEFINE_ONEOF_HANDLER(float, float) -DEFINE_ONEOF_HANDLER(int64, int64_t) -DEFINE_ONEOF_HANDLER(uint64, uint64_t) -DEFINE_ONEOF_HANDLER(double, double) - -#undef DEFINE_ONEOF_HANDLER - -// Handlers for strings in a oneof. -static void *oneofstr_handler(void *closure, - const void *hd, - size_t size_hint) { - MessageHeader* msg = closure; - const oneof_handlerdata_t *oneofdata = hd; - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyStringUtf8Encoding); - DEREF(msg, oneofdata->case_ofs, uint32_t) = - oneofdata->oneof_case_num; - DEREF(msg, oneofdata->ofs, VALUE) = str; - return (void*)str; -} - -static void *oneofbytes_handler(void *closure, - const void *hd, - size_t size_hint) { - MessageHeader* msg = closure; - const oneof_handlerdata_t *oneofdata = hd; - VALUE str = rb_str_new2(""); - rb_enc_associate(str, kRubyString8bitEncoding); - DEREF(msg, oneofdata->case_ofs, uint32_t) = - oneofdata->oneof_case_num; - DEREF(msg, oneofdata->ofs, VALUE) = str; - return (void*)str; -} - -static bool oneofstring_end_handler(void* closure, const void* hd) { - VALUE rb_str = rb_str_new2(""); - rb_obj_freeze(rb_str); - return true; -} - -// Handler for a submessage field in a oneof. -static void *oneofsubmsg_handler(void *closure, - const void *hd) { - MessageHeader* msg = closure; - const oneof_handlerdata_t *oneofdata = hd; - uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t); - - VALUE submsg_rb; - MessageHeader* submsg; - - if (oldcase != oneofdata->oneof_case_num || - DEREF(msg, oneofdata->ofs, VALUE) == Qnil) { - DEREF(msg, oneofdata->ofs, VALUE) = - initialize_rb_class_with_no_args(oneofdata->subklass); - } - // Set the oneof case *after* allocating the new class instance -- otherwise, - // if the Ruby GC is invoked as part of a call into the VM, it might invoke - // our mark routines, and our mark routines might see the case value - // indicating a VALUE is present and expect a valid VALUE. See comment in - // layout_set() for more detail: basically, the change to the value and the - // case must be atomic w.r.t. the Ruby VM. - DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->oneof_case_num; - - submsg_rb = DEREF(msg, oneofdata->ofs, VALUE); - TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); - return submsg; -} - -static void* oneof_startwrapper(void* closure, const void* hd) { - char* msg = closure; - const oneof_handlerdata_t *oneofdata = hd; - - DEREF(msg, oneofdata->case_ofs, uint32_t) = oneofdata->oneof_case_num; - - return msg + oneofdata->ofs; -} - -// Set up handlers for a repeated field. -static void add_handlers_for_repeated_field(upb_handlers *h, - const Descriptor* desc, - const upb_fielddef *f, - size_t offset) { - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = newhandlerdata(h, offset, -1); - upb_handlers_setstartseq(h, f, startseq_handler, &attr); - - switch (upb_fielddef_type(f)) { - -#define SET_HANDLER(utype, ltype) \ - case utype: \ - upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ - break; - - SET_HANDLER(UPB_TYPE_BOOL, bool); - SET_HANDLER(UPB_TYPE_INT32, int32); - SET_HANDLER(UPB_TYPE_UINT32, uint32); - SET_HANDLER(UPB_TYPE_ENUM, int32); - SET_HANDLER(UPB_TYPE_FLOAT, float); - SET_HANDLER(UPB_TYPE_INT64, int64); - SET_HANDLER(UPB_TYPE_UINT64, uint64); - SET_HANDLER(UPB_TYPE_DOUBLE, double); - -#undef SET_HANDLER - - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; - upb_handlers_setstartstr(h, f, is_bytes ? - appendbytes_handler : appendstr_handler, - NULL); - upb_handlers_setstring(h, f, stringdata_handler, NULL); - upb_handlers_setendstr(h, f, appendstring_end_handler, NULL); - break; - } - case UPB_TYPE_MESSAGE: { - VALUE subklass = field_type_class(desc->layout, f); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = newsubmsghandlerdata(h, f, 0, -1, subklass); - if (is_wrapper(upb_fielddef_msgsubdef(f))) { - upb_handlers_setstartsubmsg(h, f, appendwrapper_handler, &attr); - } else { - upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); - } - break; - } - } -} - -static bool doublewrapper_handler(void* closure, const void* hd, double val) { - VALUE* rbval = closure; - *rbval = DBL2NUM(val); - return true; -} - -static bool floatwrapper_handler(void* closure, const void* hd, float val) { - VALUE* rbval = closure; - *rbval = DBL2NUM(val); - return true; -} - -static bool int64wrapper_handler(void* closure, const void* hd, int64_t val) { - VALUE* rbval = closure; - *rbval = LL2NUM(val); - return true; -} - -static bool uint64wrapper_handler(void* closure, const void* hd, uint64_t val) { - VALUE* rbval = closure; - *rbval = ULL2NUM(val); - return true; -} - -static bool int32wrapper_handler(void* closure, const void* hd, int32_t val) { - VALUE* rbval = closure; - *rbval = INT2NUM(val); - return true; -} - -static bool uint32wrapper_handler(void* closure, const void* hd, uint32_t val) { - VALUE* rbval = closure; - *rbval = UINT2NUM(val); - return true; -} - -static void* startstringwrapper_handler(void* closure, const void* hd, - size_t size_hint) { - VALUE* rbval = closure; - (void)size_hint; - *rbval = rb_str_new(NULL, 0); - rb_enc_associate(*rbval, kRubyStringUtf8Encoding); - return closure; -} - -static size_t stringwrapper_handler(void* closure, const void* hd, - const char* ptr, size_t len, - const upb_bufhandle* handle) { - VALUE* rbval = closure; - *rbval = noleak_rb_str_cat(*rbval, ptr, len); - return len; -} - -static void* startbyteswrapper_handler(void* closure, const void* hd, - size_t size_hint) { - VALUE* rbval = closure; - (void)size_hint; - *rbval = rb_str_new(NULL, 0); - rb_enc_associate(*rbval, kRubyString8bitEncoding); - return closure; -} - -static size_t byteswrapper_handler(void* closure, const void* hd, - const char* ptr, size_t len, - const upb_bufhandle* handle) { - VALUE* rbval = closure; - *rbval = noleak_rb_str_cat(*rbval, ptr, len); - return len; -} - -static bool boolwrapper_handler(void* closure, const void* hd, bool val) { - VALUE* rbval = closure; - if (val) { - *rbval = Qtrue; - } else { - *rbval = Qfalse; - } - return true; -} - -// Set up handlers for a singular field. -static void add_handlers_for_singular_field(const Descriptor* desc, - upb_handlers* h, - const upb_fielddef* f, - size_t offset, size_t hasbit_off) { - // The offset we pass to UPB points to the start of the Message, - // rather than the start of where our data is stored. - int32_t hasbit = -1; - if (hasbit_off != MESSAGE_FIELD_NO_HASBIT) { - hasbit = hasbit_off + sizeof(MessageHeader) * 8; - } - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_ENUM: - case UPB_TYPE_FLOAT: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - upb_msg_setscalarhandler(h, f, offset, hasbit); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = newhandlerdata(h, offset, hasbit); - upb_handlers_setstartstr(h, f, - is_bytes ? bytes_handler : str_handler, - &attr); - upb_handlers_setstring(h, f, stringdata_handler, &attr); - upb_handlers_setendstr(h, f, stringdata_end_handler, &attr); - break; - } - case UPB_TYPE_MESSAGE: { - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = newsubmsghandlerdata( - h, f, offset, hasbit, field_type_class(desc->layout, f)); - if (is_wrapper(upb_fielddef_msgsubdef(f))) { - upb_handlers_setstartsubmsg(h, f, startwrapper, &attr); - } else { - upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); - } - } - } -} - -// Adds handlers to a map field. -static void add_handlers_for_mapfield(upb_handlers* h, - const upb_fielddef* fielddef, - size_t offset, - const Descriptor* desc) { - const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef); - map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - - upb_handlers_addcleanup(h, hd, xfree); - attr.handler_data = hd; - upb_handlers_setstartsubmsg(h, fielddef, startmap_handler, &attr); - upb_handlers_setendsubmsg(h, fielddef, endmap_handler, &attr); -} - -// Adds handlers to a map-entry msgdef. -static void add_handlers_for_mapentry(const upb_msgdef* msgdef, upb_handlers* h, - const Descriptor* desc) { - const upb_fielddef* key_field = map_entry_key(msgdef); - const upb_fielddef* value_field = map_entry_value(msgdef); - map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - - upb_handlers_addcleanup(h, hd, xfree); - attr.handler_data = hd; - upb_handlers_setendmsg(h, endmapentry_handler, &attr); - - add_handlers_for_singular_field( - desc, h, key_field, - offsetof(map_parse_frame_t, key_storage), - MESSAGE_FIELD_NO_HASBIT); - add_handlers_for_singular_field( - desc, h, value_field, - offsetof(map_parse_frame_t, value_storage), - MESSAGE_FIELD_NO_HASBIT); -} - -static void add_handlers_for_wrapper(const upb_msgdef* msgdef, - upb_handlers* h) { - const upb_fielddef* f = upb_msgdef_itof(msgdef, 1); - switch (upb_msgdef_wellknowntype(msgdef)) { - case UPB_WELLKNOWN_DOUBLEVALUE: - upb_handlers_setdouble(h, f, doublewrapper_handler, NULL); - break; - case UPB_WELLKNOWN_FLOATVALUE: - upb_handlers_setfloat(h, f, floatwrapper_handler, NULL); - break; - case UPB_WELLKNOWN_INT64VALUE: - upb_handlers_setint64(h, f, int64wrapper_handler, NULL); - break; - case UPB_WELLKNOWN_UINT64VALUE: - upb_handlers_setuint64(h, f, uint64wrapper_handler, NULL); - break; - case UPB_WELLKNOWN_INT32VALUE: - upb_handlers_setint32(h, f, int32wrapper_handler, NULL); - break; - case UPB_WELLKNOWN_UINT32VALUE: - upb_handlers_setuint32(h, f, uint32wrapper_handler, NULL); - break; - case UPB_WELLKNOWN_STRINGVALUE: - upb_handlers_setstartstr(h, f, startstringwrapper_handler, NULL); - upb_handlers_setstring(h, f, stringwrapper_handler, NULL); - break; - case UPB_WELLKNOWN_BYTESVALUE: - upb_handlers_setstartstr(h, f, startbyteswrapper_handler, NULL); - upb_handlers_setstring(h, f, byteswrapper_handler, NULL); - break; - case UPB_WELLKNOWN_BOOLVALUE: - upb_handlers_setbool(h, f, boolwrapper_handler, NULL); - return; - default: - rb_raise(rb_eRuntimeError, - "Internal logic error with well-known types."); - } -} - -// Set up handlers for a oneof field. -static void add_handlers_for_oneof_field(upb_handlers *h, - const upb_fielddef *f, - size_t offset, - size_t oneof_case_offset, - const Descriptor* desc) { - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = - newoneofhandlerdata(h, offset, oneof_case_offset, f, desc); - - switch (upb_fielddef_type(f)) { - -#define SET_HANDLER(utype, ltype) \ - case utype: \ - upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \ - break; - - SET_HANDLER(UPB_TYPE_BOOL, bool); - SET_HANDLER(UPB_TYPE_INT32, int32); - SET_HANDLER(UPB_TYPE_UINT32, uint32); - SET_HANDLER(UPB_TYPE_ENUM, int32); - SET_HANDLER(UPB_TYPE_FLOAT, float); - SET_HANDLER(UPB_TYPE_INT64, int64); - SET_HANDLER(UPB_TYPE_UINT64, uint64); - SET_HANDLER(UPB_TYPE_DOUBLE, double); - -#undef SET_HANDLER - - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; - upb_handlers_setstartstr(h, f, is_bytes ? - oneofbytes_handler : oneofstr_handler, - &attr); - upb_handlers_setstring(h, f, stringdata_handler, NULL); - upb_handlers_setendstr(h, f, oneofstring_end_handler, &attr); - break; - } - case UPB_TYPE_MESSAGE: { - if (is_wrapper(upb_fielddef_msgsubdef(f))) { - upb_handlers_setstartsubmsg(h, f, oneof_startwrapper, &attr); - } else { - upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr); - } - break; - } - } -} - -static bool unknown_field_handler(void* closure, const void* hd, - const char* buf, size_t size) { - MessageHeader* msg = (MessageHeader*)closure; - UPB_UNUSED(hd); - - if (msg->unknown_fields == NULL) { - msg->unknown_fields = malloc(sizeof(stringsink)); - stringsink_init(msg->unknown_fields); - } - - stringsink_string(msg->unknown_fields, NULL, buf, size, NULL); - - return true; -} - -size_t get_field_offset(MessageLayout* layout, const upb_fielddef* f) { - return layout->fields[upb_fielddef_index(f)].offset + sizeof(MessageHeader); -} - -void add_handlers_for_message(const void *closure, upb_handlers *h) { - const VALUE descriptor_pool = (VALUE)closure; - const upb_msgdef* msgdef = upb_handlers_msgdef(h); - Descriptor* desc = - ruby_to_Descriptor(get_msgdef_obj(descriptor_pool, msgdef)); - upb_msg_field_iter i; - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - - // Ensure layout exists. We may be invoked to create handlers for a given - // message if we are included as a submsg of another message type before our - // class is actually built, so to work around this, we just create the layout - // (and handlers, in the class-building function) on-demand. - if (desc->layout == NULL) { - create_layout(desc); - } - - // If this is a mapentry message type, set up a special set of handlers and - // bail out of the normal (user-defined) message type handling. - if (upb_msgdef_mapentry(msgdef)) { - add_handlers_for_mapentry(msgdef, h, desc); - return; - } - - // If this is a wrapper type, use special handlers and bail. - if (is_wrapper(msgdef)) { - add_handlers_for_wrapper(msgdef, h); - return; - } - - upb_handlers_setunknown(h, unknown_field_handler, &attr); - - for (upb_msg_field_begin(&i, desc->msgdef); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f); - size_t offset = get_field_offset(desc->layout, f); - - if (oneof) { - size_t oneof_case_offset = - desc->layout->oneofs[upb_oneofdef_index(oneof)].case_offset + - sizeof(MessageHeader); - add_handlers_for_oneof_field(h, f, offset, oneof_case_offset, desc); - } else if (is_map_field(f)) { - add_handlers_for_mapfield(h, f, offset, desc); - } else if (upb_fielddef_isseq(f)) { - add_handlers_for_repeated_field(h, desc, f, offset); - } else { - add_handlers_for_singular_field( - desc, h, f, offset, - desc->layout->fields[upb_fielddef_index(f)].hasbit); - } - } -} - -// Constructs the handlers for filling a message's data into an in-memory -// object. -const upb_handlers* get_fill_handlers(Descriptor* desc) { - DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool); - return upb_handlercache_get(pool->fill_handler_cache, desc->msgdef); -} - -static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) { - DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool); - return upb_pbcodecache_get(pool->fill_method_cache, desc->msgdef); -} - -static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) { - DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool); - return upb_json_codecache_get(pool->json_fill_method_cache, desc->msgdef); -} - -static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) { - DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool); - return upb_handlercache_get(pool->pb_serialize_handler_cache, desc->msgdef); -} - -static const upb_handlers* msgdef_json_serialize_handlers( - Descriptor* desc, bool preserve_proto_fieldnames) { - DescriptorPool* pool = ruby_to_DescriptorPool(desc->descriptor_pool); - if (preserve_proto_fieldnames) { - return upb_handlercache_get(pool->json_serialize_handler_preserve_cache, - desc->msgdef); - } else { - return upb_handlercache_get(pool->json_serialize_handler_cache, - desc->msgdef); - } -} - - -// Stack-allocated context during an encode/decode operation. Contains the upb -// environment and its stack-based allocator, an initial buffer for allocations -// to avoid malloc() when possible, and a template for Ruby exception messages -// if any error occurs. -#define STACK_ENV_STACKBYTES 4096 -typedef struct { - upb_arena *arena; - upb_status status; - const char* ruby_error_template; - char allocbuf[STACK_ENV_STACKBYTES]; -} stackenv; - -static void stackenv_init(stackenv* se, const char* errmsg); -static void stackenv_uninit(stackenv* se); - -static void stackenv_init(stackenv* se, const char* errmsg) { - se->ruby_error_template = errmsg; - se->arena = - upb_arena_init(se->allocbuf, sizeof(se->allocbuf), &upb_alloc_global); - upb_status_clear(&se->status); -} - -static void stackenv_uninit(stackenv* se) { - upb_arena_free(se->arena); - - if (!upb_ok(&se->status)) { - // TODO(haberman): have a way to verify that this is actually a parse error, - // instead of just throwing "parse error" unconditionally. - VALUE errmsg = rb_str_new2(upb_status_errmsg(&se->status)); - rb_raise(cParseError, se->ruby_error_template, errmsg); - } -} - -/* - * call-seq: - * MessageClass.decode(data) => message - * - * Decodes the given data (as a string containing bytes in protocol buffers wire - * format) under the interpretration given by this message class's definition - * and returns a message object with the corresponding field values. - */ -VALUE Message_decode(VALUE klass, VALUE data) { - VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - VALUE msgklass = Descriptor_msgclass(descriptor); - VALUE msg_rb; - MessageHeader* msg; - - if (TYPE(data) != T_STRING) { - rb_raise(rb_eArgError, "Expected string for binary protobuf data."); - } - - msg_rb = initialize_rb_class_with_no_args(msgklass); - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - { - const upb_pbdecodermethod* method = msgdef_decodermethod(desc); - const upb_handlers* h = upb_pbdecodermethod_desthandlers(method); - const upb_msgdef* m = upb_handlers_msgdef(h); - VALUE wrapper = Qnil; - void* ptr = msg; - stackenv se; - upb_sink sink; - upb_pbdecoder* decoder; - stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE); - - if (is_wrapper(m)) { - ptr = &wrapper; - } - - upb_sink_reset(&sink, h, ptr); - decoder = upb_pbdecoder_create(se.arena, method, sink, &se.status); - upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), - upb_pbdecoder_input(decoder)); - - stackenv_uninit(&se); - - if (is_wrapper(m)) { - msg_rb = ruby_wrapper_type(msgklass, wrapper); - } - } - - return msg_rb; -} - -/* - * call-seq: - * MessageClass.decode_json(data, options = {}) => message - * - * Decodes the given data (as a string containing bytes in protocol buffers wire - * format) under the interpretration given by this message class's definition - * and returns a message object with the corresponding field values. - * - * @param options [Hash] options for the decoder - * ignore_unknown_fields: set true to ignore unknown fields (default is to - * raise an error) - */ -VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) { - VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - VALUE msgklass = Descriptor_msgclass(descriptor); - VALUE msg_rb; - VALUE data = argv[0]; - VALUE ignore_unknown_fields = Qfalse; - MessageHeader* msg; - - if (argc < 1 || argc > 2) { - rb_raise(rb_eArgError, "Expected 1 or 2 arguments."); - } - - if (argc == 2) { - VALUE hash_args = argv[1]; - if (TYPE(hash_args) != T_HASH) { - rb_raise(rb_eArgError, "Expected hash arguments."); - } - - ignore_unknown_fields = rb_hash_lookup2( - hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse); - } - - if (TYPE(data) != T_STRING) { - rb_raise(rb_eArgError, "Expected string for JSON data."); - } - - // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to - // convert, because string handlers pass data directly to message string - // fields. - - msg_rb = initialize_rb_class_with_no_args(msgklass); - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - { - const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc); - const upb_handlers* h = get_fill_handlers(desc); - const upb_msgdef* m = upb_handlers_msgdef(h); - stackenv se; - upb_sink sink; - upb_json_parser* parser; - DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool); - stackenv_init(&se, "Error occurred during parsing: %" PRIsVALUE); - - if (is_wrapper(m)) { - rb_raise( - rb_eRuntimeError, - "Parsing a wrapper type from JSON at the top level does not work."); - } - - upb_sink_reset(&sink, h, msg); - parser = upb_json_parser_create(se.arena, method, pool->symtab, sink, - &se.status, RTEST(ignore_unknown_fields)); - upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), - upb_json_parser_input(parser)); - - stackenv_uninit(&se); - } - - return msg_rb; -} - -// ----------------------------------------------------------------------------- -// Serializing. -// ----------------------------------------------------------------------------- - -/* msgvisitor *****************************************************************/ - -static void putmsg(VALUE msg, const Descriptor* desc, upb_sink sink, int depth, - bool emit_defaults, bool is_json, bool open_msg); - -static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { - upb_selector_t ret; - bool ok = upb_handlers_getselector(f, type, &ret); - UPB_ASSERT(ok); - return ret; -} - -static void putstr(VALUE str, const upb_fielddef *f, upb_sink sink) { - upb_sink subsink; - - if (str == Qnil) return; - - assert(BUILTIN_TYPE(str) == RUBY_T_STRING); - - // We should be guaranteed that the string has the correct encoding because - // we ensured this at assignment time and then froze the string. - if (upb_fielddef_type(f) == UPB_TYPE_STRING) { - assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyStringUtf8Encoding); - } else { - assert(rb_enc_from_index(ENCODING_GET(str)) == kRubyString8bitEncoding); - } - - upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str), - &subsink); - upb_sink_putstring(subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str), - RSTRING_LEN(str), NULL); - upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR)); -} - -static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink sink, - int depth, bool emit_defaults, bool is_json) { - upb_sink subsink; - VALUE descriptor; - Descriptor* subdesc; - - if (submsg == Qnil) return; - - descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); - subdesc = ruby_to_Descriptor(descriptor); - - upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink); - putmsg(submsg, subdesc, subsink, depth + 1, emit_defaults, is_json, true); - upb_sink_endsubmsg(sink, subsink, getsel(f, UPB_HANDLER_ENDSUBMSG)); -} - -static void putary(VALUE ary, const upb_fielddef* f, upb_sink sink, int depth, - bool emit_defaults, bool is_json) { - upb_sink subsink; - upb_fieldtype_t type = upb_fielddef_type(f); - upb_selector_t sel = 0; - int size; - int i; - VALUE type_class = ruby_to_RepeatedField(ary)->field_type_class; - - if (ary == Qnil) return; - if (!emit_defaults && NUM2INT(RepeatedField_length(ary)) == 0) return; - - size = NUM2INT(RepeatedField_length(ary)); - if (size == 0 && !emit_defaults) return; - - upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); - - if (upb_fielddef_isprimitive(f)) { - sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - } - - for (i = 0; i < size; i++) { - void* memory = RepeatedField_index_native(ary, i); - switch (type) { -#define T(upbtypeconst, upbtype, ctype) \ - case upbtypeconst: \ - upb_sink_put##upbtype(subsink, sel, *((ctype*)memory)); \ - break; - - T(UPB_TYPE_FLOAT, float, float) - T(UPB_TYPE_DOUBLE, double, double) - T(UPB_TYPE_BOOL, bool, int8_t) - case UPB_TYPE_ENUM: - T(UPB_TYPE_INT32, int32, int32_t) - T(UPB_TYPE_UINT32, uint32, uint32_t) - T(UPB_TYPE_INT64, int64, int64_t) - T(UPB_TYPE_UINT64, uint64, uint64_t) - - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - putstr(*((VALUE *)memory), f, subsink); - break; - case UPB_TYPE_MESSAGE: { - VALUE val = native_slot_get(UPB_TYPE_MESSAGE, type_class, memory); - putsubmsg(val, f, subsink, depth, emit_defaults, is_json); - break; - } - -#undef T - - } - } - upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); -} - -static void put_ruby_value(VALUE value, const upb_fielddef* f, VALUE type_class, - int depth, upb_sink sink, bool emit_defaults, - bool is_json) { - upb_selector_t sel = 0; - - if (depth > ENCODE_MAX_NESTING) { - rb_raise(rb_eRuntimeError, - "Maximum recursion depth exceeded during encoding."); - } - - if (upb_fielddef_isprimitive(f)) { - sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - } - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - upb_sink_putint32(sink, sel, NUM2INT(value)); - break; - case UPB_TYPE_INT64: - upb_sink_putint64(sink, sel, NUM2LL(value)); - break; - case UPB_TYPE_UINT32: - upb_sink_putuint32(sink, sel, NUM2UINT(value)); - break; - case UPB_TYPE_UINT64: - upb_sink_putuint64(sink, sel, NUM2ULL(value)); - break; - case UPB_TYPE_FLOAT: - upb_sink_putfloat(sink, sel, NUM2DBL(value)); - break; - case UPB_TYPE_DOUBLE: - upb_sink_putdouble(sink, sel, NUM2DBL(value)); - break; - case UPB_TYPE_ENUM: { - if (TYPE(value) == T_SYMBOL) { - value = rb_funcall(type_class, rb_intern("resolve"), 1, value); - } - upb_sink_putint32(sink, sel, NUM2INT(value)); - break; - } - case UPB_TYPE_BOOL: - upb_sink_putbool(sink, sel, value == Qtrue); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - putstr(value, f, sink); - break; - case UPB_TYPE_MESSAGE: - putsubmsg(value, f, sink, depth, emit_defaults, is_json); - } -} - -static void putmap(VALUE map, const upb_fielddef* f, upb_sink sink, int depth, - bool emit_defaults, bool is_json) { - Map* self; - upb_sink subsink; - const upb_fielddef* key_field; - const upb_fielddef* value_field; - Map_iter it; - - if (map == Qnil) return; - if (!emit_defaults && Map_length(map) == 0) return; - - self = ruby_to_Map(map); - - upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); - - assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE); - key_field = map_field_key(f); - value_field = map_field_value(f); - - for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) { - VALUE key = Map_iter_key(&it); - VALUE value = Map_iter_value(&it); - upb_status status; - - upb_sink entry_sink; - upb_sink_startsubmsg(subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), - &entry_sink); - upb_sink_startmsg(entry_sink); - - put_ruby_value(key, key_field, Qnil, depth + 1, entry_sink, emit_defaults, - is_json); - put_ruby_value(value, value_field, self->value_type_class, depth + 1, - entry_sink, emit_defaults, is_json); - - upb_sink_endmsg(entry_sink, &status); - upb_sink_endsubmsg(subsink, entry_sink, getsel(f, UPB_HANDLER_ENDSUBMSG)); - } - - upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); -} - -static const upb_handlers* msgdef_json_serialize_handlers( - Descriptor* desc, bool preserve_proto_fieldnames); - -static void putjsonany(VALUE msg_rb, const Descriptor* desc, upb_sink sink, - int depth, bool emit_defaults) { - upb_status status; - MessageHeader* msg = NULL; - const upb_fielddef* type_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_TYPE); - const upb_fielddef* value_field = upb_msgdef_itof(desc->msgdef, UPB_ANY_VALUE); - - size_t type_url_offset; - VALUE type_url_str_rb; - const upb_msgdef *payload_type = NULL; - - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - upb_sink_startmsg(sink); - - /* Handle type url */ - type_url_offset = desc->layout->fields[upb_fielddef_index(type_field)].offset; - type_url_str_rb = DEREF(Message_data(msg), type_url_offset, VALUE); - if (RSTRING_LEN(type_url_str_rb) > 0) { - putstr(type_url_str_rb, type_field, sink); - } - - { - const char* type_url_str = RSTRING_PTR(type_url_str_rb); - size_t type_url_len = RSTRING_LEN(type_url_str_rb); - DescriptorPool* pool = ruby_to_DescriptorPool(generated_pool); - - if (type_url_len <= 20 || - strncmp(type_url_str, "type.googleapis.com/", 20) != 0) { - rb_raise(rb_eRuntimeError, "Invalid type url: %s", type_url_str); - return; - } - - /* Resolve type url */ - type_url_str += 20; - type_url_len -= 20; - - payload_type = upb_symtab_lookupmsg2( - pool->symtab, type_url_str, type_url_len); - if (payload_type == NULL) { - rb_raise(rb_eRuntimeError, "Unknown type: %s", type_url_str); - return; - } - } - - { - uint32_t value_offset; - VALUE value_str_rb; - size_t value_len; - - value_offset = desc->layout->fields[upb_fielddef_index(value_field)].offset; - value_str_rb = DEREF(Message_data(msg), value_offset, VALUE); - value_len = RSTRING_LEN(value_str_rb); - - if (value_len > 0) { - VALUE payload_desc_rb = get_msgdef_obj(generated_pool, payload_type); - Descriptor* payload_desc = ruby_to_Descriptor(payload_desc_rb); - VALUE payload_class = Descriptor_msgclass(payload_desc_rb); - upb_sink subsink; - bool is_wellknown; - - VALUE payload_msg_rb = Message_decode(payload_class, value_str_rb); - - is_wellknown = - upb_msgdef_wellknowntype(payload_desc->msgdef) != - UPB_WELLKNOWN_UNSPECIFIED; - if (is_wellknown) { - upb_sink_startstr(sink, getsel(value_field, UPB_HANDLER_STARTSTR), 0, - &subsink); - } - - subsink.handlers = - msgdef_json_serialize_handlers(payload_desc, true); - subsink.closure = sink.closure; - putmsg(payload_msg_rb, payload_desc, subsink, depth, emit_defaults, true, - is_wellknown); - } - } - - upb_sink_endmsg(sink, &status); -} - -static void putjsonlistvalue( - VALUE msg_rb, const Descriptor* desc, - upb_sink sink, int depth, bool emit_defaults) { - upb_status status; - upb_sink subsink; - MessageHeader* msg = NULL; - const upb_fielddef* f = upb_msgdef_itof(desc->msgdef, 1); - uint32_t offset = - desc->layout->fields[upb_fielddef_index(f)].offset + - sizeof(MessageHeader); - VALUE ary; - - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - upb_sink_startmsg(sink); - - ary = DEREF(msg, offset, VALUE); - - if (ary == Qnil || RepeatedField_size(ary) == 0) { - upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); - upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); - } else { - putary(ary, f, sink, depth, emit_defaults, true); - } - - upb_sink_endmsg(sink, &status); -} - -static void putmsg(VALUE msg_rb, const Descriptor* desc, - upb_sink sink, int depth, bool emit_defaults, - bool is_json, bool open_msg) { - MessageHeader* msg; - upb_msg_field_iter i; - upb_status status; - bool json_wrapper = is_wrapper(desc->msgdef) && is_json; - - if (is_json && - upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_ANY) { - putjsonany(msg_rb, desc, sink, depth, emit_defaults); - return; - } - - if (is_json && - upb_msgdef_wellknowntype(desc->msgdef) == UPB_WELLKNOWN_LISTVALUE) { - putjsonlistvalue(msg_rb, desc, sink, depth, emit_defaults); - return; - } - - if (open_msg) { - upb_sink_startmsg(sink); - } - - // Protect against cycles (possible because users may freely reassign message - // and repeated fields) by imposing a maximum recursion depth. - if (depth > ENCODE_MAX_NESTING) { - rb_raise(rb_eRuntimeError, - "Maximum recursion depth exceeded during encoding."); - } - - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - if (desc != msg->descriptor) { - rb_raise(rb_eArgError, - "The type of given msg is '%s', expect '%s'.", - upb_msgdef_fullname(msg->descriptor->msgdef), - upb_msgdef_fullname(desc->msgdef)); - } - - for (upb_msg_field_begin(&i, desc->msgdef); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f); - bool is_matching_oneof = false; - uint32_t offset = - desc->layout->fields[upb_fielddef_index(f)].offset + - sizeof(MessageHeader); - - if (oneof) { - uint32_t oneof_case = - slot_read_oneof_case(desc->layout, Message_data(msg), oneof); - // For a oneof, check that this field is actually present -- skip all the - // below if not. - if (oneof_case != upb_fielddef_number(f)) { - continue; - } - // Otherwise, fall through to the appropriate singular-field handler - // below. - is_matching_oneof = true; - } - - if (is_map_field(f)) { - VALUE map = DEREF(msg, offset, VALUE); - if (map != Qnil || emit_defaults) { - putmap(map, f, sink, depth, emit_defaults, is_json); - } - } else if (upb_fielddef_isseq(f)) { - VALUE ary = DEREF(msg, offset, VALUE); - if (ary != Qnil) { - putary(ary, f, sink, depth, emit_defaults, is_json); - } - } else if (upb_fielddef_isstring(f)) { - VALUE str = DEREF(msg, offset, VALUE); - bool is_default = false; - - if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO2) { - is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; - } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) { - is_default = RSTRING_LEN(str) == 0; - } - - if (is_matching_oneof || emit_defaults || !is_default || json_wrapper) { - putstr(str, f, sink); - } - } else if (upb_fielddef_issubmsg(f)) { - // OPT: could try to avoid the layout_get() (which will expand lazy - // wrappers). - VALUE val = layout_get(desc->layout, Message_data(msg), f); - putsubmsg(val, f, sink, depth, emit_defaults, is_json); - } else { - upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - -#define T(upbtypeconst, upbtype, ctype, default_value) \ - case upbtypeconst: { \ - ctype value = DEREF(msg, offset, ctype); \ - bool is_default = false; \ - if (upb_fielddef_haspresence(f)) { \ - is_default = layout_has(desc->layout, Message_data(msg), f) == Qfalse; \ - } else if (upb_msgdef_syntax(desc->msgdef) == UPB_SYNTAX_PROTO3) { \ - is_default = default_value == value; \ - } \ - if (is_matching_oneof || emit_defaults || !is_default || json_wrapper) { \ - upb_sink_put##upbtype(sink, sel, value); \ - } \ - } break; - - switch (upb_fielddef_type(f)) { - T(UPB_TYPE_FLOAT, float, float, 0.0) - T(UPB_TYPE_DOUBLE, double, double, 0.0) - T(UPB_TYPE_BOOL, bool, uint8_t, 0) - case UPB_TYPE_ENUM: - T(UPB_TYPE_INT32, int32, int32_t, 0) - T(UPB_TYPE_UINT32, uint32, uint32_t, 0) - T(UPB_TYPE_INT64, int64, int64_t, 0) - T(UPB_TYPE_UINT64, uint64, uint64_t, 0) - - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error."); - } - -#undef T - } - } - - { - stringsink* unknown = msg->unknown_fields; - if (unknown != NULL) { - upb_sink_putunknown(sink, unknown->ptr, unknown->len); - } - } - - if (open_msg) { - upb_sink_endmsg(sink, &status); - } -} - -/* - * call-seq: - * MessageClass.encode(msg) => bytes - * - * Encodes the given message object to its serialized form in protocol buffers - * wire format. - */ -VALUE Message_encode(VALUE klass, VALUE msg_rb) { - VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - - stringsink sink; - stringsink_init(&sink); - - { - const upb_handlers* serialize_handlers = - msgdef_pb_serialize_handlers(desc); - - stackenv se; - upb_pb_encoder* encoder; - VALUE ret; - - stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE); - encoder = upb_pb_encoder_create(se.arena, serialize_handlers, sink.sink); - - putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0, false, false, true); - - ret = rb_str_new(sink.ptr, sink.len); - - stackenv_uninit(&se); - stringsink_uninit(&sink); - - return ret; - } -} - -/* - * call-seq: - * MessageClass.encode_json(msg, options = {}) => json_string - * - * Encodes the given message object into its serialized JSON representation. - * @param options [Hash] options for the decoder - * preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase) - * emit_defaults: set true to emit 0/false values (default is to omit them) - */ -VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { - VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - VALUE msg_rb; - VALUE preserve_proto_fieldnames = Qfalse; - VALUE emit_defaults = Qfalse; - stringsink sink; - - if (argc < 1 || argc > 2) { - rb_raise(rb_eArgError, "Expected 1 or 2 arguments."); - } - - msg_rb = argv[0]; - - if (argc == 2) { - VALUE hash_args = argv[1]; - if (TYPE(hash_args) != T_HASH) { - rb_raise(rb_eArgError, "Expected hash arguments."); - } - preserve_proto_fieldnames = rb_hash_lookup2( - hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse); - - emit_defaults = rb_hash_lookup2( - hash_args, ID2SYM(rb_intern("emit_defaults")), Qfalse); - } - - stringsink_init(&sink); - - { - const upb_handlers* serialize_handlers = - msgdef_json_serialize_handlers(desc, RTEST(preserve_proto_fieldnames)); - upb_json_printer* printer; - stackenv se; - VALUE ret; - - stackenv_init(&se, "Error occurred during encoding: %" PRIsVALUE); - printer = upb_json_printer_create(se.arena, serialize_handlers, sink.sink); - - putmsg(msg_rb, desc, upb_json_printer_input(printer), 0, - RTEST(emit_defaults), true, true); - - ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding()); - - stackenv_uninit(&se); - stringsink_uninit(&sink); - - return ret; - } -} - -static void discard_unknown(VALUE msg_rb, const Descriptor* desc) { - MessageHeader* msg; - upb_msg_field_iter it; - - TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); - - { - stringsink* unknown = msg->unknown_fields; - if (unknown != NULL) { - stringsink_uninit(unknown); - msg->unknown_fields = NULL; - } - } - - for (upb_msg_field_begin(&it, desc->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - upb_fielddef *f = upb_msg_iter_field(&it); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(f); - uint32_t offset = - desc->layout->fields[upb_fielddef_index(f)].offset + - sizeof(MessageHeader); - - if (oneof) { - uint32_t oneof_case = - slot_read_oneof_case(desc->layout, Message_data(msg), oneof); - // For a oneof, check that this field is actually present -- skip all the - // below if not. - if (oneof_case != upb_fielddef_number(f)) { - continue; - } - // Otherwise, fall through to the appropriate singular-field handler - // below. - } - - if (!upb_fielddef_issubmsg(f)) { - continue; - } - - if (is_map_field(f)) { - VALUE map; - Map_iter map_it; - - if (!upb_fielddef_issubmsg(map_field_value(f))) continue; - map = DEREF(msg, offset, VALUE); - if (map == Qnil) continue; - for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) { - VALUE submsg = Map_iter_value(&map_it); - VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); - const Descriptor* subdesc = ruby_to_Descriptor(descriptor); - discard_unknown(submsg, subdesc); - } - } else if (upb_fielddef_isseq(f)) { - VALUE ary = DEREF(msg, offset, VALUE); - int size; - int i; - - if (ary == Qnil) continue; - size = NUM2INT(RepeatedField_length(ary)); - for (i = 0; i < size; i++) { - void* memory = RepeatedField_index_native(ary, i); - VALUE submsg = *((VALUE *)memory); - VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); - const Descriptor* subdesc = ruby_to_Descriptor(descriptor); - discard_unknown(submsg, subdesc); - } - } else { - VALUE submsg = DEREF(msg, offset, VALUE); - VALUE descriptor; - const Descriptor* subdesc; - - if (submsg == Qnil) continue; - descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); - subdesc = ruby_to_Descriptor(descriptor); - discard_unknown(submsg, subdesc); - } - } -} - -/* - * call-seq: - * Google::Protobuf.discard_unknown(msg) - * - * Discard unknown fields in the given message object and recursively discard - * unknown fields in submessages. - */ -VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) { - VALUE klass = CLASS_OF(msg_rb); - VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - if (klass == cRepeatedField || klass == cMap) { - rb_raise(rb_eArgError, "Expected proto msg for discard unknown."); - } else { - discard_unknown(msg_rb, desc); - } - return Qnil; -} diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb index 80b7985000b7..ec17787f79fd 100755 --- a/ruby/ext/google/protobuf_c/extconf.rb +++ b/ruby/ext/google/protobuf_c/extconf.rb @@ -3,9 +3,9 @@ require 'mkmf' if RUBY_PLATFORM =~ /darwin/ || RUBY_PLATFORM =~ /linux/ - $CFLAGS += " -std=gnu90 -O3 -DNDEBUG -Wall -Wdeclaration-after-statement -Wsign-compare" + $CFLAGS += " -std=gnu99 -O3 -DNDEBUG -fvisibility=hidden -Wall -Wsign-compare -Wno-declaration-after-statement" else - $CFLAGS += " -std=gnu90 -O3 -DNDEBUG" + $CFLAGS += " -std=gnu99 -O3 -DNDEBUG" end @@ -14,8 +14,7 @@ $LDFLAGS += " -Wl,-wrap,memcpy" end -$objs = ["protobuf.o", "defs.o", "storage.o", "message.o", - "repeated_field.o", "map.o", "encode_decode.o", "upb.o", - "wrap_memcpy.o"] +$objs = ["protobuf.o", "convert.o", "defs.o", "message.o", + "repeated_field.o", "map.o", "ruby-upb.o", "wrap_memcpy.o"] create_makefile("google/protobuf_c") diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 00d23a76fa30..9d7d16b52923 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -28,170 +28,231 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "convert.h" +#include "defs.h" +#include "message.h" #include "protobuf.h" // ----------------------------------------------------------------------------- -// Basic map operations on top of upb's strtable. +// Basic map operations on top of upb_map. // // Note that we roll our own `Map` container here because, as for // `RepeatedField`, we want a strongly-typed container. This is so that any user // errors due to incorrect map key or value types are raised as close as // possible to the error site, rather than at some deferred point (e.g., // serialization). -// -// We build our `Map` on top of upb_strtable so that we're able to take -// advantage of the native_slot storage abstraction, as RepeatedField does. -// (This is not quite a perfect mapping -- see the key conversions below -- but -// gives us full support and error-checking for all value types for free.) // ----------------------------------------------------------------------------- -// Map values are stored using the native_slot abstraction (as with repeated -// field values), but keys are a bit special. Since we use a strtable, we need -// to store keys as sequences of bytes such that equality of those bytes maps -// one-to-one to equality of keys. We store strings directly (i.e., they map to -// their own bytes) and integers as native integers (using the native_slot -// abstraction). - -// Note that there is another tradeoff here in keeping string keys as native -// strings rather than Ruby strings: traversing the Map requires conversion to -// Ruby string values on every traversal, potentially creating more garbage. We -// should consider ways to cache a Ruby version of the key if this becomes an -// issue later. - -// Forms a key to use with the underlying strtable from a Ruby key value. |buf| -// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to -// construct a key byte sequence if needed. |out_key| and |out_length| provide -// the resulting key data/length. -#define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t) -static VALUE table_key(Map* self, VALUE key, - char* buf, - const char** out_key, - size_t* out_length) { - switch (self->key_type) { - case UPB_TYPE_BYTES: - case UPB_TYPE_STRING: - // Strings: use string content directly. - if (TYPE(key) == T_SYMBOL) { - key = rb_id2str(SYM2ID(key)); - } - Check_Type(key, T_STRING); - key = native_slot_encode_and_freeze_string(self->key_type, key); - *out_key = RSTRING_PTR(key); - *out_length = RSTRING_LEN(key); - break; - - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - native_slot_set("", self->key_type, Qnil, buf, key); - *out_key = buf; - *out_length = native_slot_size(self->key_type); - break; - - default: - // Map constructor should not allow a Map with another key type to be - // constructed. - assert(false); - break; - } - - return key; -} - -static VALUE table_key_to_ruby(Map* self, upb_strview key) { - switch (self->key_type) { - case UPB_TYPE_BYTES: - case UPB_TYPE_STRING: { - VALUE ret = rb_str_new(key.data, key.size); - rb_enc_associate(ret, - (self->key_type == UPB_TYPE_BYTES) ? - kRubyString8bitEncoding : kRubyStringUtf8Encoding); - return ret; - } - - case UPB_TYPE_BOOL: - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - return native_slot_get(self->key_type, Qnil, key.data); - - default: - assert(false); - return Qnil; - } -} - -static void* value_memory(upb_value* v) { - return (void*)(&v->val); -} - // ----------------------------------------------------------------------------- // Map container type. // ----------------------------------------------------------------------------- +typedef struct { + const upb_map *map; // Can convert to mutable when non-frozen. + upb_fieldtype_t key_type; + TypeInfo value_type_info; + VALUE value_type_class; + VALUE arena; +} Map; + +static void Map_mark(void* _self) { + Map* self = _self; + rb_gc_mark(self->value_type_class); + rb_gc_mark(self->arena); +} + const rb_data_type_t Map_type = { "Google::Protobuf::Map", - { Map_mark, Map_free, NULL }, + { Map_mark, RUBY_DEFAULT_FREE, NULL }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, }; VALUE cMap; -Map* ruby_to_Map(VALUE _self) { +static Map* ruby_to_Map(VALUE _self) { Map* self; TypedData_Get_Struct(_self, Map, &Map_type, self); return self; } -void Map_mark(void* _self) { - Map* self = _self; +static VALUE Map_alloc(VALUE klass) { + Map* self = ALLOC(Map); + self->map = NULL; + self->value_type_class = Qnil; + self->value_type_info.def.msgdef = NULL; + self->arena = Qnil; + return TypedData_Wrap_Struct(klass, &Map_type, self); +} - rb_gc_mark(self->value_type_class); - rb_gc_mark(self->parse_frame); - - if (self->value_type == UPB_TYPE_STRING || - self->value_type == UPB_TYPE_BYTES || - self->value_type == UPB_TYPE_MESSAGE) { - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - native_slot_mark(self->value_type, mem); +VALUE Map_GetRubyWrapper(upb_map* map, upb_fieldtype_t key_type, + TypeInfo value_type, VALUE arena) { + PBRUBY_ASSERT(map); + + VALUE val = ObjectCache_Get(map); + + if (val == Qnil) { + val = Map_alloc(cMap); + Map* self; + ObjectCache_Add(map, val, Arena_get(arena)); + TypedData_Get_Struct(val, Map, &Map_type, self); + self->map = map; + self->arena = arena; + self->key_type = key_type; + self->value_type_info = value_type; + if (self->value_type_info.type == UPB_TYPE_MESSAGE) { + const upb_msgdef *val_m = self->value_type_info.def.msgdef; + self->value_type_class = Descriptor_DefToClass(val_m); } } + + return val; } -void Map_free(void* _self) { - Map* self = _self; - upb_strtable_uninit(&self->table); - xfree(self); +static VALUE Map_new_this_type(Map *from) { + VALUE arena_rb = Arena_new(); + upb_map* map = upb_map_new(Arena_get(arena_rb), from->key_type, + from->value_type_info.type); + VALUE ret = + Map_GetRubyWrapper(map, from->key_type, from->value_type_info, arena_rb); + PBRUBY_ASSERT(ruby_to_Map(ret)->value_type_class == from->value_type_class); + return ret; } -VALUE Map_alloc(VALUE klass) { - Map* self = ALLOC(Map); - memset(self, 0, sizeof(Map)); - self->value_type_class = Qnil; - return TypedData_Wrap_Struct(klass, &Map_type, self); +static TypeInfo Map_keyinfo(Map* self) { + TypeInfo ret; + ret.type = self->key_type; + ret.def.msgdef = NULL; + return ret; } -VALUE Map_set_frame(VALUE map, VALUE val) { - Map* self = ruby_to_Map(map); - self->parse_frame = val; - return val; +static upb_map *Map_GetMutable(VALUE _self) { + rb_check_frozen(_self); + return (upb_map*)ruby_to_Map(_self)->map; } -static bool needs_typeclass(upb_fieldtype_t type) { - switch (type) { - case UPB_TYPE_MESSAGE: - case UPB_TYPE_ENUM: - return true; - default: - return false; +VALUE Map_CreateHash(const upb_map* map, upb_fieldtype_t key_type, + TypeInfo val_info) { + VALUE hash = rb_hash_new(); + size_t iter = UPB_MAP_BEGIN; + TypeInfo key_info = TypeInfo_from_type(key_type); + + if (!map) return hash; + + while (upb_mapiter_next(map, &iter)) { + upb_msgval key = upb_mapiter_key(map, iter); + upb_msgval val = upb_mapiter_value(map, iter); + VALUE key_val = Convert_UpbToRuby(key, key_info, Qnil); + VALUE val_val = Scalar_CreateHash(val, val_info); + rb_hash_aset(hash, key_val, val_val); + } + + return hash; +} + +VALUE Map_deep_copy(VALUE obj) { + Map* self = ruby_to_Map(obj); + VALUE new_arena_rb = Arena_new(); + upb_arena *arena = Arena_get(new_arena_rb); + upb_map* new_map = + upb_map_new(arena, self->key_type, self->value_type_info.type); + size_t iter = UPB_MAP_BEGIN; + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + upb_msgval val = upb_mapiter_value(self->map, iter); + upb_msgval val_copy = Msgval_DeepCopy(val, self->value_type_info, arena); + upb_map_set(new_map, key, val_copy, arena); + } + + return Map_GetRubyWrapper(new_map, self->key_type, self->value_type_info, + new_arena_rb); +} + +const upb_map* Map_GetUpbMap(VALUE val, const upb_fielddef *field) { + const upb_fielddef* key_field = map_field_key(field); + const upb_fielddef* value_field = map_field_value(field); + TypeInfo value_type_info = TypeInfo_get(value_field); + Map* self; + + if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || + RTYPEDDATA_TYPE(val) != &Map_type) { + rb_raise(cTypeError, "Expected Map instance"); + } + + self = ruby_to_Map(val); + if (self->key_type != upb_fielddef_type(key_field)) { + rb_raise(cTypeError, "Map key type does not match field's key type"); + } + if (self->value_type_info.type != value_type_info.type) { + rb_raise(cTypeError, "Map value type does not match field's value type"); + } + if (self->value_type_info.def.msgdef != value_type_info.def.msgdef) { + rb_raise(cTypeError, "Map value type has wrong message/enum class"); + } + + return self->map; +} + +void Map_Inspect(StringBuilder* b, const upb_map* map, upb_fieldtype_t key_type, + TypeInfo val_type) { + bool first = true; + TypeInfo key_type_info = {key_type}; + StringBuilder_Printf(b, "{"); + if (map) { + size_t iter = UPB_MAP_BEGIN; + while (upb_mapiter_next(map, &iter)) { + upb_msgval key = upb_mapiter_key(map, iter); + upb_msgval val = upb_mapiter_value(map, iter); + if (first) { + first = false; + } else { + StringBuilder_Printf(b, ", "); + } + StringBuilder_PrintMsgval(b, key, key_type_info); + StringBuilder_Printf(b, "=>"); + StringBuilder_PrintMsgval(b, val, val_type); + } + } + StringBuilder_Printf(b, "}"); +} + +static int merge_into_self_callback(VALUE key, VALUE val, VALUE _self) { + Map* self = ruby_to_Map(_self); + upb_arena *arena = Arena_get(self->arena); + upb_msgval key_val = Convert_RubyToUpb(key, "", Map_keyinfo(self), arena); + upb_msgval val_val = Convert_RubyToUpb(val, "", self->value_type_info, arena); + upb_map_set(Map_GetMutable(_self), key_val, val_val, arena); + return ST_CONTINUE; +} + +// Used only internally -- shared by #merge and #initialize. +static VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) { + if (TYPE(hashmap) == T_HASH) { + rb_hash_foreach(hashmap, merge_into_self_callback, _self); + } else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) && + RTYPEDDATA_TYPE(hashmap) == &Map_type) { + Map* self = ruby_to_Map(_self); + Map* other = ruby_to_Map(hashmap); + upb_arena *arena = Arena_get(self->arena); + upb_msg *self_msg = Map_GetMutable(_self); + size_t iter = UPB_MAP_BEGIN; + + upb_arena_fuse(arena, Arena_get(other->arena)); + + if (self->key_type != other->key_type || + self->value_type_info.type != other->value_type_info.type || + self->value_type_class != other->value_type_class) { + rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types"); + } + + while (upb_mapiter_next(other->map, &iter)) { + upb_msgval key = upb_mapiter_key(other->map, iter); + upb_msgval val = upb_mapiter_value(other->map, iter); + upb_map_set(self_msg, key, val, arena); + } + } else { + rb_raise(rb_eArgError, "Unknown type merging into Map"); } + return _self; } /* @@ -224,9 +285,9 @@ static bool needs_typeclass(upb_fieldtype_t type) { * references to underlying objects will be shared if the value type is a * message type. */ -VALUE Map_init(int argc, VALUE* argv, VALUE _self) { +static VALUE Map_init(int argc, VALUE* argv, VALUE _self) { Map* self = ruby_to_Map(_self); - int init_value_arg; + VALUE init_arg; // We take either two args (:key_type, :value_type), three args (:key_type, // :value_type, "ValueMessageType"), or four args (the above plus an initial @@ -236,8 +297,9 @@ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { } self->key_type = ruby_to_fieldtype(argv[0]); - self->value_type = ruby_to_fieldtype(argv[1]); - self->parse_frame = Qnil; + self->value_type_info = + TypeInfo_FromClass(argc, argv, 1, &self->value_type_class, &init_arg); + self->arena = Arena_new(); // Check that the key type is an allowed type. switch (self->key_type) { @@ -254,21 +316,12 @@ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { rb_raise(rb_eArgError, "Invalid key type for map."); } - init_value_arg = 2; - if (needs_typeclass(self->value_type) && argc > 2) { - self->value_type_class = argv[2]; - validate_type_class(self->value_type, self->value_type_class); - init_value_arg = 3; - } - - // Table value type is always UINT64: this ensures enough space to store the - // native_slot value. - if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) { - rb_raise(rb_eRuntimeError, "Could not allocate table."); - } + self->map = upb_map_new(Arena_get(self->arena), self->key_type, + self->value_type_info.type); + ObjectCache_Add(self->map, _self, Arena_get(self->arena)); - if (argc > init_value_arg) { - Map_merge_into_self(_self, argv[init_value_arg]); + if (init_arg != Qnil) { + Map_merge_into_self(_self, init_arg); } return Qnil; @@ -282,22 +335,16 @@ VALUE Map_init(int argc, VALUE* argv, VALUE _self) { * Note that Map also includes Enumerable; map thus acts like a normal Ruby * sequence. */ -VALUE Map_each(VALUE _self) { +static VALUE Map_each(VALUE _self) { Map* self = ruby_to_Map(_self); - - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - VALUE key = table_key_to_ruby(self, upb_strtable_iter_key(&it)); - - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - VALUE value = native_slot_get(self->value_type, - self->value_type_class, - mem); - - rb_yield_values(2, key, value); + size_t iter = UPB_MAP_BEGIN; + + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + upb_msgval val = upb_mapiter_value(self->map, iter); + VALUE key_val = Convert_UpbToRuby(key, Map_keyinfo(self), self->arena); + VALUE val_val = Convert_UpbToRuby(val, self->value_type_info, self->arena); + rb_yield_values(2, key_val, val_val); } return Qnil; @@ -309,17 +356,15 @@ VALUE Map_each(VALUE _self) { * * Returns the list of keys contained in the map, in unspecified order. */ -VALUE Map_keys(VALUE _self) { +static VALUE Map_keys(VALUE _self) { Map* self = ruby_to_Map(_self); - + size_t iter = UPB_MAP_BEGIN; VALUE ret = rb_ary_new(); - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - VALUE key = table_key_to_ruby(self, upb_strtable_iter_key(&it)); - rb_ary_push(ret, key); + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + VALUE key_val = Convert_UpbToRuby(key, Map_keyinfo(self), self->arena); + rb_ary_push(ret, key_val); } return ret; @@ -331,22 +376,15 @@ VALUE Map_keys(VALUE _self) { * * Returns the list of values contained in the map, in unspecified order. */ -VALUE Map_values(VALUE _self) { +static VALUE Map_values(VALUE _self) { Map* self = ruby_to_Map(_self); - + size_t iter = UPB_MAP_BEGIN; VALUE ret = rb_ary_new(); - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - VALUE value = native_slot_get(self->value_type, - self->value_type_class, - mem); - - rb_ary_push(ret, value); + + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval val = upb_mapiter_value(self->map, iter); + VALUE val_val = Convert_UpbToRuby(val, self->value_type_info, self->arena); + rb_ary_push(ret, val_val); } return ret; @@ -359,18 +397,13 @@ VALUE Map_values(VALUE _self) { * Accesses the element at the given key. Throws an exception if the key type is * incorrect. Returns nil when the key is not present in the map. */ -VALUE Map_index(VALUE _self, VALUE key) { +static VALUE Map_index(VALUE _self, VALUE key) { Map* self = ruby_to_Map(_self); + upb_msgval key_upb = Convert_RubyToUpb(key, "", Map_keyinfo(self), NULL); + upb_msgval val; - char keybuf[TABLE_KEY_BUF_LENGTH]; - const char* keyval = NULL; - size_t length = 0; - upb_value v; - key = table_key(self, key, keybuf, &keyval, &length); - - if (upb_strtable_lookup2(&self->table, keyval, length, &v)) { - void* mem = value_memory(&v); - return native_slot_get(self->value_type, self->value_type_class, mem); + if (upb_map_get(self->map, key_upb, &val)) { + return Convert_UpbToRuby(val, self->value_type_info, self->arena); } else { return Qnil; } @@ -384,33 +417,15 @@ VALUE Map_index(VALUE _self, VALUE key) { * Throws an exception if the key type is incorrect. Returns the new value that * was just inserted. */ -VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) { +static VALUE Map_index_set(VALUE _self, VALUE key, VALUE val) { Map* self = ruby_to_Map(_self); - char keybuf[TABLE_KEY_BUF_LENGTH]; - const char* keyval = NULL; - size_t length = 0; - upb_value v; - void* mem; - key = table_key(self, key, keybuf, &keyval, &length); - - rb_check_frozen(_self); + upb_arena *arena = Arena_get(self->arena); + upb_msgval key_upb = Convert_RubyToUpb(key, "", Map_keyinfo(self), NULL); + upb_msgval val_upb = Convert_RubyToUpb(val, "", self->value_type_info, arena); - if (TYPE(value) == T_HASH) { - VALUE args[1] = { value }; - value = rb_class_new_instance(1, args, self->value_type_class); - } - - mem = value_memory(&v); - native_slot_set("", self->value_type, self->value_type_class, mem, value); - - // Replace any existing value by issuing a 'remove' operation first. - upb_strtable_remove2(&self->table, keyval, length, NULL); - if (!upb_strtable_insert2(&self->table, keyval, length, v)) { - rb_raise(rb_eRuntimeError, "Could not insert into table"); - } + upb_map_set(Map_GetMutable(_self), key_upb, val_upb, arena); - // Ruby hashmap's :[]= method also returns the inserted value. - return value; + return val; } /* @@ -420,15 +435,11 @@ VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) { * Returns true if the given key is present in the map. Throws an exception if * the key has the wrong type. */ -VALUE Map_has_key(VALUE _self, VALUE key) { +static VALUE Map_has_key(VALUE _self, VALUE key) { Map* self = ruby_to_Map(_self); + upb_msgval key_upb = Convert_RubyToUpb(key, "", Map_keyinfo(self), NULL); - char keybuf[TABLE_KEY_BUF_LENGTH]; - const char* keyval = NULL; - size_t length = 0; - key = table_key(self, key, keybuf, &keyval, &length); - - if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) { + if (upb_map_get(self->map, key_upb, NULL)) { return Qtrue; } else { return Qfalse; @@ -442,22 +453,25 @@ VALUE Map_has_key(VALUE _self, VALUE key) { * Deletes the value at the given key, if any, returning either the old value or * nil if none was present. Throws an exception if the key is of the wrong type. */ -VALUE Map_delete(VALUE _self, VALUE key) { +static VALUE Map_delete(VALUE _self, VALUE key) { Map* self = ruby_to_Map(_self); - char keybuf[TABLE_KEY_BUF_LENGTH]; - const char* keyval = NULL; - size_t length = 0; - upb_value v; - key = table_key(self, key, keybuf, &keyval, &length); + upb_msgval key_upb = Convert_RubyToUpb(key, "", Map_keyinfo(self), NULL); + upb_msgval val_upb; + VALUE ret; rb_check_frozen(_self); - if (upb_strtable_remove2(&self->table, keyval, length, &v)) { - void* mem = value_memory(&v); - return native_slot_get(self->value_type, self->value_type_class, mem); + // TODO(haberman): make upb_map_delete() also capable of returning the deleted + // value. + if (upb_map_get(self->map, key_upb, &val_upb)) { + ret = Convert_UpbToRuby(val_upb, self->value_type_info, self->arena); } else { - return Qnil; + ret = Qnil; } + + upb_map_delete(Map_GetMutable(_self), key_upb); + + return ret; } /* @@ -466,17 +480,8 @@ VALUE Map_delete(VALUE _self, VALUE key) { * * Removes all entries from the map. */ -VALUE Map_clear(VALUE _self) { - Map* self = ruby_to_Map(_self); - - rb_check_frozen(_self); - - // Uninit and reinit the table -- this is faster than iterating and doing a - // delete-lookup on each key. - upb_strtable_uninit(&self->table); - if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) { - rb_raise(rb_eRuntimeError, "Unable to re-initialize table"); - } +static VALUE Map_clear(VALUE _self) { + upb_map_clear(Map_GetMutable(_self)); return Qnil; } @@ -486,24 +491,9 @@ VALUE Map_clear(VALUE _self) { * * Returns the number of entries (key-value pairs) in the map. */ -VALUE Map_length(VALUE _self) { +static VALUE Map_length(VALUE _self) { Map* self = ruby_to_Map(_self); - return ULL2NUM(upb_strtable_count(&self->table)); -} - -VALUE Map_new_this_type(VALUE _self) { - Map* self = ruby_to_Map(_self); - VALUE new_map = Qnil; - VALUE key_type = fieldtype_to_ruby(self->key_type); - VALUE value_type = fieldtype_to_ruby(self->value_type); - if (self->value_type_class != Qnil) { - new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3, - key_type, value_type, self->value_type_class); - } else { - new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2, - key_type, value_type); - } - return new_map; + return ULL2NUM(upb_map_size(self->map)); } /* @@ -513,54 +503,23 @@ VALUE Map_new_this_type(VALUE _self) { * Duplicates this map with a shallow copy. References to all non-primitive * element objects (e.g., submessages) are shared. */ -VALUE Map_dup(VALUE _self) { - Map* self = ruby_to_Map(_self); - VALUE new_map = Map_new_this_type(_self); - Map* new_self = ruby_to_Map(new_map); - - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_strview k = upb_strtable_iter_key(&it); - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - upb_value dup; - void* dup_mem = value_memory(&dup); - native_slot_dup(self->value_type, dup_mem, mem); - - if (!upb_strtable_insert2(&new_self->table, k.data, k.size, dup)) { - rb_raise(rb_eRuntimeError, "Error inserting value into new table"); - } - } - - return new_map; -} - -// Used by Google::Protobuf.deep_copy but not exposed directly. -VALUE Map_deep_copy(VALUE _self) { +static VALUE Map_dup(VALUE _self) { Map* self = ruby_to_Map(_self); - VALUE new_map = Map_new_this_type(_self); - Map* new_self = ruby_to_Map(new_map); - - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_strview k = upb_strtable_iter_key(&it); - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - upb_value dup; - void* dup_mem = value_memory(&dup); - native_slot_deep_copy(self->value_type, self->value_type_class, dup_mem, - mem); - - if (!upb_strtable_insert2(&new_self->table, k.data, k.size, dup)) { - rb_raise(rb_eRuntimeError, "Error inserting value into new table"); - } + VALUE new_map_rb = Map_new_this_type(self); + Map* new_self = ruby_to_Map(new_map_rb); + size_t iter = UPB_MAP_BEGIN; + upb_arena *arena = Arena_get(new_self->arena); + upb_map *new_map = Map_GetMutable(new_map_rb); + + upb_arena_fuse(arena, Arena_get(self->arena)); + + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + upb_msgval val = upb_mapiter_value(self->map, iter); + upb_map_set(new_map, key, val, arena); } - return new_map; + return new_map_rb; } /* @@ -579,12 +538,11 @@ VALUE Map_deep_copy(VALUE _self) { VALUE Map_eq(VALUE _self, VALUE _other) { Map* self = ruby_to_Map(_self); Map* other; - upb_strtable_iter it; // Allow comparisons to Ruby hashmaps by converting to a temporary Map // instance. Slow, but workable. if (TYPE(_other) == T_HASH) { - VALUE other_map = Map_new_this_type(_self); + VALUE other_map = Map_new_this_type(self); Map_merge_into_self(other_map, _other); _other = other_map; } @@ -595,33 +553,27 @@ VALUE Map_eq(VALUE _self, VALUE _other) { return Qtrue; } if (self->key_type != other->key_type || - self->value_type != other->value_type || + self->value_type_info.type != other->value_type_info.type || self->value_type_class != other->value_type_class) { return Qfalse; } - if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) { + if (upb_map_size(self->map) != upb_map_size(other->map)) { return Qfalse; } // For each member of self, check that an equal member exists at the same key // in other. - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_strview k = upb_strtable_iter_key(&it); - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - upb_value other_v; - void* other_mem = value_memory(&other_v); - - if (!upb_strtable_lookup2(&other->table, k.data, k.size, &other_v)) { + size_t iter = UPB_MAP_BEGIN; + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + upb_msgval val = upb_mapiter_value(self->map, iter); + upb_msgval other_val; + if (!upb_map_get(other->map, key, &other_val)) { // Not present in other map. return Qfalse; } - - if (!native_slot_eq(self->value_type, self->value_type_class, mem, - other_mem)) { - // Present, but value not equal. + if (!Msgval_IsEqual(val, other_val, self->value_type_info)) { + // Present but different value. return Qfalse; } } @@ -629,6 +581,21 @@ VALUE Map_eq(VALUE _self, VALUE _other) { return Qtrue; } +/* + * call-seq: + * Message.freeze => self + * + * Freezes the message object. We have to intercept this so we can pin the + * Ruby object into memory so we don't forget it's frozen. + */ +static VALUE Map_freeze(VALUE _self) { + Map* self = ruby_to_Map(_self); + + ObjectCache_Pin(self->map, _self, Arena_get(self->arena)); + RB_OBJ_FREEZE(_self); + return _self; +} + /* * call-seq: * Map.hash => hash_value @@ -637,26 +604,18 @@ VALUE Map_eq(VALUE _self, VALUE _other) { */ VALUE Map_hash(VALUE _self) { Map* self = ruby_to_Map(_self); - - st_index_t h = rb_hash_start(0); - VALUE hash_sym = rb_intern("hash"); - - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); !upb_strtable_done(&it); - upb_strtable_next(&it)) { - VALUE key = table_key_to_ruby(self, upb_strtable_iter_key(&it)); - - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - VALUE value = native_slot_get(self->value_type, - self->value_type_class, - mem); - - h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0))); - h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0))); + uint64_t hash = 0; + + size_t iter = UPB_MAP_BEGIN; + TypeInfo key_info = {self->key_type}; + while (upb_mapiter_next(self->map, &iter)) { + upb_msgval key = upb_mapiter_key(self->map, iter); + upb_msgval val = upb_mapiter_value(self->map, iter); + hash = Msgval_GetHash(key, key_info, hash); + hash = Msgval_GetHash(val, self->value_type_info, hash); } - return INT2FIX(h); + return LL2NUM(hash); } /* @@ -667,24 +626,7 @@ VALUE Map_hash(VALUE _self) { */ VALUE Map_to_h(VALUE _self) { Map* self = ruby_to_Map(_self); - VALUE hash = rb_hash_new(); - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - VALUE key = table_key_to_ruby(self, upb_strtable_iter_key(&it)); - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - VALUE value = native_slot_get(self->value_type, - self->value_type_class, - mem); - - if (self->value_type == UPB_TYPE_MESSAGE) { - value = Message_to_h(value); - } - rb_hash_aset(hash, key, value); - } - return hash; + return Map_CreateHash(self->map, self->key_type, self->value_type_info); } /* @@ -698,34 +640,11 @@ VALUE Map_to_h(VALUE _self) { VALUE Map_inspect(VALUE _self) { Map* self = ruby_to_Map(_self); - VALUE str = rb_str_new2("{"); - - bool first = true; - VALUE inspect_sym = rb_intern("inspect"); - - upb_strtable_iter it; - for (upb_strtable_begin(&it, &self->table); !upb_strtable_done(&it); - upb_strtable_next(&it)) { - VALUE key = table_key_to_ruby(self, upb_strtable_iter_key(&it)); - - upb_value v = upb_strtable_iter_value(&it); - void* mem = value_memory(&v); - VALUE value = native_slot_get(self->value_type, - self->value_type_class, - mem); - - if (!first) { - str = rb_str_cat2(str, ", "); - } else { - first = false; - } - str = rb_str_append(str, rb_funcall(key, inspect_sym, 0)); - str = rb_str_cat2(str, "=>"); - str = rb_str_append(str, rb_funcall(value, inspect_sym, 0)); - } - - str = rb_str_cat2(str, "}"); - return str; + StringBuilder* builder = StringBuilder_New(); + Map_Inspect(builder, self->map, self->key_type, self->value_type_info); + VALUE ret = StringBuilder_ToRubyString(builder); + StringBuilder_Free(builder); + return ret; } /* @@ -737,79 +656,11 @@ VALUE Map_inspect(VALUE _self) { * in the new copy of this map. Returns the new copy of this map with merged * contents. */ -VALUE Map_merge(VALUE _self, VALUE hashmap) { +static VALUE Map_merge(VALUE _self, VALUE hashmap) { VALUE dupped = Map_dup(_self); return Map_merge_into_self(dupped, hashmap); } -static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) { - Map_index_set(self, key, value); - return ST_CONTINUE; -} - -// Used only internally -- shared by #merge and #initialize. -VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) { - if (TYPE(hashmap) == T_HASH) { - rb_hash_foreach(hashmap, merge_into_self_callback, _self); - } else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) && - RTYPEDDATA_TYPE(hashmap) == &Map_type) { - - Map* self = ruby_to_Map(_self); - Map* other = ruby_to_Map(hashmap); - upb_strtable_iter it; - - if (self->key_type != other->key_type || - self->value_type != other->value_type || - self->value_type_class != other->value_type_class) { - rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types"); - } - - for (upb_strtable_begin(&it, &other->table); - !upb_strtable_done(&it); - upb_strtable_next(&it)) { - upb_strview k = upb_strtable_iter_key(&it); - - // Replace any existing value by issuing a 'remove' operation first. - upb_value v; - upb_value oldv; - upb_strtable_remove2(&self->table, k.data, k.size, &oldv); - - v = upb_strtable_iter_value(&it); - upb_strtable_insert2(&self->table, k.data, k.size, v); - } - } else { - rb_raise(rb_eArgError, "Unknown type merging into Map"); - } - return _self; -} - -// Internal method: map iterator initialization (used for serialization). -void Map_begin(VALUE _self, Map_iter* iter) { - Map* self = ruby_to_Map(_self); - iter->self = self; - upb_strtable_begin(&iter->it, &self->table); -} - -void Map_next(Map_iter* iter) { - upb_strtable_next(&iter->it); -} - -bool Map_done(Map_iter* iter) { - return upb_strtable_done(&iter->it); -} - -VALUE Map_iter_key(Map_iter* iter) { - return table_key_to_ruby(iter->self, upb_strtable_iter_key(&iter->it)); -} - -VALUE Map_iter_value(Map_iter* iter) { - upb_value v = upb_strtable_iter_value(&iter->it); - void* mem = value_memory(&v); - return native_slot_get(iter->self->value_type, - iter->self->value_type_class, - mem); -} - void Map_register(VALUE module) { VALUE klass = rb_define_class_under(module, "Map", rb_cObject); rb_define_alloc_func(klass, Map_alloc); @@ -828,6 +679,7 @@ void Map_register(VALUE module) { rb_define_method(klass, "length", Map_length, 0); rb_define_method(klass, "dup", Map_dup, 0); rb_define_method(klass, "==", Map_eq, 1); + rb_define_method(klass, "freeze", Map_freeze, 0); rb_define_method(klass, "hash", Map_hash, 0); rb_define_method(klass, "to_h", Map_to_h, 0); rb_define_method(klass, "inspect", Map_inspect, 0); diff --git a/ruby/ext/google/protobuf_c/map.h b/ruby/ext/google/protobuf_c/map.h new file mode 100644 index 000000000000..1b840c3da77e --- /dev/null +++ b/ruby/ext/google/protobuf_c/map.h @@ -0,0 +1,66 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RUBY_PROTOBUF_MAP_H_ +#define RUBY_PROTOBUF_MAP_H_ + +#include + +#include "protobuf.h" +#include "ruby-upb.h" + +// Returns a Ruby wrapper object for the given map, which will be created if +// one does not exist already. +VALUE Map_GetRubyWrapper(upb_map *map, upb_fieldtype_t key_type, + TypeInfo value_type, VALUE arena); + +// Gets the underlying upb_map for this Ruby map object, which must have +// key/value type that match |field|. If this is not a map or the type doesn't +// match, raises an exception. +const upb_map *Map_GetUpbMap(VALUE val, const upb_fielddef *field); + +// Implements #inspect for this map by appending its contents to |b|. +void Map_Inspect(StringBuilder *b, const upb_map *map, upb_fieldtype_t key_type, + TypeInfo val_type); + +// Returns a new Hash object containing the contents of this Map. +VALUE Map_CreateHash(const upb_map* map, upb_fieldtype_t key_type, + TypeInfo val_info); + +// Returns a deep copy of this Map object. +VALUE Map_deep_copy(VALUE obj); + +// Ruby class of Google::Protobuf::Map. +extern VALUE cMap; + +// Call at startup to register all types in this module. +void Map_register(VALUE module); + +#endif // RUBY_PROTOBUF_MAP_H_ diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c index 0050506d3bca..22a21c129e6e 100644 --- a/ruby/ext/google/protobuf_c/message.c +++ b/ruby/ext/google/protobuf_c/message.c @@ -28,49 +28,61 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "message.h" + +#include "convert.h" +#include "defs.h" +#include "map.h" #include "protobuf.h" +#include "repeated_field.h" +#include "third_party/wyhash/wyhash.h" -// ----------------------------------------------------------------------------- -// Class/module creation from msgdefs and enumdefs, respectively. -// ----------------------------------------------------------------------------- +static VALUE cParseError = Qnil; +static ID descriptor_instancevar_interned; -void* Message_data(void* msg) { - return ((uint8_t *)msg) + sizeof(MessageHeader); +static VALUE initialize_rb_class_with_no_args(VALUE klass) { + return rb_funcall(klass, rb_intern("new"), 0); } -void Message_mark(void* _self) { - MessageHeader* self = (MessageHeader *)_self; - layout_mark(self->descriptor->layout, Message_data(self)); +VALUE MessageOrEnum_GetDescriptor(VALUE klass) { + return rb_ivar_get(klass, descriptor_instancevar_interned); } -void Message_free(void* self) { - stringsink* unknown = ((MessageHeader *)self)->unknown_fields; - if (unknown != NULL) { - stringsink_uninit(unknown); - free(unknown); - } - xfree(self); +// ----------------------------------------------------------------------------- +// Class/module creation from msgdefs and enumdefs, respectively. +// ----------------------------------------------------------------------------- + +typedef struct { + VALUE arena; + const upb_msg* msg; // Can get as mutable when non-frozen. + const upb_msgdef* msgdef; // kept alive by self.class.descriptor reference. +} Message; + +static void Message_mark(void* _self) { + Message* self = (Message *)_self; + rb_gc_mark(self->arena); } -rb_data_type_t Message_type = { +static rb_data_type_t Message_type = { "Message", - { Message_mark, Message_free, NULL }, + { Message_mark, RUBY_DEFAULT_FREE, NULL }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, }; -VALUE Message_alloc(VALUE klass) { +static Message* ruby_to_Message(VALUE msg_rb) { + Message* msg; + TypedData_Get_Struct(msg_rb, Message, &Message_type, msg); + return msg; +} + +static VALUE Message_alloc(VALUE klass) { VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); - Descriptor* desc = ruby_to_Descriptor(descriptor); - MessageHeader* msg; + Message* msg = ALLOC(Message); VALUE ret; - if (desc->layout == NULL) { - create_layout(desc); - } - - msg = (void*)ALLOC_N(uint8_t, sizeof(MessageHeader) + desc->layout->size); - msg->descriptor = desc; - msg->unknown_fields = NULL; - memcpy(Message_data(msg), desc->layout->empty_template, desc->layout->size); + msg->msgdef = Descriptor_GetMsgDef(descriptor); + msg->arena = Qnil; + msg->msg = NULL; ret = TypedData_Wrap_Struct(klass, &Message_type, msg); rb_ivar_set(ret, descriptor_instancevar_interned, descriptor); @@ -78,24 +90,92 @@ VALUE Message_alloc(VALUE klass) { return ret; } -static const upb_fielddef* which_oneof_field(MessageHeader* self, const upb_oneofdef* o) { - uint32_t oneof_case; - const upb_fielddef* f; +const upb_msg *Message_Get(VALUE msg_rb, const upb_msgdef **m) { + Message* msg = ruby_to_Message(msg_rb); + if (m) *m = msg->msgdef; + return msg->msg; +} + +upb_msg *Message_GetMutable(VALUE msg_rb, const upb_msgdef **m) { + rb_check_frozen(msg_rb); + return (upb_msg*)Message_Get(msg_rb, m); +} + +void Message_InitPtr(VALUE self_, upb_msg *msg, VALUE arena) { + Message* self = ruby_to_Message(self_); + self->msg = msg; + self->arena = arena; + ObjectCache_Add(msg, self_, Arena_get(arena)); +} + +VALUE Message_GetArena(VALUE msg_rb) { + Message* msg = ruby_to_Message(msg_rb); + return msg->arena; +} + +void Message_CheckClass(VALUE klass) { + if (rb_get_alloc_func(klass) != &Message_alloc) { + rb_raise(rb_eArgError, + "Message class was not returned by the DescriptorPool."); + } +} + +VALUE Message_GetRubyWrapper(upb_msg* msg, const upb_msgdef* m, VALUE arena) { + if (msg == NULL) return Qnil; - oneof_case = - slot_read_oneof_case(self->descriptor->layout, Message_data(self), o); + VALUE val = ObjectCache_Get(msg); - if (oneof_case == ONEOF_CASE_NONE) { - return NULL; + if (val == Qnil) { + VALUE klass = Descriptor_DefToClass(m); + val = Message_alloc(klass); + Message_InitPtr(val, msg, arena); } - // oneof_case is a field index, so find that field. - f = upb_oneofdef_itof(o, oneof_case); - assert(f != NULL); + return val; +} + +void Message_PrintMessage(StringBuilder* b, const upb_msg* msg, + const upb_msgdef* m) { + bool first = true; + int n = upb_msgdef_fieldcount(m); + VALUE klass = Descriptor_DefToClass(m); + StringBuilder_Printf(b, "<%s: ", rb_class2name(klass)); + + for (int i = 0; i < n; i++) { + const upb_fielddef* field = upb_msgdef_field(m, i); + + if (upb_fielddef_haspresence(field) && !upb_msg_has(msg, field)) { + continue; + } + + if (!first) { + StringBuilder_Printf(b, ", "); + } else { + first = false; + } + + upb_msgval msgval = upb_msg_get(msg, field); + + StringBuilder_Printf(b, "%s: ", upb_fielddef_name(field)); + + if (upb_fielddef_ismap(field)) { + const upb_msgdef* entry_m = upb_fielddef_msgsubdef(field); + const upb_fielddef* key_f = upb_msgdef_itof(entry_m, 1); + const upb_fielddef* val_f = upb_msgdef_itof(entry_m, 2); + TypeInfo val_info = TypeInfo_get(val_f); + Map_Inspect(b, msgval.map_val, upb_fielddef_type(key_f), val_info); + } else if (upb_fielddef_isseq(field)) { + RepeatedField_Inspect(b, msgval.array_val, TypeInfo_get(field)); + } else { + StringBuilder_PrintMsgval(b, msgval, TypeInfo_get(field)); + } + } - return f; + StringBuilder_Printf(b, ">"); } +// Helper functions for #method_missing //////////////////////////////////////// + enum { METHOD_UNKNOWN = 0, METHOD_GETTER = 1, @@ -108,153 +188,199 @@ enum { }; // Check if the field is a well known wrapper type -bool is_wrapper_type_field(const upb_fielddef* field) { - const upb_msgdef *m; - if (upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { +static bool IsWrapper(const upb_fielddef* f) { + return upb_fielddef_issubmsg(f) && + upb_msgdef_iswrapper(upb_fielddef_msgsubdef(f)); +} + +static bool Match(const upb_msgdef* m, const char* name, const upb_fielddef** f, + const upb_oneofdef** o, const char* prefix, + const char* suffix) { + size_t sp = strlen(prefix); + size_t ss = strlen(suffix); + size_t sn = strlen(name); + + if (sn <= sp + ss) return false; + + if (memcmp(name, prefix, sp) != 0 || + memcmp(name + sn - ss, suffix, ss) != 0) { return false; } - m = upb_fielddef_msgsubdef(field); - switch (upb_msgdef_wellknowntype(m)) { - case UPB_WELLKNOWN_DOUBLEVALUE: - case UPB_WELLKNOWN_FLOATVALUE: - case UPB_WELLKNOWN_INT64VALUE: - case UPB_WELLKNOWN_UINT64VALUE: - case UPB_WELLKNOWN_INT32VALUE: - case UPB_WELLKNOWN_UINT32VALUE: - case UPB_WELLKNOWN_STRINGVALUE: - case UPB_WELLKNOWN_BYTESVALUE: - case UPB_WELLKNOWN_BOOLVALUE: - return true; - default: - return false; - } + + return upb_msgdef_lookupname(m, name + sp, sn - sp - ss, f, o); } -// Get a new Ruby wrapper type and set the initial value -VALUE ruby_wrapper_type(VALUE type_class, VALUE value) { - if (value != Qnil) { - VALUE hash = rb_hash_new(); - rb_hash_aset(hash, rb_str_new2("value"), value); - { - VALUE args[1] = {hash}; - return rb_class_new_instance(1, args, type_class); +static int extract_method_call(VALUE method_name, Message* self, + const upb_fielddef** f, const upb_oneofdef** o) { + const upb_msgdef* m = self->msgdef; + const char* name; + + Check_Type(method_name, T_SYMBOL); + name = rb_id2name(SYM2ID(method_name)); + + if (Match(m, name, f, o, "", "")) return METHOD_GETTER; + if (Match(m, name, f, o, "", "=")) return METHOD_SETTER; + if (Match(m, name, f, o, "clear_", "")) return METHOD_CLEAR; + if (Match(m, name, f, o, "has_", "?") && + (*o || (*f && upb_fielddef_haspresence(*f)))) { + // Disallow oneof hazzers for proto3. + // TODO(haberman): remove this test when we are enabling oneof hazzers for + // proto3. + if (*f && !upb_fielddef_issubmsg(*f) && + upb_fielddef_realcontainingoneof(*f) && + upb_msgdef_syntax(upb_fielddef_containingtype(*f)) != + UPB_SYNTAX_PROTO2) { + return METHOD_UNKNOWN; } + return METHOD_PRESENCE; + } + if (Match(m, name, f, o, "", "_as_value") && *f && !upb_fielddef_isseq(*f) && + IsWrapper(*f)) { + return METHOD_WRAPPER_GETTER; + } + if (Match(m, name, f, o, "", "_as_value=") && *f && !upb_fielddef_isseq(*f) && + IsWrapper(*f)) { + return METHOD_WRAPPER_SETTER; + } + if (Match(m, name, f, o, "", "_const") && *f && + upb_fielddef_type(*f) == UPB_TYPE_ENUM) { + return METHOD_ENUM_GETTER; } - return Qnil; -} -static int extract_method_call(VALUE method_name, MessageHeader* self, - const upb_fielddef **f, const upb_oneofdef **o) { - VALUE method_str; - char* name; - size_t name_len; - int accessor_type; - const upb_oneofdef* test_o; - const upb_fielddef* test_f; - bool has_field; + return METHOD_UNKNOWN; +} - Check_Type(method_name, T_SYMBOL); +static VALUE Message_oneof_accessor(VALUE _self, const upb_oneofdef* o, + int accessor_type) { + Message* self = ruby_to_Message(_self); + const upb_fielddef* oneof_field = upb_msg_whichoneof(self->msg, o); - method_str = rb_id2str(SYM2ID(method_name)); - name = RSTRING_PTR(method_str); - name_len = RSTRING_LEN(method_str); - - if (name[name_len - 1] == '=') { - accessor_type = METHOD_SETTER; - name_len--; - // We want to ensure if the proto has something named clear_foo or has_foo?, - // we don't strip the prefix. - } else if (strncmp("clear_", name, 6) == 0 && - !upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, - &test_f, &test_o)) { - accessor_type = METHOD_CLEAR; - name = name + 6; - name_len = name_len - 6; - } else if (strncmp("has_", name, 4) == 0 && name[name_len - 1] == '?' && - !upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, - &test_f, &test_o)) { - accessor_type = METHOD_PRESENCE; - name = name + 4; - name_len = name_len - 5; - } else { - accessor_type = METHOD_GETTER; - } - - has_field = upb_msgdef_lookupname(self->descriptor->msgdef, name, name_len, - &test_f, &test_o); - - // Look for wrapper type accessor of the form _as_value - if (!has_field && - (accessor_type == METHOD_GETTER || accessor_type == METHOD_SETTER) && - name_len > 9 && strncmp(name + name_len - 9, "_as_value", 9) == 0) { - const upb_oneofdef* test_o_wrapper; - const upb_fielddef* test_f_wrapper; - char wrapper_field_name[name_len - 8]; - - // Find the field name - strncpy(wrapper_field_name, name, name_len - 9); - wrapper_field_name[name_len - 9] = '\0'; - - // Check if field exists and is a wrapper type - if (upb_msgdef_lookupname(self->descriptor->msgdef, wrapper_field_name, - name_len - 9, &test_f_wrapper, &test_o_wrapper) && - is_wrapper_type_field(test_f_wrapper)) { - // It does exist! - has_field = true; - if (accessor_type == METHOD_SETTER) { - accessor_type = METHOD_WRAPPER_SETTER; - } else { - accessor_type = METHOD_WRAPPER_GETTER; + switch (accessor_type) { + case METHOD_PRESENCE: + return oneof_field == NULL ? Qfalse : Qtrue; + case METHOD_CLEAR: + if (oneof_field != NULL) { + upb_msg_clearfield(Message_GetMutable(_self, NULL), oneof_field); } - test_o = test_o_wrapper; - test_f = test_f_wrapper; - } + return Qnil; + case METHOD_GETTER: + return oneof_field == NULL + ? Qnil + : ID2SYM(rb_intern(upb_fielddef_name(oneof_field))); + case METHOD_SETTER: + rb_raise(rb_eRuntimeError, "Oneof accessors are read-only."); } + rb_raise(rb_eRuntimeError, "Invalid access of oneof field."); +} - // Look for enum accessor of the form _const - if (!has_field && accessor_type == METHOD_GETTER && - name_len > 6 && strncmp(name + name_len - 6, "_const", 6) == 0) { - const upb_oneofdef* test_o_enum; - const upb_fielddef* test_f_enum; - char enum_name[name_len - 5]; - - // Find enum field name - strncpy(enum_name, name, name_len - 6); - enum_name[name_len - 6] = '\0'; - - // Check if enum field exists - if (upb_msgdef_lookupname(self->descriptor->msgdef, enum_name, name_len - 6, - &test_f_enum, &test_o_enum) && - upb_fielddef_type(test_f_enum) == UPB_TYPE_ENUM) { - // It does exist! - has_field = true; - accessor_type = METHOD_ENUM_GETTER; - test_o = test_o_enum; - test_f = test_f_enum; +static void Message_setfield(upb_msg* msg, const upb_fielddef* f, VALUE val, + upb_arena* arena) { + upb_msgval msgval; + if (upb_fielddef_ismap(f)) { + msgval.map_val = Map_GetUpbMap(val, f); + } else if (upb_fielddef_isseq(f)) { + msgval.array_val = RepeatedField_GetUpbArray(val, f); + } else { + if (val == Qnil && + (upb_fielddef_issubmsg(f) || upb_fielddef_realcontainingoneof(f))) { + upb_msg_clearfield(msg, f); + return; } + msgval = + Convert_RubyToUpb(val, upb_fielddef_name(f), TypeInfo_get(f), arena); } + upb_msg_set(msg, f, msgval, arena); +} - // Verify the name corresponds to a oneof or field in this message. - if (!has_field) { - return METHOD_UNKNOWN; - } - - // Method calls like 'has_foo?' are not allowed if field "foo" does not have - // a hasbit (e.g. repeated fields or non-message type fields for proto3 - // syntax). - if (accessor_type == METHOD_PRESENCE && test_f != NULL) { - if (!upb_fielddef_haspresence(test_f)) return METHOD_UNKNOWN; +static VALUE Message_field_accessor(VALUE _self, const upb_fielddef* f, + int accessor_type, int argc, VALUE* argv) { + upb_arena *arena = Arena_get(Message_GetArena(_self)); - // TODO(haberman): remove this case, allow for proto3 oneofs. - if (upb_fielddef_realcontainingoneof(test_f) && - upb_filedef_syntax(upb_fielddef_file(test_f)) == UPB_SYNTAX_PROTO3) { - return METHOD_UNKNOWN; + switch (accessor_type) { + case METHOD_SETTER: + Message_setfield(Message_GetMutable(_self, NULL), f, argv[1], arena); + return Qnil; + case METHOD_CLEAR: + upb_msg_clearfield(Message_GetMutable(_self, NULL), f); + return Qnil; + case METHOD_PRESENCE: + if (!upb_fielddef_haspresence(f)) { + rb_raise(rb_eRuntimeError, "Field does not have presence."); + } + return upb_msg_has(Message_Get(_self, NULL), f); + case METHOD_WRAPPER_GETTER: { + Message* self = ruby_to_Message(_self); + if (upb_msg_has(self->msg, f)) { + PBRUBY_ASSERT(upb_fielddef_issubmsg(f) && !upb_fielddef_isseq(f)); + upb_msgval wrapper = upb_msg_get(self->msg, f); + const upb_msgdef *wrapper_m = upb_fielddef_msgsubdef(f); + const upb_fielddef *value_f = upb_msgdef_itof(wrapper_m, 1); + upb_msgval value = upb_msg_get(wrapper.msg_val, value_f); + return Convert_UpbToRuby(value, TypeInfo_get(value_f), self->arena); + } else { + return Qnil; + } + } + case METHOD_WRAPPER_SETTER: { + upb_msg *msg = Message_GetMutable(_self, NULL); + if (argv[1] == Qnil) { + upb_msg_clearfield(msg, f); + } else { + const upb_fielddef *val_f = upb_msgdef_itof(upb_fielddef_msgsubdef(f), 1); + upb_msgval msgval = Convert_RubyToUpb(argv[1], upb_fielddef_name(f), + TypeInfo_get(val_f), arena); + upb_msg *wrapper = upb_msg_mutable(msg, f, arena).msg; + upb_msg_set(wrapper, val_f, msgval, arena); + } + return Qnil; + } + case METHOD_ENUM_GETTER: { + upb_msgval msgval = upb_msg_get(Message_Get(_self, NULL), f); + + if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { + // Map repeated fields to a new type with ints + VALUE arr = rb_ary_new(); + size_t i, n = upb_array_size(msgval.array_val); + for (i = 0; i < n; i++) { + upb_msgval elem = upb_array_get(msgval.array_val, i); + rb_ary_push(arr, INT2NUM(elem.int32_val)); + } + return arr; + } else { + return INT2NUM(msgval.int32_val); + } + } + case METHOD_GETTER: { + Message* self = ruby_to_Message(_self); + // This is a special-case: upb_msg_mutable() for map & array are logically + // const (they will not change what is serialized) but physically + // non-const, as they do allocate a repeated field or map. The logical + // constness means it's ok to do even if the message is frozen. + upb_msg *msg = (upb_msg*)self->msg; + if (upb_fielddef_ismap(f)) { + upb_map *map = upb_msg_mutable(msg, f, arena).map; + const upb_fielddef *key_f = map_field_key(f); + const upb_fielddef *val_f = map_field_value(f); + upb_fieldtype_t key_type = upb_fielddef_type(key_f); + TypeInfo value_type_info = TypeInfo_get(val_f); + return Map_GetRubyWrapper(map, key_type, value_type_info, self->arena); + } else if (upb_fielddef_isseq(f)) { + upb_array *arr = upb_msg_mutable(msg, f, arena).array; + return RepeatedField_GetRubyWrapper(arr, TypeInfo_get(f), self->arena); + } else if (upb_fielddef_issubmsg(f)) { + if (!upb_msg_has(self->msg, f)) return Qnil; + upb_msg *submsg = upb_msg_mutable(msg, f, arena).msg; + const upb_msgdef *m = upb_fielddef_msgsubdef(f); + return Message_GetRubyWrapper(submsg, m, self->arena); + } else { + upb_msgval msgval = upb_msg_get(self->msg, f); + return Convert_UpbToRuby(msgval, TypeInfo_get(f), self->arena); + } + default: + rb_raise(rb_eRuntimeError, "Internal error, no such accessor: %d", + accessor_type); } } - - *o = test_o; - *f = test_f; - return accessor_type; } /* @@ -284,111 +410,56 @@ static int extract_method_call(VALUE method_name, MessageHeader* self, * true if the field 'fieldname' is set in the message object, else false. For * 'proto3' syntax, calling this for a basic type field will result in an error. */ -VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { - MessageHeader* self; +static VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { + Message* self = ruby_to_Message(_self); const upb_oneofdef* o; const upb_fielddef* f; int accessor_type; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); if (argc < 1) { rb_raise(rb_eArgError, "Expected method name as first argument."); } accessor_type = extract_method_call(argv[0], self, &f, &o); - if (accessor_type == METHOD_UNKNOWN || (o == NULL && f == NULL) ) { - return rb_call_super(argc, argv); - } else if (accessor_type == METHOD_SETTER || accessor_type == METHOD_WRAPPER_SETTER) { - if (argc != 2) { - rb_raise(rb_eArgError, "Expected 2 arguments, received %d", argc); - } - rb_check_frozen(_self); - } else if (argc != 1) { - rb_raise(rb_eArgError, "Expected 1 argument, received %d", argc); - } - // Return which of the oneof fields are set - if (o != NULL) { - const upb_fielddef* oneof_field = which_oneof_field(self, o); - - if (accessor_type == METHOD_SETTER) { - rb_raise(rb_eRuntimeError, "Oneof accessors are read-only."); - } + if (accessor_type == METHOD_UNKNOWN) return rb_call_super(argc, argv); - if (accessor_type == METHOD_PRESENCE) { - return oneof_field == NULL ? Qfalse : Qtrue; - } else if (accessor_type == METHOD_CLEAR) { - if (oneof_field != NULL) { - layout_clear(self->descriptor->layout, Message_data(self), oneof_field); + // Validate argument count. + switch (accessor_type) { + case METHOD_SETTER: + case METHOD_WRAPPER_SETTER: + if (argc != 2) { + rb_raise(rb_eArgError, "Expected 2 arguments, received %d", argc); } - return Qnil; - } else { - // METHOD_ACCESSOR - return oneof_field == NULL ? Qnil : - ID2SYM(rb_intern(upb_fielddef_name(oneof_field))); - } - // Otherwise we're operating on a single proto field - } else if (accessor_type == METHOD_SETTER) { - layout_set(self->descriptor->layout, Message_data(self), f, argv[1]); - return Qnil; - } else if (accessor_type == METHOD_CLEAR) { - layout_clear(self->descriptor->layout, Message_data(self), f); - return Qnil; - } else if (accessor_type == METHOD_PRESENCE) { - return layout_has(self->descriptor->layout, Message_data(self), f); - } else if (accessor_type == METHOD_WRAPPER_GETTER) { - VALUE value = layout_get(self->descriptor->layout, Message_data(self), f); - switch (TYPE(value)) { - case T_DATA: - return rb_funcall(value, rb_intern("value"), 0); - case T_NIL: - return Qnil; - default: - return value; - } - } else if (accessor_type == METHOD_WRAPPER_SETTER) { - VALUE wrapper = ruby_wrapper_type( - field_type_class(self->descriptor->layout, f), argv[1]); - layout_set(self->descriptor->layout, Message_data(self), f, wrapper); - return Qnil; - } else if (accessor_type == METHOD_ENUM_GETTER) { - VALUE enum_type = field_type_class(self->descriptor->layout, f); - VALUE method = rb_intern("const_get"); - VALUE raw_value = layout_get(self->descriptor->layout, Message_data(self), f); - - // Map repeated fields to a new type with ints - if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { - int array_size = FIX2INT(rb_funcall(raw_value, rb_intern("length"), 0)); - int i; - VALUE array_args[1] = { ID2SYM(rb_intern("int64")) }; - VALUE array = rb_class_new_instance(1, array_args, CLASS_OF(raw_value)); - for (i = 0; i < array_size; i++) { - VALUE entry = rb_funcall(enum_type, method, 1, rb_funcall(raw_value, - rb_intern("at"), 1, INT2NUM(i))); - rb_funcall(array, rb_intern("push"), 1, entry); + rb_check_frozen(_self); + break; + default: + if (argc != 1) { + rb_raise(rb_eArgError, "Expected 1 argument, received %d", argc); } - return array; - } - // Convert the value for singular fields - return rb_funcall(enum_type, method, 1, raw_value); + break; + } + + // Dispatch accessor. + if (o != NULL) { + return Message_oneof_accessor(_self, o, accessor_type); } else { - return layout_get(self->descriptor->layout, Message_data(self), f); + return Message_field_accessor(_self, f, accessor_type, argc, argv); } } - -VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) { - MessageHeader* self; +static VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) { + Message* self = ruby_to_Message(_self); const upb_oneofdef* o; const upb_fielddef* f; int accessor_type; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); if (argc < 1) { rb_raise(rb_eArgError, "Expected method name as first argument."); } accessor_type = extract_method_call(argv[0], self, &f, &o); + if (accessor_type == METHOD_UNKNOWN) { return rb_call_super(argc, argv); } else if (o != NULL) { @@ -398,17 +469,116 @@ VALUE Message_respond_to_missing(int argc, VALUE* argv, VALUE _self) { } } -VALUE create_submsg_from_hash(const MessageLayout* layout, - const upb_fielddef* f, VALUE hash) { - VALUE args[1] = { hash }; - return rb_class_new_instance(1, args, field_type_class(layout, f)); +void Message_InitFromValue(upb_msg* msg, const upb_msgdef* m, VALUE val, + upb_arena* arena); + +typedef struct { + upb_map *map; + TypeInfo key_type; + TypeInfo val_type; + upb_arena *arena; +} MapInit; + +static int Map_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { + MapInit *map_init = (MapInit*)_self; + upb_msgval k, v; + k = Convert_RubyToUpb(key, "", map_init->key_type, NULL); + + if (map_init->val_type.type == UPB_TYPE_MESSAGE && TYPE(val) == T_HASH) { + upb_msg *msg = upb_msg_new(map_init->val_type.def.msgdef, map_init->arena); + Message_InitFromValue(msg, map_init->val_type.def.msgdef, val, + map_init->arena); + v.msg_val = msg; + } else { + v = Convert_RubyToUpb(val, "", map_init->val_type, map_init->arena); + } + upb_map_set(map_init->map, k, v, map_init->arena); + return ST_CONTINUE; } -int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { - MessageHeader* self; - char *name; - const upb_fielddef* f; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); +static void Map_InitFromValue(upb_map* map, const upb_fielddef* f, VALUE val, + upb_arena* arena) { + const upb_msgdef* entry_m = upb_fielddef_msgsubdef(f); + const upb_fielddef* key_f = upb_msgdef_itof(entry_m, 1); + const upb_fielddef* val_f = upb_msgdef_itof(entry_m, 2); + if (TYPE(val) != T_HASH) { + rb_raise(rb_eArgError, + "Expected Hash object as initializer value for map field '%s' " + "(given %s).", + upb_fielddef_name(f), rb_class2name(CLASS_OF(val))); + } + MapInit map_init = {map, TypeInfo_get(key_f), TypeInfo_get(val_f), arena}; + rb_hash_foreach(val, Map_initialize_kwarg, (VALUE)&map_init); +} + +static upb_msgval MessageValue_FromValue(VALUE val, TypeInfo info, + upb_arena* arena) { + if (info.type == UPB_TYPE_MESSAGE) { + upb_msgval msgval; + upb_msg* msg = upb_msg_new(info.def.msgdef, arena); + Message_InitFromValue(msg, info.def.msgdef, val, arena); + msgval.msg_val = msg; + return msgval; + } else { + return Convert_RubyToUpb(val, "", info, arena); + } +} + +static void RepeatedField_InitFromValue(upb_array* arr, const upb_fielddef* f, + VALUE val, upb_arena* arena) { + TypeInfo type_info = TypeInfo_get(f); + + if (TYPE(val) != T_ARRAY) { + rb_raise(rb_eArgError, + "Expected array as initializer value for repeated field '%s' (given %s).", + upb_fielddef_name(f), rb_class2name(CLASS_OF(val))); + } + + for (int i = 0; i < RARRAY_LEN(val); i++) { + VALUE entry = rb_ary_entry(val, i); + upb_msgval msgval; + if (upb_fielddef_issubmsg(f) && TYPE(entry) == T_HASH) { + msgval = MessageValue_FromValue(entry, type_info, arena); + } else { + msgval = Convert_RubyToUpb(entry, upb_fielddef_name(f), type_info, arena); + } + upb_array_append(arr, msgval, arena); + } +} + +static void Message_InitFieldFromValue(upb_msg* msg, const upb_fielddef* f, + VALUE val, upb_arena* arena) { + if (TYPE(val) == T_NIL) return; + + if (upb_fielddef_ismap(f)) { + upb_map *map = upb_msg_mutable(msg, f, arena).map; + Map_InitFromValue(map, f, val, arena); + } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { + upb_array *arr = upb_msg_mutable(msg, f, arena).array; + RepeatedField_InitFromValue(arr, f, val, arena); + } else if (upb_fielddef_issubmsg(f)) { + if (TYPE(val) == T_HASH) { + upb_msg *submsg = upb_msg_mutable(msg, f, arena).msg; + Message_InitFromValue(submsg, upb_fielddef_msgsubdef(f), val, arena); + } else { + Message_setfield(msg, f, val, arena); + } + } else { + upb_msgval msgval = + Convert_RubyToUpb(val, upb_fielddef_name(f), TypeInfo_get(f), arena); + upb_msg_set(msg, f, msgval, arena); + } +} + +typedef struct { + upb_msg *msg; + const upb_msgdef *msgdef; + upb_arena *arena; +} MsgInit; + +static int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { + MsgInit *msg_init = (MsgInit*)_self; + const char *name; if (TYPE(key) == T_STRING) { name = RSTRING_PTR(key); @@ -419,52 +589,26 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { "Expected string or symbols as hash keys when initializing proto from hash."); } - f = upb_msgdef_ntofz(self->descriptor->msgdef, name); + const upb_fielddef* f = upb_msgdef_ntofz(msg_init->msgdef, name); + if (f == NULL) { rb_raise(rb_eArgError, "Unknown field name '%s' in initialization map entry.", name); } - if (TYPE(val) == T_NIL) { - return 0; - } - - if (is_map_field(f)) { - VALUE map; - - if (TYPE(val) != T_HASH) { - rb_raise(rb_eArgError, - "Expected Hash object as initializer value for map field '%s' (given %s).", - name, rb_class2name(CLASS_OF(val))); - } - map = layout_get(self->descriptor->layout, Message_data(self), f); - Map_merge_into_self(map, val); - } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { - VALUE ary; - int i; - - if (TYPE(val) != T_ARRAY) { - rb_raise(rb_eArgError, - "Expected array as initializer value for repeated field '%s' (given %s).", - name, rb_class2name(CLASS_OF(val))); - } - ary = layout_get(self->descriptor->layout, Message_data(self), f); - for (i = 0; i < RARRAY_LEN(val); i++) { - VALUE entry = rb_ary_entry(val, i); - if (TYPE(entry) == T_HASH && upb_fielddef_issubmsg(f)) { - entry = create_submsg_from_hash(self->descriptor->layout, f, entry); - } + Message_InitFieldFromValue(msg_init->msg, f, val, msg_init->arena); + return ST_CONTINUE; +} - RepeatedField_push(ary, entry); - } +void Message_InitFromValue(upb_msg* msg, const upb_msgdef* m, VALUE val, + upb_arena* arena) { + MsgInit msg_init = {msg, m, arena}; + if (TYPE(val) == T_HASH) { + rb_hash_foreach(val, Message_initialize_kwarg, (VALUE)&msg_init); } else { - if (TYPE(val) == T_HASH && upb_fielddef_issubmsg(f)) { - val = create_submsg_from_hash(self->descriptor->layout, f, val); - } - - layout_set(self->descriptor->layout, Message_data(self), f, val); + rb_raise(rb_eArgError, "Expected hash arguments or message, not %s", + rb_class2name(CLASS_OF(val))); } - return 0; } /* @@ -479,12 +623,13 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { * have been added to a pool. The method definitions described here on the * Message class are provided on each concrete message class. */ -VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) { - MessageHeader* self; - VALUE hash_args; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); +static VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) { + Message* self = ruby_to_Message(_self); + VALUE arena_rb = Arena_new(); + upb_arena *arena = Arena_get(arena_rb); + upb_msg *msg = upb_msg_new(self->msgdef, arena); - layout_init(self->descriptor->layout, Message_data(self)); + Message_InitPtr(_self, msg, arena_rb); if (argc == 0) { return Qnil; @@ -492,12 +637,7 @@ VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) { if (argc != 1) { rb_raise(rb_eArgError, "Expected 0 or 1 arguments."); } - hash_args = argv[0]; - if (TYPE(hash_args) != T_HASH) { - rb_raise(rb_eArgError, "Expected hash arguments."); - } - - rb_hash_foreach(hash_args, Message_initialize_kwarg, _self); + Message_InitFromValue((upb_msg*)self->msg, self->msgdef, argv[0], arena); return Qnil; } @@ -507,37 +647,40 @@ VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) { * * Performs a shallow copy of this message and returns the new copy. */ -VALUE Message_dup(VALUE _self) { - MessageHeader* self; - VALUE new_msg; - MessageHeader* new_msg_self; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); - - new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self)); - TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self); - - layout_dup(self->descriptor->layout, - Message_data(new_msg_self), - Message_data(self)); - +static VALUE Message_dup(VALUE _self) { + Message* self = ruby_to_Message(_self); + VALUE new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self)); + Message* new_msg_self = ruby_to_Message(new_msg); + size_t size = upb_msgdef_layout(self->msgdef)->size; + + // TODO(copy unknown fields?) + // TODO(use official upb msg copy function) + memcpy((upb_msg*)new_msg_self->msg, self->msg, size); + upb_arena_fuse(Arena_get(new_msg_self->arena), Arena_get(self->arena)); return new_msg; } -// Internal only; used by Google::Protobuf.deep_copy. -VALUE Message_deep_copy(VALUE _self) { - MessageHeader* self; - MessageHeader* new_msg_self; - VALUE new_msg; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); +// Support function for Message_eq, and also used by other #eq functions. +bool Message_Equal(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m) { + if (m1 == m2) return true; - new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self)); - TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self); + size_t size1, size2; + int encode_opts = UPB_ENCODE_SKIPUNKNOWN | UPB_ENCODE_DETERMINISTIC; + upb_arena *arena_tmp = upb_arena_new(); + const upb_msglayout *layout = upb_msgdef_layout(m); - layout_deep_copy(self->descriptor->layout, - Message_data(new_msg_self), - Message_data(self)); + // Compare deterministically serialized payloads with no unknown fields. + char *data1 = upb_encode_ex(m1, layout, encode_opts, arena_tmp, &size1); + char *data2 = upb_encode_ex(m2, layout, encode_opts, arena_tmp, &size2); - return new_msg; + if (data1 && data2) { + bool ret = (size1 == size2) && (memcmp(data1, data2, size1) == 0); + upb_arena_free(arena_tmp); + return ret; + } else { + upb_arena_free(arena_tmp); + rb_raise(cParseError, "Error comparing messages"); + } } /* @@ -549,22 +692,37 @@ VALUE Message_deep_copy(VALUE _self) { * method's semantics (a more efficient comparison may actually be done if the * field is of a primitive type). */ -VALUE Message_eq(VALUE _self, VALUE _other) { - MessageHeader* self; - MessageHeader* other; +static VALUE Message_eq(VALUE _self, VALUE _other) { if (TYPE(_self) != TYPE(_other)) { return Qfalse; } - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); - TypedData_Get_Struct(_other, MessageHeader, &Message_type, other); - if (self->descriptor != other->descriptor) { - return Qfalse; - } + Message* self = ruby_to_Message(_self); + Message* other = ruby_to_Message(_other); + + return Message_Equal(self->msg, other->msg, self->msgdef) + ? Qtrue + : Qfalse; +} + +uint64_t Message_Hash(const upb_msg* msg, const upb_msgdef* m, uint64_t seed) { + upb_arena *arena = upb_arena_new(); + const char *data; + size_t size; + + // Hash a deterministically serialized payloads with no unknown fields. + data = upb_encode_ex(msg, upb_msgdef_layout(m), + UPB_ENCODE_SKIPUNKNOWN | UPB_ENCODE_DETERMINISTIC, arena, + &size); - return layout_eq(self->descriptor->layout, - Message_data(self), - Message_data(other)); + if (data) { + uint64_t ret = wyhash(data, size, seed, _wyp); + upb_arena_free(arena); + return ret; + } else { + upb_arena_free(arena); + rb_raise(cParseError, "Error calculating hash"); + } } /* @@ -573,11 +731,9 @@ VALUE Message_eq(VALUE _self, VALUE _other) { * * Returns a hash value that represents this message's field values. */ -VALUE Message_hash(VALUE _self) { - MessageHeader* self; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); - - return layout_hash(self->descriptor->layout, Message_data(self)); +static VALUE Message_hash(VALUE _self) { + Message* self = ruby_to_Message(_self); + return INT2FIX(Message_Hash(self->msg, self->msgdef, 0)); } /* @@ -588,81 +744,117 @@ VALUE Message_hash(VALUE _self) { * formatted as "". Each * field's value is represented according to its own #inspect method. */ -VALUE Message_inspect(VALUE _self) { - MessageHeader* self; - VALUE str; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); +static VALUE Message_inspect(VALUE _self) { + Message* self = ruby_to_Message(_self); - str = rb_str_new2("<"); - str = rb_str_append(str, rb_str_new2(rb_class2name(CLASS_OF(_self)))); - str = rb_str_cat2(str, ": "); - str = rb_str_append(str, layout_inspect( - self->descriptor->layout, Message_data(self))); - str = rb_str_cat2(str, ">"); - return str; + StringBuilder* builder = StringBuilder_New(); + Message_PrintMessage(builder, self->msg, self->msgdef); + VALUE ret = StringBuilder_ToRubyString(builder); + StringBuilder_Free(builder); + return ret; } -/* - * call-seq: - * Message.to_h => {} - * - * Returns the message as a Ruby Hash object, with keys as symbols. - */ -VALUE Message_to_h(VALUE _self) { - MessageHeader* self; +// Support functions for Message_to_h ////////////////////////////////////////// + +static VALUE RepeatedField_CreateArray(const upb_array* arr, + TypeInfo type_info) { + int size = arr ? upb_array_size(arr) : 0; + VALUE ary = rb_ary_new2(size); + + for (int i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(arr, i); + VALUE val = Scalar_CreateHash(msgval, type_info); + rb_ary_push(ary, val); + } + + return ary; +} + +static VALUE Message_CreateHash(const upb_msg *msg, const upb_msgdef *m) { + if (!msg) return Qnil; + VALUE hash = rb_hash_new(); - upb_msg_field_iter it; + int n = upb_msgdef_fieldcount(m); bool is_proto2; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); // We currently have a few behaviors that are specific to proto2. // This is unfortunate, we should key behaviors off field attributes (like // whether a field has presence), not proto2 vs. proto3. We should see if we // can change this without breaking users. - is_proto2 = - upb_msgdef_syntax(self->descriptor->msgdef) == UPB_SYNTAX_PROTO2; + is_proto2 = upb_msgdef_syntax(m) == UPB_SYNTAX_PROTO2; - for (upb_msg_field_begin(&it, self->descriptor->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); + for (int i = 0; i < n; i++) { + const upb_fielddef* field = upb_msgdef_field(m, i); + TypeInfo type_info = TypeInfo_get(field); + upb_msgval msgval; VALUE msg_value; VALUE msg_key; // Do not include fields that are not present (oneof or optional fields). if (is_proto2 && upb_fielddef_haspresence(field) && - !layout_has(self->descriptor->layout, Message_data(self), field)) { + !upb_msg_has(msg, field)) { continue; } - msg_value = layout_get(self->descriptor->layout, Message_data(self), field); msg_key = ID2SYM(rb_intern(upb_fielddef_name(field))); - if (is_map_field(field)) { - msg_value = Map_to_h(msg_value); - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - msg_value = RepeatedField_to_ary(msg_value); - if (is_proto2 && RARRAY_LEN(msg_value) == 0) { + msgval = upb_msg_get(msg, field); + + // Proto2 omits empty map/repeated filds also. + + if (upb_fielddef_ismap(field)) { + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(field); + const upb_fielddef *key_f = upb_msgdef_itof(entry_m, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry_m, 2); + upb_fieldtype_t key_type = upb_fielddef_type(key_f); + msg_value = Map_CreateHash(msgval.map_val, key_type, TypeInfo_get(val_f)); + } else if (upb_fielddef_isseq(field)) { + if (is_proto2 && + (!msgval.array_val || upb_array_size(msgval.array_val) == 0)) { continue; } - - if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { - int i; - for (i = 0; i < RARRAY_LEN(msg_value); i++) { - VALUE elem = rb_ary_entry(msg_value, i); - rb_ary_store(msg_value, i, Message_to_h(elem)); - } - } - - } else if (msg_value != Qnil && - upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { - msg_value = Message_to_h(msg_value); + msg_value = RepeatedField_CreateArray(msgval.array_val, type_info); + } else { + msg_value = Scalar_CreateHash(msgval, type_info); } + rb_hash_aset(hash, msg_key, msg_value); } + return hash; } +VALUE Scalar_CreateHash(upb_msgval msgval, TypeInfo type_info) { + if (type_info.type == UPB_TYPE_MESSAGE) { + return Message_CreateHash(msgval.msg_val, type_info.def.msgdef); + } else { + return Convert_UpbToRuby(msgval, type_info, Qnil); + } +} + +/* + * call-seq: + * Message.to_h => {} + * + * Returns the message as a Ruby Hash object, with keys as symbols. + */ +static VALUE Message_to_h(VALUE _self) { + Message* self = ruby_to_Message(_self); + return Message_CreateHash(self->msg, self->msgdef); +} +/* + * call-seq: + * Message.freeze => self + * + * Freezes the message object. We have to intercept this so we can pin the + * Ruby object into memory so we don't forget it's frozen. + */ +static VALUE Message_freeze(VALUE _self) { + Message* self = ruby_to_Message(_self); + ObjectCache_Pin(self->msg, _self, Arena_get(self->arena)); + RB_OBJ_FREEZE(_self); + return _self; +} /* * call-seq: @@ -671,16 +863,20 @@ VALUE Message_to_h(VALUE _self) { * Accesses a field's value by field name. The provided field name should be a * string. */ -VALUE Message_index(VALUE _self, VALUE field_name) { - MessageHeader* self; +static VALUE Message_index(VALUE _self, VALUE field_name) { + Message* self = ruby_to_Message(_self); const upb_fielddef* field; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + upb_msgval val; + Check_Type(field_name, T_STRING); - field = upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name)); + field = upb_msgdef_ntofz(self->msgdef, RSTRING_PTR(field_name)); + if (field == NULL) { return Qnil; } - return layout_get(self->descriptor->layout, Message_data(self), field); + + val = upb_msg_get(self->msg, field); + return Convert_UpbToRuby(val, TypeInfo_get(field), self->arena); } /* @@ -690,19 +886,208 @@ VALUE Message_index(VALUE _self, VALUE field_name) { * Sets a field's value by field name. The provided field name should be a * string. */ -VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) { - MessageHeader* self; - const upb_fielddef* field; - TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); +static VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) { + Message* self = ruby_to_Message(_self); + const upb_fielddef* f; + upb_msgval val; + upb_arena *arena = Arena_get(self->arena); + Check_Type(field_name, T_STRING); - field = upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name)); - if (field == NULL) { + f = upb_msgdef_ntofz(self->msgdef, RSTRING_PTR(field_name)); + + if (f == NULL) { rb_raise(rb_eArgError, "Unknown field: %s", RSTRING_PTR(field_name)); } - layout_set(self->descriptor->layout, Message_data(self), field, value); + + val = Convert_RubyToUpb(value, upb_fielddef_name(f), TypeInfo_get(f), arena); + upb_msg_set(Message_GetMutable(_self, NULL), f, val, arena); + return Qnil; } +/* + * call-seq: + * MessageClass.decode(data) => message + * + * Decodes the given data (as a string containing bytes in protocol buffers wire + * format) under the interpretration given by this message class's definition + * and returns a message object with the corresponding field values. + */ +static VALUE Message_decode(VALUE klass, VALUE data) { + if (TYPE(data) != T_STRING) { + rb_raise(rb_eArgError, "Expected string for binary protobuf data."); + } + + VALUE msg_rb = initialize_rb_class_with_no_args(klass); + Message* msg = ruby_to_Message(msg_rb); + + if (!upb_decode(RSTRING_PTR(data), RSTRING_LEN(data), (upb_msg*)msg->msg, + upb_msgdef_layout(msg->msgdef), + Arena_get(msg->arena))) { + rb_raise(cParseError, "Error occurred during parsing"); + } + + return msg_rb; +} + +/* + * call-seq: + * MessageClass.decode_json(data, options = {}) => message + * + * Decodes the given data (as a string containing bytes in protocol buffers wire + * format) under the interpretration given by this message class's definition + * and returns a message object with the corresponding field values. + * + * @param options [Hash] options for the decoder + * ignore_unknown_fields: set true to ignore unknown fields (default is to + * raise an error) + */ +static VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) { + VALUE data = argv[0]; + int options = 0; + upb_status status; + + // TODO(haberman): use this message's pool instead. + const upb_symtab *symtab = DescriptorPool_GetSymtab(generated_pool); + + if (argc < 1 || argc > 2) { + rb_raise(rb_eArgError, "Expected 1 or 2 arguments."); + } + + if (argc == 2) { + VALUE hash_args = argv[1]; + if (TYPE(hash_args) != T_HASH) { + rb_raise(rb_eArgError, "Expected hash arguments."); + } + + if (RTEST(rb_hash_lookup2( hash_args, ID2SYM(rb_intern("ignore_unknown_fields")), Qfalse))) { + options |= UPB_JSONDEC_IGNOREUNKNOWN; + } + } + + if (TYPE(data) != T_STRING) { + rb_raise(rb_eArgError, "Expected string for JSON data."); + } + + // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to + // convert, because string handlers pass data directly to message string + // fields. + + VALUE msg_rb = initialize_rb_class_with_no_args(klass); + Message* msg = ruby_to_Message(msg_rb); + + // We don't allow users to decode a wrapper type directly. + if (upb_msgdef_iswrapper(msg->msgdef)) { + rb_raise(rb_eRuntimeError, "Cannot parse a wrapper directly."); + } + + upb_status_clear(&status); + if (!upb_json_decode(RSTRING_PTR(data), RSTRING_LEN(data), (upb_msg*)msg->msg, + msg->msgdef, symtab, options, + Arena_get(msg->arena), &status)) { + rb_raise(cParseError, "Error occurred during parsing: %s", + upb_status_errmsg(&status)); + } + + return msg_rb; +} + +/* + * call-seq: + * MessageClass.encode(msg) => bytes + * + * Encodes the given message object to its serialized form in protocol buffers + * wire format. + */ +static VALUE Message_encode(VALUE klass, VALUE msg_rb) { + Message* msg = ruby_to_Message(msg_rb); + upb_arena *arena = upb_arena_new(); + const char *data; + size_t size; + + if (CLASS_OF(msg_rb) != klass) { + rb_raise(rb_eArgError, "Message of wrong type."); + } + + data = upb_encode(msg->msg, upb_msgdef_layout(msg->msgdef), arena, + &size); + + if (data) { + VALUE ret = rb_str_new(data, size); + rb_enc_associate(ret, rb_ascii8bit_encoding()); + upb_arena_free(arena); + return ret; + } else { + upb_arena_free(arena); + rb_raise(rb_eRuntimeError, "Exceeded maximum depth (possibly cycle)"); + } +} + +/* + * call-seq: + * MessageClass.encode_json(msg, options = {}) => json_string + * + * Encodes the given message object into its serialized JSON representation. + * @param options [Hash] options for the decoder + * preserve_proto_fieldnames: set true to use original fieldnames (default is to camelCase) + * emit_defaults: set true to emit 0/false values (default is to omit them) + */ +static VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { + Message* msg = ruby_to_Message(argv[0]); + int options = 0; + char buf[1024]; + size_t size; + upb_status status; + + // TODO(haberman): use this message's pool instead. + const upb_symtab *symtab = DescriptorPool_GetSymtab(generated_pool); + + if (argc < 1 || argc > 2) { + rb_raise(rb_eArgError, "Expected 1 or 2 arguments."); + } + + if (argc == 2) { + VALUE hash_args = argv[1]; + if (TYPE(hash_args) != T_HASH) { + rb_raise(rb_eArgError, "Expected hash arguments."); + } + + if (RTEST(rb_hash_lookup2(hash_args, + ID2SYM(rb_intern("preserve_proto_fieldnames")), + Qfalse))) { + options |= UPB_JSONENC_PROTONAMES; + } + + if (RTEST(rb_hash_lookup2(hash_args, ID2SYM(rb_intern("emit_defaults")), + Qfalse))) { + options |= UPB_JSONENC_EMITDEFAULTS; + } + } + + upb_status_clear(&status); + size = upb_json_encode(msg->msg, msg->msgdef, symtab, options, buf, + sizeof(buf), &status); + + if (!upb_ok(&status)) { + rb_raise(cParseError, "Error occurred during encoding: %s", + upb_status_errmsg(&status)); + } + + VALUE ret; + if (size >= sizeof(buf)) { + char* buf2 = malloc(size + 1); + upb_json_encode(msg->msg, msg->msgdef, symtab, options, buf2, size + 1, + &status); + ret = rb_str_new(buf2, size); + free(buf2); + } else { + ret = rb_str_new(buf, size); + } + + rb_enc_associate(ret, rb_utf8_encoding()); + return ret; +} + /* * call-seq: * Message.descriptor => descriptor @@ -710,16 +1095,15 @@ VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) { * Class method that returns the Descriptor instance corresponding to this * message class's type. */ -VALUE Message_descriptor(VALUE klass) { +static VALUE Message_descriptor(VALUE klass) { return rb_ivar_get(klass, descriptor_instancevar_interned); } VALUE build_class_from_descriptor(VALUE descriptor) { - Descriptor* desc = ruby_to_Descriptor(descriptor); const char *name; VALUE klass; - name = upb_msgdef_fullname(desc->msgdef); + name = upb_msgdef_fullname(Descriptor_GetMsgDef(descriptor)); if (name == NULL) { rb_raise(rb_eRuntimeError, "Descriptor does not have assigned name."); } @@ -746,6 +1130,7 @@ VALUE build_class_from_descriptor(VALUE descriptor) { rb_define_method(klass, "clone", Message_dup, 0); rb_define_method(klass, "==", Message_eq, 1); rb_define_method(klass, "eql?", Message_eq, 1); + rb_define_method(klass, "freeze", Message_freeze, 0); rb_define_method(klass, "hash", Message_hash, 0); rb_define_method(klass, "to_h", Message_to_h, 0); rb_define_method(klass, "inspect", Message_inspect, 0); @@ -768,12 +1153,12 @@ VALUE build_class_from_descriptor(VALUE descriptor) { * This module method, provided on each generated enum module, looks up an enum * value by number and returns its name as a Ruby symbol, or nil if not found. */ -VALUE enum_lookup(VALUE self, VALUE number) { +static VALUE enum_lookup(VALUE self, VALUE number) { int32_t num = NUM2INT(number); VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned); - EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc); + const upb_enumdef *e = EnumDescriptor_GetEnumDef(desc); - const char* name = upb_enumdef_iton(enumdesc->enumdef, num); + const char* name = upb_enumdef_iton(e, num); if (name == NULL) { return Qnil; } else { @@ -788,13 +1173,13 @@ VALUE enum_lookup(VALUE self, VALUE number) { * This module method, provided on each generated enum module, looks up an enum * value by name (as a Ruby symbol) and returns its name, or nil if not found. */ -VALUE enum_resolve(VALUE self, VALUE sym) { +static VALUE enum_resolve(VALUE self, VALUE sym) { const char* name = rb_id2name(SYM2ID(sym)); VALUE desc = rb_ivar_get(self, descriptor_instancevar_interned); - EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc); + const upb_enumdef *e = EnumDescriptor_GetEnumDef(desc); int32_t num = 0; - bool found = upb_enumdef_ntoiz(enumdesc->enumdef, name, &num); + bool found = upb_enumdef_ntoiz(e, name, &num); if (!found) { return Qnil; } else { @@ -809,17 +1194,16 @@ VALUE enum_resolve(VALUE self, VALUE sym) { * This module method, provided on each generated enum module, returns the * EnumDescriptor corresponding to this enum type. */ -VALUE enum_descriptor(VALUE self) { +static VALUE enum_descriptor(VALUE self) { return rb_ivar_get(self, descriptor_instancevar_interned); } VALUE build_module_from_enumdesc(VALUE _enumdesc) { - EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(_enumdesc); - VALUE mod = rb_define_module_id( - rb_intern(upb_enumdef_fullname(enumdesc->enumdef))); + const upb_enumdef *e = EnumDescriptor_GetEnumDef(_enumdesc); + VALUE mod = rb_define_module_id(rb_intern(upb_enumdef_fullname(e))); upb_enum_iter it; - for (upb_enum_begin(&it, enumdesc->enumdef); + for (upb_enum_begin(&it, e); !upb_enum_done(&it); upb_enum_next(&it)) { const char* name = upb_enum_iter_name(&it); @@ -840,20 +1224,92 @@ VALUE build_module_from_enumdesc(VALUE _enumdesc) { return mod; } -/* - * call-seq: - * Google::Protobuf.deep_copy(obj) => copy_of_obj - * - * Performs a deep copy of a RepeatedField instance, a Map instance, or a - * message object, recursively copying its members. - */ -VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) { - VALUE klass = CLASS_OF(obj); - if (klass == cRepeatedField) { - return RepeatedField_deep_copy(obj); - } else if (klass == cMap) { - return Map_deep_copy(obj); - } else { - return Message_deep_copy(obj); +// Internal only; used by Google::Protobuf.deep_copy. +upb_msg* Message_deep_copy(const upb_msg* msg, const upb_msgdef* m, + upb_arena *arena) { + // Serialize and parse. + upb_arena *tmp_arena = upb_arena_new(); + const upb_msglayout *layout = upb_msgdef_layout(m); + size_t size; + + char* data = upb_encode_ex(msg, layout, 0, tmp_arena, &size); + upb_msg* new_msg = upb_msg_new(m, arena); + + if (!data || !upb_decode(data, size, new_msg, layout, arena)) { + upb_arena_free(tmp_arena); + rb_raise(cParseError, "Error occurred copying proto"); } + + upb_arena_free(tmp_arena); + return new_msg; +} + +const upb_msg* Message_GetUpbMessage(VALUE value, const upb_msgdef* m, + const char* name, upb_arena* arena) { + if (value == Qnil) return NULL; + + VALUE klass = CLASS_OF(value); + VALUE desc_rb = rb_ivar_get(klass, descriptor_instancevar_interned); + const upb_msgdef* val_m = + desc_rb == Qnil ? NULL : Descriptor_GetMsgDef(desc_rb); + + if (val_m != m) { + // Check for possible implicit conversions + // TODO: hash conversion? + + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_TIMESTAMP: { + // Time -> Google::Protobuf::Timestamp + upb_msg *msg = upb_msg_new(m, arena); + upb_msgval sec, nsec; + struct timespec time; + const upb_fielddef *sec_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nsec_f = upb_msgdef_itof(m, 2); + + if (!rb_obj_is_kind_of(value, rb_cTime)) goto badtype; + + time = rb_time_timespec(value); + sec.int64_val = time.tv_sec; + nsec.int32_val = time.tv_nsec; + upb_msg_set(msg, sec_f, sec, arena); + upb_msg_set(msg, nsec_f, nsec, arena); + return msg; + } + case UPB_WELLKNOWN_DURATION: { + // Numeric -> Google::Protobuf::Duration + upb_msg *msg = upb_msg_new(m, arena); + upb_msgval sec, nsec; + const upb_fielddef *sec_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nsec_f = upb_msgdef_itof(m, 2); + + if (!rb_obj_is_kind_of(value, rb_cNumeric)) goto badtype; + + sec.int64_val = NUM2LL(value); + nsec.int32_val = (NUM2DBL(value) - NUM2LL(value)) * 1000000000; + upb_msg_set(msg, sec_f, sec, arena); + upb_msg_set(msg, nsec_f, nsec, arena); + return msg; + } + default: + badtype: + rb_raise(cTypeError, + "Invalid type %s to assign to submessage field '%s'.", + rb_class2name(CLASS_OF(value)), name); + } + + } + + Message* self = ruby_to_Message(value); + upb_arena_fuse(arena, Arena_get(self->arena)); + + return self->msg; +} + +void Message_register(VALUE protobuf) { + cParseError = rb_const_get(protobuf, rb_intern("ParseError")); + + // Ruby-interned string: "descriptor". We use this identifier to store an + // instance variable on message classes we create in order to link them back + // to their descriptors. + descriptor_instancevar_interned = rb_intern("descriptor"); } diff --git a/ruby/ext/google/protobuf_c/message.h b/ruby/ext/google/protobuf_c/message.h new file mode 100644 index 000000000000..551f41f96d82 --- /dev/null +++ b/ruby/ext/google/protobuf_c/message.h @@ -0,0 +1,98 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RUBY_PROTOBUF_MESSAGE_H_ +#define RUBY_PROTOBUF_MESSAGE_H_ + +#include + +#include "protobuf.h" +#include "ruby-upb.h" + +// Gets the underlying upb_msg* and upb_msgdef for the given Ruby message +// wrapper. Requires that |value| is indeed a message object. +const upb_msg *Message_Get(VALUE value, const upb_msgdef **m); + +// Like Message_Get(), but checks that the object is not frozen and returns a +// mutable pointer. +upb_msg *Message_GetMutable(VALUE value, const upb_msgdef **m); + +// Returns the Arena object for this message. +VALUE Message_GetArena(VALUE value); + +// Converts |value| into a upb_msg value of the expected upb_msgdef type, +// raising an error if this is not possible. Used when assigning |value| to a +// field of another message, which means the message must be of a particular +// type. +// +// This will perform automatic conversions in some cases (for example, Time -> +// Google::Protobuf::Timestamp). If any new message is created, it will be +// created on |arena|, and any existing message will have its arena fused with +// |arena|. +const upb_msg* Message_GetUpbMessage(VALUE value, const upb_msgdef* m, + const char* name, upb_arena* arena); + +// Gets or constructs a Ruby wrapper object for the given message. The wrapper +// object will reference |arena| and ensure that it outlives this object. +VALUE Message_GetRubyWrapper(upb_msg* msg, const upb_msgdef* m, VALUE arena); + +// Implements #inspect for this message, printing the text to |b|. +void Message_PrintMessage(StringBuilder* b, const upb_msg* msg, + const upb_msgdef* m); + +// Returns a hash value for the given message. +uint64_t Message_Hash(const upb_msg *msg, const upb_msgdef *m, uint64_t seed); + +// Returns a deep copy of the given message. +upb_msg* Message_deep_copy(const upb_msg* msg, const upb_msgdef* m, + upb_arena *arena); + +// Returns true if these two messages are equal. +bool Message_Equal(const upb_msg *m1, const upb_msg *m2, const upb_msgdef *m); + +// Checks that this Ruby object is a message, and raises an exception if not. +void Message_CheckClass(VALUE klass); + +// Returns a new Hash object containing the contents of this message. +VALUE Scalar_CreateHash(upb_msgval val, TypeInfo type_info); + +// Creates a message class or enum module for this descriptor, respectively. +VALUE build_class_from_descriptor(VALUE descriptor); +VALUE build_module_from_enumdesc(VALUE _enumdesc); + +// Returns the Descriptor/EnumDescriptor for the given message class or enum +// module, respectively. Returns nil if this is not a message class or enum +// module. +VALUE MessageOrEnum_GetDescriptor(VALUE klass); + +// Call at startup to register all types in this module. +void Message_register(VALUE protobuf); + +#endif // RUBY_PROTOBUF_MESSAGE_H_ diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index 60c1e2a28152..737cd284b868 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -30,62 +30,342 @@ #include "protobuf.h" +#include + +#include "defs.h" +#include "map.h" +#include "message.h" +#include "repeated_field.h" + VALUE cError; -VALUE cParseError; VALUE cTypeError; -VALUE c_only_cookie = Qnil; -static VALUE cached_empty_string = Qnil; -static VALUE cached_empty_bytes = Qnil; +const upb_fielddef* map_field_key(const upb_fielddef* field) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(field); + return upb_msgdef_itof(entry, 1); +} + +const upb_fielddef* map_field_value(const upb_fielddef* field) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(field); + return upb_msgdef_itof(entry, 2); +} -static VALUE create_frozen_string(const char* str, size_t size, bool binary) { - VALUE str_rb = rb_str_new(str, size); +// ----------------------------------------------------------------------------- +// StringBuilder, for inspect +// ----------------------------------------------------------------------------- + +struct StringBuilder { + size_t size; + size_t cap; + char *data; +}; + +typedef struct StringBuilder StringBuilder; + +static size_t StringBuilder_SizeOf(size_t cap) { + return sizeof(StringBuilder) + cap; +} + +StringBuilder* StringBuilder_New() { + const size_t cap = 128; + StringBuilder* builder = malloc(sizeof(*builder)); + builder->size = 0; + builder->cap = cap; + builder->data = malloc(builder->cap); + return builder; +} + +void StringBuilder_Free(StringBuilder* b) { + free(b->data); + free(b); +} + +void StringBuilder_Printf(StringBuilder* b, const char *fmt, ...) { + size_t have = b->cap - b->size; + size_t n; + va_list args; + + va_start(args, fmt); + n = vsnprintf(&b->data[b->size], have, fmt, args); + va_end(args); + + if (have <= n) { + while (have <= n) { + b->cap *= 2; + have = b->cap - b->size; + } + b->data = realloc(b->data, StringBuilder_SizeOf(b->cap)); + va_start(args, fmt); + n = vsnprintf(&b->data[b->size], have, fmt, args); + va_end(args); + PBRUBY_ASSERT(n < have); + } + + b->size += n; +} - rb_enc_associate(str_rb, - binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding); - rb_obj_freeze(str_rb); - return str_rb; +VALUE StringBuilder_ToRubyString(StringBuilder* b) { + VALUE ret = rb_str_new(b->data, b->size); + rb_enc_associate(ret, rb_utf8_encoding()); + return ret; } -VALUE get_frozen_string(const char* str, size_t size, bool binary) { - if (size == 0) { - return binary ? cached_empty_bytes : cached_empty_string; +static void StringBuilder_PrintEnum(StringBuilder* b, int32_t val, + const upb_enumdef* e) { + const char *name = upb_enumdef_iton(e, val); + if (name) { + StringBuilder_Printf(b, ":%s", name); } else { - // It is harder to memoize non-empty strings. The obvious approach would be - // to use a Ruby hash keyed by string as memo table, but looking up in such a table - // requires constructing a string (the very thing we're trying to avoid). - // - // Since few fields have defaults, we will just optimize the empty string - // case for now. - return create_frozen_string(str, size, binary); + StringBuilder_Printf(b, "%" PRId32, val); + } +} + +void StringBuilder_PrintMsgval(StringBuilder* b, upb_msgval val, + TypeInfo info) { + switch (info.type) { + case UPB_TYPE_BOOL: + StringBuilder_Printf(b, "%s", val.bool_val ? "true" : "false"); + break; + case UPB_TYPE_FLOAT: { + VALUE str = rb_inspect(DBL2NUM(val.float_val)); + StringBuilder_Printf(b, "%s", RSTRING_PTR(str)); + break; + } + case UPB_TYPE_DOUBLE: { + VALUE str = rb_inspect(DBL2NUM(val.double_val)); + StringBuilder_Printf(b, "%s", RSTRING_PTR(str)); + break; + } + case UPB_TYPE_INT32: + StringBuilder_Printf(b, "%" PRId32, val.int32_val); + break; + case UPB_TYPE_UINT32: + StringBuilder_Printf(b, "%" PRIu32, val.uint32_val); + break; + case UPB_TYPE_INT64: + StringBuilder_Printf(b, "%" PRId64, val.int64_val); + break; + case UPB_TYPE_UINT64: + StringBuilder_Printf(b, "%" PRIu64, val.uint64_val); + break; + case UPB_TYPE_STRING: + StringBuilder_Printf(b, "\"%.*s\"", (int)val.str_val.size, val.str_val.data); + break; + case UPB_TYPE_BYTES: + StringBuilder_Printf(b, "\"%.*s\"", (int)val.str_val.size, val.str_val.data); + break; + case UPB_TYPE_ENUM: + StringBuilder_PrintEnum(b, val.int32_val, info.def.enumdef); + break; + case UPB_TYPE_MESSAGE: + Message_PrintMessage(b, val.msg_val, info.def.msgdef); + break; } } // ----------------------------------------------------------------------------- -// Utilities. +// Arena // ----------------------------------------------------------------------------- -// Raises a Ruby error if |status| is not OK, using its error message. -void check_upb_status(const upb_status* status, const char* msg) { - if (!upb_ok(status)) { - rb_raise(rb_eRuntimeError, "%s: %s\n", msg, upb_status_errmsg(status)); - } +void Arena_free(void* data) { upb_arena_free(data); } + +static VALUE cArena; + +const rb_data_type_t Arena_type = { + "Google::Protobuf::Internal::Arena", + { NULL, Arena_free, NULL }, +}; + +static VALUE Arena_alloc(VALUE klass) { + upb_arena *arena = upb_arena_new(); + return TypedData_Wrap_Struct(klass, &Arena_type, arena); +} + +upb_arena *Arena_get(VALUE _arena) { + upb_arena *arena; + TypedData_Get_Struct(_arena, upb_arena, &Arena_type, arena); + return arena; +} + +VALUE Arena_new() { + return Arena_alloc(cArena); +} + +void Arena_register(VALUE module) { + VALUE internal = rb_define_module_under(module, "Internal"); + VALUE klass = rb_define_class_under(internal, "Arena", rb_cObject); + rb_define_alloc_func(klass, Arena_alloc); + rb_gc_register_address(&cArena); + cArena = klass; } -// String encodings: we look these up once, at load time, and then cache them -// here. -rb_encoding* kRubyStringUtf8Encoding; -rb_encoding* kRubyStringASCIIEncoding; -rb_encoding* kRubyString8bitEncoding; +// ----------------------------------------------------------------------------- +// Object Cache +// ----------------------------------------------------------------------------- -// Ruby-interned string: "descriptor". We use this identifier to store an -// instance variable on message classes we create in order to link them back to -// their descriptors. +// A pointer -> Ruby Object cache that keeps references to Ruby wrapper +// objects. This allows us to look up any Ruby wrapper object by the address +// of the object it is wrapping. That way we can avoid ever creating two +// different wrapper objects for the same C object, which saves memory and +// preserves object identity. // -// We intern this once at module load time then use the interned identifier at -// runtime in order to avoid the cost of repeatedly interning in hot paths. -const char* kDescriptorInstanceVar = "descriptor"; -ID descriptor_instancevar_interned; +// We use Hash and/or WeakMap for the cache. WeakMap is faster overall +// (probably due to removal being integrated with GC) but doesn't work for Ruby +// <2.7 (see note below). We need Hash for Ruby <2.7 and for cases where we +// need to GC-root the object (notably when the object has been frozen). + +#if RUBY_API_VERSION_CODE >= 20700 +#define USE_WEAK_MAP 1 +#else +#define USE_WEAK_MAP 0 +#endif + +static VALUE ObjectCache_GetKey(const void* key) { + char buf[sizeof(key)]; + memcpy(&buf, &key, sizeof(key)); + intptr_t key_int = (intptr_t)key; + PBRUBY_ASSERT((key_int & 3) == 0); + return LL2NUM(key_int >> 2); +} + +// Strong object cache, uses regular Hash and GC-roots objects. +// - For Ruby <2.7, used for all objects. +// - For Ruby >=2.7, used only for frozen objects, so we preserve the "frozen" +// bit (since this information is not preserved at the upb level). + +VALUE strong_obj_cache = Qnil; + +static void StrongObjectCache_Init() { + rb_gc_register_address(&strong_obj_cache); + strong_obj_cache = rb_hash_new(); +} + +static void StrongObjectCache_Remove(void* key) { + VALUE key_rb = ObjectCache_GetKey(key); + PBRUBY_ASSERT(rb_hash_lookup(strong_obj_cache, key_rb) != Qnil); + rb_hash_delete(strong_obj_cache, key_rb); +} + +static VALUE StrongObjectCache_Get(const void* key) { + VALUE key_rb = ObjectCache_GetKey(key); + return rb_hash_lookup(strong_obj_cache, key_rb); +} + +static void StrongObjectCache_Add(const void* key, VALUE val, + upb_arena* arena) { + PBRUBY_ASSERT(StrongObjectCache_Get(key) == Qnil); + VALUE key_rb = ObjectCache_GetKey(key); + rb_hash_aset(strong_obj_cache, key_rb, val); + upb_arena_addcleanup(arena, (void*)key, StrongObjectCache_Remove); +} + +// Weak object cache. This speeds up the test suite significantly, so we +// presume it speeds up real code also. However we can only use it in Ruby +// >=2.7 due to: +// https://bugs.ruby-lang.org/issues/16035 + +#if USE_WEAK_MAP + +VALUE weak_obj_cache = Qnil; + +static void WeakObjectCache_Init() { + rb_gc_register_address(&weak_obj_cache); + VALUE klass = rb_eval_string("ObjectSpace::WeakMap"); + weak_obj_cache = rb_class_new_instance(0, NULL, klass); +} + +static VALUE WeakObjectCache_Get(const void* key) { + VALUE key_rb = ObjectCache_GetKey(key); + VALUE ret = rb_funcall(weak_obj_cache, rb_intern("[]"), 1, key_rb); + return ret; +} + +static void WeakObjectCache_Add(const void* key, VALUE val) { + PBRUBY_ASSERT(WeakObjectCache_Get(key) == Qnil); + VALUE key_rb = ObjectCache_GetKey(key); + rb_funcall(weak_obj_cache, rb_intern("[]="), 2, key_rb, val); + PBRUBY_ASSERT(WeakObjectCache_Get(key) == val); +} + +#endif + +// Public ObjectCache API. + +static void ObjectCache_Init() { + StrongObjectCache_Init(); +#if USE_WEAK_MAP + WeakObjectCache_Init(); +#endif +} + +void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena) { +#if USE_WEAK_MAP + (void)arena; + WeakObjectCache_Add(key, val); +#else + StrongObjectCache_Add(key, val, arena); +#endif +} + +// Returns the cached object for this key, if any. Otherwise returns Qnil. +VALUE ObjectCache_Get(const void* key) { +#if USE_WEAK_MAP + return WeakObjectCache_Get(key); +#else + return StrongObjectCache_Get(key); +#endif +} + +void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena) { +#if USE_WEAK_MAP + PBRUBY_ASSERT(WeakObjectCache_Get(key) == val); + // This will GC-root the object, but we'll still use the weak map for + // actual lookup. + StrongObjectCache_Add(key, val, arena); +#else + // Value is already pinned, nothing to do. +#endif +} + +/* + * call-seq: + * Google::Protobuf.discard_unknown(msg) + * + * Discard unknown fields in the given message object and recursively discard + * unknown fields in submessages. + */ +static VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) { + const upb_msgdef *m; + upb_msg *msg = Message_GetMutable(msg_rb, &m); + if (!upb_msg_discardunknown(msg, m, 128)) { + rb_raise(rb_eRuntimeError, "Messages nested too deeply."); + } + + return Qnil; +} + +/* + * call-seq: + * Google::Protobuf.deep_copy(obj) => copy_of_obj + * + * Performs a deep copy of a RepeatedField instance, a Map instance, or a + * message object, recursively copying its members. + */ +VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) { + VALUE klass = CLASS_OF(obj); + if (klass == cRepeatedField) { + return RepeatedField_deep_copy(obj); + } else if (klass == cMap) { + return Map_deep_copy(obj); + } else { + VALUE new_arena_rb = Arena_new(); + upb_arena *new_arena = Arena_get(new_arena_rb); + const upb_msgdef *m; + const upb_msg *msg = Message_Get(obj, &m); + upb_msg* new_msg = Message_deep_copy(msg, m, new_arena); + return Message_GetRubyWrapper(new_msg, m, new_arena_rb); + } +} // ----------------------------------------------------------------------------- // Initialization/entry point. @@ -93,44 +373,24 @@ ID descriptor_instancevar_interned; // This must be named "Init_protobuf_c" because the Ruby module is named // "protobuf_c" -- the VM looks for this symbol in our .so. +__attribute__ ((visibility ("default"))) void Init_protobuf_c() { + ObjectCache_Init(); + VALUE google = rb_define_module("Google"); VALUE protobuf = rb_define_module_under(google, "Protobuf"); - VALUE internal = rb_define_module_under(protobuf, "Internal"); - - descriptor_instancevar_interned = rb_intern(kDescriptorInstanceVar); - DescriptorPool_register(protobuf); - Descriptor_register(protobuf); - FileDescriptor_register(protobuf); - FieldDescriptor_register(protobuf); - OneofDescriptor_register(protobuf); - EnumDescriptor_register(protobuf); - MessageBuilderContext_register(internal); - OneofBuilderContext_register(internal); - EnumBuilderContext_register(internal); - FileBuilderContext_register(internal); - Builder_register(internal); + + Arena_register(protobuf); + Defs_register(protobuf); RepeatedField_register(protobuf); Map_register(protobuf); + Message_register(protobuf); cError = rb_const_get(protobuf, rb_intern("Error")); - cParseError = rb_const_get(protobuf, rb_intern("ParseError")); cTypeError = rb_const_get(protobuf, rb_intern("TypeError")); rb_define_singleton_method(protobuf, "discard_unknown", Google_Protobuf_discard_unknown, 1); rb_define_singleton_method(protobuf, "deep_copy", Google_Protobuf_deep_copy, 1); - - kRubyStringUtf8Encoding = rb_utf8_encoding(); - kRubyStringASCIIEncoding = rb_usascii_encoding(); - kRubyString8bitEncoding = rb_ascii8bit_encoding(); - - rb_gc_register_address(&c_only_cookie); - c_only_cookie = rb_class_new_instance(0, NULL, rb_cObject); - - rb_gc_register_address(&cached_empty_string); - rb_gc_register_address(&cached_empty_bytes); - cached_empty_string = create_frozen_string("", 0, false); - cached_empty_bytes = create_frozen_string("", 0, true); } diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 0ec78fc4fbd6..90fb0a209335 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -35,631 +35,76 @@ #include #include -#include "upb.h" - -// Forward decls. -struct DescriptorPool; -struct Descriptor; -struct FileDescriptor; -struct FieldDescriptor; -struct EnumDescriptor; -struct MessageLayout; -struct MessageField; -struct MessageHeader; -struct MessageBuilderContext; -struct EnumBuilderContext; -struct FileBuilderContext; -struct Builder; - -typedef struct DescriptorPool DescriptorPool; -typedef struct Descriptor Descriptor; -typedef struct FileDescriptor FileDescriptor; -typedef struct FieldDescriptor FieldDescriptor; -typedef struct OneofDescriptor OneofDescriptor; -typedef struct EnumDescriptor EnumDescriptor; -typedef struct MessageLayout MessageLayout; -typedef struct MessageField MessageField; -typedef struct MessageOneof MessageOneof; -typedef struct MessageHeader MessageHeader; -typedef struct MessageBuilderContext MessageBuilderContext; -typedef struct OneofBuilderContext OneofBuilderContext; -typedef struct EnumBuilderContext EnumBuilderContext; -typedef struct FileBuilderContext FileBuilderContext; -typedef struct Builder Builder; - -/* - It can be a bit confusing how the C structs defined below and the Ruby - objects interact and hold references to each other. First, a few principles: - - - Ruby's "TypedData" abstraction lets a Ruby VALUE hold a pointer to a C - struct (or arbitrary memory chunk), own it, and free it when collected. - Thus, each struct below will have a corresponding Ruby object - wrapping/owning it. - - - To get back from an underlying upb {msg,enum}def to the Ruby object, we - keep a global hashmap, accessed by get_def_obj/add_def_obj below. - - The in-memory structure is then something like: - - Ruby | upb - | - DescriptorPool ------------|-----------> upb_symtab____________________ - | | (message types) \ - | v \ - Descriptor ---------------|-----------> upb_msgdef (enum types)| - |--> msgclass | | ^ | - | (dynamically built) | | | (submsg fields) | - |--> MessageLayout | | | / - |--------------------------|> decoder method| | / - \--------------------------|> serialize | | / - | handlers v | / - FieldDescriptor -----------|-----------> upb_fielddef / - | | / - | v (enum fields) / - EnumDescriptor ------------|-----------> upb_enumdef <----------' - | - | - ^ | \___/ - `---------------|-----------------' (get_def_obj map) - */ - -// ----------------------------------------------------------------------------- -// Ruby class structure definitions. -// ----------------------------------------------------------------------------- - -struct DescriptorPool { - VALUE def_to_descriptor; // Hash table of def* -> Ruby descriptor. - upb_symtab* symtab; - upb_handlercache* fill_handler_cache; - upb_handlercache* pb_serialize_handler_cache; - upb_handlercache* json_serialize_handler_cache; - upb_handlercache* json_serialize_handler_preserve_cache; - upb_pbcodecache* fill_method_cache; - upb_json_codecache* json_fill_method_cache; -}; - -struct Descriptor { - const upb_msgdef* msgdef; - MessageLayout* layout; - VALUE klass; - VALUE descriptor_pool; -}; - -struct FileDescriptor { - const upb_filedef* filedef; - VALUE descriptor_pool; // Owns the upb_filedef. -}; - -struct FieldDescriptor { - const upb_fielddef* fielddef; - VALUE descriptor_pool; // Owns the upb_fielddef. -}; - -struct OneofDescriptor { - const upb_oneofdef* oneofdef; - VALUE descriptor_pool; // Owns the upb_oneofdef. -}; - -struct EnumDescriptor { - const upb_enumdef* enumdef; - VALUE module; // begins as nil - VALUE descriptor_pool; // Owns the upb_enumdef. -}; - -struct MessageBuilderContext { - google_protobuf_DescriptorProto* msg_proto; - VALUE file_builder; -}; - -struct OneofBuilderContext { - int oneof_index; - VALUE message_builder; -}; - -struct EnumBuilderContext { - google_protobuf_EnumDescriptorProto* enum_proto; - VALUE file_builder; -}; - -struct FileBuilderContext { - upb_arena *arena; - google_protobuf_FileDescriptorProto* file_proto; - VALUE descriptor_pool; -}; - -struct Builder { - VALUE descriptor_pool; - VALUE default_file_builder; -}; - -extern VALUE cDescriptorPool; -extern VALUE cDescriptor; -extern VALUE cFileDescriptor; -extern VALUE cFieldDescriptor; -extern VALUE cEnumDescriptor; -extern VALUE cMessageBuilderContext; -extern VALUE cOneofBuilderContext; -extern VALUE cEnumBuilderContext; -extern VALUE cFileBuilderContext; -extern VALUE cBuilder; - -extern VALUE cError; -extern VALUE cParseError; -extern VALUE cTypeError; - -// We forward-declare all of the Ruby method implementations here because we -// sometimes call the methods directly across .c files, rather than going -// through Ruby's method dispatching (e.g. during message parse). It's cleaner -// to keep the list of object methods together than to split them between -// static-in-file definitions and header declarations. - -void DescriptorPool_mark(void* _self); -void DescriptorPool_free(void* _self); -VALUE DescriptorPool_alloc(VALUE klass); -void DescriptorPool_register(VALUE module); -DescriptorPool* ruby_to_DescriptorPool(VALUE value); -VALUE DescriptorPool_build(int argc, VALUE* argv, VALUE _self); -VALUE DescriptorPool_lookup(VALUE _self, VALUE name); -VALUE DescriptorPool_generated_pool(VALUE _self); - -extern VALUE generated_pool; - -void Descriptor_mark(void* _self); -void Descriptor_free(void* _self); -VALUE Descriptor_alloc(VALUE klass); -void Descriptor_register(VALUE module); -Descriptor* ruby_to_Descriptor(VALUE value); -VALUE Descriptor_initialize(VALUE _self, VALUE cookie, VALUE descriptor_pool, - VALUE ptr); -VALUE Descriptor_name(VALUE _self); -VALUE Descriptor_each(VALUE _self); -VALUE Descriptor_lookup(VALUE _self, VALUE name); -VALUE Descriptor_each_oneof(VALUE _self); -VALUE Descriptor_lookup_oneof(VALUE _self, VALUE name); -VALUE Descriptor_msgclass(VALUE _self); -VALUE Descriptor_file_descriptor(VALUE _self); -extern const rb_data_type_t _Descriptor_type; - -void FileDescriptor_mark(void* _self); -void FileDescriptor_free(void* _self); -VALUE FileDescriptor_alloc(VALUE klass); -void FileDescriptor_register(VALUE module); -FileDescriptor* ruby_to_FileDescriptor(VALUE value); -VALUE FileDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr); -VALUE FileDescriptor_name(VALUE _self); -VALUE FileDescriptor_syntax(VALUE _self); - -void FieldDescriptor_mark(void* _self); -void FieldDescriptor_free(void* _self); -VALUE FieldDescriptor_alloc(VALUE klass); -void FieldDescriptor_register(VALUE module); -FieldDescriptor* ruby_to_FieldDescriptor(VALUE value); -VALUE FieldDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr); -VALUE FieldDescriptor_name(VALUE _self); -VALUE FieldDescriptor_type(VALUE _self); -VALUE FieldDescriptor_default(VALUE _self); -VALUE FieldDescriptor_label(VALUE _self); -VALUE FieldDescriptor_number(VALUE _self); -VALUE FieldDescriptor_submsg_name(VALUE _self); -VALUE FieldDescriptor_subtype(VALUE _self); -VALUE FieldDescriptor_has(VALUE _self, VALUE msg_rb); -VALUE FieldDescriptor_clear(VALUE _self, VALUE msg_rb); -VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb); -VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value); -upb_fieldtype_t ruby_to_fieldtype(VALUE type); -VALUE fieldtype_to_ruby(upb_fieldtype_t type); - -void OneofDescriptor_mark(void* _self); -void OneofDescriptor_free(void* _self); -VALUE OneofDescriptor_alloc(VALUE klass); -void OneofDescriptor_register(VALUE module); -OneofDescriptor* ruby_to_OneofDescriptor(VALUE value); -VALUE OneofDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr); -VALUE OneofDescriptor_name(VALUE _self); -VALUE OneofDescriptor_each(VALUE _self); - -void EnumDescriptor_mark(void* _self); -void EnumDescriptor_free(void* _self); -VALUE EnumDescriptor_alloc(VALUE klass); -VALUE EnumDescriptor_initialize(VALUE _self, VALUE cookie, - VALUE descriptor_pool, VALUE ptr); -void EnumDescriptor_register(VALUE module); -EnumDescriptor* ruby_to_EnumDescriptor(VALUE value); -VALUE EnumDescriptor_file_descriptor(VALUE _self); -VALUE EnumDescriptor_name(VALUE _self); -VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name); -VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number); -VALUE EnumDescriptor_each(VALUE _self); -VALUE EnumDescriptor_enummodule(VALUE _self); -extern const rb_data_type_t _EnumDescriptor_type; - -void MessageBuilderContext_mark(void* _self); -void MessageBuilderContext_free(void* _self); -VALUE MessageBuilderContext_alloc(VALUE klass); -void MessageBuilderContext_register(VALUE module); -MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value); -VALUE MessageBuilderContext_initialize(VALUE _self, - VALUE _file_builder, - VALUE name); -VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self); -VALUE MessageBuilderContext_proto3_optional(int argc, VALUE* argv, VALUE _self); -VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self); -VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self); -VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self); -VALUE MessageBuilderContext_oneof(VALUE _self, VALUE name); - -void OneofBuilderContext_mark(void* _self); -void OneofBuilderContext_free(void* _self); -VALUE OneofBuilderContext_alloc(VALUE klass); -void OneofBuilderContext_register(VALUE module); -OneofBuilderContext* ruby_to_OneofBuilderContext(VALUE value); -VALUE OneofBuilderContext_initialize(VALUE _self, - VALUE descriptor, - VALUE builder); -VALUE OneofBuilderContext_optional(int argc, VALUE* argv, VALUE _self); - -void EnumBuilderContext_mark(void* _self); -void EnumBuilderContext_free(void* _self); -VALUE EnumBuilderContext_alloc(VALUE klass); -void EnumBuilderContext_register(VALUE module); -EnumBuilderContext* ruby_to_EnumBuilderContext(VALUE value); -VALUE EnumBuilderContext_initialize(VALUE _self, VALUE _file_builder, - VALUE name); -VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number); - -void FileBuilderContext_mark(void* _self); -void FileBuilderContext_free(void* _self); -VALUE FileBuilderContext_alloc(VALUE klass); -void FileBuilderContext_register(VALUE module); -FileBuilderContext* ruby_to_FileBuilderContext(VALUE _self); -upb_strview FileBuilderContext_strdup(VALUE _self, VALUE rb_str); -upb_strview FileBuilderContext_strdup_name(VALUE _self, VALUE rb_str); -upb_strview FileBuilderContext_strdup_sym(VALUE _self, VALUE rb_sym); -VALUE FileBuilderContext_initialize(VALUE _self, VALUE descriptor_pool, - VALUE name, VALUE options); -VALUE FileBuilderContext_add_message(VALUE _self, VALUE name); -VALUE FileBuilderContext_add_enum(VALUE _self, VALUE name); -VALUE FileBuilderContext_pending_descriptors(VALUE _self); - -void Builder_mark(void* _self); -void Builder_free(void* _self); -VALUE Builder_alloc(VALUE klass); -void Builder_register(VALUE module); -Builder* ruby_to_Builder(VALUE value); -VALUE Builder_build(VALUE _self); -VALUE Builder_initialize(VALUE _self, VALUE descriptor_pool); -VALUE Builder_add_file(int argc, VALUE *argv, VALUE _self); -VALUE Builder_add_message(VALUE _self, VALUE name); -VALUE Builder_add_enum(VALUE _self, VALUE name); -VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb); - -// ----------------------------------------------------------------------------- -// Native slot storage abstraction. -// ----------------------------------------------------------------------------- - -#define NATIVE_SLOT_MAX_SIZE sizeof(uint64_t) - -size_t native_slot_size(upb_fieldtype_t type); -void native_slot_set(const char* name, - upb_fieldtype_t type, - VALUE type_class, - void* memory, - VALUE value); -// Atomically (with respect to Ruby VM calls) either update the value and set a -// oneof case, or do neither. If |case_memory| is null, then no case value is -// set. -void native_slot_set_value_and_case(const char* name, - upb_fieldtype_t type, - VALUE type_class, - void* memory, - VALUE value, - uint32_t* case_memory, - uint32_t case_number); -VALUE native_slot_get(upb_fieldtype_t type, - VALUE type_class, - const void* memory); -void native_slot_init(upb_fieldtype_t type, void* memory); -void native_slot_mark(upb_fieldtype_t type, void* memory); -void native_slot_dup(upb_fieldtype_t type, void* to, void* from); -void native_slot_deep_copy(upb_fieldtype_t type, VALUE type_class, void* to, - void* from); -bool native_slot_eq(upb_fieldtype_t type, VALUE type_class, void* mem1, - void* mem2); - -VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value); -void native_slot_check_int_range_precision(const char* name, upb_fieldtype_t type, VALUE value); -uint32_t slot_read_oneof_case(MessageLayout* layout, const void* storage, - const upb_oneofdef* oneof); -bool is_value_field(const upb_fielddef* f); - -extern rb_encoding* kRubyStringUtf8Encoding; -extern rb_encoding* kRubyStringASCIIEncoding; -extern rb_encoding* kRubyString8bitEncoding; - -VALUE field_type_class(const MessageLayout* layout, const upb_fielddef* field); - -#define MAP_KEY_FIELD 1 -#define MAP_VALUE_FIELD 2 - -// Oneof case slot value to indicate that no oneof case is set. The value `0` is -// safe because field numbers are used as case identifiers, and no field can -// have a number of 0. -#define ONEOF_CASE_NONE 0 +#include "ruby-upb.h" +#include "defs.h" // These operate on a map field (i.e., a repeated field of submessages whose // submessage type is a map-entry msgdef). -bool is_map_field(const upb_fielddef* field); const upb_fielddef* map_field_key(const upb_fielddef* field); const upb_fielddef* map_field_value(const upb_fielddef* field); -// These operate on a map-entry msgdef. -const upb_fielddef* map_entry_key(const upb_msgdef* msgdef); -const upb_fielddef* map_entry_value(const upb_msgdef* msgdef); - -// ----------------------------------------------------------------------------- -// Repeated field container type. -// ----------------------------------------------------------------------------- - -typedef struct { - upb_fieldtype_t field_type; - VALUE field_type_class; - void* elements; - int size; - int capacity; -} RepeatedField; - -void RepeatedField_mark(void* self); -void RepeatedField_free(void* self); -VALUE RepeatedField_alloc(VALUE klass); -VALUE RepeatedField_init(int argc, VALUE* argv, VALUE self); -void RepeatedField_register(VALUE module); - -extern const rb_data_type_t RepeatedField_type; -extern VALUE cRepeatedField; - -RepeatedField* ruby_to_RepeatedField(VALUE value); - -VALUE RepeatedField_new_this_type(VALUE _self); -VALUE RepeatedField_each(VALUE _self); -VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self); -void* RepeatedField_index_native(VALUE _self, int index); -int RepeatedField_size(VALUE _self); -VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val); -void RepeatedField_reserve(RepeatedField* self, int new_size); -VALUE RepeatedField_push(VALUE _self, VALUE val); -void RepeatedField_push_native(VALUE _self, void* data); -VALUE RepeatedField_pop_one(VALUE _self); -VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self); -VALUE RepeatedField_replace(VALUE _self, VALUE list); -VALUE RepeatedField_clear(VALUE _self); -VALUE RepeatedField_length(VALUE _self); -VALUE RepeatedField_dup(VALUE _self); -VALUE RepeatedField_deep_copy(VALUE _self); -VALUE RepeatedField_to_ary(VALUE _self); -VALUE RepeatedField_eq(VALUE _self, VALUE _other); -VALUE RepeatedField_hash(VALUE _self); -VALUE RepeatedField_inspect(VALUE _self); -VALUE RepeatedField_plus(VALUE _self, VALUE list); - -// Defined in repeated_field.c; also used by Map. -void validate_type_class(upb_fieldtype_t type, VALUE klass); - // ----------------------------------------------------------------------------- -// Map container type. +// Arena // ----------------------------------------------------------------------------- -typedef struct { - upb_fieldtype_t key_type; - upb_fieldtype_t value_type; - VALUE value_type_class; - VALUE parse_frame; - upb_strtable table; -} Map; - -void Map_mark(void* self); -void Map_free(void* self); -VALUE Map_alloc(VALUE klass); -VALUE Map_init(int argc, VALUE* argv, VALUE self); -void Map_register(VALUE module); -VALUE Map_set_frame(VALUE self, VALUE val); - -extern const rb_data_type_t Map_type; -extern VALUE cMap; - -Map* ruby_to_Map(VALUE value); - -VALUE Map_new_this_type(VALUE _self); -VALUE Map_each(VALUE _self); -VALUE Map_keys(VALUE _self); -VALUE Map_values(VALUE _self); -VALUE Map_index(VALUE _self, VALUE key); -VALUE Map_index_set(VALUE _self, VALUE key, VALUE value); -VALUE Map_has_key(VALUE _self, VALUE key); -VALUE Map_delete(VALUE _self, VALUE key); -VALUE Map_clear(VALUE _self); -VALUE Map_length(VALUE _self); -VALUE Map_dup(VALUE _self); -VALUE Map_deep_copy(VALUE _self); -VALUE Map_eq(VALUE _self, VALUE _other); -VALUE Map_hash(VALUE _self); -VALUE Map_to_h(VALUE _self); -VALUE Map_inspect(VALUE _self); -VALUE Map_merge(VALUE _self, VALUE hashmap); -VALUE Map_merge_into_self(VALUE _self, VALUE hashmap); +// A Ruby object that wraps an underlying upb_arena. Any objects that are +// allocated from this arena should reference the Arena in rb_gc_mark(), to +// ensure that the object's underlying memory outlives any Ruby object that can +// reach it. -typedef struct { - Map* self; - upb_strtable_iter it; -} Map_iter; - -void Map_begin(VALUE _self, Map_iter* iter); -void Map_next(Map_iter* iter); -bool Map_done(Map_iter* iter); -VALUE Map_iter_key(Map_iter* iter); -VALUE Map_iter_value(Map_iter* iter); +VALUE Arena_new(); +upb_arena *Arena_get(VALUE arena); // ----------------------------------------------------------------------------- -// Message layout / storage. +// ObjectCache // ----------------------------------------------------------------------------- -#define MESSAGE_FIELD_NO_HASBIT ((uint32_t)-1) - -struct MessageField { - uint32_t offset; - uint32_t hasbit; -}; - -struct MessageOneof { - uint32_t offset; - uint32_t case_offset; -}; - -// MessageLayout is owned by the enclosing Descriptor, which must outlive us. -struct MessageLayout { - const Descriptor* desc; - const upb_msgdef* msgdef; - void* empty_template; // Can memcpy() onto a layout to clear it. - MessageField* fields; - MessageOneof* oneofs; - uint32_t size; - uint32_t value_offset; - int value_count; - int repeated_count; - int map_count; -}; - -#define ONEOF_CASE_MASK 0x80000000 - -void create_layout(Descriptor* desc); -void free_layout(MessageLayout* layout); -bool field_contains_hasbit(MessageLayout* layout, - const upb_fielddef* field); -VALUE layout_get_default(const upb_fielddef* field); -VALUE layout_get(MessageLayout* layout, - const void* storage, - const upb_fielddef* field); -void layout_set(MessageLayout* layout, - void* storage, - const upb_fielddef* field, - VALUE val); -VALUE layout_has(MessageLayout* layout, - const void* storage, - const upb_fielddef* field); -void layout_clear(MessageLayout* layout, - const void* storage, - const upb_fielddef* field); -void layout_init(MessageLayout* layout, void* storage); -void layout_mark(MessageLayout* layout, void* storage); -void layout_dup(MessageLayout* layout, void* to, void* from); -void layout_deep_copy(MessageLayout* layout, void* to, void* from); -VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2); -VALUE layout_hash(MessageLayout* layout, void* storage); -VALUE layout_inspect(MessageLayout* layout, void* storage); - -bool is_wrapper_type_field(const upb_fielddef* field); -VALUE ruby_wrapper_type(VALUE type_class, VALUE value); +// Global object cache from upb array/map/message/symtab to wrapper object. +// +// This is a conceptually "weak" cache, in that it does not prevent "val" from +// being collected (though in Ruby <2.7 is it effectively strong, due to +// implementation limitations). + +// Adds an entry to the cache. The "arena" parameter must give the arena that +// "key" was allocated from. In Ruby <2.7.0, it will be used to remove the key +// from the cache when the arena is destroyed. +void ObjectCache_Add(const void* key, VALUE val, upb_arena *arena); + +// Returns the cached object for this key, if any. Otherwise returns Qnil. +VALUE ObjectCache_Get(const void* key); + +// Pins the previously added object so it is GC-rooted. This turns the +// reference to "val" from weak to strong. We use this to guarantee that the +// "frozen" bit on the object will be remembered, even if the user drops their +// reference to this precise object. +// +// The "arena" parameter must give the arena that "key" was allocated from. +void ObjectCache_Pin(const void* key, VALUE val, upb_arena *arena); // ----------------------------------------------------------------------------- -// Message class creation. +// StringBuilder, for inspect // ----------------------------------------------------------------------------- -// This should probably be factored into a common upb component. - -typedef struct { - upb_byteshandler handler; - upb_bytessink sink; - char *ptr; - size_t len, size; -} stringsink; - -void stringsink_uninit(stringsink *sink); - -struct MessageHeader { - Descriptor* descriptor; // kept alive by self.class.descriptor reference. - stringsink* unknown_fields; // store unknown fields in decoding. - // Data comes after this. -}; - -extern rb_data_type_t Message_type; - -VALUE build_class_from_descriptor(VALUE descriptor); -void* Message_data(void* msg); -void Message_mark(void* self); -void Message_free(void* self); -VALUE Message_alloc(VALUE klass); -VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self); -VALUE Message_initialize(int argc, VALUE* argv, VALUE _self); -VALUE Message_dup(VALUE _self); -VALUE Message_deep_copy(VALUE _self); -VALUE Message_eq(VALUE _self, VALUE _other); -VALUE Message_hash(VALUE _self); -VALUE Message_inspect(VALUE _self); -VALUE Message_to_h(VALUE _self); -VALUE Message_index(VALUE _self, VALUE field_name); -VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value); -VALUE Message_descriptor(VALUE klass); -VALUE Message_decode(VALUE klass, VALUE data); -VALUE Message_encode(VALUE klass, VALUE msg_rb); -VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass); -VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass); - -VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb); -VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj); - -VALUE build_module_from_enumdesc(VALUE _enumdesc); -VALUE enum_lookup(VALUE self, VALUE number); -VALUE enum_resolve(VALUE self, VALUE sym); -VALUE enum_descriptor(VALUE self); +struct StringBuilder; +typedef struct StringBuilder StringBuilder; -const upb_pbdecodermethod *new_fillmsg_decodermethod( - Descriptor* descriptor, const void *owner); -void add_handlers_for_message(const void *closure, upb_handlers *h); +StringBuilder* StringBuilder_New(); +void StringBuilder_Free(StringBuilder* b); +void StringBuilder_Printf(StringBuilder* b, const char *fmt, ...); +VALUE StringBuilder_ToRubyString(StringBuilder* b); -// Maximum depth allowed during encoding, to avoid stack overflows due to -// cycles. -#define ENCODE_MAX_NESTING 63 - -// ----------------------------------------------------------------------------- -// A cache of frozen string objects to use as field defaults. -// ----------------------------------------------------------------------------- -VALUE get_frozen_string(const char* data, size_t size, bool binary); - -// ----------------------------------------------------------------------------- -// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor -// instances. -// ----------------------------------------------------------------------------- -VALUE get_msgdef_obj(VALUE descriptor_pool, const upb_msgdef* def); -VALUE get_enumdef_obj(VALUE descriptor_pool, const upb_enumdef* def); -VALUE get_fielddef_obj(VALUE descriptor_pool, const upb_fielddef* def); -VALUE get_filedef_obj(VALUE descriptor_pool, const upb_filedef* def); -VALUE get_oneofdef_obj(VALUE descriptor_pool, const upb_oneofdef* def); +void StringBuilder_PrintMsgval(StringBuilder* b, upb_msgval val, TypeInfo info); // ----------------------------------------------------------------------------- // Utilities. // ----------------------------------------------------------------------------- -void check_upb_status(const upb_status* status, const char* msg); - -#define CHECK_UPB(code, msg) do { \ - upb_status status = UPB_STATUS_INIT; \ - code; \ - check_upb_status(&status, msg); \ -} while (0) - -extern ID descriptor_instancevar_interned; - -// A distinct object that is not accessible from Ruby. We use this as a -// constructor argument to enforce that certain objects cannot be created from -// Ruby. -extern VALUE c_only_cookie; +extern VALUE cTypeError; #ifdef NDEBUG -#define UPB_ASSERT(expr) do {} while (false && (expr)) +#define PBRUBY_ASSERT(expr) do {} while (false && (expr)) #else -#define UPB_ASSERT(expr) assert(expr) +#define PBRUBY_ASSERT(expr) assert(expr) #endif #define UPB_UNUSED(var) (void)var diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c index e3afb282b8ca..65ca3c664791 100644 --- a/ruby/ext/google/protobuf_c/repeated_field.c +++ b/ruby/ext/google/protobuf_c/repeated_field.c @@ -28,49 +28,162 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "repeated_field.h" + +#include "convert.h" +#include "defs.h" +#include "message.h" #include "protobuf.h" +#include "third_party/wyhash/wyhash.h" // ----------------------------------------------------------------------------- // Repeated field container type. // ----------------------------------------------------------------------------- +typedef struct { + const upb_array *array; // Can get as mutable when non-frozen. + TypeInfo type_info; + VALUE type_class; // To GC-root the msgdef/enumdef in type_info. + VALUE arena; // To GC-root the upb_array. +} RepeatedField; + +VALUE cRepeatedField; + +static void RepeatedField_mark(void* _self) { + RepeatedField* self = (RepeatedField*)_self; + rb_gc_mark(self->type_class); + rb_gc_mark(self->arena); +} + const rb_data_type_t RepeatedField_type = { "Google::Protobuf::RepeatedField", - { RepeatedField_mark, RepeatedField_free, NULL }, + { RepeatedField_mark, RUBY_DEFAULT_FREE, NULL }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY, }; -VALUE cRepeatedField; - -RepeatedField* ruby_to_RepeatedField(VALUE _self) { +static RepeatedField* ruby_to_RepeatedField(VALUE _self) { RepeatedField* self; TypedData_Get_Struct(_self, RepeatedField, &RepeatedField_type, self); return self; } -void* RepeatedField_memoryat(RepeatedField* self, int index, int element_size) { - return ((uint8_t *)self->elements) + index * element_size; +static upb_array *RepeatedField_GetMutable(VALUE _self) { + rb_check_frozen(_self); + return (upb_array*)ruby_to_RepeatedField(_self)->array; +} + +VALUE RepeatedField_alloc(VALUE klass) { + RepeatedField* self = ALLOC(RepeatedField); + self->arena = Qnil; + self->type_class = Qnil; + self->array = NULL; + return TypedData_Wrap_Struct(klass, &RepeatedField_type, self); +} + +VALUE RepeatedField_GetRubyWrapper(upb_array* array, TypeInfo type_info, + VALUE arena) { + PBRUBY_ASSERT(array); + VALUE val = ObjectCache_Get(array); + + if (val == Qnil) { + val = RepeatedField_alloc(cRepeatedField); + RepeatedField* self; + ObjectCache_Add(array, val, Arena_get(arena)); + TypedData_Get_Struct(val, RepeatedField, &RepeatedField_type, self); + self->array = array; + self->arena = arena; + self->type_info = type_info; + if (self->type_info.type == UPB_TYPE_MESSAGE) { + self->type_class = Descriptor_DefToClass(type_info.def.msgdef); + } + } + + PBRUBY_ASSERT(ruby_to_RepeatedField(val)->type_info.type == type_info.type); + PBRUBY_ASSERT(ruby_to_RepeatedField(val)->type_info.def.msgdef == + type_info.def.msgdef); + return val; +} + +static VALUE RepeatedField_new_this_type(RepeatedField* from) { + VALUE arena_rb = Arena_new(); + upb_array *array = upb_array_new(Arena_get(arena_rb), from->type_info.type); + VALUE ret = RepeatedField_GetRubyWrapper(array, from->type_info, arena_rb); + PBRUBY_ASSERT(ruby_to_RepeatedField(ret)->type_class == from->type_class); + return ret; +} + +void RepeatedField_Inspect(StringBuilder* b, const upb_array* array, + TypeInfo info) { + bool first = true; + StringBuilder_Printf(b, "["); + size_t n = array ? upb_array_size(array) : 0; + for (size_t i = 0; i < n; i++) { + if (first) { + first = false; + } else { + StringBuilder_Printf(b, ", "); + } + StringBuilder_PrintMsgval(b, upb_array_get(array, i), info); + } + StringBuilder_Printf(b, "]"); +} + +VALUE RepeatedField_deep_copy(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + VALUE new_rptfield = RepeatedField_new_this_type(self); + RepeatedField* new_self = ruby_to_RepeatedField(new_rptfield); + VALUE arena_rb = new_self->arena; + upb_array *new_array = RepeatedField_GetMutable(new_rptfield); + upb_arena *arena = Arena_get(arena_rb); + size_t elements = upb_array_size(self->array); + + upb_array_resize(new_array, elements, arena); + + size_t size = upb_array_size(self->array); + for (size_t i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(self->array, i); + upb_msgval copy = Msgval_DeepCopy(msgval, self->type_info, arena); + upb_array_set(new_array, i, copy); + } + + return new_rptfield; +} + +const upb_array* RepeatedField_GetUpbArray(VALUE val, const upb_fielddef *field) { + RepeatedField* self; + TypeInfo type_info = TypeInfo_get(field); + + if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || + RTYPEDDATA_TYPE(val) != &RepeatedField_type) { + rb_raise(cTypeError, "Expected repeated field array"); + } + + self = ruby_to_RepeatedField(val); + if (self->type_info.type != type_info.type) { + rb_raise(cTypeError, "Repeated field array has wrong element type"); + } + + if (self->type_info.def.msgdef != type_info.def.msgdef) { + rb_raise(cTypeError, "Repeated field array has wrong message/enum class"); + } + + return self->array; } static int index_position(VALUE _index, RepeatedField* repeated_field) { int index = NUM2INT(_index); - if (index < 0 && repeated_field->size > 0) { - index = repeated_field->size + index; - } + if (index < 0) index += upb_array_size(repeated_field->array); return index; } -VALUE RepeatedField_subarray(VALUE _self, long beg, long len) { - RepeatedField* self = ruby_to_RepeatedField(_self); - int element_size = native_slot_size(self->field_type); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; - size_t off = beg * element_size; - VALUE ary = rb_ary_new2(len); - int i; +static VALUE RepeatedField_subarray(RepeatedField* self, long beg, long len) { + size_t size = upb_array_size(self->array); + VALUE ary = rb_ary_new2(size); + long i; - for (i = beg; i < beg + len; i++, off += element_size) { - void* mem = ((uint8_t *)self->elements) + off; - VALUE elem = native_slot_get(field_type, field_type_class, mem); + for (i = beg; i < beg + len; i++) { + upb_msgval msgval = upb_array_get(self->array, i); + VALUE elem = Convert_UpbToRuby(msgval, self->type_info, self->arena); rb_ary_push(ary, elem); } return ary; @@ -84,17 +197,14 @@ VALUE RepeatedField_subarray(VALUE _self, long beg, long len) { * also includes Enumerable; combined with this method, the repeated field thus * acts like an ordinary Ruby sequence. */ -VALUE RepeatedField_each(VALUE _self) { +static VALUE RepeatedField_each(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; - int element_size = native_slot_size(field_type); - size_t off = 0; + int size = upb_array_size(self->array); int i; - for (i = 0; i < self->size; i++, off += element_size) { - void* memory = (void *) (((uint8_t *)self->elements) + off); - VALUE val = native_slot_get(field_type, field_type_class, memory); + for (i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(self->array, i); + VALUE val = Convert_UpbToRuby(msgval, self->type_info, self->arena); rb_yield(val); } return _self; @@ -107,11 +217,9 @@ VALUE RepeatedField_each(VALUE _self) { * * Accesses the element at the given index. Returns nil on out-of-bounds */ -VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) { +static VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - int element_size = native_slot_size(self->field_type); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; + long size = upb_array_size(self->array); VALUE arg = argv[0]; long beg, len; @@ -119,35 +227,36 @@ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) { if (argc == 1){ if (FIXNUM_P(arg)) { /* standard case */ - void* memory; + upb_msgval msgval; int index = index_position(argv[0], self); - if (index < 0 || index >= self->size) { + if (index < 0 || (size_t)index >= upb_array_size(self->array)) { return Qnil; } - memory = RepeatedField_memoryat(self, index, element_size); - return native_slot_get(field_type, field_type_class, memory); - }else{ + msgval = upb_array_get(self->array, index); + return Convert_UpbToRuby(msgval, self->type_info, self->arena); + } else { /* check if idx is Range */ - switch (rb_range_beg_len(arg, &beg, &len, self->size, 0)) { + switch (rb_range_beg_len(arg, &beg, &len, size, 0)) { case Qfalse: break; case Qnil: return Qnil; default: - return RepeatedField_subarray(_self, beg, len); + return RepeatedField_subarray(self, beg, len); } } } + /* assume 2 arguments */ beg = NUM2LONG(argv[0]); len = NUM2LONG(argv[1]); if (beg < 0) { - beg += self->size; + beg += size; } - if (beg >= self->size) { + if (beg >= size) { return Qnil; } - return RepeatedField_subarray(_self, beg, len); + return RepeatedField_subarray(self, beg, len); } /* @@ -157,128 +266,88 @@ VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self) { * Sets the element at the given index. On out-of-bounds assignments, extends * the array and fills the hole (if any) with default values. */ -VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val) { +static VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; - int element_size = native_slot_size(field_type); - void* memory; + int size = upb_array_size(self->array); + upb_array *array = RepeatedField_GetMutable(_self); + upb_arena *arena = Arena_get(self->arena); + upb_msgval msgval = Convert_RubyToUpb(val, "", self->type_info, arena); int index = index_position(_index, self); if (index < 0 || index >= (INT_MAX - 1)) { return Qnil; } - if (index >= self->size) { - upb_fieldtype_t field_type = self->field_type; - int element_size = native_slot_size(field_type); - int i; - RepeatedField_reserve(self, index + 1); - for (i = self->size; i <= index; i++) { - void* elem = RepeatedField_memoryat(self, i, element_size); - native_slot_init(field_type, elem); + if (index >= size) { + upb_array_resize(array, index + 1, arena); + upb_msgval fill; + memset(&fill, 0, sizeof(fill)); + for (int i = size; i < index; i++) { + // Fill default values. + // TODO(haberman): should this happen at the upb level? + upb_array_set(array, i, fill); } - self->size = index + 1; } - memory = RepeatedField_memoryat(self, index, element_size); - native_slot_set("", field_type, field_type_class, memory, val); + upb_array_set(array, index, msgval); return Qnil; } -static int kInitialSize = 8; - -void RepeatedField_reserve(RepeatedField* self, int new_size) { - void* old_elems = self->elements; - int elem_size = native_slot_size(self->field_type); - if (new_size <= self->capacity) { - return; - } - if (self->capacity == 0) { - self->capacity = kInitialSize; - } - while (self->capacity < new_size) { - self->capacity *= 2; - } - self->elements = ALLOC_N(uint8_t, elem_size * self->capacity); - if (old_elems != NULL) { - memcpy(self->elements, old_elems, self->size * elem_size); - xfree(old_elems); - } -} - /* * call-seq: - * RepeatedField.push(value) + * RepeatedField.push(value, ...) * * Adds a new element to the repeated field. */ -VALUE RepeatedField_push(VALUE _self, VALUE val) { +static VALUE RepeatedField_push_vararg(int argc, VALUE* argv, VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - int element_size = native_slot_size(field_type); - void* memory; - - RepeatedField_reserve(self, self->size + 1); - memory = (void *) (((uint8_t *)self->elements) + self->size * element_size); - native_slot_set("", field_type, self->field_type_class, memory, val); - // native_slot_set may raise an error; bump size only after set. - self->size++; - return _self; -} - -VALUE RepeatedField_push_vararg(VALUE _self, VALUE args) { + upb_arena *arena = Arena_get(self->arena); + upb_array *array = RepeatedField_GetMutable(_self); int i; - for (i = 0; i < RARRAY_LEN(args); i++) { - RepeatedField_push(_self, rb_ary_entry(args, i)); + + for (i = 0; i < argc; i++) { + upb_msgval msgval = Convert_RubyToUpb(argv[i], "", self->type_info, arena); + upb_array_append(array, msgval, arena); } + return _self; } -// Used by parsing handlers. -void RepeatedField_push_native(VALUE _self, void* data) { +/* + * call-seq: + * RepeatedField.<<(value) + * + * Adds a new element to the repeated field. + */ +static VALUE RepeatedField_push(VALUE _self, VALUE val) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - int element_size = native_slot_size(field_type); - void* memory; - - RepeatedField_reserve(self, self->size + 1); - memory = (void *) (((uint8_t *)self->elements) + self->size * element_size); - memcpy(memory, data, element_size); - self->size++; -} + upb_arena *arena = Arena_get(self->arena); + upb_array *array = RepeatedField_GetMutable(_self); -void* RepeatedField_index_native(VALUE _self, int index) { - RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - int element_size = native_slot_size(field_type); - return RepeatedField_memoryat(self, index, element_size); -} + upb_msgval msgval = Convert_RubyToUpb(val, "", self->type_info, arena); + upb_array_append(array, msgval, arena); -int RepeatedField_size(VALUE _self) { - RepeatedField* self = ruby_to_RepeatedField(_self); - return self->size; + return _self; } /* * Private ruby method, used by RepeatedField.pop */ -VALUE RepeatedField_pop_one(VALUE _self) { +static VALUE RepeatedField_pop_one(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; - int element_size = native_slot_size(field_type); - int index; - void* memory; + size_t size = upb_array_size(self->array); + upb_array *array = RepeatedField_GetMutable(_self); + upb_msgval last; VALUE ret; - if (self->size == 0) { + if (size == 0) { return Qnil; } - index = self->size - 1; - memory = RepeatedField_memoryat(self, index, element_size); - ret = native_slot_get(field_type, field_type_class, memory); - self->size--; + + last = upb_array_get(self->array, size - 1); + ret = Convert_UpbToRuby(last, self->type_info, self->arena); + + upb_array_resize(array, size - 1, Arena_get(self->arena)); return ret; } @@ -288,15 +357,18 @@ VALUE RepeatedField_pop_one(VALUE _self) { * * Replaces the contents of the repeated field with the given list of elements. */ -VALUE RepeatedField_replace(VALUE _self, VALUE list) { +static VALUE RepeatedField_replace(VALUE _self, VALUE list) { RepeatedField* self = ruby_to_RepeatedField(_self); + upb_array *array = RepeatedField_GetMutable(_self); int i; Check_Type(list, T_ARRAY); - self->size = 0; + upb_array_resize(array, 0, Arena_get(self->arena)); + for (i = 0; i < RARRAY_LEN(list); i++) { RepeatedField_push(_self, rb_ary_entry(list, i)); } + return list; } @@ -306,9 +378,10 @@ VALUE RepeatedField_replace(VALUE _self, VALUE list) { * * Clears (removes all elements from) this repeated field. */ -VALUE RepeatedField_clear(VALUE _self) { +static VALUE RepeatedField_clear(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - self->size = 0; + upb_array *array = RepeatedField_GetMutable(_self); + upb_array_resize(array, 0, Arena_get(self->arena)); return _self; } @@ -318,23 +391,9 @@ VALUE RepeatedField_clear(VALUE _self) { * * Returns the length of this repeated field. */ -VALUE RepeatedField_length(VALUE _self) { - RepeatedField* self = ruby_to_RepeatedField(_self); - return INT2NUM(self->size); -} - -VALUE RepeatedField_new_this_type(VALUE _self) { +static VALUE RepeatedField_length(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - VALUE new_rptfield = Qnil; - VALUE element_type = fieldtype_to_ruby(self->field_type); - if (self->field_type_class != Qnil) { - new_rptfield = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2, - element_type, self->field_type_class); - } else { - new_rptfield = rb_funcall(CLASS_OF(_self), rb_intern("new"), 1, - element_type); - } - return new_rptfield; + return INT2NUM(upb_array_size(self->array)); } /* @@ -344,42 +403,20 @@ VALUE RepeatedField_new_this_type(VALUE _self) { * Duplicates this repeated field with a shallow copy. References to all * non-primitive element objects (e.g., submessages) are shared. */ -VALUE RepeatedField_dup(VALUE _self) { +static VALUE RepeatedField_dup(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - VALUE new_rptfield = RepeatedField_new_this_type(_self); + VALUE new_rptfield = RepeatedField_new_this_type(self); RepeatedField* new_rptfield_self = ruby_to_RepeatedField(new_rptfield); - upb_fieldtype_t field_type = self->field_type; - size_t elem_size = native_slot_size(field_type); - size_t off = 0; + upb_array *new_array = RepeatedField_GetMutable(new_rptfield); + upb_arena* arena = Arena_get(new_rptfield_self->arena); + int size = upb_array_size(self->array); int i; - RepeatedField_reserve(new_rptfield_self, self->size); - for (i = 0; i < self->size; i++, off += elem_size) { - void* to_mem = (uint8_t *)new_rptfield_self->elements + off; - void* from_mem = (uint8_t *)self->elements + off; - native_slot_dup(field_type, to_mem, from_mem); - new_rptfield_self->size++; - } - - return new_rptfield; -} + upb_arena_fuse(arena, Arena_get(self->arena)); -// Internal only: used by Google::Protobuf.deep_copy. -VALUE RepeatedField_deep_copy(VALUE _self) { - RepeatedField* self = ruby_to_RepeatedField(_self); - VALUE new_rptfield = RepeatedField_new_this_type(_self); - RepeatedField* new_rptfield_self = ruby_to_RepeatedField(new_rptfield); - upb_fieldtype_t field_type = self->field_type; - size_t elem_size = native_slot_size(field_type); - size_t off = 0; - int i; - - RepeatedField_reserve(new_rptfield_self, self->size); - for (i = 0; i < self->size; i++, off += elem_size) { - void* to_mem = (uint8_t *)new_rptfield_self->elements + off; - void* from_mem = (uint8_t *)self->elements + off; - native_slot_deep_copy(field_type, self->field_type_class, to_mem, from_mem); - new_rptfield_self->size++; + for (i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(self->array, i); + upb_array_append(new_array, msgval, arena); } return new_rptfield; @@ -394,17 +431,16 @@ VALUE RepeatedField_deep_copy(VALUE _self) { */ VALUE RepeatedField_to_ary(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - upb_fieldtype_t field_type = self->field_type; - size_t elem_size = native_slot_size(field_type); - size_t off = 0; - VALUE ary = rb_ary_new2(self->size); + int size = upb_array_size(self->array); + VALUE ary = rb_ary_new2(size); int i; - for (i = 0; i < self->size; i++, off += elem_size) { - void* mem = ((uint8_t *)self->elements) + off; - VALUE elem = native_slot_get(field_type, self->field_type_class, mem); - rb_ary_push(ary, elem); + for (i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(self->array, i); + VALUE val = Convert_UpbToRuby(msgval, self->type_info, self->arena); + rb_ary_push(ary, val); } + return ary; } @@ -436,28 +472,38 @@ VALUE RepeatedField_eq(VALUE _self, VALUE _other) { self = ruby_to_RepeatedField(_self); other = ruby_to_RepeatedField(_other); - if (self->field_type != other->field_type || - self->field_type_class != other->field_type_class || - self->size != other->size) { + size_t n = upb_array_size(self->array); + + if (self->type_info.type != other->type_info.type || + self->type_class != other->type_class || + upb_array_size(other->array) != n) { return Qfalse; } - { - upb_fieldtype_t field_type = self->field_type; - size_t elem_size = native_slot_size(field_type); - size_t off = 0; - int i; - - for (i = 0; i < self->size; i++, off += elem_size) { - void* self_mem = ((uint8_t *)self->elements) + off; - void* other_mem = ((uint8_t *)other->elements) + off; - if (!native_slot_eq(field_type, self->field_type_class, self_mem, - other_mem)) { - return Qfalse; - } + for (size_t i = 0; i < n; i++) { + upb_msgval val1 = upb_array_get(self->array, i); + upb_msgval val2 = upb_array_get(other->array, i); + if (!Msgval_IsEqual(val1, val2, self->type_info)) { + return Qfalse; } - return Qtrue; } + + return Qtrue; +} + +/* + * call-seq: + * RepeatedField.freeze => self + * + * Freezes the repeated field. We have to intercept this so we can pin the Ruby + * object into memory so we don't forget it's frozen. + */ +static VALUE RepeatedField_freeze(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + + ObjectCache_Pin(self->array, _self, Arena_get(self->arena)); + RB_OBJ_FREEZE(_self); + return _self; } /* @@ -468,22 +514,15 @@ VALUE RepeatedField_eq(VALUE _self, VALUE _other) { */ VALUE RepeatedField_hash(VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); - st_index_t h = rb_hash_start(0); - VALUE hash_sym = rb_intern("hash"); - upb_fieldtype_t field_type = self->field_type; - VALUE field_type_class = self->field_type_class; - size_t elem_size = native_slot_size(field_type); - size_t off = 0; - int i; + uint64_t hash = 0; + size_t n = upb_array_size(self->array); - for (i = 0; i < self->size; i++, off += elem_size) { - void* mem = ((uint8_t *)self->elements) + off; - VALUE elem = native_slot_get(field_type, field_type_class, mem); - h = rb_hash_uint(h, NUM2LONG(rb_funcall(elem, hash_sym, 0))); + for (size_t i = 0; i < n; i++) { + upb_msgval val = upb_array_get(self->array, i); + hash = Msgval_GetHash(val, self->type_info, hash); } - h = rb_hash_end(h); - return INT2FIX(h); + return LL2NUM(hash); } /* @@ -495,34 +534,39 @@ VALUE RepeatedField_hash(VALUE _self) { * be either another repeated field or a Ruby array. */ VALUE RepeatedField_plus(VALUE _self, VALUE list) { - VALUE dupped = RepeatedField_dup(_self); + VALUE dupped_ = RepeatedField_dup(_self); if (TYPE(list) == T_ARRAY) { int i; for (i = 0; i < RARRAY_LEN(list); i++) { VALUE elem = rb_ary_entry(list, i); - RepeatedField_push(dupped, elem); + RepeatedField_push(dupped_, elem); } } else if (RB_TYPE_P(list, T_DATA) && RTYPEDDATA_P(list) && RTYPEDDATA_TYPE(list) == &RepeatedField_type) { RepeatedField* self = ruby_to_RepeatedField(_self); RepeatedField* list_rptfield = ruby_to_RepeatedField(list); + RepeatedField* dupped = ruby_to_RepeatedField(dupped_); + upb_array *dupped_array = RepeatedField_GetMutable(dupped_); + upb_arena* arena = Arena_get(dupped->arena); + int size = upb_array_size(list_rptfield->array); int i; - if (self->field_type != list_rptfield->field_type || - self->field_type_class != list_rptfield->field_type_class) { + if (self->type_info.type != list_rptfield->type_info.type || + self->type_class != list_rptfield->type_class) { rb_raise(rb_eArgError, "Attempt to append RepeatedField with different element type."); } - for (i = 0; i < list_rptfield->size; i++) { - void* mem = RepeatedField_index_native(list, i); - RepeatedField_push_native(dupped, mem); + + for (i = 0; i < size; i++) { + upb_msgval msgval = upb_array_get(list_rptfield->array, i); + upb_array_append(dupped_array, msgval, arena); } } else { rb_raise(rb_eArgError, "Unknown type appending to RepeatedField"); } - return dupped; + return dupped_; } /* @@ -541,116 +585,41 @@ VALUE RepeatedField_concat(VALUE _self, VALUE list) { return _self; } - -void validate_type_class(upb_fieldtype_t type, VALUE klass) { - if (rb_ivar_get(klass, descriptor_instancevar_interned) == Qnil) { - rb_raise(rb_eArgError, - "Type class has no descriptor. Please pass a " - "class or enum as returned by the DescriptorPool."); - } - if (type == UPB_TYPE_MESSAGE) { - VALUE desc = rb_ivar_get(klass, descriptor_instancevar_interned); - if (!RB_TYPE_P(desc, T_DATA) || !RTYPEDDATA_P(desc) || - RTYPEDDATA_TYPE(desc) != &_Descriptor_type) { - rb_raise(rb_eArgError, "Descriptor has an incorrect type."); - } - if (rb_get_alloc_func(klass) != &Message_alloc) { - rb_raise(rb_eArgError, - "Message class was not returned by the DescriptorPool."); - } - } else if (type == UPB_TYPE_ENUM) { - VALUE enumdesc = rb_ivar_get(klass, descriptor_instancevar_interned); - if (!RB_TYPE_P(enumdesc, T_DATA) || !RTYPEDDATA_P(enumdesc) || - RTYPEDDATA_TYPE(enumdesc) != &_EnumDescriptor_type) { - rb_raise(rb_eArgError, "Descriptor has an incorrect type."); - } - } -} - -void RepeatedField_init_args(int argc, VALUE* argv, - VALUE _self) { +/* + * call-seq: + * RepeatedField.new(type, type_class = nil, initial_elems = []) + * + * Creates a new repeated field. The provided type must be a Ruby symbol, and + * can take on the same values as those accepted by FieldDescriptor#type=. If + * the type is :message or :enum, type_class must be non-nil, and must be the + * Ruby class or module returned by Descriptor#msgclass or + * EnumDescriptor#enummodule, respectively. An initial list of elements may also + * be provided. + */ +VALUE RepeatedField_init(int argc, VALUE* argv, VALUE _self) { RepeatedField* self = ruby_to_RepeatedField(_self); + upb_arena *arena; VALUE ary = Qnil; + + self->arena = Arena_new(); + arena = Arena_get(self->arena); + if (argc < 1) { rb_raise(rb_eArgError, "Expected at least 1 argument."); } - self->field_type = ruby_to_fieldtype(argv[0]); - if (self->field_type == UPB_TYPE_MESSAGE || - self->field_type == UPB_TYPE_ENUM) { - if (argc < 2) { - rb_raise(rb_eArgError, "Expected at least 2 arguments for message/enum."); - } - self->field_type_class = argv[1]; - if (argc > 2) { - ary = argv[2]; - } - validate_type_class(self->field_type, self->field_type_class); - } else { - if (argc > 2) { - rb_raise(rb_eArgError, "Too many arguments: expected 1 or 2."); - } - if (argc > 1) { - ary = argv[1]; - } - } + self->type_info = TypeInfo_FromClass(argc, argv, 0, &self->type_class, &ary); + self->array = upb_array_new(arena, self->type_info.type); + ObjectCache_Add(self->array, _self, arena); if (ary != Qnil) { - int i; - if (!RB_TYPE_P(ary, T_ARRAY)) { rb_raise(rb_eArgError, "Expected array as initialize argument"); } - for (i = 0; i < RARRAY_LEN(ary); i++) { + for (int i = 0; i < RARRAY_LEN(ary); i++) { RepeatedField_push(_self, rb_ary_entry(ary, i)); } } -} - -// Mark, free, alloc, init and class setup functions. - -void RepeatedField_mark(void* _self) { - RepeatedField* self = (RepeatedField*)_self; - upb_fieldtype_t field_type = self->field_type; - int element_size = native_slot_size(field_type); - int i; - - rb_gc_mark(self->field_type_class); - for (i = 0; i < self->size; i++) { - void* memory = (((uint8_t *)self->elements) + i * element_size); - native_slot_mark(self->field_type, memory); - } -} - -void RepeatedField_free(void* _self) { - RepeatedField* self = (RepeatedField*)_self; - xfree(self->elements); - xfree(self); -} - -/* - * call-seq: - * RepeatedField.new(type, type_class = nil, initial_elems = []) - * - * Creates a new repeated field. The provided type must be a Ruby symbol, and - * can take on the same values as those accepted by FieldDescriptor#type=. If - * the type is :message or :enum, type_class must be non-nil, and must be the - * Ruby class or module returned by Descriptor#msgclass or - * EnumDescriptor#enummodule, respectively. An initial list of elements may also - * be provided. - */ -VALUE RepeatedField_alloc(VALUE klass) { - RepeatedField* self = ALLOC(RepeatedField); - self->elements = NULL; - self->size = 0; - self->capacity = 0; - self->field_type = -1; - self->field_type_class = Qnil; - return TypedData_Wrap_Struct(klass, &RepeatedField_type, self); -} - -VALUE RepeatedField_init(int argc, VALUE* argv, VALUE self) { - RepeatedField_init_args(argc, argv, self); return Qnil; } @@ -667,7 +636,7 @@ void RepeatedField_register(VALUE module) { rb_define_method(klass, "[]", RepeatedField_index, -1); rb_define_method(klass, "at", RepeatedField_index, -1); rb_define_method(klass, "[]=", RepeatedField_index_set, 2); - rb_define_method(klass, "push", RepeatedField_push_vararg, -2); + rb_define_method(klass, "push", RepeatedField_push_vararg, -1); rb_define_method(klass, "<<", RepeatedField_push, 1); rb_define_private_method(klass, "pop_one", RepeatedField_pop_one, 0); rb_define_method(klass, "replace", RepeatedField_replace, 1); @@ -679,6 +648,7 @@ void RepeatedField_register(VALUE module) { rb_define_method(klass, "clone", RepeatedField_dup, 0); rb_define_method(klass, "==", RepeatedField_eq, 1); rb_define_method(klass, "to_ary", RepeatedField_to_ary, 0); + rb_define_method(klass, "freeze", RepeatedField_freeze, 0); rb_define_method(klass, "hash", RepeatedField_hash, 0); rb_define_method(klass, "+", RepeatedField_plus, 1); rb_define_method(klass, "concat", RepeatedField_concat, 1); diff --git a/ruby/ext/google/protobuf_c/repeated_field.h b/ruby/ext/google/protobuf_c/repeated_field.h new file mode 100644 index 000000000000..5a5959dd837b --- /dev/null +++ b/ruby/ext/google/protobuf_c/repeated_field.h @@ -0,0 +1,62 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RUBY_PROTOBUF_REPEATED_FIELD_H_ +#define RUBY_PROTOBUF_REPEATED_FIELD_H_ + +#include + +#include "protobuf.h" +#include "ruby-upb.h" + +// Returns a Ruby wrapper object for the given upb_array, which will be created +// if one does not exist already. +VALUE RepeatedField_GetRubyWrapper(upb_array* msg, TypeInfo type_info, + VALUE arena); + +// Gets the underlying upb_array for this Ruby RepeatedField object, which must +// have a type that matches |f|. If this is not a repeated field or the type +// doesn't match, raises an exception. +const upb_array* RepeatedField_GetUpbArray(VALUE value, const upb_fielddef* f); + +// Implements #inspect for this repeated field by appending its contents to |b|. +void RepeatedField_Inspect(StringBuilder* b, const upb_array* array, + TypeInfo info); + +// Returns a deep copy of this RepeatedField object. +VALUE RepeatedField_deep_copy(VALUE obj); + +// Ruby class of Google::Protobuf::RepeatedField. +extern VALUE cRepeatedField; + +// Call at startup to register all types in this module. +void RepeatedField_register(VALUE module); + +#endif // RUBY_PROTOBUF_REPEATED_FIELD_H_ diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c new file mode 100755 index 000000000000..9c048da98bac --- /dev/null +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -0,0 +1,8913 @@ +/* Amalgamated source file */ +#include "ruby-upb.h" +/* +* This is where we define macros used across upb. +* +* All of these macros are undef'd in port_undef.inc to avoid leaking them to +* users. +* +* The correct usage is: +* +* #include "upb/foobar.h" +* #include "upb/baz.h" +* +* // MUST be last included header. +* #include "upb/port_def.inc" +* +* // Code for this file. +* // <...> +* +* // Can be omitted for .c files, required for .h. +* #include "upb/port_undef.inc" +* +* This file is private and must not be included by users! +*/ + +#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (defined(_MSC_VER) && _MSC_VER >= 1900)) +#error upb requires C99 or C++11 or MSVC >= 2015. +#endif + +#include +#include + +#if UINTPTR_MAX == 0xffffffff +#define UPB_SIZE(size32, size64) size32 +#else +#define UPB_SIZE(size32, size64) size64 +#endif + +/* If we always read/write as a consistent type to each address, this shouldn't + * violate aliasing. + */ +#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs))) + +#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \ + *UPB_PTR_AT(msg, case_offset, int) == case_val \ + ? *UPB_PTR_AT(msg, offset, fieldtype) \ + : default + +#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \ + *UPB_PTR_AT(msg, case_offset, int) = case_val; \ + *UPB_PTR_AT(msg, offset, fieldtype) = value; + +#define UPB_MAPTYPE_STRING 0 + +/* UPB_INLINE: inline if possible, emit standalone code if required. */ +#ifdef __cplusplus +#define UPB_INLINE inline +#elif defined (__GNUC__) || defined(__clang__) +#define UPB_INLINE static __inline__ +#else +#define UPB_INLINE static +#endif + +#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align)) +#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align)) +#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16) +#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member) + +/* Hints to the compiler about likely/unlikely branches. */ +#if defined (__GNUC__) || defined(__clang__) +#define UPB_LIKELY(x) __builtin_expect((x),1) +#define UPB_UNLIKELY(x) __builtin_expect((x),0) +#else +#define UPB_LIKELY(x) (x) +#define UPB_UNLIKELY(x) (x) +#endif + +/* Macros for function attributes on compilers that support them. */ +#ifdef __GNUC__ +#define UPB_FORCEINLINE __inline__ __attribute__((always_inline)) +#define UPB_NOINLINE __attribute__((noinline)) +#define UPB_NORETURN __attribute__((__noreturn__)) +#define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg))) +#elif defined(_MSC_VER) +#define UPB_NOINLINE +#define UPB_FORCEINLINE +#define UPB_NORETURN __declspec(noreturn) +#define UPB_PRINTF(str, first_vararg) +#else /* !defined(__GNUC__) */ +#define UPB_FORCEINLINE +#define UPB_NOINLINE +#define UPB_NORETURN +#define UPB_PRINTF(str, first_vararg) +#endif + +#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) + +#define UPB_UNUSED(var) (void)var + +/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true. + */ +#ifdef NDEBUG +#ifdef __GNUC__ +#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable() +#elif defined _MSC_VER +#define UPB_ASSUME(expr) if (!(expr)) __assume(0) +#else +#define UPB_ASSUME(expr) do {} while (false && (expr)) +#endif +#else +#define UPB_ASSUME(expr) assert(expr) +#endif + +/* UPB_ASSERT(): in release mode, we use the expression without letting it be + * evaluated. This prevents "unused variable" warnings. */ +#ifdef NDEBUG +#define UPB_ASSERT(expr) do {} while (false && (expr)) +#else +#define UPB_ASSERT(expr) assert(expr) +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) +#else +#define UPB_UNREACHABLE() do { assert(0); } while(0) +#endif + +/* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */ +#ifdef __APPLE__ +#define UPB_SETJMP(buf) _setjmp(buf) +#define UPB_LONGJMP(buf, val) _longjmp(buf, val) +#else +#define UPB_SETJMP(buf) setjmp(buf) +#define UPB_LONGJMP(buf, val) longjmp(buf, val) +#endif + +/* Configure whether fasttable is switched on or not. *************************/ + +#if defined(__x86_64__) && defined(__GNUC__) +#define UPB_FASTTABLE_SUPPORTED 1 +#else +#define UPB_FASTTABLE_SUPPORTED 0 +#endif + +/* define UPB_ENABLE_FASTTABLE to force fast table support. + * This is useful when we want to ensure we are really getting fasttable, + * for example for testing or benchmarking. */ +#if defined(UPB_ENABLE_FASTTABLE) +#if !UPB_FASTTABLE_SUPPORTED +#error fasttable is x86-64 + Clang/GCC only +#endif +#define UPB_FASTTABLE 1 +/* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. + * This is useful for releasing code that might be used on multiple platforms, + * for example the PHP or Ruby C extensions. */ +#elif defined(UPB_TRY_ENABLE_FASTTABLE) +#define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED +#else +#define UPB_FASTTABLE 0 +#endif + +/* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully + * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */ +#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE) +#define UPB_FASTTABLE_INIT(...) +#else +#define UPB_FASTTABLE_INIT(...) __VA_ARGS__ +#endif + +#undef UPB_FASTTABLE_SUPPORTED + +/* ASAN poisoning (for arena) *************************************************/ + +#if defined(__SANITIZE_ADDRESS__) +#define UPB_ASAN 1 +#ifdef __cplusplus +extern "C" { +#endif +void __asan_poison_memory_region(void const volatile *addr, size_t size); +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#ifdef __cplusplus +} /* extern "C" */ +#endif +#define UPB_POISON_MEMORY_REGION(addr, size) \ + __asan_poison_memory_region((addr), (size)) +#define UPB_UNPOISON_MEMORY_REGION(addr, size) \ + __asan_unpoison_memory_region((addr), (size)) +#else +#define UPB_ASAN 0 +#define UPB_POISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#define UPB_UNPOISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#endif + + +#include +#include + + +/* Must be last. */ + +/* Maps descriptor type -> elem_size_lg2. */ +static const uint8_t desctype_to_elem_size_lg2[] = { + -1, /* invalid descriptor type */ + 3, /* DOUBLE */ + 2, /* FLOAT */ + 3, /* INT64 */ + 3, /* UINT64 */ + 2, /* INT32 */ + 3, /* FIXED64 */ + 2, /* FIXED32 */ + 0, /* BOOL */ + UPB_SIZE(3, 4), /* STRING */ + UPB_SIZE(2, 3), /* GROUP */ + UPB_SIZE(2, 3), /* MESSAGE */ + UPB_SIZE(3, 4), /* BYTES */ + 2, /* UINT32 */ + 2, /* ENUM */ + 2, /* SFIXED32 */ + 3, /* SFIXED64 */ + 2, /* SINT32 */ + 3, /* SINT64 */ +}; + +/* Maps descriptor type -> upb map size. */ +static const uint8_t desctype_to_mapsize[] = { + -1, /* invalid descriptor type */ + 8, /* DOUBLE */ + 4, /* FLOAT */ + 8, /* INT64 */ + 8, /* UINT64 */ + 4, /* INT32 */ + 8, /* FIXED64 */ + 4, /* FIXED32 */ + 1, /* BOOL */ + UPB_MAPTYPE_STRING, /* STRING */ + sizeof(void *), /* GROUP */ + sizeof(void *), /* MESSAGE */ + UPB_MAPTYPE_STRING, /* BYTES */ + 4, /* UINT32 */ + 4, /* ENUM */ + 4, /* SFIXED32 */ + 8, /* SFIXED64 */ + 4, /* SINT32 */ + 8, /* SINT64 */ +}; + +static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) | + (1 << UPB_DTYPE_FIXED32) | + (1 << UPB_DTYPE_SFIXED32); + +static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) | + (1 << UPB_DTYPE_FIXED64) | + (1 << UPB_DTYPE_SFIXED64); + +/* Op: an action to be performed for a wire-type/field-type combination. */ +#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */ +#define OP_STRING 4 +#define OP_BYTES 5 +#define OP_SUBMSG 6 +/* Ops above are scalar-only. Repeated fields can use any op. */ +#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */ +#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */ + +static const int8_t varint_ops[19] = { + -1, /* field not found */ + -1, /* DOUBLE */ + -1, /* FLOAT */ + OP_SCALAR_LG2(3), /* INT64 */ + OP_SCALAR_LG2(3), /* UINT64 */ + OP_SCALAR_LG2(2), /* INT32 */ + -1, /* FIXED64 */ + -1, /* FIXED32 */ + OP_SCALAR_LG2(0), /* BOOL */ + -1, /* STRING */ + -1, /* GROUP */ + -1, /* MESSAGE */ + -1, /* BYTES */ + OP_SCALAR_LG2(2), /* UINT32 */ + OP_SCALAR_LG2(2), /* ENUM */ + -1, /* SFIXED32 */ + -1, /* SFIXED64 */ + OP_SCALAR_LG2(2), /* SINT32 */ + OP_SCALAR_LG2(3), /* SINT64 */ +}; + +static const int8_t delim_ops[37] = { + /* For non-repeated field type. */ + -1, /* field not found */ + -1, /* DOUBLE */ + -1, /* FLOAT */ + -1, /* INT64 */ + -1, /* UINT64 */ + -1, /* INT32 */ + -1, /* FIXED64 */ + -1, /* FIXED32 */ + -1, /* BOOL */ + OP_STRING, /* STRING */ + -1, /* GROUP */ + OP_SUBMSG, /* MESSAGE */ + OP_BYTES, /* BYTES */ + -1, /* UINT32 */ + -1, /* ENUM */ + -1, /* SFIXED32 */ + -1, /* SFIXED64 */ + -1, /* SINT32 */ + -1, /* SINT64 */ + /* For repeated field type. */ + OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ + OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ + OP_VARPCK_LG2(3), /* REPEATED INT64 */ + OP_VARPCK_LG2(3), /* REPEATED UINT64 */ + OP_VARPCK_LG2(2), /* REPEATED INT32 */ + OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ + OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ + OP_VARPCK_LG2(0), /* REPEATED BOOL */ + OP_STRING, /* REPEATED STRING */ + OP_SUBMSG, /* REPEATED GROUP */ + OP_SUBMSG, /* REPEATED MESSAGE */ + OP_BYTES, /* REPEATED BYTES */ + OP_VARPCK_LG2(2), /* REPEATED UINT32 */ + OP_VARPCK_LG2(2), /* REPEATED ENUM */ + OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ + OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ + OP_VARPCK_LG2(2), /* REPEATED SINT32 */ + OP_VARPCK_LG2(3), /* REPEATED SINT64 */ +}; + +typedef union { + bool bool_val; + uint32_t uint32_val; + uint64_t uint64_val; + uint32_t size; +} wireval; + +static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *layout); + +UPB_NORETURN static void decode_err(upb_decstate *d) { UPB_LONGJMP(d->err, 1); } + +// We don't want to mark this NORETURN, see comment in .h. +// Unfortunately this code to suppress the warning doesn't appear to be working. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunknown-warning-option" +#pragma clang diagnostic ignored "-Wsuggest-attribute" +#endif + +const char *fastdecode_err(upb_decstate *d) { + longjmp(d->err, 1); + return NULL; +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +const uint8_t upb_utf8_offsets[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static void decode_verifyutf8(upb_decstate *d, const char *buf, int len) { + if (!decode_verifyutf8_inl(buf, len)) decode_err(d); +} + +static bool decode_reserve(upb_decstate *d, upb_array *arr, size_t elem) { + bool need_realloc = arr->size - arr->len < elem; + if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) { + decode_err(d); + } + return need_realloc; +} + +typedef struct { + const char *ptr; + uint64_t val; +} decode_vret; + +UPB_NOINLINE +static decode_vret decode_longvarint64(const char *ptr, uint64_t val) { + decode_vret ret = {NULL, 0}; + uint64_t byte; + int i; + for (i = 1; i < 10; i++) { + byte = (uint8_t)ptr[i]; + val += (byte - 1) << (i * 7); + if (!(byte & 0x80)) { + ret.ptr = ptr + i + 1; + ret.val = val; + return ret; + } + } + return ret; +} + +UPB_FORCEINLINE +static const char *decode_varint64(upb_decstate *d, const char *ptr, + uint64_t *val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; + } else { + decode_vret res = decode_longvarint64(ptr, byte); + if (!res.ptr) decode_err(d); + *val = res.val; + return res.ptr; + } +} + +UPB_FORCEINLINE +static const char *decode_tag(upb_decstate *d, const char *ptr, + uint32_t *val) { + uint64_t byte = (uint8_t)*ptr; + if (UPB_LIKELY((byte & 0x80) == 0)) { + *val = byte; + return ptr + 1; + } else { + const char *start = ptr; + decode_vret res = decode_longvarint64(ptr, byte); + ptr = res.ptr; + *val = res.val; + if (!ptr || *val > UINT32_MAX || ptr - start > 5) decode_err(d); + return ptr; + } +} + +static void decode_munge(int type, wireval *val) { + switch (type) { + case UPB_DESCRIPTOR_TYPE_BOOL: + val->bool_val = val->uint64_val != 0; + break; + case UPB_DESCRIPTOR_TYPE_SINT32: { + uint32_t n = val->uint32_val; + val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1); + break; + } + case UPB_DESCRIPTOR_TYPE_SINT64: { + uint64_t n = val->uint64_val; + val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1); + break; + } + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_UINT32: + if (!_upb_isle()) { + /* The next stage will memcpy(dst, &val, 4) */ + val->uint32_val = val->uint64_val; + } + break; + } +} + +static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, + uint32_t field_number) { + static upb_msglayout_field none = {0, 0, 0, 0, 0, 0}; + + /* Lots of optimization opportunities here. */ + int i; + if (l == NULL) return &none; + for (i = 0; i < l->field_count; i++) { + if (l->fields[i].number == field_number) { + return &l->fields[i]; + } + } + + return &none; /* Unknown field. */ +} + +static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout, + const upb_msglayout_field *field) { + const upb_msglayout *subl = layout->submsgs[field->submsg_index]; + return _upb_msg_new_inl(subl, &d->arena); +} + +UPB_NOINLINE +const char *decode_isdonefallback(upb_decstate *d, const char *ptr, + int overrun) { + ptr = decode_isdonefallback_inl(d, ptr, overrun); + if (ptr == NULL) { + decode_err(d); + } + return ptr; +} + +static const char *decode_readstr(upb_decstate *d, const char *ptr, int size, + upb_strview *str) { + if (d->alias) { + str->data = ptr; + } else { + char *data = upb_arena_malloc(&d->arena, size); + if (!data) decode_err(d); + memcpy(data, ptr, size); + str->data = data; + } + str->size = size; + return ptr + size; +} + +UPB_FORCEINLINE +static const char *decode_tosubmsg(upb_decstate *d, const char *ptr, + upb_msg *submsg, const upb_msglayout *layout, + const upb_msglayout_field *field, int size) { + const upb_msglayout *subl = layout->submsgs[field->submsg_index]; + int saved_delta = decode_pushlimit(d, ptr, size); + if (--d->depth < 0) decode_err(d); + if (!decode_isdone(d, &ptr)) { + ptr = decode_msg(d, ptr, submsg, subl); + } + if (d->end_group != DECODE_NOGROUP) decode_err(d); + decode_poplimit(d, ptr, saved_delta); + d->depth++; + return ptr; +} + +UPB_FORCEINLINE +static const char *decode_group(upb_decstate *d, const char *ptr, + upb_msg *submsg, const upb_msglayout *subl, + uint32_t number) { + if (--d->depth < 0) decode_err(d); + if (decode_isdone(d, &ptr)) { + decode_err(d); + } + ptr = decode_msg(d, ptr, submsg, subl); + if (d->end_group != number) decode_err(d); + d->end_group = DECODE_NOGROUP; + d->depth++; + return ptr; +} + +UPB_FORCEINLINE +static const char *decode_togroup(upb_decstate *d, const char *ptr, + upb_msg *submsg, const upb_msglayout *layout, + const upb_msglayout_field *field) { + const upb_msglayout *subl = layout->submsgs[field->submsg_index]; + return decode_group(d, ptr, submsg, subl, field->number); +} + +static const char *decode_toarray(upb_decstate *d, const char *ptr, + upb_msg *msg, const upb_msglayout *layout, + const upb_msglayout_field *field, wireval val, + int op) { + upb_array **arrp = UPB_PTR_AT(msg, field->offset, void); + upb_array *arr = *arrp; + void *mem; + + if (arr) { + decode_reserve(d, arr, 1); + } else { + size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype]; + arr = _upb_array_new(&d->arena, 4, lg2); + if (!arr) decode_err(d); + *arrp = arr; + } + + switch (op) { + case OP_SCALAR_LG2(0): + case OP_SCALAR_LG2(2): + case OP_SCALAR_LG2(3): + /* Append scalar value. */ + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void); + arr->len++; + memcpy(mem, &val, 1 << op); + return ptr; + case OP_STRING: + decode_verifyutf8(d, ptr, val.size); + /* Fallthrough. */ + case OP_BYTES: { + /* Append bytes. */ + upb_strview *str = (upb_strview*)_upb_array_ptr(arr) + arr->len; + arr->len++; + return decode_readstr(d, ptr, val.size, str); + } + case OP_SUBMSG: { + /* Append submessage / group. */ + upb_msg *submsg = decode_newsubmsg(d, layout, field); + *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) = + submsg; + arr->len++; + if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) { + return decode_togroup(d, ptr, submsg, layout, field); + } else { + return decode_tosubmsg(d, ptr, submsg, layout, field, val.size); + } + } + case OP_FIXPCK_LG2(2): + case OP_FIXPCK_LG2(3): { + /* Fixed packed. */ + int lg2 = op - OP_FIXPCK_LG2(0); + int mask = (1 << lg2) - 1; + size_t count = val.size >> lg2; + if ((val.size & mask) != 0) { + decode_err(d); /* Length isn't a round multiple of elem size. */ + } + decode_reserve(d, arr, count); + mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); + arr->len += count; + memcpy(mem, ptr, val.size); /* XXX: ptr boundary. */ + return ptr + val.size; + } + case OP_VARPCK_LG2(0): + case OP_VARPCK_LG2(2): + case OP_VARPCK_LG2(3): { + /* Varint packed. */ + int lg2 = op - OP_VARPCK_LG2(0); + int scale = 1 << lg2; + int saved_limit = decode_pushlimit(d, ptr, val.size); + char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); + while (!decode_isdone(d, &ptr)) { + wireval elem; + ptr = decode_varint64(d, ptr, &elem.uint64_val); + decode_munge(field->descriptortype, &elem); + if (decode_reserve(d, arr, 1)) { + out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); + } + arr->len++; + memcpy(out, &elem, scale); + out += scale; + } + decode_poplimit(d, ptr, saved_limit); + return ptr; + } + default: + UPB_UNREACHABLE(); + } +} + +static const char *decode_tomap(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *layout, + const upb_msglayout_field *field, wireval val) { + upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *); + upb_map *map = *map_p; + upb_map_entry ent; + const upb_msglayout *entry = layout->submsgs[field->submsg_index]; + + if (!map) { + /* Lazily create map. */ + const upb_msglayout *entry = layout->submsgs[field->submsg_index]; + const upb_msglayout_field *key_field = &entry->fields[0]; + const upb_msglayout_field *val_field = &entry->fields[1]; + char key_size = desctype_to_mapsize[key_field->descriptortype]; + char val_size = desctype_to_mapsize[val_field->descriptortype]; + UPB_ASSERT(key_field->offset == 0); + UPB_ASSERT(val_field->offset == sizeof(upb_strview)); + map = _upb_map_new(&d->arena, key_size, val_size); + *map_p = map; + } + + /* Parse map entry. */ + memset(&ent, 0, sizeof(ent)); + + if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || + entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { + /* Create proactively to handle the case where it doesn't appear. */ + ent.v.val = upb_value_ptr(_upb_msg_new(entry->submsgs[0], &d->arena)); + } + + ptr = decode_tosubmsg(d, ptr, &ent.k, layout, field, val.size); + _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena); + return ptr; +} + +static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *layout, + const upb_msglayout_field *field, wireval val, + int op) { + void *mem = UPB_PTR_AT(msg, field->offset, void); + int type = field->descriptortype; + + /* Set presence if necessary. */ + if (field->presence < 0) { + /* Oneof case */ + uint32_t *oneof_case = _upb_oneofcase_field(msg, field); + if (op == OP_SUBMSG && *oneof_case != field->number) { + memset(mem, 0, sizeof(void*)); + } + *oneof_case = field->number; + } else if (field->presence > 0) { + _upb_sethas_field(msg, field); + } + + /* Store into message. */ + switch (op) { + case OP_SUBMSG: { + upb_msg **submsgp = mem; + upb_msg *submsg = *submsgp; + if (!submsg) { + submsg = decode_newsubmsg(d, layout, field); + *submsgp = submsg; + } + if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) { + ptr = decode_togroup(d, ptr, submsg, layout, field); + } else { + ptr = decode_tosubmsg(d, ptr, submsg, layout, field, val.size); + } + break; + } + case OP_STRING: + decode_verifyutf8(d, ptr, val.size); + /* Fallthrough. */ + case OP_BYTES: + return decode_readstr(d, ptr, val.size, mem); + case OP_SCALAR_LG2(3): + memcpy(mem, &val, 8); + break; + case OP_SCALAR_LG2(2): + memcpy(mem, &val, 4); + break; + case OP_SCALAR_LG2(0): + memcpy(mem, &val, 1); + break; + default: + UPB_UNREACHABLE(); + } + + return ptr; +} + +UPB_FORCEINLINE +static bool decode_tryfastdispatch(upb_decstate *d, const char **ptr, + upb_msg *msg, const upb_msglayout *layout) { +#if UPB_FASTTABLE + if (layout && layout->table_mask != (unsigned char)-1) { + uint16_t tag = fastdecode_loadtag(*ptr); + intptr_t table = decode_totable(layout); + *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag); + return true; + } +#endif + return false; +} + +UPB_NOINLINE +static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, + const upb_msglayout *layout) { + while (true) { + uint32_t tag; + const upb_msglayout_field *field; + int field_number; + int wire_type; + const char *field_start = ptr; + wireval val; + int op; + + UPB_ASSERT(ptr < d->limit_ptr); + ptr = decode_tag(d, ptr, &tag); + field_number = tag >> 3; + wire_type = tag & 7; + + field = upb_find_field(layout, field_number); + + switch (wire_type) { + case UPB_WIRE_TYPE_VARINT: + ptr = decode_varint64(d, ptr, &val.uint64_val); + op = varint_ops[field->descriptortype]; + decode_munge(field->descriptortype, &val); + break; + case UPB_WIRE_TYPE_32BIT: + memcpy(&val.uint32_val, ptr, 4); + val.uint32_val = _upb_be_swap32(val.uint32_val); + ptr += 4; + op = OP_SCALAR_LG2(2); + if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; + break; + case UPB_WIRE_TYPE_64BIT: + memcpy(&val.uint64_val, ptr, 8); + val.uint64_val = _upb_be_swap64(val.uint64_val); + ptr += 8; + op = OP_SCALAR_LG2(3); + if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; + break; + case UPB_WIRE_TYPE_DELIMITED: { + int ndx = field->descriptortype; + uint64_t size; + if (_upb_isrepeated(field)) ndx += 18; + ptr = decode_varint64(d, ptr, &size); + if (size >= INT32_MAX || + ptr - d->end + (int32_t)size > d->limit) { + decode_err(d); /* Length overflow. */ + } + op = delim_ops[ndx]; + val.size = size; + break; + } + case UPB_WIRE_TYPE_START_GROUP: + val.uint32_val = field_number; + op = OP_SUBMSG; + if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown; + break; + case UPB_WIRE_TYPE_END_GROUP: + d->end_group = field_number; + return ptr; + default: + decode_err(d); + } + + if (op >= 0) { + /* Parse, using op for dispatch. */ + switch (field->label) { + case UPB_LABEL_REPEATED: + case _UPB_LABEL_PACKED: + ptr = decode_toarray(d, ptr, msg, layout, field, val, op); + break; + case _UPB_LABEL_MAP: + ptr = decode_tomap(d, ptr, msg, layout, field, val); + break; + default: + ptr = decode_tomsg(d, ptr, msg, layout, field, val, op); + break; + } + } else { + unknown: + /* Skip unknown field. */ + if (field_number == 0) decode_err(d); + if (wire_type == UPB_WIRE_TYPE_DELIMITED) ptr += val.size; + if (msg) { + if (wire_type == UPB_WIRE_TYPE_START_GROUP) { + d->unknown = field_start; + d->unknown_msg = msg; + ptr = decode_group(d, ptr, NULL, NULL, field_number); + d->unknown_msg = NULL; + field_start = d->unknown; + } + if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, + &d->arena)) { + decode_err(d); + } + } else if (wire_type == UPB_WIRE_TYPE_START_GROUP) { + ptr = decode_group(d, ptr, NULL, NULL, field_number); + } + } + + if (decode_isdone(d, &ptr)) return ptr; + if (decode_tryfastdispatch(d, &ptr, msg, layout)) return ptr; + } +} + +const char *fastdecode_generic(struct upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, uint64_t hasbits, + uint64_t data) { + (void)data; + *(uint32_t*)msg |= hasbits; + return decode_msg(d, ptr, msg, decode_totablep(table)); +} + +static bool decode_top(struct upb_decstate *d, const char *buf, void *msg, + const upb_msglayout *l) { + if (!decode_tryfastdispatch(d, &buf, msg, l)) { + decode_msg(d, buf, msg, l); + } + return d->end_group == DECODE_NOGROUP; +} + +bool _upb_decode(const char *buf, size_t size, void *msg, + const upb_msglayout *l, upb_arena *arena, int options) { + bool ok; + upb_decstate state; + unsigned depth = (unsigned)options >> 16; + + if (size == 0) { + return true; + } else if (size <= 16) { + memset(&state.patch, 0, 32); + memcpy(&state.patch, buf, size); + buf = state.patch; + state.end = buf + size; + state.limit = 0; + state.alias = false; + } else { + state.end = buf + size - 16; + state.limit = 16; + state.alias = options & UPB_DECODE_ALIAS; + } + + state.limit_ptr = state.end; + state.unknown_msg = NULL; + state.depth = depth ? depth : 64; + state.end_group = DECODE_NOGROUP; + state.arena.head = arena->head; + state.arena.last_size = arena->last_size; + state.arena.cleanups = arena->cleanups; + state.arena.parent = arena; + + if (UPB_UNLIKELY(UPB_SETJMP(state.err))) { + ok = false; + } else { + ok = decode_top(&state, buf, msg, l); + } + + arena->head.ptr = state.arena.head.ptr; + arena->head.end = state.arena.head.end; + arena->cleanups = state.arena.cleanups; + return ok; +} + +#undef OP_SCALAR_LG2 +#undef OP_FIXPCK_LG2 +#undef OP_VARPCK_LG2 +#undef OP_STRING +#undef OP_SUBMSG +/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ + + +#include +#include + + +/* Must be last. */ + +#define UPB_PB_VARINT_MAX_LEN 10 + +UPB_NOINLINE +static size_t encode_varint64(uint64_t val, char *buf) { + size_t i = 0; + do { + uint8_t byte = val & 0x7fU; + val >>= 7; + if (val) byte |= 0x80U; + buf[i++] = byte; + } while (val); + return i; +} + +static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); } +static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); } + +typedef struct { + jmp_buf err; + upb_alloc *alloc; + char *buf, *ptr, *limit; + int options; + int depth; + _upb_mapsorter sorter; +} upb_encstate; + +static size_t upb_roundup_pow2(size_t bytes) { + size_t ret = 128; + while (ret < bytes) { + ret *= 2; + } + return ret; +} + +UPB_NORETURN static void encode_err(upb_encstate *e) { + UPB_LONGJMP(e->err, 1); +} + +UPB_NOINLINE +static void encode_growbuffer(upb_encstate *e, size_t bytes) { + size_t old_size = e->limit - e->buf; + size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); + char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size); + + if (!new_buf) encode_err(e); + + /* We want previous data at the end, realloc() put it at the beginning. */ + if (old_size > 0) { + memmove(new_buf + new_size - old_size, e->buf, old_size); + } + + e->ptr = new_buf + new_size - (e->limit - e->ptr); + e->limit = new_buf + new_size; + e->buf = new_buf; + + e->ptr -= bytes; +} + +/* Call to ensure that at least "bytes" bytes are available for writing at + * e->ptr. Returns false if the bytes could not be allocated. */ +UPB_FORCEINLINE +static void encode_reserve(upb_encstate *e, size_t bytes) { + if ((size_t)(e->ptr - e->buf) < bytes) { + encode_growbuffer(e, bytes); + return; + } + + e->ptr -= bytes; +} + +/* Writes the given bytes to the buffer, handling reserve/advance. */ +static void encode_bytes(upb_encstate *e, const void *data, size_t len) { + if (len == 0) return; /* memcpy() with zero size is UB */ + encode_reserve(e, len); + memcpy(e->ptr, data, len); +} + +static void encode_fixed64(upb_encstate *e, uint64_t val) { + val = _upb_be_swap64(val); + encode_bytes(e, &val, sizeof(uint64_t)); +} + +static void encode_fixed32(upb_encstate *e, uint32_t val) { + val = _upb_be_swap32(val); + encode_bytes(e, &val, sizeof(uint32_t)); +} + +UPB_NOINLINE +static void encode_longvarint(upb_encstate *e, uint64_t val) { + size_t len; + char *start; + + encode_reserve(e, UPB_PB_VARINT_MAX_LEN); + len = encode_varint64(val, e->ptr); + start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; + memmove(start, e->ptr, len); + e->ptr = start; +} + +UPB_FORCEINLINE +static void encode_varint(upb_encstate *e, uint64_t val) { + if (val < 128 && e->ptr != e->buf) { + --e->ptr; + *e->ptr = val; + } else { + encode_longvarint(e, val); + } +} + +static void encode_double(upb_encstate *e, double d) { + uint64_t u64; + UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); + memcpy(&u64, &d, sizeof(uint64_t)); + encode_fixed64(e, u64); +} + +static void encode_float(upb_encstate *e, float d) { + uint32_t u32; + UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); + memcpy(&u32, &d, sizeof(uint32_t)); + encode_fixed32(e, u32); +} + +static void encode_tag(upb_encstate *e, uint32_t field_number, + uint8_t wire_type) { + encode_varint(e, (field_number << 3) | wire_type); +} + +static void encode_fixedarray(upb_encstate *e, const upb_array *arr, + size_t elem_size, uint32_t tag) { + size_t bytes = arr->len * elem_size; + const char* data = _upb_array_constptr(arr); + const char* ptr = data + bytes - elem_size; + if (tag) { + while (true) { + encode_bytes(e, ptr, elem_size); + encode_varint(e, tag); + if (ptr == data) break; + ptr -= elem_size; + } + } else { + encode_bytes(e, data, bytes); + } +} + +static void encode_message(upb_encstate *e, const char *msg, + const upb_msglayout *m, size_t *size); + +static void encode_scalar(upb_encstate *e, const void *_field_mem, + const upb_msglayout *m, const upb_msglayout_field *f, + bool skip_zero_value) { + const char *field_mem = _field_mem; + int wire_type; + +#define CASE(ctype, type, wtype, encodeval) \ + { \ + ctype val = *(ctype *)field_mem; \ + if (skip_zero_value && val == 0) { \ + return; \ + } \ + encode_##type(e, encodeval); \ + wire_type = wtype; \ + break; \ + } + + switch (f->descriptortype) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + CASE(double, double, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FLOAT: + CASE(float, float, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_UINT32: + CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val); + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val); + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val); + case UPB_DESCRIPTOR_TYPE_BOOL: + CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val); + case UPB_DESCRIPTOR_TYPE_SINT32: + CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val)); + case UPB_DESCRIPTOR_TYPE_SINT64: + CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + upb_strview view = *(upb_strview*)field_mem; + if (skip_zero_value && view.size == 0) { + return; + } + encode_bytes(e, view.data, view.size); + encode_varint(e, view.size); + wire_type = UPB_WIRE_TYPE_DELIMITED; + break; + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + size_t size; + void *submsg = *(void **)field_mem; + const upb_msglayout *subm = m->submsgs[f->submsg_index]; + if (submsg == NULL) { + return; + } + if (--e->depth == 0) encode_err(e); + encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP); + encode_message(e, submsg, subm, &size); + wire_type = UPB_WIRE_TYPE_START_GROUP; + e->depth++; + break; + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + size_t size; + void *submsg = *(void **)field_mem; + const upb_msglayout *subm = m->submsgs[f->submsg_index]; + if (submsg == NULL) { + return; + } + if (--e->depth == 0) encode_err(e); + encode_message(e, submsg, subm, &size); + encode_varint(e, size); + wire_type = UPB_WIRE_TYPE_DELIMITED; + e->depth++; + break; + } + default: + UPB_UNREACHABLE(); + } +#undef CASE + + encode_tag(e, f->number, wire_type); +} + +static void encode_array(upb_encstate *e, const char *field_mem, + const upb_msglayout *m, const upb_msglayout_field *f) { + const upb_array *arr = *(const upb_array**)field_mem; + bool packed = f->label == _UPB_LABEL_PACKED; + size_t pre_len = e->limit - e->ptr; + + if (arr == NULL || arr->len == 0) { + return; + } + +#define VARINT_CASE(ctype, encode) \ + { \ + const ctype *start = _upb_array_constptr(arr); \ + const ctype *ptr = start + arr->len; \ + uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \ + do { \ + ptr--; \ + encode_varint(e, encode); \ + if (tag) encode_varint(e, tag); \ + } while (ptr != start); \ + } \ + break; + +#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) + + switch (f->descriptortype) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)); + break; + case UPB_DESCRIPTOR_TYPE_FLOAT: + encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)); + break; + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)); + break; + case UPB_DESCRIPTOR_TYPE_FIXED32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)); + break; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_UINT64: + VARINT_CASE(uint64_t, *ptr); + case UPB_DESCRIPTOR_TYPE_UINT32: + VARINT_CASE(uint32_t, *ptr); + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_ENUM: + VARINT_CASE(int32_t, (int64_t)*ptr); + case UPB_DESCRIPTOR_TYPE_BOOL: + VARINT_CASE(bool, *ptr); + case UPB_DESCRIPTOR_TYPE_SINT32: + VARINT_CASE(int32_t, encode_zz32(*ptr)); + case UPB_DESCRIPTOR_TYPE_SINT64: + VARINT_CASE(int64_t, encode_zz64(*ptr)); + case UPB_DESCRIPTOR_TYPE_STRING: + case UPB_DESCRIPTOR_TYPE_BYTES: { + const upb_strview *start = _upb_array_constptr(arr); + const upb_strview *ptr = start + arr->len; + do { + ptr--; + encode_bytes(e, ptr->data, ptr->size); + encode_varint(e, ptr->size); + encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } while (ptr != start); + return; + } + case UPB_DESCRIPTOR_TYPE_GROUP: { + const void *const*start = _upb_array_constptr(arr); + const void *const*ptr = start + arr->len; + const upb_msglayout *subm = m->submsgs[f->submsg_index]; + if (--e->depth == 0) encode_err(e); + do { + size_t size; + ptr--; + encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP); + encode_message(e, *ptr, subm, &size); + encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP); + } while (ptr != start); + e->depth++; + return; + } + case UPB_DESCRIPTOR_TYPE_MESSAGE: { + const void *const*start = _upb_array_constptr(arr); + const void *const*ptr = start + arr->len; + const upb_msglayout *subm = m->submsgs[f->submsg_index]; + if (--e->depth == 0) encode_err(e); + do { + size_t size; + ptr--; + encode_message(e, *ptr, subm, &size); + encode_varint(e, size); + encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } while (ptr != start); + e->depth++; + return; + } + } +#undef VARINT_CASE + + if (packed) { + encode_varint(e, e->limit - e->ptr - pre_len); + encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); + } +} + +static void encode_mapentry(upb_encstate *e, uint32_t number, + const upb_msglayout *layout, + const upb_map_entry *ent) { + const upb_msglayout_field *key_field = &layout->fields[0]; + const upb_msglayout_field *val_field = &layout->fields[1]; + size_t pre_len = e->limit - e->ptr; + size_t size; + encode_scalar(e, &ent->v, layout, val_field, false); + encode_scalar(e, &ent->k, layout, key_field, false); + size = (e->limit - e->ptr) - pre_len; + encode_varint(e, size); + encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED); +} + +static void encode_map(upb_encstate *e, const char *field_mem, + const upb_msglayout *m, const upb_msglayout_field *f) { + const upb_map *map = *(const upb_map**)field_mem; + const upb_msglayout *layout = m->submsgs[f->submsg_index]; + UPB_ASSERT(layout->field_count == 2); + + if (map == NULL) return; + + if (e->options & UPB_ENCODE_DETERMINISTIC) { + _upb_sortedmap sorted; + _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map, + &sorted); + upb_map_entry ent; + while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) { + encode_mapentry(e, f->number, layout, &ent); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + upb_strtable_iter i; + upb_strtable_begin(&i, &map->table); + for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strview key = upb_strtable_iter_key(&i); + const upb_value val = upb_strtable_iter_value(&i); + upb_map_entry ent; + _upb_map_fromkey(key, &ent.k, map->key_size); + _upb_map_fromvalue(val, &ent.v, map->val_size); + encode_mapentry(e, f->number, layout, &ent); + } + } +} + +static void encode_scalarfield(upb_encstate *e, const char *msg, + const upb_msglayout *m, + const upb_msglayout_field *f) { + bool skip_empty = false; + if (f->presence == 0) { + /* Proto3 presence. */ + skip_empty = true; + } else if (f->presence > 0) { + /* Proto2 presence: hasbit. */ + if (!_upb_hasbit_field(msg, f)) return; + } else { + /* Field is in a oneof. */ + if (_upb_getoneofcase_field(msg, f) != f->number) return; + } + encode_scalar(e, msg + f->offset, m, f, skip_empty); +} + +static void encode_message(upb_encstate *e, const char *msg, + const upb_msglayout *m, size_t *size) { + size_t pre_len = e->limit - e->ptr; + const upb_msglayout_field *f = &m->fields[m->field_count]; + const upb_msglayout_field *first = &m->fields[0]; + + if ((e->options & UPB_ENCODE_SKIPUNKNOWN) == 0) { + size_t unknown_size; + const char *unknown = upb_msg_getunknown(msg, &unknown_size); + + if (unknown) { + encode_bytes(e, unknown, unknown_size); + } + } + + while (f != first) { + f--; + if (_upb_isrepeated(f)) { + encode_array(e, msg + f->offset, m, f); + } else if (f->label == _UPB_LABEL_MAP) { + encode_map(e, msg + f->offset, m, f); + } else { + encode_scalarfield(e, msg, m, f); + } + } + + *size = (e->limit - e->ptr) - pre_len; +} + +char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, + upb_arena *arena, size_t *size) { + upb_encstate e; + unsigned depth = (unsigned)options >> 16; + + e.alloc = upb_arena_alloc(arena); + e.buf = NULL; + e.limit = NULL; + e.ptr = NULL; + e.depth = depth ? depth : 64; + e.options = options; + _upb_mapsorter_init(&e.sorter); + char *ret = NULL; + + if (UPB_SETJMP(e.err)) { + *size = 0; + ret = NULL; + } else { + encode_message(&e, msg, l, size); + *size = e.limit - e.ptr; + if (*size == 0) { + static char ch; + ret = &ch; + } else { + UPB_ASSERT(e.ptr); + ret = e.ptr; + } + } + + _upb_mapsorter_destroy(&e.sorter); + return ret; +} + + + + +/** upb_msg *******************************************************************/ + +static const size_t overhead = sizeof(upb_msg_internal); + +static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { + ptrdiff_t size = sizeof(upb_msg_internal); + return (upb_msg_internal*)((char*)msg - size); +} + +upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { + return _upb_msg_new_inl(l, a); +} + +void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l) { + void *mem = UPB_PTR_AT(msg, -sizeof(upb_msg_internal), char); + memset(mem, 0, upb_msg_sizeof(l)); +} + +bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, + upb_arena *arena) { + + upb_msg_internal *in = upb_msg_getinternal(msg); + if (!in->unknown) { + size_t size = 128; + while (size < len) size *= 2; + in->unknown = upb_arena_malloc(arena, size + overhead); + if (!in->unknown) return false; + in->unknown->size = size; + in->unknown->len = 0; + } else if (in->unknown->size - in->unknown->len < len) { + size_t need = in->unknown->len + len; + size_t size = in->unknown->size; + while (size < need) size *= 2; + in->unknown = upb_arena_realloc( + arena, in->unknown, in->unknown->size + overhead, size + overhead); + if (!in->unknown) return false; + in->unknown->size = size; + } + memcpy(UPB_PTR_AT(in->unknown + 1, in->unknown->len, char), data, len); + in->unknown->len += len; + return true; +} + +void _upb_msg_discardunknown_shallow(upb_msg *msg) { + upb_msg_internal *in = upb_msg_getinternal(msg); + if (in->unknown) { + in->unknown->len = 0; + } +} + +const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { + const upb_msg_internal *in = upb_msg_getinternal_const(msg); + if (in->unknown) { + *len = in->unknown->len; + return (char*)(in->unknown + 1); + } else { + *len = 0; + return NULL; + } +} + +/** upb_array *****************************************************************/ + +bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) { + size_t new_size = UPB_MAX(arr->size, 4); + int elem_size_lg2 = arr->data & 7; + size_t old_bytes = arr->size << elem_size_lg2; + size_t new_bytes; + void* ptr = _upb_array_ptr(arr); + + /* Log2 ceiling of size. */ + while (new_size < min_size) new_size *= 2; + + new_bytes = new_size << elem_size_lg2; + ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes); + + if (!ptr) { + return false; + } + + arr->data = _upb_tag_arrptr(ptr, elem_size_lg2); + arr->size = new_size; + return true; +} + +static upb_array *getorcreate_array(upb_array **arr_ptr, int elem_size_lg2, + upb_arena *arena) { + upb_array *arr = *arr_ptr; + if (!arr) { + arr = _upb_array_new(arena, 4, elem_size_lg2); + if (!arr) return NULL; + *arr_ptr = arr; + } + return arr; +} + +void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, + int elem_size_lg2, upb_arena *arena) { + upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); + return arr && _upb_array_resize(arr, size, arena) ? _upb_array_ptr(arr) + : NULL; +} + +bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value, + int elem_size_lg2, upb_arena *arena) { + upb_array *arr = getorcreate_array(arr_ptr, elem_size_lg2, arena); + if (!arr) return false; + + size_t elems = arr->len; + + if (!_upb_array_resize(arr, elems + 1, arena)) { + return false; + } + + char *data = _upb_array_ptr(arr); + memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2); + return true; +} + +/** upb_map *******************************************************************/ + +upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { + upb_map *map = upb_arena_malloc(a, sizeof(upb_map)); + + if (!map) { + return NULL; + } + + upb_strtable_init2(&map->table, UPB_CTYPE_INT32, 4, upb_arena_alloc(a)); + map->key_size = key_size; + map->val_size = value_size; + + return map; +} + +static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key, + void *b_key, size_t size) { + const upb_tabent *const*a = _a; + const upb_tabent *const*b = _b; + upb_strview a_tabkey = upb_tabstrview((*a)->key); + upb_strview b_tabkey = upb_tabstrview((*b)->key); + _upb_map_fromkey(a_tabkey, a_key, size); + _upb_map_fromkey(b_tabkey, b_key, size); +} + +static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) { + int64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) { + uint64_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 8); + return a - b; +} + +static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) { + int32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) { + uint32_t a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 4); + return a - b; +} + +static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) { + bool a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, 1); + return a - b; +} + +static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) { + upb_strview a, b; + _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING); + size_t common_size = UPB_MIN(a.size, b.size); + int cmp = memcmp(a.data, b.data, common_size); + if (cmp) return cmp; + return a.size - b.size; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted) { + int map_size = _upb_map_size(map); + sorted->start = s->size; + sorted->pos = sorted->start; + sorted->end = sorted->start + map_size; + + /* Grow s->entries if necessary. */ + if (sorted->end > s->cap) { + s->cap = _upb_lg2ceilsize(sorted->end); + s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); + if (!s->entries) return false; + } + + s->size = sorted->end; + + /* Copy non-empty entries from the table to s->entries. */ + upb_tabent const**dst = &s->entries[sorted->start]; + const upb_tabent *src = map->table.t.entries; + const upb_tabent *end = src + upb_table_size(&map->table.t); + for (; src < end; src++) { + if (!upb_tabent_isempty(src)) { + *dst = src; + dst++; + } + } + UPB_ASSERT(dst == &s->entries[sorted->end]); + + /* Sort entries according to the key type. */ + + int (*compar)(const void *, const void *); + + switch (key_type) { + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + case UPB_DESCRIPTOR_TYPE_SINT64: + compar = _upb_mapsorter_cmpi64; + break; + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + compar = _upb_mapsorter_cmpu64; + break; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_SINT32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + case UPB_DESCRIPTOR_TYPE_ENUM: + compar = _upb_mapsorter_cmpi32; + break; + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_FIXED32: + compar = _upb_mapsorter_cmpu32; + break; + case UPB_DESCRIPTOR_TYPE_BOOL: + compar = _upb_mapsorter_cmpbool; + break; + case UPB_DESCRIPTOR_TYPE_STRING: + compar = _upb_mapsorter_cmpstr; + break; + default: + UPB_UNREACHABLE(); + } + + qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar); + return true; +} +/* +** upb_table Implementation +** +** Implementation is heavily inspired by Lua's ltable.c. +*/ + +#include + +#include "third_party/wyhash/wyhash.h" + +/* Must be last. */ + +#define UPB_MAXARRSIZE 16 /* 64k. */ + +/* From Chromium. */ +#define ARRAY_SIZE(x) \ + ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) + +static const double MAX_LOAD = 0.85; + +/* The minimum utilization of the array part of a mixed hash/array table. This + * is a speed/memory-usage tradeoff (though it's not straightforward because of + * cache effects). The lower this is, the more memory we'll use. */ +static const double MIN_DENSITY = 0.1; + +bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } + +int log2ceil(uint64_t v) { + int ret = 0; + bool pow2 = is_pow2(v); + while (v >>= 1) ret++; + ret = pow2 ? ret : ret + 1; /* Ceiling. */ + return UPB_MIN(UPB_MAXARRSIZE, ret); +} + +char *upb_strdup(const char *s, upb_alloc *a) { + return upb_strdup2(s, strlen(s), a); +} + +char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { + size_t n; + char *p; + + /* Prevent overflow errors. */ + if (len == SIZE_MAX) return NULL; + /* Always null-terminate, even if binary data; but don't rely on the input to + * have a null-terminating byte since it may be a raw binary buffer. */ + n = len + 1; + p = upb_malloc(a, n); + if (p) { + memcpy(p, s, len); + p[len] = 0; + } + return p; +} + +/* A type to represent the lookup key of either a strtable or an inttable. */ +typedef union { + uintptr_t num; + struct { + const char *str; + size_t len; + } str; +} lookupkey_t; + +static lookupkey_t strkey2(const char *str, size_t len) { + lookupkey_t k; + k.str.str = str; + k.str.len = len; + return k; +} + +static lookupkey_t intkey(uintptr_t key) { + lookupkey_t k; + k.num = key; + return k; +} + +typedef uint32_t hashfunc_t(upb_tabkey key); +typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); + +/* Base table (shared code) ***************************************************/ + +/* For when we need to cast away const. */ +static upb_tabent *mutable_entries(upb_table *t) { + return (upb_tabent*)t->entries; +} + +static bool isfull(upb_table *t) { + return t->count == t->max_count; +} + +static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { + size_t bytes; + + t->count = 0; + t->size_lg2 = size_lg2; + t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; + t->max_count = upb_table_size(t) * MAX_LOAD; + bytes = upb_table_size(t) * sizeof(upb_tabent); + if (bytes > 0) { + t->entries = upb_malloc(a, bytes); + if (!t->entries) return false; + memset(mutable_entries(t), 0, bytes); + } else { + t->entries = NULL; + } + return true; +} + +static void uninit(upb_table *t, upb_alloc *a) { + upb_free(a, mutable_entries(t)); +} + +static upb_tabent *emptyent(upb_table *t, upb_tabent *e) { + upb_tabent *begin = mutable_entries(t); + upb_tabent *end = begin + upb_table_size(t); + for (e = e + 1; e < end; e++) { + if (upb_tabent_isempty(e)) return e; + } + for (e = begin; e < end; e++) { + if (upb_tabent_isempty(e)) return e; + } + UPB_ASSERT(false); + return NULL; +} + +static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { + return (upb_tabent*)upb_getentry(t, hash); +} + +static const upb_tabent *findentry(const upb_table *t, lookupkey_t key, + uint32_t hash, eqlfunc_t *eql) { + const upb_tabent *e; + + if (t->size_lg2 == 0) return NULL; + e = upb_getentry(t, hash); + if (upb_tabent_isempty(e)) return NULL; + while (1) { + if (eql(e->key, key)) return e; + if ((e = e->next) == NULL) return NULL; + } +} + +static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key, + uint32_t hash, eqlfunc_t *eql) { + return (upb_tabent*)findentry(t, key, hash, eql); +} + +static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v, + uint32_t hash, eqlfunc_t *eql) { + const upb_tabent *e = findentry(t, key, hash, eql); + if (e) { + if (v) { + _upb_value_setval(v, e->val.val); + } + return true; + } else { + return false; + } +} + +/* The given key must not already exist in the table. */ +static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey, + upb_value val, uint32_t hash, + hashfunc_t *hashfunc, eqlfunc_t *eql) { + upb_tabent *mainpos_e; + upb_tabent *our_e; + + UPB_ASSERT(findentry(t, key, hash, eql) == NULL); + + t->count++; + mainpos_e = getentry_mutable(t, hash); + our_e = mainpos_e; + + if (upb_tabent_isempty(mainpos_e)) { + /* Our main position is empty; use it. */ + our_e->next = NULL; + } else { + /* Collision. */ + upb_tabent *new_e = emptyent(t, mainpos_e); + /* Head of collider's chain. */ + upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); + if (chain == mainpos_e) { + /* Existing ent is in its main position (it has the same hash as us, and + * is the head of our chain). Insert to new ent and append to this chain. */ + new_e->next = mainpos_e->next; + mainpos_e->next = new_e; + our_e = new_e; + } else { + /* Existing ent is not in its main position (it is a node in some other + * chain). This implies that no existing ent in the table has our hash. + * Evict it (updating its chain) and use its ent for head of our chain. */ + *new_e = *mainpos_e; /* copies next. */ + while (chain->next != mainpos_e) { + chain = (upb_tabent*)chain->next; + UPB_ASSERT(chain); + } + chain->next = new_e; + our_e = mainpos_e; + our_e->next = NULL; + } + } + our_e->key = tabkey; + our_e->val.val = val.val; + UPB_ASSERT(findentry(t, key, hash, eql) == our_e); +} + +static bool rm(upb_table *t, lookupkey_t key, upb_value *val, + upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) { + upb_tabent *chain = getentry_mutable(t, hash); + if (upb_tabent_isempty(chain)) return false; + if (eql(chain->key, key)) { + /* Element to remove is at the head of its chain. */ + t->count--; + if (val) _upb_value_setval(val, chain->val.val); + if (removed) *removed = chain->key; + if (chain->next) { + upb_tabent *move = (upb_tabent*)chain->next; + *chain = *move; + move->key = 0; /* Make the slot empty. */ + } else { + chain->key = 0; /* Make the slot empty. */ + } + return true; + } else { + /* Element to remove is either in a non-head position or not in the + * table. */ + while (chain->next && !eql(chain->next->key, key)) { + chain = (upb_tabent*)chain->next; + } + if (chain->next) { + /* Found element to remove. */ + upb_tabent *rm = (upb_tabent*)chain->next; + t->count--; + if (val) _upb_value_setval(val, chain->next->val.val); + if (removed) *removed = rm->key; + rm->key = 0; /* Make the slot empty. */ + chain->next = rm->next; + return true; + } else { + /* Element to remove is not in the table. */ + return false; + } + } +} + +static size_t next(const upb_table *t, size_t i) { + do { + if (++i >= upb_table_size(t)) + return SIZE_MAX - 1; /* Distinct from -1. */ + } while(upb_tabent_isempty(&t->entries[i])); + + return i; +} + +static size_t begin(const upb_table *t) { + return next(t, -1); +} + + +/* upb_strtable ***************************************************************/ + +/* A simple "subclass" of upb_table that only adds a hash function for strings. */ + +static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { + uint32_t len = (uint32_t) k2.str.len; + char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); + if (str == NULL) return 0; + memcpy(str, &len, sizeof(uint32_t)); + if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); + str[sizeof(uint32_t) + k2.str.len] = '\0'; + return (uintptr_t)str; +} + +static uint32_t table_hash(const char *p, size_t n) { + return wyhash(p, n, 0, _wyp); +} + +static uint32_t strhash(upb_tabkey key) { + uint32_t len; + char *str = upb_tabstr(key, &len); + return table_hash(str, len); +} + +static bool streql(upb_tabkey k1, lookupkey_t k2) { + uint32_t len; + char *str = upb_tabstr(k1, &len); + return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0); +} + +bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, + size_t expected_size, upb_alloc *a) { + UPB_UNUSED(ctype); /* TODO(haberman): rm */ + // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2 denominator. + size_t need_entries = (expected_size + 1) * 1204 / 1024; + UPB_ASSERT(need_entries >= expected_size * 0.85); + int size_lg2 = _upb_lg2ceil(need_entries); + return init(&t->t, size_lg2, a); +} + +void upb_strtable_clear(upb_strtable *t) { + size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent); + t->t.count = 0; + memset((char*)t->t.entries, 0, bytes); +} + +void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { + size_t i; + for (i = 0; i < upb_table_size(&t->t); i++) + upb_free(a, (void*)t->t.entries[i].key); + uninit(&t->t, a); +} + +bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { + upb_strtable new_table; + upb_strtable_iter i; + + if (!init(&new_table.t, size_lg2, a)) + return false; + upb_strtable_begin(&i, t); + for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { + upb_strview key = upb_strtable_iter_key(&i); + upb_strtable_insert3( + &new_table, key.data, key.size, + upb_strtable_iter_value(&i), a); + } + upb_strtable_uninit2(t, a); + *t = new_table; + return true; +} + +bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, + upb_value v, upb_alloc *a) { + lookupkey_t key; + upb_tabkey tabkey; + uint32_t hash; + + if (isfull(&t->t)) { + /* Need to resize. New table of double the size, add old elements to it. */ + if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) { + return false; + } + } + + key = strkey2(k, len); + tabkey = strcopy(key, a); + if (tabkey == 0) return false; + + hash = table_hash(key.str.str, key.str.len); + insert(&t->t, key, tabkey, v, hash, &strhash, &streql); + return true; +} + +bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, + upb_value *v) { + uint32_t hash = table_hash(key, len); + return lookup(&t->t, strkey2(key, len), v, hash, &streql); +} + +bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, + upb_value *val, upb_alloc *alloc) { + uint32_t hash = table_hash(key, len); + upb_tabkey tabkey; + if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { + if (alloc) { + /* Arena-based allocs don't need to free and won't pass this. */ + upb_free(alloc, (void*)tabkey); + } + return true; + } else { + return false; + } +} + +/* Iteration */ + +void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { + i->t = t; + i->index = begin(&t->t); +} + +void upb_strtable_next(upb_strtable_iter *i) { + i->index = next(&i->t->t, i->index); +} + +bool upb_strtable_done(const upb_strtable_iter *i) { + if (!i->t) return true; + return i->index >= upb_table_size(&i->t->t) || + upb_tabent_isempty(str_tabent(i)); +} + +upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) { + upb_strview key; + uint32_t len; + UPB_ASSERT(!upb_strtable_done(i)); + key.data = upb_tabstr(str_tabent(i)->key, &len); + key.size = len; + return key; +} + +upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { + UPB_ASSERT(!upb_strtable_done(i)); + return _upb_value_val(str_tabent(i)->val.val); +} + +void upb_strtable_iter_setdone(upb_strtable_iter *i) { + i->t = NULL; + i->index = SIZE_MAX; +} + +bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, + const upb_strtable_iter *i2) { + if (upb_strtable_done(i1) && upb_strtable_done(i2)) + return true; + return i1->t == i2->t && i1->index == i2->index; +} + + +/* upb_inttable ***************************************************************/ + +/* For inttables we use a hybrid structure where small keys are kept in an + * array and large keys are put in the hash table. */ + +static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); } + +static bool inteql(upb_tabkey k1, lookupkey_t k2) { + return k1 == k2.num; +} + +static upb_tabval *mutable_array(upb_inttable *t) { + return (upb_tabval*)t->array; +} + +static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) { + if (key < t->array_size) { + return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL; + } else { + upb_tabent *e = + findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql); + return e ? &e->val : NULL; + } +} + +static const upb_tabval *inttable_val_const(const upb_inttable *t, + uintptr_t key) { + return inttable_val((upb_inttable*)t, key); +} + +size_t upb_inttable_count(const upb_inttable *t) { + return t->t.count + t->array_count; +} + +static void check(upb_inttable *t) { + UPB_UNUSED(t); +#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) + { + /* This check is very expensive (makes inserts/deletes O(N)). */ + size_t count = 0; + upb_inttable_iter i; + upb_inttable_begin(&i, t); + for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { + UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL)); + } + UPB_ASSERT(count == upb_inttable_count(t)); + } +#endif +} + +bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, + upb_alloc *a) { + size_t array_bytes; + + if (!init(&t->t, hsize_lg2, a)) return false; + /* Always make the array part at least 1 long, so that we know key 0 + * won't be in the hash part, which simplifies things. */ + t->array_size = UPB_MAX(1, asize); + t->array_count = 0; + array_bytes = t->array_size * sizeof(upb_value); + t->array = upb_malloc(a, array_bytes); + if (!t->array) { + uninit(&t->t, a); + return false; + } + memset(mutable_array(t), 0xff, array_bytes); + check(t); + return true; +} + +bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { + UPB_UNUSED(ctype); /* TODO(haberman): rm */ + return upb_inttable_sizedinit(t, 0, 4, a); +} + +void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { + uninit(&t->t, a); + upb_free(a, mutable_array(t)); +} + +bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, + upb_alloc *a) { + upb_tabval tabval; + tabval.val = val.val; + UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ + + if (key < t->array_size) { + UPB_ASSERT(!upb_arrhas(t->array[key])); + t->array_count++; + mutable_array(t)[key].val = val.val; + } else { + if (isfull(&t->t)) { + /* Need to resize the hash part, but we re-use the array part. */ + size_t i; + upb_table new_table; + + if (!init(&new_table, t->t.size_lg2 + 1, a)) { + return false; + } + + for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) { + const upb_tabent *e = &t->t.entries[i]; + uint32_t hash; + upb_value v; + + _upb_value_setval(&v, e->val.val); + hash = upb_inthash(e->key); + insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql); + } + + UPB_ASSERT(t->t.count == new_table.count); + + uninit(&t->t, a); + t->t = new_table; + } + insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); + } + check(t); + return true; +} + +bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { + const upb_tabval *table_v = inttable_val_const(t, key); + if (!table_v) return false; + if (v) _upb_value_setval(v, table_v->val); + return true; +} + +bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) { + upb_tabval *table_v = inttable_val(t, key); + if (!table_v) return false; + table_v->val = val.val; + return true; +} + +bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { + bool success; + if (key < t->array_size) { + if (upb_arrhas(t->array[key])) { + upb_tabval empty = UPB_TABVALUE_EMPTY_INIT; + t->array_count--; + if (val) { + _upb_value_setval(val, t->array[key].val); + } + mutable_array(t)[key] = empty; + success = true; + } else { + success = false; + } + } else { + success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql); + } + check(t); + return success; +} + +bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, + upb_alloc *a) { + return upb_inttable_insert2(t, (uintptr_t)key, val, a); +} + +bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, + upb_value *v) { + return upb_inttable_lookup(t, (uintptr_t)key, v); +} + +bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { + return upb_inttable_remove(t, (uintptr_t)key, val); +} + +void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { + /* A power-of-two histogram of the table keys. */ + size_t counts[UPB_MAXARRSIZE + 1] = {0}; + + /* The max key in each bucket. */ + uintptr_t max[UPB_MAXARRSIZE + 1] = {0}; + + upb_inttable_iter i; + size_t arr_count; + int size_lg2; + upb_inttable new_t; + + upb_inttable_begin(&i, t); + for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uintptr_t key = upb_inttable_iter_key(&i); + int bucket = log2ceil(key); + max[bucket] = UPB_MAX(max[bucket], key); + counts[bucket]++; + } + + /* Find the largest power of two that satisfies the MIN_DENSITY + * definition (while actually having some keys). */ + arr_count = upb_inttable_count(t); + + for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) { + if (counts[size_lg2] == 0) { + /* We can halve again without losing any entries. */ + continue; + } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) { + break; + } + + arr_count -= counts[size_lg2]; + } + + UPB_ASSERT(arr_count <= upb_inttable_count(t)); + + { + /* Insert all elements into new, perfectly-sized table. */ + size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */ + size_t hash_count = upb_inttable_count(t) - arr_count; + size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0; + int hashsize_lg2 = log2ceil(hash_size); + + upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a); + upb_inttable_begin(&i, t); + for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { + uintptr_t k = upb_inttable_iter_key(&i); + upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); + } + UPB_ASSERT(new_t.array_size == arr_size); + UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); + } + upb_inttable_uninit2(t, a); + *t = new_t; +} + +/* Iteration. */ + +static const upb_tabent *int_tabent(const upb_inttable_iter *i) { + UPB_ASSERT(!i->array_part); + return &i->t->t.entries[i->index]; +} + +static upb_tabval int_arrent(const upb_inttable_iter *i) { + UPB_ASSERT(i->array_part); + return i->t->array[i->index]; +} + +void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { + i->t = t; + i->index = -1; + i->array_part = true; + upb_inttable_next(i); +} + +void upb_inttable_next(upb_inttable_iter *iter) { + const upb_inttable *t = iter->t; + if (iter->array_part) { + while (++iter->index < t->array_size) { + if (upb_arrhas(int_arrent(iter))) { + return; + } + } + iter->array_part = false; + iter->index = begin(&t->t); + } else { + iter->index = next(&t->t, iter->index); + } +} + +bool upb_inttable_done(const upb_inttable_iter *i) { + if (!i->t) return true; + if (i->array_part) { + return i->index >= i->t->array_size || + !upb_arrhas(int_arrent(i)); + } else { + return i->index >= upb_table_size(&i->t->t) || + upb_tabent_isempty(int_tabent(i)); + } +} + +uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) { + UPB_ASSERT(!upb_inttable_done(i)); + return i->array_part ? i->index : int_tabent(i)->key; +} + +upb_value upb_inttable_iter_value(const upb_inttable_iter *i) { + UPB_ASSERT(!upb_inttable_done(i)); + return _upb_value_val( + i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val); +} + +void upb_inttable_iter_setdone(upb_inttable_iter *i) { + i->t = NULL; + i->index = SIZE_MAX; + i->array_part = false; +} + +bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, + const upb_inttable_iter *i2) { + if (upb_inttable_done(i1) && upb_inttable_done(i2)) + return true; + return i1->t == i2->t && i1->index == i2->index && + i1->array_part == i2->array_part; +} + + +#include +#include +#include +#include +#include +#include +#include + + +/* upb_status *****************************************************************/ + +void upb_status_clear(upb_status *status) { + if (!status) return; + status->ok = true; + status->msg[0] = '\0'; +} + +bool upb_ok(const upb_status *status) { return status->ok; } + +const char *upb_status_errmsg(const upb_status *status) { return status->msg; } + +void upb_status_seterrmsg(upb_status *status, const char *msg) { + if (!status) return; + status->ok = false; + strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1); + status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; +} + +void upb_status_seterrf(upb_status *status, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + upb_status_vseterrf(status, fmt, args); + va_end(args); +} + +void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) { + if (!status) return; + status->ok = false; + vsnprintf(status->msg, sizeof(status->msg), fmt, args); + status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; +} + +void upb_status_vappenderrf(upb_status *status, const char *fmt, va_list args) { + size_t len; + if (!status) return; + status->ok = false; + len = strlen(status->msg); + vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args); + status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; +} + +/* upb_alloc ******************************************************************/ + +static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, + size_t size) { + UPB_UNUSED(alloc); + UPB_UNUSED(oldsize); + if (size == 0) { + free(ptr); + return NULL; + } else { + return realloc(ptr, size); + } +} + +upb_alloc upb_alloc_global = {&upb_global_allocfunc}; + +/* upb_arena ******************************************************************/ + +/* Be conservative and choose 16 in case anyone is using SSE. */ + +struct mem_block { + struct mem_block *next; + uint32_t size; + uint32_t cleanups; + /* Data follows. */ +}; + +typedef struct cleanup_ent { + upb_cleanup_func *cleanup; + void *ud; +} cleanup_ent; + +static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(mem_block), 16); + +static upb_arena *arena_findroot(upb_arena *a) { + /* Path splitting keeps time complexity down, see: + * https://en.wikipedia.org/wiki/Disjoint-set_data_structure */ + while (a->parent != a) { + upb_arena *next = a->parent; + a->parent = next->parent; + a = next; + } + return a; +} + +static void upb_arena_addblock(upb_arena *a, upb_arena *root, void *ptr, + size_t size) { + mem_block *block = ptr; + + /* The block is for arena |a|, but should appear in the freelist of |root|. */ + block->next = root->freelist; + block->size = (uint32_t)size; + block->cleanups = 0; + root->freelist = block; + a->last_size = block->size; + if (!root->freelist_tail) root->freelist_tail = block; + + a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char); + a->head.end = UPB_PTR_AT(block, size, char); + a->cleanups = &block->cleanups; + + UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr); +} + +static bool upb_arena_allocblock(upb_arena *a, size_t size) { + upb_arena *root = arena_findroot(a); + size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve; + mem_block *block = upb_malloc(root->block_alloc, block_size); + + if (!block) return false; + upb_arena_addblock(a, root, block, block_size); + return true; +} + +void *_upb_arena_slowmalloc(upb_arena *a, size_t size) { + if (!upb_arena_allocblock(a, size)) return NULL; /* Out of memory. */ + UPB_ASSERT(_upb_arenahas(a) >= size); + return upb_arena_malloc(a, size); +} + +static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize, + size_t size) { + upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */ + return upb_arena_realloc(a, ptr, oldsize, size); +} + +/* Public Arena API ***********************************************************/ + +upb_arena *arena_initslow(void *mem, size_t n, upb_alloc *alloc) { + const size_t first_block_overhead = sizeof(upb_arena) + memblock_reserve; + upb_arena *a; + + /* We need to malloc the initial block. */ + n = first_block_overhead + 256; + if (!alloc || !(mem = upb_malloc(alloc, n))) { + return NULL; + } + + a = UPB_PTR_AT(mem, n - sizeof(*a), upb_arena); + n -= sizeof(*a); + + a->head.alloc.func = &upb_arena_doalloc; + a->block_alloc = alloc; + a->parent = a; + a->refcount = 1; + a->freelist = NULL; + a->freelist_tail = NULL; + + upb_arena_addblock(a, a, mem, n); + + return a; +} + +upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { + upb_arena *a; + + /* Round block size down to alignof(*a) since we will allocate the arena + * itself at the end. */ + n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_arena)); + + if (UPB_UNLIKELY(n < sizeof(upb_arena))) { + return arena_initslow(mem, n, alloc); + } + + a = UPB_PTR_AT(mem, n - sizeof(*a), upb_arena); + + a->head.alloc.func = &upb_arena_doalloc; + a->block_alloc = alloc; + a->parent = a; + a->refcount = 1; + a->last_size = UPB_MAX(128, n); + a->head.ptr = mem; + a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char); + a->freelist = NULL; + a->cleanups = NULL; + + return a; +} + +static void arena_dofree(upb_arena *a) { + mem_block *block = a->freelist; + UPB_ASSERT(a->parent == a); + UPB_ASSERT(a->refcount == 0); + + while (block) { + /* Load first since we are deleting block. */ + mem_block *next = block->next; + + if (block->cleanups > 0) { + cleanup_ent *end = UPB_PTR_AT(block, block->size, void); + cleanup_ent *ptr = end - block->cleanups; + + for (; ptr < end; ptr++) { + ptr->cleanup(ptr->ud); + } + } + + upb_free(a->block_alloc, block); + block = next; + } +} + +void upb_arena_free(upb_arena *a) { + a = arena_findroot(a); + if (--a->refcount == 0) arena_dofree(a); +} + +bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { + cleanup_ent *ent; + + if (!a->cleanups || _upb_arenahas(a) < sizeof(cleanup_ent)) { + if (!upb_arena_allocblock(a, 128)) return false; /* Out of memory. */ + UPB_ASSERT(_upb_arenahas(a) >= sizeof(cleanup_ent)); + } + + a->head.end -= sizeof(cleanup_ent); + ent = (cleanup_ent*)a->head.end; + (*a->cleanups)++; + UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent)); + + ent->cleanup = func; + ent->ud = ud; + + return true; +} + +void upb_arena_fuse(upb_arena *a1, upb_arena *a2) { + upb_arena *r1 = arena_findroot(a1); + upb_arena *r2 = arena_findroot(a2); + + if (r1 == r2) return; /* Already fused. */ + + /* We want to join the smaller tree to the larger tree. + * So swap first if they are backwards. */ + if (r1->refcount < r2->refcount) { + upb_arena *tmp = r1; + r1 = r2; + r2 = tmp; + } + + /* r1 takes over r2's freelist and refcount. */ + r1->refcount += r2->refcount; + if (r2->freelist_tail) { + UPB_ASSERT(r2->freelist_tail->next == NULL); + r2->freelist_tail->next = r1->freelist; + r1->freelist = r2->freelist; + } + r2->parent = r1; +} +// Fast decoder: ~3x the speed of decode.c, but x86-64 specific. +// Also the table size grows by 2x. +// +// Could potentially be ported to ARM64 or other 64-bit archs that pass at +// least six arguments in registers. +// +// The overall design is to create specialized functions for every possible +// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch +// to the specialized function as quickly as possible. + + + +/* Must be last. */ + +#if UPB_FASTTABLE + +// The standard set of arguments passed to each parsing function. +// Thanks to x86-64 calling conventions, these will stay in registers. +#define UPB_PARSE_PARAMS \ + upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data + +#define RETURN_GENERIC(m) \ + /* fprintf(stderr, m); */ \ + return fastdecode_generic(d, ptr, msg, table, hasbits, 0); + +typedef enum { + CARD_s = 0, /* Singular (optional, non-repeated) */ + CARD_o = 1, /* Oneof */ + CARD_r = 2, /* Repeated */ + CARD_p = 3 /* Packed Repeated */ +} upb_card; + +UPB_NOINLINE +static const char *fastdecode_isdonefallback(upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits, int overrun) { + ptr = decode_isdonefallback_inl(d, ptr, overrun); + if (ptr == NULL) { + return fastdecode_err(d); + } + uint16_t tag = fastdecode_loadtag(ptr); + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); +} + +UPB_FORCEINLINE +static const char *fastdecode_dispatch(upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits) { + if (UPB_UNLIKELY(ptr >= d->limit_ptr)) { + int overrun = ptr - d->end; + if (UPB_LIKELY(overrun == d->limit)) { + // Parse is finished. + *(uint32_t*)msg |= hasbits; // Sync hasbits. + return ptr; + } else { + return fastdecode_isdonefallback(d, ptr, msg, table, hasbits, overrun); + } + } + + // Read two bytes of tag data (for a one-byte tag, the high byte is junk). + uint16_t tag = fastdecode_loadtag(ptr); + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, tag); +} + +UPB_FORCEINLINE +static bool fastdecode_checktag(uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (data & 0xff) == 0; + } else { + return (data & 0xffff) == 0; + } +} + +UPB_FORCEINLINE +static const char *fastdecode_longsize(const char *ptr, int *size) { + int i; + UPB_ASSERT(*size & 0x80); + *size &= 0xff; + for (i = 0; i < 3; i++) { + ptr++; + size_t byte = (uint8_t)ptr[-1]; + *size += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) return ptr; + } + ptr++; + size_t byte = (uint8_t)ptr[-1]; + // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected + // for a 32 bit varint. + if (UPB_UNLIKELY(byte >= 8)) return NULL; + *size += (byte - 1) << 28; + return ptr; +} + +UPB_FORCEINLINE +static bool fastdecode_boundscheck(const char *ptr, size_t len, + const char *end) { + uintptr_t uptr = (uintptr_t)ptr; + uintptr_t uend = (uintptr_t)end + 16; + uintptr_t res = uptr + len; + return res < uptr || res > uend; +} + +UPB_FORCEINLINE +static bool fastdecode_boundscheck2(const char *ptr, size_t len, + const char *end) { + // This is one extra branch compared to the more normal: + // return (size_t)(end - ptr) < size; + // However it is one less computation if we are just about to use "ptr + len": + // https://godbolt.org/z/35YGPz + // In microbenchmarks this shows an overall 4% improvement. + uintptr_t uptr = (uintptr_t)ptr; + uintptr_t uend = (uintptr_t)end; + uintptr_t res = uptr + len; + return res < uptr || res > uend; +} + +typedef const char *fastdecode_delimfunc(upb_decstate *d, const char *ptr, + void *ctx); + +UPB_FORCEINLINE +static const char *fastdecode_delimited(upb_decstate *d, const char *ptr, + fastdecode_delimfunc *func, void *ctx) { + ptr++; + int len = (int8_t)ptr[-1]; + if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) { + // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer. + // If it exceeds the buffer limit, limit/limit_ptr will change during + // sub-message parsing, so we need to preserve delta, not limit. + if (UPB_UNLIKELY(len & 0x80)) { + // Size varint >1 byte (length >= 128). + ptr = fastdecode_longsize(ptr, &len); + if (!ptr) { + // Corrupt wire format: size exceeded INT_MAX. + return NULL; + } + } + if (ptr - d->end + (int)len > d->limit) { + // Corrupt wire format: invalid limit. + return NULL; + } + int delta = decode_pushlimit(d, ptr, len); + ptr = func(d, ptr, ctx); + decode_poplimit(d, ptr, delta); + } else { + // Fast case: Sub-message is <128 bytes and fits in the current buffer. + // This means we can preserve limit/limit_ptr verbatim. + const char *saved_limit_ptr = d->limit_ptr; + int saved_limit = d->limit; + d->limit_ptr = ptr + len; + d->limit = d->limit_ptr - d->end; + UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); + ptr = func(d, ptr, ctx); + d->limit_ptr = saved_limit_ptr; + d->limit = saved_limit; + UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); + } + return ptr; +} + +/* singular, oneof, repeated field handling ***********************************/ + +typedef struct { + upb_array *arr; + void *end; +} fastdecode_arr; + +typedef enum { + FD_NEXT_ATLIMIT, + FD_NEXT_SAMEFIELD, + FD_NEXT_OTHERFIELD +} fastdecode_next; + +typedef struct { + void *dst; + fastdecode_next next; + uint32_t tag; +} fastdecode_nextret; + +UPB_FORCEINLINE +static void *fastdecode_resizearr(upb_decstate *d, void *dst, + fastdecode_arr *farr, int valbytes) { + if (UPB_UNLIKELY(dst == farr->end)) { + size_t old_size = farr->arr->size; + size_t old_bytes = old_size * valbytes; + size_t new_size = old_size * 2; + size_t new_bytes = new_size * valbytes; + char *old_ptr = _upb_array_ptr(farr->arr); + char *new_ptr = upb_arena_realloc(&d->arena, old_ptr, old_bytes, new_bytes); + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + farr->arr->size = new_size; + farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2); + dst = (void*)(new_ptr + (old_size * valbytes)); + farr->end = (void*)(new_ptr + (new_size * valbytes)); + } + return dst; +} + +UPB_FORCEINLINE +static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) { + if (tagbytes == 1) { + return (uint8_t)tag == (uint8_t)data; + } else { + return (uint16_t)tag == (uint16_t)data; + } +} + +UPB_FORCEINLINE +static void fastdecode_commitarr(void *dst, fastdecode_arr *farr, + int valbytes) { + farr->arr->len = + (size_t)((char *)dst - (char *)_upb_array_ptr(farr->arr)) / valbytes; +} + +UPB_FORCEINLINE +static fastdecode_nextret fastdecode_nextrepeated(upb_decstate *d, void *dst, + const char **ptr, + fastdecode_arr *farr, + uint64_t data, int tagbytes, + int valbytes) { + fastdecode_nextret ret; + dst = (char *)dst + valbytes; + + if (UPB_LIKELY(!decode_isdone(d, ptr))) { + ret.tag = fastdecode_loadtag(*ptr); + if (fastdecode_tagmatch(ret.tag, data, tagbytes)) { + ret.next = FD_NEXT_SAMEFIELD; + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_OTHERFIELD; + } + } else { + fastdecode_commitarr(dst, farr, valbytes); + ret.next = FD_NEXT_ATLIMIT; + } + + ret.dst = dst; + return ret; +} + +UPB_FORCEINLINE +static void *fastdecode_fieldmem(upb_msg *msg, uint64_t data) { + size_t ofs = data >> 48; + return (char *)msg + ofs; +} + +UPB_FORCEINLINE +static void *fastdecode_getfield(upb_decstate *d, const char *ptr, upb_msg *msg, + uint64_t *data, uint64_t *hasbits, + fastdecode_arr *farr, int valbytes, + upb_card card) { + switch (card) { + case CARD_s: { + uint8_t hasbit_index = *data >> 24; + // Set hasbit and return pointer to scalar field. + *hasbits |= 1ull << hasbit_index; + return fastdecode_fieldmem(msg, *data); + } + case CARD_o: { + uint16_t case_ofs = *data >> 32; + uint32_t *oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t); + uint8_t field_number = *data >> 24; + *oneof_case = field_number; + return fastdecode_fieldmem(msg, *data); + } + case CARD_r: { + // Get pointer to upb_array and allocate/expand if necessary. + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + upb_array **arr_p = fastdecode_fieldmem(msg, *data); + char *begin; + *(uint32_t*)msg |= *hasbits; + *hasbits = 0; + if (UPB_LIKELY(!*arr_p)) { + farr->arr = _upb_array_new(&d->arena, 8, elem_size_lg2); + *arr_p = farr->arr; + } else { + farr->arr = *arr_p; + } + begin = _upb_array_ptr(farr->arr); + farr->end = begin + (farr->arr->size * valbytes); + *data = fastdecode_loadtag(ptr); + return begin + (farr->arr->len * valbytes); + } + default: + UPB_UNREACHABLE(); + } +} + +UPB_FORCEINLINE +static bool fastdecode_flippacked(uint64_t *data, int tagbytes) { + *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype. + return fastdecode_checktag(*data, tagbytes); +} + +/* varint fields **************************************************************/ + +UPB_FORCEINLINE +static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) { + if (valbytes == 1) { + return val != 0; + } else if (zigzag) { + if (valbytes == 4) { + uint32_t n = val; + return (n >> 1) ^ -(int32_t)(n & 1); + } else if (valbytes == 8) { + return (val >> 1) ^ -(int64_t)(val & 1); + } + UPB_UNREACHABLE(); + } + return val; +} + +UPB_FORCEINLINE +static const char *fastdecode_varint64(const char *ptr, uint64_t *val) { + ptr++; + *val = (uint8_t)ptr[-1]; + if (UPB_UNLIKELY(*val & 0x80)) { + int i; + for (i = 0; i < 8; i++) { + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + *val += (byte - 1) << (7 + 7 * i); + if (UPB_LIKELY((byte & 0x80) == 0)) goto done; + } + ptr++; + uint64_t byte = (uint8_t)ptr[-1]; + if (byte > 1) { + return NULL; + } + *val += (byte - 1) << 63; + } +done: + UPB_ASSUME(ptr != NULL); + return ptr; +} + +UPB_FORCEINLINE +static const char *fastdecode_unpackedvarint(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, upb_card card, + bool zigzag, + _upb_field_parser *packed) { + uint64_t val; + void *dst; + fastdecode_arr farr; + + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { + return packed(UPB_PARSE_ARGS); + } + RETURN_GENERIC("varint field tag mismatch\n"); + } + + dst = + fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); + if (card == CARD_r) { + if (UPB_UNLIKELY(!dst)) { + RETURN_GENERIC("need array resize\n"); + } + } + +again: + if (card == CARD_r) { + dst = fastdecode_resizearr(d, dst, &farr, valbytes); + } + + ptr += tagbytes; + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return fastdecode_err(d); + val = fastdecode_munge(val, valbytes, zigzag); + memcpy(dst, &val, valbytes); + + if (card == CARD_r) { + fastdecode_nextret ret = + fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); + switch (ret.next) { + case FD_NEXT_SAMEFIELD: + dst = ret.dst; + goto again; + case FD_NEXT_OTHERFIELD: + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + case FD_NEXT_ATLIMIT: + return ptr; + } + } + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +typedef struct { + uint8_t valbytes; + bool zigzag; + void *dst; + fastdecode_arr farr; +} fastdecode_varintdata; + +UPB_FORCEINLINE +static const char *fastdecode_topackedvarint(upb_decstate *d, const char *ptr, + void *ctx) { + fastdecode_varintdata *data = ctx; + void *dst = data->dst; + uint64_t val; + + while (!decode_isdone(d, &ptr)) { + dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes); + ptr = fastdecode_varint64(ptr, &val); + if (ptr == NULL) return NULL; + val = fastdecode_munge(val, data->valbytes, data->zigzag); + memcpy(dst, &val, data->valbytes); + dst = (char *)dst + data->valbytes; + } + + fastdecode_commitarr(dst, &data->farr, data->valbytes); + return ptr; +} + +UPB_FORCEINLINE +static const char *fastdecode_packedvarint(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, bool zigzag, + _upb_field_parser *unpacked) { + fastdecode_varintdata ctx = {valbytes, zigzag}; + + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + if (fastdecode_flippacked(&data, tagbytes)) { + return unpacked(UPB_PARSE_ARGS); + } else { + RETURN_GENERIC("varint field tag mismatch\n"); + } + } + + ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, + valbytes, CARD_r); + if (UPB_UNLIKELY(!ctx.dst)) { + RETURN_GENERIC("need array resize\n"); + } + + ptr += tagbytes; + ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); + + if (UPB_UNLIKELY(ptr == NULL)) { + return fastdecode_err(d); + } + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +UPB_FORCEINLINE +static const char *fastdecode_varint(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, upb_card card, bool zigzag, + _upb_field_parser *unpacked, + _upb_field_parser *packed) { + if (card == CARD_p) { + return fastdecode_packedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, zigzag, + unpacked); + } else { + return fastdecode_unpackedvarint(UPB_PARSE_ARGS, tagbytes, valbytes, card, + zigzag, packed); + } +} + +#define z_ZZ true +#define b_ZZ false +#define v_ZZ false + +/* Generate all combinations: + * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */ + +#define F(card, type, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + return fastdecode_varint(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ + type##_ZZ, \ + &upb_pr##type##valbytes##_##tagbytes##bt, \ + &upb_pp##type##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef z_ZZ +#undef b_ZZ +#undef v_ZZ +#undef o_ONEOF +#undef s_ONEOF +#undef r_ONEOF +#undef F +#undef TYPES +#undef TAGBYTES + + +/* fixed fields ***************************************************************/ + +UPB_FORCEINLINE +static const char *fastdecode_unpackedfixed(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, upb_card card, + _upb_field_parser *packed) { + void *dst; + fastdecode_arr farr; + + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { + return packed(UPB_PARSE_ARGS); + } + RETURN_GENERIC("fixed field tag mismatch\n"); + } + + dst = + fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, card); + if (card == CARD_r) { + if (UPB_UNLIKELY(!dst)) { + RETURN_GENERIC("couldn't allocate array in arena\n"); + } + } + + +again: + if (card == CARD_r) { + dst = fastdecode_resizearr(d, dst, &farr, valbytes); + } + + ptr += tagbytes; + memcpy(dst, ptr, valbytes); + ptr += valbytes; + + if (card == CARD_r) { + fastdecode_nextret ret = + fastdecode_nextrepeated(d, dst, &ptr, &farr, data, tagbytes, valbytes); + switch (ret.next) { + case FD_NEXT_SAMEFIELD: + dst = ret.dst; + goto again; + case FD_NEXT_OTHERFIELD: + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + case FD_NEXT_ATLIMIT: + return ptr; + } + } + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +UPB_FORCEINLINE +static const char *fastdecode_packedfixed(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, + _upb_field_parser *unpacked) { + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + if (fastdecode_flippacked(&data, tagbytes)) { + return unpacked(UPB_PARSE_ARGS); + } else { + RETURN_GENERIC("varint field tag mismatch\n"); + } + } + + ptr += tagbytes; + int size = (uint8_t)ptr[0]; + ptr++; + if (size & 0x80) { + ptr = fastdecode_longsize(ptr, &size); + } + + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr)) || + (size % valbytes) != 0) { + return fastdecode_err(d); + } + + upb_array **arr_p = fastdecode_fieldmem(msg, data); + upb_array *arr = *arr_p; + uint8_t elem_size_lg2 = __builtin_ctz(valbytes); + int elems = size / valbytes; + + if (UPB_LIKELY(!arr)) { + *arr_p = arr = _upb_array_new(&d->arena, elems, elem_size_lg2); + if (!arr) { + return fastdecode_err(d); + } + } else { + _upb_array_resize(arr, elems, &d->arena); + } + + char *dst = _upb_array_ptr(arr); + memcpy(dst, ptr, size); + arr->len = elems; + + return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); +} + +UPB_FORCEINLINE +static const char *fastdecode_fixed(UPB_PARSE_PARAMS, int tagbytes, + int valbytes, upb_card card, + _upb_field_parser *unpacked, + _upb_field_parser *packed) { + if (card == CARD_p) { + return fastdecode_packedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, unpacked); + } else { + return fastdecode_unpackedfixed(UPB_PARSE_ARGS, tagbytes, valbytes, card, + packed); + } +} + +/* Generate all combinations: + * {s,o,r,p} x {f4,f8} x {1bt,2bt} */ + +#define F(card, valbytes, tagbytes) \ + UPB_NOINLINE \ + const char *upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + return fastdecode_fixed(UPB_PARSE_ARGS, tagbytes, valbytes, CARD_##card, \ + &upb_ppf##valbytes##_##tagbytes##bt, \ + &upb_prf##valbytes##_##tagbytes##bt); \ + } + +#define TYPES(card, tagbytes) \ + F(card, 4, tagbytes) \ + F(card, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES + +/* string fields **************************************************************/ + +typedef const char *fastdecode_copystr_func(struct upb_decstate *d, + const char *ptr, upb_msg *msg, + const upb_msglayout *table, + uint64_t hasbits, upb_strview *dst); + +UPB_NOINLINE +static const char *fastdecode_verifyutf8(upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits, upb_strview *dst) { + if (!decode_verifyutf8_inl(dst->data, dst->size)) { + return fastdecode_err(d); + } + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +UPB_FORCEINLINE +static const char *fastdecode_longstring(struct upb_decstate *d, + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + upb_strview *dst, + bool validate_utf8) { + int size = (uint8_t)ptr[0]; // Could plumb through hasbits. + ptr++; + if (size & 0x80) { + ptr = fastdecode_longsize(ptr, &size); + } + + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { + dst->size = 0; + return fastdecode_err(d); + } + + if (d->alias) { + dst->data = ptr; + dst->size = size; + } else { + char *data = upb_arena_malloc(&d->arena, size); + if (!data) { + return fastdecode_err(d); + } + memcpy(data, ptr, size); + dst->data = data; + dst->size = size; + } + + if (validate_utf8) { + return fastdecode_verifyutf8(d, ptr + size, msg, table, hasbits, dst); + } else { + return fastdecode_dispatch(d, ptr + size, msg, table, hasbits); + } +} + +UPB_NOINLINE +static const char *fastdecode_longstring_utf8(struct upb_decstate *d, + const char *ptr, upb_msg *msg, + intptr_t table, uint64_t hasbits, + upb_strview *dst) { + return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, true); +} + +UPB_NOINLINE +static const char *fastdecode_longstring_noutf8(struct upb_decstate *d, + const char *ptr, upb_msg *msg, + intptr_t table, + uint64_t hasbits, + upb_strview *dst) { + return fastdecode_longstring(d, ptr, msg, table, hasbits, dst, false); +} + +UPB_FORCEINLINE +static void fastdecode_docopy(upb_decstate *d, const char *ptr, uint32_t size, + int copy, char *data, upb_strview *dst) { + d->arena.head.ptr += copy; + dst->data = data; + UPB_UNPOISON_MEMORY_REGION(data, copy); + memcpy(data, ptr, copy); + UPB_POISON_MEMORY_REGION(data + size, copy - size); +} + +UPB_FORCEINLINE +static const char *fastdecode_copystring(UPB_PARSE_PARAMS, int tagbytes, + upb_card card, bool validate_utf8) { + upb_strview *dst; + fastdecode_arr farr; + int64_t size; + size_t arena_has; + size_t common_has; + char *buf; + + UPB_ASSERT(!d->alias); + UPB_ASSERT(fastdecode_checktag(data, tagbytes)); + + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, + sizeof(upb_strview), card); + +again: + if (card == CARD_r) { + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); + } + + size = (uint8_t)ptr[tagbytes]; + ptr += tagbytes + 1; + dst->size = size; + + buf = d->arena.head.ptr; + arena_has = _upb_arenahas(&d->arena); + common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); + + if (UPB_LIKELY(size <= 15 - tagbytes)) { + if (arena_has < 16) goto longstr; + d->arena.head.ptr += 16; + memcpy(buf, ptr - tagbytes - 1, 16); + dst->data = buf + tagbytes + 1; + } else if (UPB_LIKELY(size <= 32)) { + if (UPB_UNLIKELY(common_has < 32)) goto longstr; + fastdecode_docopy(d, ptr, size, 32, buf, dst); + } else if (UPB_LIKELY(size <= 64)) { + if (UPB_UNLIKELY(common_has < 64)) goto longstr; + fastdecode_docopy(d, ptr, size, 64, buf, dst); + } else if (UPB_LIKELY(size < 128)) { + if (UPB_UNLIKELY(common_has < 128)) goto longstr; + fastdecode_docopy(d, ptr, size, 128, buf, dst); + } else { + goto longstr; + } + + ptr += size; + + if (card == CARD_r) { + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { + return fastdecode_err(d); + } + fastdecode_nextret ret = fastdecode_nextrepeated( + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); + switch (ret.next) { + case FD_NEXT_SAMEFIELD: + dst = ret.dst; + goto again; + case FD_NEXT_OTHERFIELD: + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + case FD_NEXT_ATLIMIT: + return ptr; + } + } + + if (card != CARD_r && validate_utf8) { + return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); + } + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); + +longstr: + ptr--; + if (validate_utf8) { + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); + } else { + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); + } +} + +UPB_FORCEINLINE +static const char *fastdecode_string(UPB_PARSE_PARAMS, int tagbytes, + upb_card card, _upb_field_parser *copyfunc, + bool validate_utf8) { + upb_strview *dst; + fastdecode_arr farr; + int64_t size; + + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + RETURN_GENERIC("string field tag mismatch\n"); + } + + if (UPB_UNLIKELY(!d->alias)) { + return copyfunc(UPB_PARSE_ARGS); + } + + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, + sizeof(upb_strview), card); + +again: + if (card == CARD_r) { + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_strview)); + } + + size = (int8_t)ptr[tagbytes]; + ptr += tagbytes + 1; + dst->data = ptr; + dst->size = size; + + if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { + ptr--; + if (validate_utf8) { + return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, dst); + } else { + return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, dst); + } + } + + ptr += size; + + if (card == CARD_r) { + if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { + return fastdecode_err(d); + } + fastdecode_nextret ret = fastdecode_nextrepeated( + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_strview)); + switch (ret.next) { + case FD_NEXT_SAMEFIELD: + dst = ret.dst; + if (UPB_UNLIKELY(!d->alias)) { + // Buffer flipped and we can't alias any more. Bounce to copyfunc(), + // but via dispatch since we need to reload table data also. + fastdecode_commitarr(dst, &farr, sizeof(upb_strview)); + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + } + goto again; + case FD_NEXT_OTHERFIELD: + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + case FD_NEXT_ATLIMIT: + return ptr; + } + } + + if (card != CARD_r && validate_utf8) { + return fastdecode_verifyutf8(d, ptr, msg, table, hasbits, dst); + } + + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +/* Generate all combinations: + * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */ + +#define s_VALIDATE true +#define b_VALIDATE false + +#define F(card, tagbytes, type) \ + UPB_NOINLINE \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + return fastdecode_copystring(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ + type##_VALIDATE); \ + } \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \ + return fastdecode_string(UPB_PARSE_ARGS, tagbytes, CARD_##card, \ + &upb_c##card##type##_##tagbytes##bt, \ + type##_VALIDATE); \ + } + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef s_VALIDATE +#undef b_VALIDATE +#undef F +#undef TAGBYTES + +/* message fields *************************************************************/ + +UPB_INLINE +upb_msg *decode_newmsg_ceil(upb_decstate *d, const upb_msglayout *l, + int msg_ceil_bytes) { + size_t size = l->size + sizeof(upb_msg_internal); + char *msg_data; + if (UPB_LIKELY(msg_ceil_bytes > 0 && + _upb_arenahas(&d->arena) >= msg_ceil_bytes)) { + UPB_ASSERT(size <= (size_t)msg_ceil_bytes); + msg_data = d->arena.head.ptr; + d->arena.head.ptr += size; + UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes); + memset(msg_data, 0, msg_ceil_bytes); + UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size); + } else { + msg_data = (char*)upb_arena_malloc(&d->arena, size); + memset(msg_data, 0, size); + } + return msg_data + sizeof(upb_msg_internal); +} + +typedef struct { + intptr_t table; + upb_msg *msg; +} fastdecode_submsgdata; + +UPB_FORCEINLINE +static const char *fastdecode_tosubmsg(upb_decstate *d, const char *ptr, + void *ctx) { + fastdecode_submsgdata *submsg = ctx; + ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0); + UPB_ASSUME(ptr != NULL); + return ptr; +} + +UPB_FORCEINLINE +static const char *fastdecode_submsg(UPB_PARSE_PARAMS, int tagbytes, + int msg_ceil_bytes, upb_card card) { + + if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { + RETURN_GENERIC("submessage field tag mismatch\n"); + } + + if (--d->depth == 0) return fastdecode_err(d); + + upb_msg **dst; + uint32_t submsg_idx = (data >> 16) & 0xff; + const upb_msglayout *tablep = decode_totablep(table); + const upb_msglayout *subtablep = tablep->submsgs[submsg_idx]; + fastdecode_submsgdata submsg = {decode_totable(subtablep)}; + fastdecode_arr farr; + + if (subtablep->table_mask == (uint8_t)-1) { + RETURN_GENERIC("submessage doesn't have fast tables."); + } + + dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, + sizeof(upb_msg *), card); + + if (card == CARD_s) { + *(uint32_t*)msg |= hasbits; + hasbits = 0; + } + +again: + if (card == CARD_r) { + dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_msg*)); + } + + submsg.msg = *dst; + + if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { + *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); + } + + ptr += tagbytes; + ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); + + if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { + return fastdecode_err(d); + } + + if (card == CARD_r) { + fastdecode_nextret ret = fastdecode_nextrepeated( + d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_msg *)); + switch (ret.next) { + case FD_NEXT_SAMEFIELD: + dst = ret.dst; + goto again; + case FD_NEXT_OTHERFIELD: + d->depth++; + return fastdecode_tagdispatch(d, ptr, msg, table, hasbits, ret.tag); + case FD_NEXT_ATLIMIT: + d->depth++; + return ptr; + } + } + + d->depth++; + return fastdecode_dispatch(d, ptr, msg, table, hasbits); +} + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \ + UPB_PARSE_PARAMS) { \ + return fastdecode_submsg(UPB_PARSE_ARGS, tagbytes, ceil_arg, CARD_##card); \ + } + +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) + +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef TAGBYTES +#undef SIZES +#undef F + +#endif /* UPB_FASTTABLE */ +/* This file was generated by upbc (the upb compiler) from the input + * file: + * + * google/protobuf/descriptor.proto + * + * Do not edit -- your changes will be discarded when the file is + * regenerated. */ + +#include + + +static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = { + &google_protobuf_FileDescriptorProto_msginit, +}; + +static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = { + {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_FileDescriptorSet_msginit = { + &google_protobuf_FileDescriptorSet_submsgs[0], + &google_protobuf_FileDescriptorSet__fields[0], + UPB_SIZE(8, 8), 1, false, 255, +}; + +static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = { + &google_protobuf_DescriptorProto_msginit, + &google_protobuf_EnumDescriptorProto_msginit, + &google_protobuf_FieldDescriptorProto_msginit, + &google_protobuf_FileOptions_msginit, + &google_protobuf_ServiceDescriptorProto_msginit, + &google_protobuf_SourceCodeInfo_msginit, +}; + +static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(12, 24), 2, 0, 12, 1}, + {3, UPB_SIZE(36, 72), 0, 0, 12, 3}, + {4, UPB_SIZE(40, 80), 0, 0, 11, 3}, + {5, UPB_SIZE(44, 88), 0, 1, 11, 3}, + {6, UPB_SIZE(48, 96), 0, 4, 11, 3}, + {7, UPB_SIZE(52, 104), 0, 2, 11, 3}, + {8, UPB_SIZE(28, 56), 3, 3, 11, 1}, + {9, UPB_SIZE(32, 64), 4, 5, 11, 1}, + {10, UPB_SIZE(56, 112), 0, 0, 5, 3}, + {11, UPB_SIZE(60, 120), 0, 0, 5, 3}, + {12, UPB_SIZE(20, 40), 5, 0, 12, 1}, +}; + +const upb_msglayout google_protobuf_FileDescriptorProto_msginit = { + &google_protobuf_FileDescriptorProto_submsgs[0], + &google_protobuf_FileDescriptorProto__fields[0], + UPB_SIZE(64, 128), 12, false, 255, +}; + +static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[7] = { + &google_protobuf_DescriptorProto_msginit, + &google_protobuf_DescriptorProto_ExtensionRange_msginit, + &google_protobuf_DescriptorProto_ReservedRange_msginit, + &google_protobuf_EnumDescriptorProto_msginit, + &google_protobuf_FieldDescriptorProto_msginit, + &google_protobuf_MessageOptions_msginit, + &google_protobuf_OneofDescriptorProto_msginit, +}; + +static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(16, 32), 0, 4, 11, 3}, + {3, UPB_SIZE(20, 40), 0, 0, 11, 3}, + {4, UPB_SIZE(24, 48), 0, 3, 11, 3}, + {5, UPB_SIZE(28, 56), 0, 1, 11, 3}, + {6, UPB_SIZE(32, 64), 0, 4, 11, 3}, + {7, UPB_SIZE(12, 24), 2, 5, 11, 1}, + {8, UPB_SIZE(36, 72), 0, 6, 11, 3}, + {9, UPB_SIZE(40, 80), 0, 2, 11, 3}, + {10, UPB_SIZE(44, 88), 0, 0, 12, 3}, +}; + +const upb_msglayout google_protobuf_DescriptorProto_msginit = { + &google_protobuf_DescriptorProto_submsgs[0], + &google_protobuf_DescriptorProto__fields[0], + UPB_SIZE(48, 96), 10, false, 255, +}; + +static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = { + &google_protobuf_ExtensionRangeOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = { + {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, + {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, + {3, UPB_SIZE(12, 16), 3, 0, 11, 1}, +}; + +const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = { + &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0], + &google_protobuf_DescriptorProto_ExtensionRange__fields[0], + UPB_SIZE(16, 24), 3, false, 255, +}; + +static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = { + {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, + {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, +}; + +const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = { + NULL, + &google_protobuf_DescriptorProto_ReservedRange__fields[0], + UPB_SIZE(16, 16), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = { + {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = { + &google_protobuf_ExtensionRangeOptions_submsgs[0], + &google_protobuf_ExtensionRangeOptions__fields[0], + UPB_SIZE(8, 8), 1, false, 255, +}; + +static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = { + &google_protobuf_FieldOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = { + {1, UPB_SIZE(24, 24), 1, 0, 12, 1}, + {2, UPB_SIZE(32, 40), 2, 0, 12, 1}, + {3, UPB_SIZE(12, 12), 3, 0, 5, 1}, + {4, UPB_SIZE(4, 4), 4, 0, 14, 1}, + {5, UPB_SIZE(8, 8), 5, 0, 14, 1}, + {6, UPB_SIZE(40, 56), 6, 0, 12, 1}, + {7, UPB_SIZE(48, 72), 7, 0, 12, 1}, + {8, UPB_SIZE(64, 104), 8, 0, 11, 1}, + {9, UPB_SIZE(16, 16), 9, 0, 5, 1}, + {10, UPB_SIZE(56, 88), 10, 0, 12, 1}, + {17, UPB_SIZE(20, 20), 11, 0, 8, 1}, +}; + +const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = { + &google_protobuf_FieldDescriptorProto_submsgs[0], + &google_protobuf_FieldDescriptorProto__fields[0], + UPB_SIZE(72, 112), 11, false, 255, +}; + +static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = { + &google_protobuf_OneofOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(12, 24), 2, 0, 11, 1}, +}; + +const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = { + &google_protobuf_OneofDescriptorProto_submsgs[0], + &google_protobuf_OneofDescriptorProto__fields[0], + UPB_SIZE(16, 32), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = { + &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, + &google_protobuf_EnumOptions_msginit, + &google_protobuf_EnumValueDescriptorProto_msginit, +}; + +static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(16, 32), 0, 2, 11, 3}, + {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, + {4, UPB_SIZE(20, 40), 0, 0, 11, 3}, + {5, UPB_SIZE(24, 48), 0, 0, 12, 3}, +}; + +const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = { + &google_protobuf_EnumDescriptorProto_submsgs[0], + &google_protobuf_EnumDescriptorProto__fields[0], + UPB_SIZE(32, 64), 5, false, 255, +}; + +static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = { + {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, + {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, +}; + +const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = { + NULL, + &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0], + UPB_SIZE(16, 16), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = { + &google_protobuf_EnumValueOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = { + {1, UPB_SIZE(8, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(4, 4), 2, 0, 5, 1}, + {3, UPB_SIZE(16, 24), 3, 0, 11, 1}, +}; + +const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = { + &google_protobuf_EnumValueDescriptorProto_submsgs[0], + &google_protobuf_EnumValueDescriptorProto__fields[0], + UPB_SIZE(24, 32), 3, false, 255, +}; + +static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = { + &google_protobuf_MethodDescriptorProto_msginit, + &google_protobuf_ServiceOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(16, 32), 0, 0, 11, 3}, + {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, +}; + +const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = { + &google_protobuf_ServiceDescriptorProto_submsgs[0], + &google_protobuf_ServiceDescriptorProto__fields[0], + UPB_SIZE(24, 48), 3, false, 255, +}; + +static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = { + &google_protobuf_MethodOptions_msginit, +}; + +static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {2, UPB_SIZE(12, 24), 2, 0, 12, 1}, + {3, UPB_SIZE(20, 40), 3, 0, 12, 1}, + {4, UPB_SIZE(28, 56), 4, 0, 11, 1}, + {5, UPB_SIZE(1, 1), 5, 0, 8, 1}, + {6, UPB_SIZE(2, 2), 6, 0, 8, 1}, +}; + +const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = { + &google_protobuf_MethodDescriptorProto_submsgs[0], + &google_protobuf_MethodDescriptorProto__fields[0], + UPB_SIZE(32, 64), 6, false, 255, +}; + +static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = { + {1, UPB_SIZE(20, 24), 1, 0, 12, 1}, + {8, UPB_SIZE(28, 40), 2, 0, 12, 1}, + {9, UPB_SIZE(4, 4), 3, 0, 14, 1}, + {10, UPB_SIZE(8, 8), 4, 0, 8, 1}, + {11, UPB_SIZE(36, 56), 5, 0, 12, 1}, + {16, UPB_SIZE(9, 9), 6, 0, 8, 1}, + {17, UPB_SIZE(10, 10), 7, 0, 8, 1}, + {18, UPB_SIZE(11, 11), 8, 0, 8, 1}, + {20, UPB_SIZE(12, 12), 9, 0, 8, 1}, + {23, UPB_SIZE(13, 13), 10, 0, 8, 1}, + {27, UPB_SIZE(14, 14), 11, 0, 8, 1}, + {31, UPB_SIZE(15, 15), 12, 0, 8, 1}, + {36, UPB_SIZE(44, 72), 13, 0, 12, 1}, + {37, UPB_SIZE(52, 88), 14, 0, 12, 1}, + {39, UPB_SIZE(60, 104), 15, 0, 12, 1}, + {40, UPB_SIZE(68, 120), 16, 0, 12, 1}, + {41, UPB_SIZE(76, 136), 17, 0, 12, 1}, + {42, UPB_SIZE(16, 16), 18, 0, 8, 1}, + {44, UPB_SIZE(84, 152), 19, 0, 12, 1}, + {45, UPB_SIZE(92, 168), 20, 0, 12, 1}, + {999, UPB_SIZE(100, 184), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_FileOptions_msginit = { + &google_protobuf_FileOptions_submsgs[0], + &google_protobuf_FileOptions__fields[0], + UPB_SIZE(104, 192), 21, false, 255, +}; + +static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = { + {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, + {2, UPB_SIZE(2, 2), 2, 0, 8, 1}, + {3, UPB_SIZE(3, 3), 3, 0, 8, 1}, + {7, UPB_SIZE(4, 4), 4, 0, 8, 1}, + {999, UPB_SIZE(8, 8), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_MessageOptions_msginit = { + &google_protobuf_MessageOptions_submsgs[0], + &google_protobuf_MessageOptions__fields[0], + UPB_SIZE(16, 16), 5, false, 255, +}; + +static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = { + {1, UPB_SIZE(4, 4), 1, 0, 14, 1}, + {2, UPB_SIZE(12, 12), 2, 0, 8, 1}, + {3, UPB_SIZE(13, 13), 3, 0, 8, 1}, + {5, UPB_SIZE(14, 14), 4, 0, 8, 1}, + {6, UPB_SIZE(8, 8), 5, 0, 14, 1}, + {10, UPB_SIZE(15, 15), 6, 0, 8, 1}, + {999, UPB_SIZE(16, 16), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_FieldOptions_msginit = { + &google_protobuf_FieldOptions_submsgs[0], + &google_protobuf_FieldOptions__fields[0], + UPB_SIZE(24, 24), 7, false, 255, +}; + +static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = { + {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_OneofOptions_msginit = { + &google_protobuf_OneofOptions_submsgs[0], + &google_protobuf_OneofOptions__fields[0], + UPB_SIZE(8, 8), 1, false, 255, +}; + +static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = { + {2, UPB_SIZE(1, 1), 1, 0, 8, 1}, + {3, UPB_SIZE(2, 2), 2, 0, 8, 1}, + {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_EnumOptions_msginit = { + &google_protobuf_EnumOptions_submsgs[0], + &google_protobuf_EnumOptions__fields[0], + UPB_SIZE(8, 16), 3, false, 255, +}; + +static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = { + {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, + {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_EnumValueOptions_msginit = { + &google_protobuf_EnumValueOptions_submsgs[0], + &google_protobuf_EnumValueOptions__fields[0], + UPB_SIZE(8, 16), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = { + {33, UPB_SIZE(1, 1), 1, 0, 8, 1}, + {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_ServiceOptions_msginit = { + &google_protobuf_ServiceOptions_submsgs[0], + &google_protobuf_ServiceOptions__fields[0], + UPB_SIZE(8, 16), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = { + &google_protobuf_UninterpretedOption_msginit, +}; + +static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = { + {33, UPB_SIZE(8, 8), 1, 0, 8, 1}, + {34, UPB_SIZE(4, 4), 2, 0, 14, 1}, + {999, UPB_SIZE(12, 16), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_MethodOptions_msginit = { + &google_protobuf_MethodOptions_submsgs[0], + &google_protobuf_MethodOptions__fields[0], + UPB_SIZE(16, 24), 3, false, 255, +}; + +static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = { + &google_protobuf_UninterpretedOption_NamePart_msginit, +}; + +static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = { + {2, UPB_SIZE(56, 80), 0, 0, 11, 3}, + {3, UPB_SIZE(32, 32), 1, 0, 12, 1}, + {4, UPB_SIZE(8, 8), 2, 0, 4, 1}, + {5, UPB_SIZE(16, 16), 3, 0, 3, 1}, + {6, UPB_SIZE(24, 24), 4, 0, 1, 1}, + {7, UPB_SIZE(40, 48), 5, 0, 12, 1}, + {8, UPB_SIZE(48, 64), 6, 0, 12, 1}, +}; + +const upb_msglayout google_protobuf_UninterpretedOption_msginit = { + &google_protobuf_UninterpretedOption_submsgs[0], + &google_protobuf_UninterpretedOption__fields[0], + UPB_SIZE(64, 96), 7, false, 255, +}; + +static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = { + {1, UPB_SIZE(4, 8), 1, 0, 12, 2}, + {2, UPB_SIZE(1, 1), 2, 0, 8, 2}, +}; + +const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = { + NULL, + &google_protobuf_UninterpretedOption_NamePart__fields[0], + UPB_SIZE(16, 32), 2, false, 255, +}; + +static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = { + &google_protobuf_SourceCodeInfo_Location_msginit, +}; + +static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = { + {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_SourceCodeInfo_msginit = { + &google_protobuf_SourceCodeInfo_submsgs[0], + &google_protobuf_SourceCodeInfo__fields[0], + UPB_SIZE(8, 8), 1, false, 255, +}; + +static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = { + {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED}, + {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED}, + {3, UPB_SIZE(4, 8), 1, 0, 12, 1}, + {4, UPB_SIZE(12, 24), 2, 0, 12, 1}, + {6, UPB_SIZE(28, 56), 0, 0, 12, 3}, +}; + +const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = { + NULL, + &google_protobuf_SourceCodeInfo_Location__fields[0], + UPB_SIZE(32, 64), 5, false, 255, +}; + +static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = { + &google_protobuf_GeneratedCodeInfo_Annotation_msginit, +}; + +static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = { + {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, +}; + +const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = { + &google_protobuf_GeneratedCodeInfo_submsgs[0], + &google_protobuf_GeneratedCodeInfo__fields[0], + UPB_SIZE(8, 8), 1, false, 255, +}; + +static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = { + {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED}, + {2, UPB_SIZE(12, 16), 1, 0, 12, 1}, + {3, UPB_SIZE(4, 4), 2, 0, 5, 1}, + {4, UPB_SIZE(8, 8), 3, 0, 5, 1}, +}; + +const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { + NULL, + &google_protobuf_GeneratedCodeInfo_Annotation__fields[0], + UPB_SIZE(24, 48), 4, false, 255, +}; + + + + +#include +#include +#include +#include +#include + + +/* Must be last. */ + +typedef struct { + size_t len; + char str[1]; /* Null-terminated string data follows. */ +} str_t; + +struct upb_fielddef { + const upb_filedef *file; + const upb_msgdef *msgdef; + const char *full_name; + const char *json_name; + union { + int64_t sint; + uint64_t uint; + double dbl; + float flt; + bool boolean; + str_t *str; + } defaultval; + const upb_oneofdef *oneof; + union { + const upb_msgdef *msgdef; + const upb_enumdef *enumdef; + const google_protobuf_FieldDescriptorProto *unresolved; + } sub; + uint32_t number_; + uint16_t index_; + uint16_t layout_index; + uint32_t selector_base; /* Used to index into a upb::Handlers table. */ + bool is_extension_; + bool lazy_; + bool packed_; + bool proto3_optional_; + upb_descriptortype_t type_; + upb_label_t label_; +}; + +struct upb_msgdef { + const upb_msglayout *layout; + const upb_filedef *file; + const char *full_name; + uint32_t selector_count; + uint32_t submsg_field_count; + + /* Tables for looking up fields by number and name. */ + upb_inttable itof; + upb_strtable ntof; + + const upb_fielddef *fields; + const upb_oneofdef *oneofs; + int field_count; + int oneof_count; + int real_oneof_count; + + /* Is this a map-entry message? */ + bool map_entry; + upb_wellknowntype_t well_known_type; + + /* TODO(haberman): proper extension ranges (there can be multiple). */ +}; + +struct upb_enumdef { + const upb_filedef *file; + const char *full_name; + upb_strtable ntoi; + upb_inttable iton; + int32_t defaultval; +}; + +struct upb_oneofdef { + const upb_msgdef *parent; + const char *full_name; + int field_count; + bool synthetic; + const upb_fielddef **fields; + upb_strtable ntof; + upb_inttable itof; +}; + +struct upb_filedef { + const char *name; + const char *package; + const char *phpprefix; + const char *phpnamespace; + + const upb_filedef **deps; + const upb_msgdef *msgs; + const upb_enumdef *enums; + const upb_fielddef *exts; + const upb_symtab *symtab; + + int dep_count; + int msg_count; + int enum_count; + int ext_count; + upb_syntax_t syntax; +}; + +struct upb_symtab { + upb_arena *arena; + upb_strtable syms; /* full_name -> packed def ptr */ + upb_strtable files; /* file_name -> upb_filedef* */ + size_t bytes_loaded; +}; + +/* Inside a symtab we store tagged pointers to specific def types. */ +typedef enum { + UPB_DEFTYPE_FIELD = 0, + + /* Only inside symtab table. */ + UPB_DEFTYPE_MSG = 1, + UPB_DEFTYPE_ENUM = 2, + + /* Only inside message table. */ + UPB_DEFTYPE_ONEOF = 1, + UPB_DEFTYPE_FIELD_JSONNAME = 2 +} upb_deftype_t; + +static const void *unpack_def(upb_value v, upb_deftype_t type) { + uintptr_t num = (uintptr_t)upb_value_getconstptr(v); + return (num & 3) == type ? (const void*)(num & ~3) : NULL; +} + +static upb_value pack_def(const void *ptr, upb_deftype_t type) { + uintptr_t num = (uintptr_t)ptr | type; + return upb_value_constptr((const void*)num); +} + +/* isalpha() etc. from are locale-dependent, which we don't want. */ +static bool upb_isbetween(char c, char low, char high) { + return c >= low && c <= high; +} + +static bool upb_isletter(char c) { + return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; +} + +static bool upb_isalphanum(char c) { + return upb_isletter(c) || upb_isbetween(c, '0', '9'); +} + +static const char *shortdefname(const char *fullname) { + const char *p; + + if (fullname == NULL) { + return NULL; + } else if ((p = strrchr(fullname, '.')) == NULL) { + /* No '.' in the name, return the full string. */ + return fullname; + } else { + /* Return one past the last '.'. */ + return p + 1; + } +} + +/* All submessage fields are lower than all other fields. + * Secondly, fields are increasing in order. */ +uint32_t field_rank(const upb_fielddef *f) { + uint32_t ret = upb_fielddef_number(f); + const uint32_t high_bit = 1 << 30; + UPB_ASSERT(ret < high_bit); + if (!upb_fielddef_issubmsg(f)) + ret |= high_bit; + return ret; +} + +int cmp_fields(const void *p1, const void *p2) { + const upb_fielddef *f1 = *(upb_fielddef*const*)p1; + const upb_fielddef *f2 = *(upb_fielddef*const*)p2; + return field_rank(f1) - field_rank(f2); +} + +/* A few implementation details of handlers. We put these here to avoid + * a def -> handlers dependency. */ + +#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ + +static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { + return upb_fielddef_isseq(f) ? 2 : 0; +} + +static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { + uint32_t ret = 1; + if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ + if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ + if (upb_fielddef_issubmsg(f)) { + /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ + ret += 0; + if (upb_fielddef_lazy(f)) { + /* STARTSTR/ENDSTR/STRING (for lazy) */ + ret += 3; + } + } + return ret; +} + +static void upb_status_setoom(upb_status *status) { + upb_status_seterrmsg(status, "out of memory"); +} + +static void assign_msg_wellknowntype(upb_msgdef *m) { + const char *name = upb_msgdef_fullname(m); + if (name == NULL) { + m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; + return; + } + if (!strcmp(name, "google.protobuf.Any")) { + m->well_known_type = UPB_WELLKNOWN_ANY; + } else if (!strcmp(name, "google.protobuf.FieldMask")) { + m->well_known_type = UPB_WELLKNOWN_FIELDMASK; + } else if (!strcmp(name, "google.protobuf.Duration")) { + m->well_known_type = UPB_WELLKNOWN_DURATION; + } else if (!strcmp(name, "google.protobuf.Timestamp")) { + m->well_known_type = UPB_WELLKNOWN_TIMESTAMP; + } else if (!strcmp(name, "google.protobuf.DoubleValue")) { + m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE; + } else if (!strcmp(name, "google.protobuf.FloatValue")) { + m->well_known_type = UPB_WELLKNOWN_FLOATVALUE; + } else if (!strcmp(name, "google.protobuf.Int64Value")) { + m->well_known_type = UPB_WELLKNOWN_INT64VALUE; + } else if (!strcmp(name, "google.protobuf.UInt64Value")) { + m->well_known_type = UPB_WELLKNOWN_UINT64VALUE; + } else if (!strcmp(name, "google.protobuf.Int32Value")) { + m->well_known_type = UPB_WELLKNOWN_INT32VALUE; + } else if (!strcmp(name, "google.protobuf.UInt32Value")) { + m->well_known_type = UPB_WELLKNOWN_UINT32VALUE; + } else if (!strcmp(name, "google.protobuf.BoolValue")) { + m->well_known_type = UPB_WELLKNOWN_BOOLVALUE; + } else if (!strcmp(name, "google.protobuf.StringValue")) { + m->well_known_type = UPB_WELLKNOWN_STRINGVALUE; + } else if (!strcmp(name, "google.protobuf.BytesValue")) { + m->well_known_type = UPB_WELLKNOWN_BYTESVALUE; + } else if (!strcmp(name, "google.protobuf.Value")) { + m->well_known_type = UPB_WELLKNOWN_VALUE; + } else if (!strcmp(name, "google.protobuf.ListValue")) { + m->well_known_type = UPB_WELLKNOWN_LISTVALUE; + } else if (!strcmp(name, "google.protobuf.Struct")) { + m->well_known_type = UPB_WELLKNOWN_STRUCT; + } else { + m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; + } +} + + +/* upb_enumdef ****************************************************************/ + +const char *upb_enumdef_fullname(const upb_enumdef *e) { + return e->full_name; +} + +const char *upb_enumdef_name(const upb_enumdef *e) { + return shortdefname(e->full_name); +} + +const upb_filedef *upb_enumdef_file(const upb_enumdef *e) { + return e->file; +} + +int32_t upb_enumdef_default(const upb_enumdef *e) { + UPB_ASSERT(upb_enumdef_iton(e, e->defaultval)); + return e->defaultval; +} + +int upb_enumdef_numvals(const upb_enumdef *e) { + return (int)upb_strtable_count(&e->ntoi); +} + +void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { + /* We iterate over the ntoi table, to account for duplicate numbers. */ + upb_strtable_begin(i, &e->ntoi); +} + +void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } +bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } + +bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, + size_t len, int32_t *num) { + upb_value v; + if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { + return false; + } + if (num) *num = upb_value_getint32(v); + return true; +} + +const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { + upb_value v; + return upb_inttable_lookup32(&def->iton, num, &v) ? + upb_value_getcstr(v) : NULL; +} + +const char *upb_enum_iter_name(upb_enum_iter *iter) { + return upb_strtable_iter_key(iter).data; +} + +int32_t upb_enum_iter_number(upb_enum_iter *iter) { + return upb_value_getint32(upb_strtable_iter_value(iter)); +} + + +/* upb_fielddef ***************************************************************/ + +const char *upb_fielddef_fullname(const upb_fielddef *f) { + return f->full_name; +} + +upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { + switch (f->type_) { + case UPB_DESCRIPTOR_TYPE_DOUBLE: + return UPB_TYPE_DOUBLE; + case UPB_DESCRIPTOR_TYPE_FLOAT: + return UPB_TYPE_FLOAT; + case UPB_DESCRIPTOR_TYPE_INT64: + case UPB_DESCRIPTOR_TYPE_SINT64: + case UPB_DESCRIPTOR_TYPE_SFIXED64: + return UPB_TYPE_INT64; + case UPB_DESCRIPTOR_TYPE_INT32: + case UPB_DESCRIPTOR_TYPE_SFIXED32: + case UPB_DESCRIPTOR_TYPE_SINT32: + return UPB_TYPE_INT32; + case UPB_DESCRIPTOR_TYPE_UINT64: + case UPB_DESCRIPTOR_TYPE_FIXED64: + return UPB_TYPE_UINT64; + case UPB_DESCRIPTOR_TYPE_UINT32: + case UPB_DESCRIPTOR_TYPE_FIXED32: + return UPB_TYPE_UINT32; + case UPB_DESCRIPTOR_TYPE_ENUM: + return UPB_TYPE_ENUM; + case UPB_DESCRIPTOR_TYPE_BOOL: + return UPB_TYPE_BOOL; + case UPB_DESCRIPTOR_TYPE_STRING: + return UPB_TYPE_STRING; + case UPB_DESCRIPTOR_TYPE_BYTES: + return UPB_TYPE_BYTES; + case UPB_DESCRIPTOR_TYPE_GROUP: + case UPB_DESCRIPTOR_TYPE_MESSAGE: + return UPB_TYPE_MESSAGE; + } + UPB_UNREACHABLE(); +} + +upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) { + return f->type_; +} + +uint32_t upb_fielddef_index(const upb_fielddef *f) { + return f->index_; +} + +upb_label_t upb_fielddef_label(const upb_fielddef *f) { + return f->label_; +} + +uint32_t upb_fielddef_number(const upb_fielddef *f) { + return f->number_; +} + +bool upb_fielddef_isextension(const upb_fielddef *f) { + return f->is_extension_; +} + +bool upb_fielddef_lazy(const upb_fielddef *f) { + return f->lazy_; +} + +bool upb_fielddef_packed(const upb_fielddef *f) { + return f->packed_; +} + +const char *upb_fielddef_name(const upb_fielddef *f) { + return shortdefname(f->full_name); +} + +const char *upb_fielddef_jsonname(const upb_fielddef *f) { + return f->json_name; +} + +uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { + return f->selector_base; +} + +const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { + return f->file; +} + +const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { + return f->msgdef; +} + +const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { + return f->oneof; +} + +const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) { + if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL; + return f->oneof; +} + +upb_msgval upb_fielddef_default(const upb_fielddef *f) { + UPB_ASSERT(!upb_fielddef_issubmsg(f)); + upb_msgval ret; + if (upb_fielddef_isstring(f)) { + str_t *str = f->defaultval.str; + if (str) { + ret.str_val.data = str->str; + ret.str_val.size = str->len; + } else { + ret.str_val.size = 0; + } + } else { + memcpy(&ret, &f->defaultval, 8); + } + return ret; +} + +static void chkdefaulttype(const upb_fielddef *f, int ctype) { + UPB_UNUSED(f); + UPB_UNUSED(ctype); +} + +int64_t upb_fielddef_defaultint64(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_INT64); + return f->defaultval.sint; +} + +int32_t upb_fielddef_defaultint32(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_INT32); + return (int32_t)f->defaultval.sint; +} + +uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_UINT64); + return f->defaultval.uint; +} + +uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_UINT32); + return (uint32_t)f->defaultval.uint; +} + +bool upb_fielddef_defaultbool(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_BOOL); + return f->defaultval.boolean; +} + +float upb_fielddef_defaultfloat(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_FLOAT); + return f->defaultval.flt; +} + +double upb_fielddef_defaultdouble(const upb_fielddef *f) { + chkdefaulttype(f, UPB_TYPE_DOUBLE); + return f->defaultval.dbl; +} + +const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) { + str_t *str = f->defaultval.str; + UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING || + upb_fielddef_type(f) == UPB_TYPE_BYTES || + upb_fielddef_type(f) == UPB_TYPE_ENUM); + if (str) { + if (len) *len = str->len; + return str->str; + } else { + if (len) *len = 0; + return NULL; + } +} + +const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL; +} + +const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL; +} + +const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) { + return &f->msgdef->layout->fields[f->layout_index]; +} + +bool upb_fielddef_issubmsg(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; +} + +bool upb_fielddef_isstring(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_STRING || + upb_fielddef_type(f) == UPB_TYPE_BYTES; +} + +bool upb_fielddef_isseq(const upb_fielddef *f) { + return upb_fielddef_label(f) == UPB_LABEL_REPEATED; +} + +bool upb_fielddef_isprimitive(const upb_fielddef *f) { + return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); +} + +bool upb_fielddef_ismap(const upb_fielddef *f) { + return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && + upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); +} + +bool upb_fielddef_hassubdef(const upb_fielddef *f) { + return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; +} + +bool upb_fielddef_haspresence(const upb_fielddef *f) { + if (upb_fielddef_isseq(f)) return false; + return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) || + f->file->syntax == UPB_SYNTAX_PROTO2; +} + +static bool between(int32_t x, int32_t low, int32_t high) { + return x >= low && x <= high; +} + +bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); } +bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); } +bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } + +bool upb_fielddef_checkdescriptortype(int32_t type) { + return between(type, 1, 18); +} + +/* upb_msgdef *****************************************************************/ + +const char *upb_msgdef_fullname(const upb_msgdef *m) { + return m->full_name; +} + +const upb_filedef *upb_msgdef_file(const upb_msgdef *m) { + return m->file; +} + +const char *upb_msgdef_name(const upb_msgdef *m) { + return shortdefname(m->full_name); +} + +upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { + return m->file->syntax; +} + +size_t upb_msgdef_selectorcount(const upb_msgdef *m) { + return m->selector_count; +} + +uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { + return m->submsg_field_count; +} + +const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { + upb_value val; + return upb_inttable_lookup32(&m->itof, i, &val) ? + upb_value_getconstptr(val) : NULL; +} + +const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, + size_t len) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return NULL; + } + + return unpack_def(val, UPB_DEFTYPE_FIELD); +} + +const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, + size_t len) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return NULL; + } + + return unpack_def(val, UPB_DEFTYPE_ONEOF); +} + +bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, + const upb_fielddef **f, const upb_oneofdef **o) { + upb_value val; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return false; + } + + *o = unpack_def(val, UPB_DEFTYPE_ONEOF); + *f = unpack_def(val, UPB_DEFTYPE_FIELD); + return *o || *f; /* False if this was a JSON name. */ +} + +const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, + const char *name, size_t len) { + upb_value val; + const upb_fielddef* f; + + if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { + return NULL; + } + + f = unpack_def(val, UPB_DEFTYPE_FIELD); + if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME); + + return f; +} + +int upb_msgdef_numfields(const upb_msgdef *m) { + return m->field_count; +} + +int upb_msgdef_numoneofs(const upb_msgdef *m) { + return m->oneof_count; +} + +int upb_msgdef_numrealoneofs(const upb_msgdef *m) { + return m->real_oneof_count; +} + +int upb_msgdef_fieldcount(const upb_msgdef *m) { + return m->field_count; +} + +int upb_msgdef_oneofcount(const upb_msgdef *m) { + return m->oneof_count; +} + +int upb_msgdef_realoneofcount(const upb_msgdef *m) { + return m->real_oneof_count; +} + +const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) { + return m->layout; +} + +const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i) { + UPB_ASSERT(i >= 0 && i < m->field_count); + return &m->fields[i]; +} + +const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i) { + UPB_ASSERT(i >= 0 && i < m->oneof_count); + return &m->oneofs[i]; +} + +bool upb_msgdef_mapentry(const upb_msgdef *m) { + return m->map_entry; +} + +upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) { + return m->well_known_type; +} + +bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) { + upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); + return type >= UPB_WELLKNOWN_DOUBLEVALUE && + type <= UPB_WELLKNOWN_UINT32VALUE; +} + +bool upb_msgdef_iswrapper(const upb_msgdef *m) { + upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); + return type >= UPB_WELLKNOWN_DOUBLEVALUE && + type <= UPB_WELLKNOWN_BOOLVALUE; +} + +void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { + upb_inttable_begin(iter, &m->itof); +} + +void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } + +bool upb_msg_field_done(const upb_msg_field_iter *iter) { + return upb_inttable_done(iter); +} + +upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { + return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); +} + +void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { + upb_inttable_iter_setdone(iter); +} + +bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1, + const upb_msg_field_iter * iter2) { + return upb_inttable_iter_isequal(iter1, iter2); +} + +void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { + upb_strtable_begin(iter, &m->ntof); + /* We need to skip past any initial fields. */ + while (!upb_strtable_done(iter) && + !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) { + upb_strtable_next(iter); + } +} + +void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { + /* We need to skip past fields to return only oneofs. */ + do { + upb_strtable_next(iter); + } while (!upb_strtable_done(iter) && + !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)); +} + +bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { + return upb_strtable_done(iter); +} + +const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { + return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF); +} + +void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { + upb_strtable_iter_setdone(iter); +} + +bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1, + const upb_msg_oneof_iter *iter2) { + return upb_strtable_iter_isequal(iter1, iter2); +} + +/* upb_oneofdef ***************************************************************/ + +const char *upb_oneofdef_name(const upb_oneofdef *o) { + return shortdefname(o->full_name); +} + +const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { + return o->parent; +} + +int upb_oneofdef_fieldcount(const upb_oneofdef *o) { + return o->field_count; +} + +const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i) { + UPB_ASSERT(i < o->field_count); + return o->fields[i]; +} + +int upb_oneofdef_numfields(const upb_oneofdef *o) { + return o->field_count; +} + +uint32_t upb_oneofdef_index(const upb_oneofdef *o) { + return o - o->parent->oneofs; +} + +bool upb_oneofdef_issynthetic(const upb_oneofdef *o) { + return o->synthetic; +} + +const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, + const char *name, size_t length) { + upb_value val; + return upb_strtable_lookup2(&o->ntof, name, length, &val) ? + upb_value_getptr(val) : NULL; +} + +const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { + upb_value val; + return upb_inttable_lookup32(&o->itof, num, &val) ? + upb_value_getptr(val) : NULL; +} + +void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { + upb_inttable_begin(iter, &o->itof); +} + +void upb_oneof_next(upb_oneof_iter *iter) { + upb_inttable_next(iter); +} + +bool upb_oneof_done(upb_oneof_iter *iter) { + return upb_inttable_done(iter); +} + +upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { + return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); +} + +void upb_oneof_iter_setdone(upb_oneof_iter *iter) { + upb_inttable_iter_setdone(iter); +} + +/* upb_filedef ****************************************************************/ + +const char *upb_filedef_name(const upb_filedef *f) { + return f->name; +} + +const char *upb_filedef_package(const upb_filedef *f) { + return f->package; +} + +const char *upb_filedef_phpprefix(const upb_filedef *f) { + return f->phpprefix; +} + +const char *upb_filedef_phpnamespace(const upb_filedef *f) { + return f->phpnamespace; +} + +upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { + return f->syntax; +} + +int upb_filedef_msgcount(const upb_filedef *f) { + return f->msg_count; +} + +int upb_filedef_depcount(const upb_filedef *f) { + return f->dep_count; +} + +int upb_filedef_enumcount(const upb_filedef *f) { + return f->enum_count; +} + +const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { + return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; +} + +const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { + return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; +} + +const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { + return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; +} + +const upb_symtab *upb_filedef_symtab(const upb_filedef *f) { + return f->symtab; +} + +void upb_symtab_free(upb_symtab *s) { + upb_arena_free(s->arena); + upb_gfree(s); +} + +upb_symtab *upb_symtab_new(void) { + upb_symtab *s = upb_gmalloc(sizeof(*s)); + upb_alloc *alloc; + + if (!s) { + return NULL; + } + + s->arena = upb_arena_new(); + s->bytes_loaded = 0; + alloc = upb_arena_alloc(s->arena); + + if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, 32, alloc) || + !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, 4, alloc)) { + upb_arena_free(s->arena); + upb_gfree(s); + s = NULL; + } + return s; +} + +const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, + size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->syms, sym, len, &v) ? + unpack_def(v, UPB_DEFTYPE_MSG) : NULL; +} + +const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { + upb_value v; + return upb_strtable_lookup(&s->syms, sym, &v) ? + unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; +} + +const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { + upb_value v; + return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) + : NULL; +} + +const upb_filedef *upb_symtab_lookupfile2( + const upb_symtab *s, const char *name, size_t len) { + upb_value v; + return upb_strtable_lookup2(&s->files, name, len, &v) ? + upb_value_getconstptr(v) : NULL; +} + +int upb_symtab_filecount(const upb_symtab *s) { + return (int)upb_strtable_count(&s->files); +} + +/* Code to build defs from descriptor protos. *********************************/ + +/* There is a question of how much validation to do here. It will be difficult + * to perfectly match the amount of validation performed by proto2. But since + * this code is used to directly build defs from Ruby (for example) we do need + * to validate important constraints like uniqueness of names and numbers. */ + +#define CHK_OOM(x) if (!(x)) { symtab_oomerr(ctx); } + +typedef struct { + upb_symtab *symtab; + upb_filedef *file; /* File we are building. */ + upb_arena *file_arena; /* Allocate defs here. */ + upb_alloc *alloc; /* Alloc of file_arena, for tables. */ + const upb_msglayout **layouts; /* NULL if we should build layouts. */ + upb_status *status; /* Record errors here. */ + jmp_buf err; /* longjmp() on error. */ +} symtab_addctx; + +UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) +static void symtab_errf(symtab_addctx *ctx, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(ctx->status, fmt, argp); + va_end(argp); + UPB_LONGJMP(ctx->err, 1); +} + +UPB_NORETURN UPB_NOINLINE +static void symtab_oomerr(symtab_addctx *ctx) { + upb_status_setoom(ctx->status); + UPB_LONGJMP(ctx->err, 1); +} + +void *symtab_alloc(symtab_addctx *ctx, size_t bytes) { + void *ret = upb_arena_malloc(ctx->file_arena, bytes); + if (!ret) symtab_oomerr(ctx); + return ret; +} + +static void check_ident(symtab_addctx *ctx, upb_strview name, bool full) { + const char *str = name.data; + size_t len = name.size; + bool start = true; + size_t i; + for (i = 0; i < len; i++) { + char c = str[i]; + if (c == '.') { + if (start || !full) { + symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str); + } + start = true; + } else if (start) { + if (!upb_isletter(c)) { + symtab_errf( + ctx, + "invalid name: path components must start with a letter (%.*s)", + (int)len, str); + } + start = false; + } else { + if (!upb_isalphanum(c)) { + symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)", + (int)len, str); + } + } + } + if (start) { + symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str); + } +} + +static size_t div_round_up(size_t n, size_t d) { + return (n + d - 1) / d; +} + +static size_t upb_msgval_sizeof(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_DOUBLE: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return 8; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_FLOAT: + return 4; + case UPB_TYPE_BOOL: + return 1; + case UPB_TYPE_MESSAGE: + return sizeof(void*); + case UPB_TYPE_BYTES: + case UPB_TYPE_STRING: + return sizeof(upb_strview); + } + UPB_UNREACHABLE(); +} + +static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { + if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) { + upb_map_entry ent; + UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); + return sizeof(ent.k); + } else if (upb_fielddef_isseq(f)) { + return sizeof(void*); + } else { + return upb_msgval_sizeof(upb_fielddef_type(f)); + } +} + +static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) { + uint32_t ret; + + l->size = UPB_ALIGN_UP(l->size, size); + ret = l->size; + l->size += size; + return ret; +} + +static int field_number_cmp(const void *p1, const void *p2) { + const upb_msglayout_field *f1 = p1; + const upb_msglayout_field *f2 = p2; + return f1->number - f2->number; +} + +static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) { + int i; + int n = upb_msgdef_numfields(m); + for (i = 0; i < n; i++) { + upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number); + UPB_ASSERT(f); + f->layout_index = i; + } +} + +/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. + * It computes a dynamic layout for all of the fields in |m|. */ +static void make_layout(symtab_addctx *ctx, const upb_msgdef *m) { + upb_msglayout *l = (upb_msglayout*)m->layout; + upb_msg_field_iter it; + upb_msg_oneof_iter oit; + size_t hasbit; + size_t submsg_count = m->submsg_field_count; + const upb_msglayout **submsgs; + upb_msglayout_field *fields; + + memset(l, 0, sizeof(*l) + sizeof(_upb_fasttable_entry)); + + fields = symtab_alloc(ctx, upb_msgdef_numfields(m) * sizeof(*fields)); + submsgs = symtab_alloc(ctx, submsg_count * sizeof(*submsgs)); + + l->field_count = upb_msgdef_numfields(m); + l->fields = fields; + l->submsgs = submsgs; + l->table_mask = 0; + + /* TODO(haberman): initialize fast tables so that reflection-based parsing + * can get the same speeds as linked-in types. */ + l->fasttable[0].field_parser = &fastdecode_generic; + l->fasttable[0].field_data = 0; + + if (upb_msgdef_mapentry(m)) { + /* TODO(haberman): refactor this method so this special case is more + * elegant. */ + const upb_fielddef *key = upb_msgdef_itof(m, 1); + const upb_fielddef *val = upb_msgdef_itof(m, 2); + fields[0].number = 1; + fields[1].number = 2; + fields[0].label = UPB_LABEL_OPTIONAL; + fields[1].label = UPB_LABEL_OPTIONAL; + fields[0].presence = 0; + fields[1].presence = 0; + fields[0].descriptortype = upb_fielddef_descriptortype(key); + fields[1].descriptortype = upb_fielddef_descriptortype(val); + fields[0].offset = 0; + fields[1].offset = sizeof(upb_strview); + fields[1].submsg_index = 0; + + if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) { + submsgs[0] = upb_fielddef_msgsubdef(val)->layout; + } + + l->field_count = 2; + l->size = 2 * sizeof(upb_strview); + l->size = UPB_ALIGN_UP(l->size, 8); + return; + } + + /* Allocate data offsets in three stages: + * + * 1. hasbits. + * 2. regular fields. + * 3. oneof fields. + * + * OPT: There is a lot of room for optimization here to minimize the size. + */ + + /* Allocate hasbits and set basic field attributes. */ + submsg_count = 0; + for (upb_msg_field_begin(&it, m), hasbit = 0; + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + upb_fielddef* f = upb_msg_iter_field(&it); + upb_msglayout_field *field = &fields[upb_fielddef_index(f)]; + + field->number = upb_fielddef_number(f); + field->descriptortype = upb_fielddef_descriptortype(f); + field->label = upb_fielddef_label(f); + + if (field->descriptortype == UPB_DTYPE_STRING && + f->file->syntax == UPB_SYNTAX_PROTO2) { + /* See TableDescriptorType() in upbc/generator.cc for details and + * rationale. */ + field->descriptortype = UPB_DTYPE_BYTES; + } + + if (upb_fielddef_ismap(f)) { + field->label = _UPB_LABEL_MAP; + } else if (upb_fielddef_packed(f)) { + field->label = _UPB_LABEL_PACKED; + } + + if (upb_fielddef_issubmsg(f)) { + const upb_msgdef *subm = upb_fielddef_msgsubdef(f); + field->submsg_index = submsg_count++; + submsgs[field->submsg_index] = subm->layout; + } + + if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) { + /* We don't use hasbit 0, so that 0 can indicate "no presence" in the + * table. This wastes one hasbit, but we don't worry about it for now. */ + field->presence = ++hasbit; + } else { + field->presence = 0; + } + } + + /* Account for space used by hasbits. */ + l->size = div_round_up(hasbit, 8); + + /* Allocate non-oneof fields. */ + for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + const upb_fielddef* f = upb_msg_iter_field(&it); + size_t field_size = upb_msg_fielddefsize(f); + size_t index = upb_fielddef_index(f); + + if (upb_fielddef_realcontainingoneof(f)) { + /* Oneofs are handled separately below. */ + continue; + } + + fields[index].offset = upb_msglayout_place(l, field_size); + } + + /* Allocate oneof fields. Each oneof field consists of a uint32 for the case + * and space for the actual data. */ + for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); + upb_msg_oneof_next(&oit)) { + const upb_oneofdef* o = upb_msg_iter_oneof(&oit); + upb_oneof_iter fit; + + size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ + size_t field_size = 0; + uint32_t case_offset; + uint32_t data_offset; + + if (upb_oneofdef_issynthetic(o)) continue; + + /* Calculate field size: the max of all field sizes. */ + for (upb_oneof_begin(&fit, o); + !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* f = upb_oneof_iter_field(&fit); + field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); + } + + /* Align and allocate case offset. */ + case_offset = upb_msglayout_place(l, case_size); + data_offset = upb_msglayout_place(l, field_size); + + for (upb_oneof_begin(&fit, o); + !upb_oneof_done(&fit); + upb_oneof_next(&fit)) { + const upb_fielddef* f = upb_oneof_iter_field(&fit); + fields[upb_fielddef_index(f)].offset = data_offset; + fields[upb_fielddef_index(f)].presence = ~case_offset; + } + } + + /* Size of the entire structure should be a multiple of its greatest + * alignment. TODO: track overall alignment for real? */ + l->size = UPB_ALIGN_UP(l->size, 8); + + /* Sort fields by number. */ + qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp); + assign_layout_indices(m, fields); +} + +static void assign_msg_indices(symtab_addctx *ctx, upb_msgdef *m) { + /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the + * lowest indexes, but we do not publicly guarantee this. */ + upb_msg_field_iter j; + int i; + uint32_t selector; + int n = upb_msgdef_numfields(m); + upb_fielddef **fields; + + if (n == 0) { + m->selector_count = UPB_STATIC_SELECTOR_COUNT; + m->submsg_field_count = 0; + return; + } + + fields = upb_gmalloc(n * sizeof(*fields)); + + m->submsg_field_count = 0; + for(i = 0, upb_msg_field_begin(&j, m); + !upb_msg_field_done(&j); + upb_msg_field_next(&j), i++) { + upb_fielddef *f = upb_msg_iter_field(&j); + UPB_ASSERT(f->msgdef == m); + if (upb_fielddef_issubmsg(f)) { + m->submsg_field_count++; + } + fields[i] = f; + } + + qsort(fields, n, sizeof(*fields), cmp_fields); + + selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; + for (i = 0; i < n; i++) { + upb_fielddef *f = fields[i]; + f->index_ = i; + f->selector_base = selector + upb_handlers_selectorbaseoffset(f); + selector += upb_handlers_selectorcount(f); + } + m->selector_count = selector; + + upb_gfree(fields); +} + +static char *strviewdup(symtab_addctx *ctx, upb_strview view) { + return upb_strdup2(view.data, view.size, ctx->alloc); +} + +static bool streql2(const char *a, size_t n, const char *b) { + return n == strlen(b) && memcmp(a, b, n) == 0; +} + +static bool streql_view(upb_strview view, const char *b) { + return streql2(view.data, view.size, b); +} + +static const char *makefullname(symtab_addctx *ctx, const char *prefix, + upb_strview name) { + if (prefix) { + /* ret = prefix + '.' + name; */ + size_t n = strlen(prefix); + char *ret = symtab_alloc(ctx, n + name.size + 2); + strcpy(ret, prefix); + ret[n] = '.'; + memcpy(&ret[n + 1], name.data, name.size); + ret[n + 1 + name.size] = '\0'; + return ret; + } else { + return strviewdup(ctx, name); + } +} + +static void finalize_oneofs(symtab_addctx *ctx, upb_msgdef *m) { + int i; + int synthetic_count = 0; + upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs; + + for (i = 0; i < m->oneof_count; i++) { + upb_oneofdef *o = &mutable_oneofs[i]; + + if (o->synthetic && o->field_count != 1) { + symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s", + o->field_count, upb_oneofdef_name(o)); + } + + if (o->synthetic) { + synthetic_count++; + } else if (synthetic_count != 0) { + symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s", + upb_oneofdef_name(o)); + } + + o->fields = symtab_alloc(ctx, sizeof(upb_fielddef *) * o->field_count); + o->field_count = 0; + } + + for (i = 0; i < m->field_count; i++) { + const upb_fielddef *f = &m->fields[i]; + upb_oneofdef *o = (upb_oneofdef*)f->oneof; + if (o) { + o->fields[o->field_count++] = f; + } + } + + m->real_oneof_count = m->oneof_count - synthetic_count; +} + +size_t getjsonname(const char *name, char *buf, size_t len) { + size_t src, dst = 0; + bool ucase_next = false; + +#define WRITE(byte) \ + ++dst; \ + if (dst < len) buf[dst - 1] = byte; \ + else if (dst == len) buf[dst - 1] = '\0' + + if (!name) { + WRITE('\0'); + return 0; + } + + /* Implement the transformation as described in the spec: + * 1. upper case all letters after an underscore. + * 2. remove all underscores. + */ + for (src = 0; name[src]; src++) { + if (name[src] == '_') { + ucase_next = true; + continue; + } + + if (ucase_next) { + WRITE(toupper(name[src])); + ucase_next = false; + } else { + WRITE(name[src]); + } + } + + WRITE('\0'); + return dst; + +#undef WRITE +} + +static char* makejsonname(symtab_addctx *ctx, const char* name) { + size_t size = getjsonname(name, NULL, 0); + char* json_name = symtab_alloc(ctx, size); + getjsonname(name, json_name, size); + return json_name; +} + +static void symtab_add(symtab_addctx *ctx, const char *name, upb_value v) { + if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) { + symtab_errf(ctx, "duplicate symbol '%s'", name); + } + upb_alloc *alloc = upb_arena_alloc(ctx->symtab->arena); + size_t len = strlen(name); + CHK_OOM(upb_strtable_insert3(&ctx->symtab->syms, name, len, v, alloc)); +} + +/* Given a symbol and the base symbol inside which it is defined, find the + * symbol's definition in t. */ +static const void *symtab_resolve(symtab_addctx *ctx, const upb_fielddef *f, + const char *base, upb_strview sym, + upb_deftype_t type) { + const upb_strtable *t = &ctx->symtab->syms; + if(sym.size == 0) goto notfound; + if(sym.data[0] == '.') { + /* Symbols starting with '.' are absolute, so we do a single lookup. + * Slice to omit the leading '.' */ + upb_value v; + if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { + goto notfound; + } + + const void *ret = unpack_def(v, type); + if (!ret) { + symtab_errf(ctx, "type mismatch when resolving field %s, name %s", + f->full_name, sym.data); + } + return ret; + } else { + /* Remove components from base until we find an entry or run out. + * TODO: This branch is totally broken, but currently not used. */ + (void)base; + UPB_ASSERT(false); + goto notfound; + } + +notfound: + symtab_errf(ctx, "couldn't resolve name '%s'", sym.data); +} + +static void create_oneofdef( + symtab_addctx *ctx, upb_msgdef *m, + const google_protobuf_OneofDescriptorProto *oneof_proto) { + upb_oneofdef *o; + upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); + upb_value v; + + o = (upb_oneofdef*)&m->oneofs[m->oneof_count++]; + o->parent = m; + o->full_name = makefullname(ctx, m->full_name, name); + o->field_count = 0; + o->synthetic = false; + + v = pack_def(o, UPB_DEFTYPE_ONEOF); + symtab_add(ctx, o->full_name, v); + CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); + + CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); + CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, 4, ctx->alloc)); +} + +static str_t *newstr(symtab_addctx *ctx, const char *data, size_t len) { + str_t *ret = symtab_alloc(ctx, sizeof(*ret) + len); + if (!ret) return NULL; + ret->len = len; + if (len) memcpy(ret->str, data, len); + ret->str[len] = '\0'; + return ret; +} + +static void parse_default(symtab_addctx *ctx, const char *str, size_t len, + upb_fielddef *f) { + char *end; + char nullz[64]; + errno = 0; + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + case UPB_TYPE_FLOAT: + /* Standard C number parsing functions expect null-terminated strings. */ + if (len >= sizeof(nullz) - 1) { + symtab_errf(ctx, "Default too long: %.*s", (int)len, str); + } + memcpy(nullz, str, len); + nullz[len] = '\0'; + str = nullz; + break; + default: + break; + } + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: { + long val = strtol(str, &end, 0); + if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.sint = val; + break; + } + case UPB_TYPE_ENUM: { + const upb_enumdef *e = f->sub.enumdef; + int32_t val; + if (!upb_enumdef_ntoi(e, str, len, &val)) { + goto invalid; + } + f->defaultval.sint = val; + break; + } + case UPB_TYPE_INT64: { + /* XXX: Need to write our own strtoll, since it's not available in c89. */ + int64_t val = strtol(str, &end, 0); + if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.sint = val; + break; + } + case UPB_TYPE_UINT32: { + unsigned long val = strtoul(str, &end, 0); + if (val > UINT32_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case UPB_TYPE_UINT64: { + /* XXX: Need to write our own strtoull, since it's not available in c89. */ + uint64_t val = strtoul(str, &end, 0); + if (val > UINT64_MAX || errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.uint = val; + break; + } + case UPB_TYPE_DOUBLE: { + double val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.dbl = val; + break; + } + case UPB_TYPE_FLOAT: { + /* XXX: Need to write our own strtof, since it's not available in c89. */ + float val = strtod(str, &end); + if (errno == ERANGE || *end) { + goto invalid; + } + f->defaultval.flt = val; + break; + } + case UPB_TYPE_BOOL: { + if (streql2(str, len, "false")) { + f->defaultval.boolean = false; + } else if (streql2(str, len, "true")) { + f->defaultval.boolean = true; + } else { + } + break; + } + case UPB_TYPE_STRING: + f->defaultval.str = newstr(ctx, str, len); + break; + case UPB_TYPE_BYTES: + /* XXX: need to interpret the C-escaped value. */ + f->defaultval.str = newstr(ctx, str, len); + break; + case UPB_TYPE_MESSAGE: + /* Should not have a default value. */ + symtab_errf(ctx, "Message should not have a default (%s)", + upb_fielddef_fullname(f)); + } + + return; + +invalid: + symtab_errf(ctx, "Invalid default '%.*s' for field %s", (int)len, str, + upb_fielddef_fullname(f)); +} + +static void set_default_default(symtab_addctx *ctx, upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_ENUM: + f->defaultval.sint = 0; + break; + case UPB_TYPE_UINT64: + case UPB_TYPE_UINT32: + f->defaultval.uint = 0; + break; + case UPB_TYPE_DOUBLE: + case UPB_TYPE_FLOAT: + f->defaultval.dbl = 0; + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + f->defaultval.str = newstr(ctx, NULL, 0); + break; + case UPB_TYPE_BOOL: + f->defaultval.boolean = false; + break; + case UPB_TYPE_MESSAGE: + break; + } +} + +static void create_fielddef( + symtab_addctx *ctx, const char *prefix, upb_msgdef *m, + const google_protobuf_FieldDescriptorProto *field_proto) { + upb_alloc *alloc = ctx->alloc; + upb_fielddef *f; + const google_protobuf_FieldOptions *options; + upb_strview name; + const char *full_name; + const char *json_name; + const char *shortname; + uint32_t field_number; + + if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { + symtab_errf(ctx, "field has no name (%s)", upb_msgdef_fullname(m)); + } + + name = google_protobuf_FieldDescriptorProto_name(field_proto); + check_ident(ctx, name, false); + full_name = makefullname(ctx, prefix, name); + shortname = shortdefname(full_name); + + if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { + json_name = strviewdup( + ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); + } else { + json_name = makejsonname(ctx, shortname); + } + + field_number = google_protobuf_FieldDescriptorProto_number(field_proto); + + if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { + symtab_errf(ctx, "invalid field number (%u)", field_number); + } + + if (m) { + /* direct message field. */ + upb_value v, field_v, json_v; + size_t json_size; + + f = (upb_fielddef*)&m->fields[m->field_count++]; + f->msgdef = m; + f->is_extension_ = false; + + if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { + symtab_errf(ctx, "duplicate field name (%s)", shortname); + } + + if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { + symtab_errf(ctx, "duplicate json_name (%s)", json_name); + } + + if (upb_inttable_lookup(&m->itof, field_number, NULL)) { + symtab_errf(ctx, "duplicate field number (%u)", field_number); + } + + field_v = pack_def(f, UPB_DEFTYPE_FIELD); + json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); + v = upb_value_constptr(f); + json_size = strlen(json_name); + + CHK_OOM( + upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); + CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); + + if (strcmp(shortname, json_name) != 0) { + upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); + } + + if (ctx->layouts) { + const upb_msglayout_field *fields = m->layout->fields; + int count = m->layout->field_count; + bool found = false; + int i; + for (i = 0; i < count; i++) { + if (fields[i].number == field_number) { + f->layout_index = i; + found = true; + break; + } + } + UPB_ASSERT(found); + } + } else { + /* extension field. */ + f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; + f->is_extension_ = true; + symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)); + } + + f->full_name = full_name; + f->json_name = json_name; + f->file = ctx->file; + f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); + f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); + f->number_ = field_number; + f->oneof = NULL; + f->proto3_optional_ = + google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); + + /* We can't resolve the subdef or (in the case of extensions) the containing + * message yet, because it may not have been defined yet. We stash a pointer + * to the field_proto until later when we can properly resolve it. */ + f->sub.unresolved = field_proto; + + if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) { + symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name); + } + + if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { + int oneof_index = + google_protobuf_FieldDescriptorProto_oneof_index(field_proto); + upb_oneofdef *oneof; + upb_value v = upb_value_constptr(f); + + if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { + symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)", + f->full_name); + } + + if (!m) { + symtab_errf(ctx, "oneof_index provided for extension field (%s)", + f->full_name); + } + + if (oneof_index >= m->oneof_count) { + symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name); + } + + oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; + f->oneof = oneof; + + oneof->field_count++; + if (f->proto3_optional_) { + oneof->synthetic = true; + } + CHK_OOM(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); + CHK_OOM(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); + } else { + f->oneof = NULL; + if (f->proto3_optional_) { + symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)", + f->full_name); + } + } + + options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ? + google_protobuf_FieldDescriptorProto_options(field_proto) : NULL; + + if (options && google_protobuf_FieldOptions_has_packed(options)) { + f->packed_ = google_protobuf_FieldOptions_packed(options); + } else { + /* Repeated fields default to packed for proto3 only. */ + f->packed_ = upb_fielddef_isprimitive(f) && + f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3; + } + + if (options) { + f->lazy_ = google_protobuf_FieldOptions_lazy(options); + } else { + f->lazy_ = false; + } +} + +static void create_enumdef( + symtab_addctx *ctx, const char *prefix, + const google_protobuf_EnumDescriptorProto *enum_proto) { + upb_enumdef *e; + const google_protobuf_EnumValueDescriptorProto *const *values; + upb_strview name; + size_t i, n; + + name = google_protobuf_EnumDescriptorProto_name(enum_proto); + check_ident(ctx, name, false); + + e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++]; + e->full_name = makefullname(ctx, prefix, name); + symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)); + + values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); + CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, n, ctx->alloc)); + CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); + + e->file = ctx->file; + e->defaultval = 0; + + if (n == 0) { + symtab_errf(ctx, "enums must contain at least one value (%s)", + e->full_name); + } + + for (i = 0; i < n; i++) { + const google_protobuf_EnumValueDescriptorProto *value = values[i]; + upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value); + char *name2 = strviewdup(ctx, name); + int32_t num = google_protobuf_EnumValueDescriptorProto_number(value); + upb_value v = upb_value_int32(num); + + if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) { + symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)", + e->full_name); + } + + if (upb_strtable_lookup(&e->ntoi, name2, NULL)) { + symtab_errf(ctx, "duplicate enum label '%s'", name2); + } + + CHK_OOM(name2) + CHK_OOM( + upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); + + if (!upb_inttable_lookup(&e->iton, num, NULL)) { + upb_value v = upb_value_cstr(name2); + CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); + } + } + + upb_inttable_compact2(&e->iton, ctx->alloc); +} + +static void create_msgdef(symtab_addctx *ctx, const char *prefix, + const google_protobuf_DescriptorProto *msg_proto) { + upb_msgdef *m; + const google_protobuf_MessageOptions *options; + const google_protobuf_OneofDescriptorProto *const *oneofs; + const google_protobuf_FieldDescriptorProto *const *fields; + const google_protobuf_EnumDescriptorProto *const *enums; + const google_protobuf_DescriptorProto *const *msgs; + size_t i, n_oneof, n_field, n; + upb_strview name; + + name = google_protobuf_DescriptorProto_name(msg_proto); + check_ident(ctx, name, false); + + m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++]; + m->full_name = makefullname(ctx, prefix, name); + symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)); + + oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof); + fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field); + + CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); + CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, n_oneof + n_field, + ctx->alloc)); + + m->file = ctx->file; + m->map_entry = false; + + options = google_protobuf_DescriptorProto_options(msg_proto); + + if (options) { + m->map_entry = google_protobuf_MessageOptions_map_entry(options); + } + + if (ctx->layouts) { + m->layout = *ctx->layouts; + ctx->layouts++; + } else { + /* Allocate now (to allow cross-linking), populate later. */ + m->layout = symtab_alloc( + ctx, sizeof(*m->layout) + sizeof(_upb_fasttable_entry)); + } + + m->oneof_count = 0; + m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof); + for (i = 0; i < n_oneof; i++) { + create_oneofdef(ctx, m, oneofs[i]); + } + + m->field_count = 0; + m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field); + for (i = 0; i < n_field; i++) { + create_fielddef(ctx, m->full_name, m, fields[i]); + } + + assign_msg_indices(ctx, m); + finalize_oneofs(ctx, m); + assign_msg_wellknowntype(m); + upb_inttable_compact2(&m->itof, ctx->alloc); + + /* This message is built. Now build nested messages and enums. */ + + enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); + for (i = 0; i < n; i++) { + create_enumdef(ctx, m->full_name, enums[i]); + } + + msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + for (i = 0; i < n; i++) { + create_msgdef(ctx, m->full_name, msgs[i]); + } +} + +static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, + upb_filedef *file) { + const google_protobuf_DescriptorProto *const *msgs; + size_t i, n; + + file->msg_count++; + + msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); + for (i = 0; i < n; i++) { + count_types_in_msg(msgs[i], file); + } + + google_protobuf_DescriptorProto_enum_type(msg_proto, &n); + file->enum_count += n; + + google_protobuf_DescriptorProto_extension(msg_proto, &n); + file->ext_count += n; +} + +static void count_types_in_file( + const google_protobuf_FileDescriptorProto *file_proto, + upb_filedef *file) { + const google_protobuf_DescriptorProto *const *msgs; + size_t i, n; + + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + for (i = 0; i < n; i++) { + count_types_in_msg(msgs[i], file); + } + + google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); + file->enum_count += n; + + google_protobuf_FileDescriptorProto_extension(file_proto, &n); + file->ext_count += n; +} + +static void resolve_fielddef(symtab_addctx *ctx, const char *prefix, + upb_fielddef *f) { + upb_strview name; + const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; + + if (f->is_extension_) { + if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { + symtab_errf(ctx, "extension for field '%s' had no extendee", + f->full_name); + } + + name = google_protobuf_FieldDescriptorProto_extendee(field_proto); + f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); + } + + if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && + !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { + symtab_errf(ctx, "field '%s' is missing type name", f->full_name); + } + + name = google_protobuf_FieldDescriptorProto_type_name(field_proto); + + if (upb_fielddef_issubmsg(f)) { + f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); + } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { + f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); + } + + /* Have to delay resolving of the default value until now because of the enum + * case, since enum defaults are specified with a label. */ + if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { + upb_strview defaultval = + google_protobuf_FieldDescriptorProto_default_value(field_proto); + + if (f->file->syntax == UPB_SYNTAX_PROTO3) { + symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)", + f->full_name); + } + + if (upb_fielddef_issubmsg(f)) { + symtab_errf(ctx, "message fields cannot have explicit defaults (%s)", + f->full_name); + } + + parse_default(ctx, defaultval.data, defaultval.size, f); + } else { + set_default_default(ctx, f); + } +} + +static void build_filedef( + symtab_addctx *ctx, upb_filedef *file, + const google_protobuf_FileDescriptorProto *file_proto) { + const google_protobuf_FileOptions *file_options_proto; + const google_protobuf_DescriptorProto *const *msgs; + const google_protobuf_EnumDescriptorProto *const *enums; + const google_protobuf_FieldDescriptorProto *const *exts; + const upb_strview* strs; + size_t i, n; + + count_types_in_file(file_proto, file); + + file->msgs = symtab_alloc(ctx, sizeof(*file->msgs) * file->msg_count); + file->enums = symtab_alloc(ctx, sizeof(*file->enums) * file->enum_count); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * file->ext_count); + + /* We increment these as defs are added. */ + file->msg_count = 0; + file->enum_count = 0; + file->ext_count = 0; + + if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { + symtab_errf(ctx, "File has no name"); + } + + file->name = + strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); + file->phpprefix = NULL; + file->phpnamespace = NULL; + + if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { + upb_strview package = + google_protobuf_FileDescriptorProto_package(file_proto); + check_ident(ctx, package, true); + file->package = strviewdup(ctx, package); + } else { + file->package = NULL; + } + + if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { + upb_strview syntax = + google_protobuf_FileDescriptorProto_syntax(file_proto); + + if (streql_view(syntax, "proto2")) { + file->syntax = UPB_SYNTAX_PROTO2; + } else if (streql_view(syntax, "proto3")) { + file->syntax = UPB_SYNTAX_PROTO3; + } else { + symtab_errf(ctx, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(syntax)); + } + } else { + file->syntax = UPB_SYNTAX_PROTO2; + } + + /* Read options. */ + file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto); + if (file_options_proto) { + if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) { + file->phpprefix = strviewdup( + ctx, + google_protobuf_FileOptions_php_class_prefix(file_options_proto)); + } + if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) { + file->phpnamespace = strviewdup( + ctx, google_protobuf_FileOptions_php_namespace(file_options_proto)); + } + } + + /* Verify dependencies. */ + strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); + file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n); + + for (i = 0; i < n; i++) { + upb_strview dep_name = strs[i]; + upb_value v; + if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, + dep_name.size, &v)) { + symtab_errf(ctx, + "Depends on file '" UPB_STRVIEW_FORMAT + "', but it has not been loaded", + UPB_STRVIEW_ARGS(dep_name)); + } + file->deps[i] = upb_value_getconstptr(v); + } + + /* Create messages. */ + msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); + for (i = 0; i < n; i++) { + create_msgdef(ctx, file->package, msgs[i]); + } + + /* Create enums. */ + enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); + for (i = 0; i < n; i++) { + create_enumdef(ctx, file->package, enums[i]); + } + + /* Create extensions. */ + exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); + file->exts = symtab_alloc(ctx, sizeof(*file->exts) * n); + for (i = 0; i < n; i++) { + create_fielddef(ctx, file->package, NULL, exts[i]); + } + + /* Now that all names are in the table, build layouts and resolve refs. */ + for (i = 0; i < (size_t)file->ext_count; i++) { + resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]); + } + + for (i = 0; i < (size_t)file->msg_count; i++) { + const upb_msgdef *m = &file->msgs[i]; + int j; + for (j = 0; j < m->field_count; j++) { + resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]); + } + } + + if (!ctx->layouts) { + for (i = 0; i < (size_t)file->msg_count; i++) { + const upb_msgdef *m = &file->msgs[i]; + make_layout(ctx, m); + } + } +} + +static void remove_filedef(upb_symtab *s, upb_filedef *file) { + upb_alloc *alloc = upb_arena_alloc(s->arena); + int i; + for (i = 0; i < file->msg_count; i++) { + const char *name = file->msgs[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + } + for (i = 0; i < file->enum_count; i++) { + const char *name = file->enums[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + } + for (i = 0; i < file->ext_count; i++) { + const char *name = file->exts[i].full_name; + upb_strtable_remove3(&s->syms, name, strlen(name), NULL, alloc); + } +} + +static const upb_filedef *_upb_symtab_addfile( + upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, + const upb_msglayout **layouts, upb_status *status) { + upb_arena *file_arena = upb_arena_new(); + upb_filedef *file; + symtab_addctx ctx; + + if (!file_arena) return NULL; + + file = upb_arena_malloc(file_arena, sizeof(*file)); + if (!file) goto done; + + ctx.file = file; + ctx.symtab = s; + ctx.file_arena = file_arena; + ctx.alloc = upb_arena_alloc(file_arena); + ctx.layouts = layouts; + ctx.status = status; + + file->msg_count = 0; + file->enum_count = 0; + file->ext_count = 0; + file->symtab = s; + + if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) { + UPB_ASSERT(!upb_ok(status)); + remove_filedef(s, file); + file = NULL; + } else { + build_filedef(&ctx, file, file_proto); + upb_strtable_insert3(&s->files, file->name, strlen(file->name), + upb_value_constptr(file), ctx.alloc); + UPB_ASSERT(upb_ok(status)); + upb_arena_fuse(s->arena, file_arena); + } + +done: + upb_arena_free(file_arena); + return file; +} + +const upb_filedef *upb_symtab_addfile( + upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, + upb_status *status) { + return _upb_symtab_addfile(s, file_proto, NULL, status); +} + +/* Include here since we want most of this file to be stdio-free. */ +#include + +bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { + /* Since this function should never fail (it would indicate a bug in upb) we + * print errors to stderr instead of returning error status to the user. */ + upb_def_init **deps = init->deps; + google_protobuf_FileDescriptorProto *file; + upb_arena *arena; + upb_status status; + + upb_status_clear(&status); + + if (upb_strtable_lookup(&s->files, init->filename, NULL)) { + return true; + } + + arena = upb_arena_new(); + + for (; *deps; deps++) { + if (!_upb_symtab_loaddefinit(s, *deps)) goto err; + } + + file = google_protobuf_FileDescriptorProto_parse_ex( + init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS); + s->bytes_loaded += init->descriptor.size; + + if (!file) { + upb_status_seterrf( + &status, + "Failed to parse compiled-in descriptor for file '%s'. This should " + "never happen.", + init->filename); + goto err; + } + + if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err; + + upb_arena_free(arena); + return true; + +err: + fprintf(stderr, "Error loading compiled-in descriptor: %s\n", + upb_status_errmsg(&status)); + upb_arena_free(arena); + return false; +} + +size_t _upb_symtab_bytesloaded(const upb_symtab *s) { + return s->bytes_loaded; +} + +upb_arena *_upb_symtab_arena(const upb_symtab *s) { + return s->arena; +} + +#undef CHK_OOM + + +#include + + +static size_t get_field_size(const upb_msglayout_field *f) { + static unsigned char sizes[] = { + 0,/* 0 */ + 8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */ + 4, /* UPB_DESCRIPTOR_TYPE_FLOAT */ + 8, /* UPB_DESCRIPTOR_TYPE_INT64 */ + 8, /* UPB_DESCRIPTOR_TYPE_UINT64 */ + 4, /* UPB_DESCRIPTOR_TYPE_INT32 */ + 8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */ + 4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */ + 1, /* UPB_DESCRIPTOR_TYPE_BOOL */ + sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */ + sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */ + sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */ + sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */ + 4, /* UPB_DESCRIPTOR_TYPE_UINT32 */ + 4, /* UPB_DESCRIPTOR_TYPE_ENUM */ + 4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */ + 8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */ + 4, /* UPB_DESCRIPTOR_TYPE_SINT32 */ + 8, /* UPB_DESCRIPTOR_TYPE_SINT64 */ + }; + return _upb_repeated_or_map(f) ? sizeof(void *) : sizes[f->descriptortype]; +} + +/* Strings/bytes are special-cased in maps. */ +static char _upb_fieldtype_to_mapsize[12] = { + 0, + 1, /* UPB_TYPE_BOOL */ + 4, /* UPB_TYPE_FLOAT */ + 4, /* UPB_TYPE_INT32 */ + 4, /* UPB_TYPE_UINT32 */ + 4, /* UPB_TYPE_ENUM */ + sizeof(void*), /* UPB_TYPE_MESSAGE */ + 8, /* UPB_TYPE_DOUBLE */ + 8, /* UPB_TYPE_INT64 */ + 8, /* UPB_TYPE_UINT64 */ + 0, /* UPB_TYPE_STRING */ + 0, /* UPB_TYPE_BYTES */ +}; + +static const char _upb_fieldtype_to_sizelg2[12] = { + 0, + 0, /* UPB_TYPE_BOOL */ + 2, /* UPB_TYPE_FLOAT */ + 2, /* UPB_TYPE_INT32 */ + 2, /* UPB_TYPE_UINT32 */ + 2, /* UPB_TYPE_ENUM */ + UPB_SIZE(2, 3), /* UPB_TYPE_MESSAGE */ + 3, /* UPB_TYPE_DOUBLE */ + 3, /* UPB_TYPE_INT64 */ + 3, /* UPB_TYPE_UINT64 */ + UPB_SIZE(3, 4), /* UPB_TYPE_STRING */ + UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ +}; + +/** upb_msg *******************************************************************/ + +upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) { + return _upb_msg_new(upb_msgdef_layout(m), a); +} + +static bool in_oneof(const upb_msglayout_field *field) { + return field->presence < 0; +} + +static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) { + const upb_msglayout_field *field = upb_fielddef_layout(f); + const char *mem = UPB_PTR_AT(msg, field->offset, char); + upb_msgval val = {0}; + memcpy(&val, mem, get_field_size(field)); + return val; +} + +bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { + const upb_msglayout_field *field = upb_fielddef_layout(f); + if (in_oneof(field)) { + return _upb_getoneofcase_field(msg, field) == field->number; + } else if (field->presence > 0) { + return _upb_hasbit_field(msg, field); + } else { + UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || + field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP); + return _upb_msg_getraw(msg, f).msg_val != NULL; + } +} + +const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg, + const upb_oneofdef *o) { + const upb_fielddef *f = upb_oneofdef_field(o, 0); + if (upb_oneofdef_issynthetic(o)) { + UPB_ASSERT(upb_oneofdef_fieldcount(o) == 1); + return upb_msg_has(msg, f) ? f : NULL; + } else { + const upb_msglayout_field *field = upb_fielddef_layout(f); + uint32_t oneof_case = _upb_getoneofcase_field(msg, field); + f = oneof_case ? upb_oneofdef_itof(o, oneof_case) : NULL; + UPB_ASSERT((f != NULL) == (oneof_case != 0)); + return f; + } +} + +upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { + if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { + return _upb_msg_getraw(msg, f); + } else { + /* TODO(haberman): change upb_fielddef to not require this switch(). */ + upb_msgval val = {0}; + switch (upb_fielddef_type(f)) { + case UPB_TYPE_INT32: + case UPB_TYPE_ENUM: + val.int32_val = upb_fielddef_defaultint32(f); + break; + case UPB_TYPE_INT64: + val.int64_val = upb_fielddef_defaultint64(f); + break; + case UPB_TYPE_UINT32: + val.uint32_val = upb_fielddef_defaultuint32(f); + break; + case UPB_TYPE_UINT64: + val.uint64_val = upb_fielddef_defaultuint64(f); + break; + case UPB_TYPE_FLOAT: + val.float_val = upb_fielddef_defaultfloat(f); + break; + case UPB_TYPE_DOUBLE: + val.double_val = upb_fielddef_defaultdouble(f); + break; + case UPB_TYPE_BOOL: + val.bool_val = upb_fielddef_defaultbool(f); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); + break; + case UPB_TYPE_MESSAGE: + val.msg_val = NULL; + break; + } + return val; + } +} + +upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, + upb_arena *a) { + const upb_msglayout_field *field = upb_fielddef_layout(f); + upb_mutmsgval ret; + char *mem = UPB_PTR_AT(msg, field->offset, char); + bool wrong_oneof = + in_oneof(field) && _upb_getoneofcase_field(msg, field) != field->number; + + memcpy(&ret, mem, sizeof(void*)); + + if (a && (!ret.msg || wrong_oneof)) { + if (upb_fielddef_ismap(f)) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY); + const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE); + ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value)); + } else if (upb_fielddef_isseq(f)) { + ret.array = upb_array_new(a, upb_fielddef_type(f)); + } else { + UPB_ASSERT(upb_fielddef_issubmsg(f)); + ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a); + } + + memcpy(mem, &ret, sizeof(void*)); + + if (wrong_oneof) { + *_upb_oneofcase_field(msg, field) = field->number; + } else if (field->presence > 0) { + _upb_sethas_field(msg, field); + } + } + return ret; +} + +void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, + upb_arena *a) { + const upb_msglayout_field *field = upb_fielddef_layout(f); + char *mem = UPB_PTR_AT(msg, field->offset, char); + UPB_UNUSED(a); /* We reserve the right to make set insert into a map. */ + memcpy(mem, &val, get_field_size(field)); + if (field->presence > 0) { + _upb_sethas_field(msg, field); + } else if (in_oneof(field)) { + *_upb_oneofcase_field(msg, field) = field->number; + } +} + +void upb_msg_clearfield(upb_msg *msg, const upb_fielddef *f) { + const upb_msglayout_field *field = upb_fielddef_layout(f); + char *mem = UPB_PTR_AT(msg, field->offset, char); + + if (field->presence > 0) { + _upb_clearhas_field(msg, field); + } else if (in_oneof(field)) { + uint32_t *oneof_case = _upb_oneofcase_field(msg, field); + if (*oneof_case != field->number) return; + *oneof_case = 0; + } + + memset(mem, 0, get_field_size(field)); +} + +void upb_msg_clear(upb_msg *msg, const upb_msgdef *m) { + _upb_msg_clear(msg, upb_msgdef_layout(m)); +} + +bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, + const upb_symtab *ext_pool, const upb_fielddef **out_f, + upb_msgval *out_val, size_t *iter) { + int i = *iter; + int n = upb_msgdef_fieldcount(m); + const upb_msgval zero = {0}; + UPB_UNUSED(ext_pool); + while (++i < n) { + const upb_fielddef *f = upb_msgdef_field(m, i); + upb_msgval val = _upb_msg_getraw(msg, f); + + /* Skip field if unset or empty. */ + if (upb_fielddef_haspresence(f)) { + if (!upb_msg_has(msg, f)) continue; + } else { + upb_msgval test = val; + if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) { + /* Clear string pointer, only size matters (ptr could be non-NULL). */ + test.str_val.data = NULL; + } + /* Continue if NULL or 0. */ + if (memcmp(&test, &zero, sizeof(test)) == 0) continue; + + /* Continue on empty array or map. */ + if (upb_fielddef_ismap(f)) { + if (upb_map_size(test.map_val) == 0) continue; + } else if (upb_fielddef_isseq(f)) { + if (upb_array_size(test.array_val) == 0) continue; + } + } + + *out_val = val; + *out_f = f; + *iter = i; + return true; + } + *iter = i; + return false; +} + +bool _upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int depth) { + size_t iter = UPB_MSG_BEGIN; + const upb_fielddef *f; + upb_msgval val; + bool ret = true; + + if (--depth == 0) return false; + + _upb_msg_discardunknown_shallow(msg); + + while (upb_msg_next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) { + const upb_msgdef *subm = upb_fielddef_msgsubdef(f); + if (!subm) continue; + if (upb_fielddef_ismap(f)) { + const upb_fielddef *val_f = upb_msgdef_itof(subm, 2); + const upb_msgdef *val_m = upb_fielddef_msgsubdef(val_f); + upb_map *map = (upb_map*)val.map_val; + size_t iter = UPB_MAP_BEGIN; + + if (!val_m) continue; + + while (upb_mapiter_next(map, &iter)) { + upb_msgval map_val = upb_mapiter_value(map, iter); + if (!_upb_msg_discardunknown((upb_msg*)map_val.msg_val, val_m, depth)) { + ret = false; + } + } + } else if (upb_fielddef_isseq(f)) { + const upb_array *arr = val.array_val; + size_t i, n = upb_array_size(arr); + for (i = 0; i < n; i++) { + upb_msgval elem = upb_array_get(arr, i); + if (!_upb_msg_discardunknown((upb_msg*)elem.msg_val, subm, depth)) { + ret = false; + } + } + } else { + if (!_upb_msg_discardunknown((upb_msg*)val.msg_val, subm, depth)) { + ret = false; + } + } + } + + return ret; +} + +bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth) { + return _upb_msg_discardunknown(msg, m, maxdepth); +} + +/** upb_array *****************************************************************/ + +upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) { + return _upb_array_new(a, 4, _upb_fieldtype_to_sizelg2[type]); +} + +size_t upb_array_size(const upb_array *arr) { + return arr->len; +} + +upb_msgval upb_array_get(const upb_array *arr, size_t i) { + upb_msgval ret; + const char* data = _upb_array_constptr(arr); + int lg2 = arr->data & 7; + UPB_ASSERT(i < arr->len); + memcpy(&ret, data + (i << lg2), 1 << lg2); + return ret; +} + +void upb_array_set(upb_array *arr, size_t i, upb_msgval val) { + char* data = _upb_array_ptr(arr); + int lg2 = arr->data & 7; + UPB_ASSERT(i < arr->len); + memcpy(data + (i << lg2), &val, 1 << lg2); +} + +bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) { + if (!_upb_array_realloc(arr, arr->len + 1, arena)) { + return false; + } + arr->len++; + upb_array_set(arr, arr->len - 1, val); + return true; +} + +bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) { + return _upb_array_resize(arr, size, arena); +} + +/** upb_map *******************************************************************/ + +upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type, + upb_fieldtype_t value_type) { + return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type], + _upb_fieldtype_to_mapsize[value_type]); +} + +size_t upb_map_size(const upb_map *map) { + return _upb_map_size(map); +} + +bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) { + return _upb_map_get(map, &key, map->key_size, val, map->val_size); +} + +void upb_map_clear(upb_map *map) { + _upb_map_clear(map); +} + +bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, + upb_arena *arena) { + return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena); +} + +bool upb_map_delete(upb_map *map, upb_msgval key) { + return _upb_map_delete(map, &key, map->key_size); +} + +bool upb_mapiter_next(const upb_map *map, size_t *iter) { + return _upb_map_next(map, iter); +} + +bool upb_mapiter_done(const upb_map *map, size_t iter) { + upb_strtable_iter i; + UPB_ASSERT(iter != UPB_MAP_BEGIN); + i.t = &map->table; + i.index = iter; + return upb_strtable_done(&i); +} + +/* Returns the key and value for this entry of the map. */ +upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) { + upb_strtable_iter i; + upb_msgval ret; + i.t = &map->table; + i.index = iter; + _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size); + return ret; +} + +upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { + upb_strtable_iter i; + upb_msgval ret; + i.t = &map->table; + i.index = iter; + _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size); + return ret; +} + +/* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ + + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Special header, must be included last. */ + +typedef struct { + const char *ptr, *end; + upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */ + const upb_symtab *any_pool; + int depth; + upb_status *status; + jmp_buf err; + int line; + const char *line_begin; + bool is_first; + int options; + const upb_fielddef *debug_field; +} jsondec; + +enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL }; + +/* Forward declarations of mutually-recursive functions. */ +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m); +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f); +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m); +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m); + +static bool jsondec_streql(upb_strview str, const char *lit) { + return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0; +} + +static bool jsondec_isnullvalue(const upb_fielddef *f) { + return upb_fielddef_type(f) == UPB_TYPE_ENUM && + strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)), + "google.protobuf.NullValue") == 0; +} + +static bool jsondec_isvalue(const upb_fielddef *f) { + return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && + upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) == + UPB_WELLKNOWN_VALUE) || + jsondec_isnullvalue(f); +} + +UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) { + upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line, + (int)(d->ptr - d->line_begin), msg); + UPB_LONGJMP(d->err, 1); +} + +UPB_PRINTF(2, 3) +UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) { + va_list argp; + upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: ", d->line, + (int)(d->ptr - d->line_begin)); + va_start(argp, fmt); + upb_status_vappenderrf(d->status, fmt, argp); + va_end(argp); + UPB_LONGJMP(d->err, 1); +} + +static void jsondec_skipws(jsondec *d) { + while (d->ptr != d->end) { + switch (*d->ptr) { + case '\n': + d->line++; + d->line_begin = d->ptr; + /* Fallthrough. */ + case '\r': + case '\t': + case ' ': + d->ptr++; + break; + default: + return; + } + } + jsondec_err(d, "Unexpected EOF"); +} + +static bool jsondec_tryparsech(jsondec *d, char ch) { + if (d->ptr == d->end || *d->ptr != ch) return false; + d->ptr++; + return true; +} + +static void jsondec_parselit(jsondec *d, const char *lit) { + size_t avail = d->end - d->ptr; + size_t len = strlen(lit); + if (avail < len || memcmp(d->ptr, lit, len) != 0) { + jsondec_errf(d, "Expected: '%s'", lit); + } + d->ptr += len; +} + +static void jsondec_wsch(jsondec *d, char ch) { + jsondec_skipws(d); + if (!jsondec_tryparsech(d, ch)) { + jsondec_errf(d, "Expected: '%c'", ch); + } +} + +static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); } +static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); } +static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); } + +static void jsondec_entrysep(jsondec *d) { + jsondec_skipws(d); + jsondec_parselit(d, ":"); +} + +static int jsondec_rawpeek(jsondec *d) { + switch (*d->ptr) { + case '{': + return JD_OBJECT; + case '[': + return JD_ARRAY; + case '"': + return JD_STRING; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return JD_NUMBER; + case 't': + return JD_TRUE; + case 'f': + return JD_FALSE; + case 'n': + return JD_NULL; + default: + jsondec_errf(d, "Unexpected character: '%c'", *d->ptr); + } +} + +/* JSON object/array **********************************************************/ + +/* These are used like so: + * + * jsondec_objstart(d); + * while (jsondec_objnext(d)) { + * ... + * } + * jsondec_objend(d) */ + +static int jsondec_peek(jsondec *d) { + jsondec_skipws(d); + return jsondec_rawpeek(d); +} + +static void jsondec_push(jsondec *d) { + if (--d->depth < 0) { + jsondec_err(d, "Recursion limit exceeded"); + } + d->is_first = true; +} + +static bool jsondec_seqnext(jsondec *d, char end_ch) { + bool is_first = d->is_first; + d->is_first = false; + jsondec_skipws(d); + if (*d->ptr == end_ch) return false; + if (!is_first) jsondec_parselit(d, ","); + return true; +} + +static void jsondec_arrstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '['); +} + +static void jsondec_arrend(jsondec *d) { + d->depth++; + jsondec_wsch(d, ']'); +} + +static bool jsondec_arrnext(jsondec *d) { + return jsondec_seqnext(d, ']'); +} + +static void jsondec_objstart(jsondec *d) { + jsondec_push(d); + jsondec_wsch(d, '{'); +} + +static void jsondec_objend(jsondec *d) { + d->depth++; + jsondec_wsch(d, '}'); +} + +static bool jsondec_objnext(jsondec *d) { + if (!jsondec_seqnext(d, '}')) return false; + if (jsondec_peek(d) != JD_STRING) { + jsondec_err(d, "Object must start with string"); + } + return true; +} + +/* JSON number ****************************************************************/ + +static bool jsondec_tryskipdigits(jsondec *d) { + const char *start = d->ptr; + + while (d->ptr < d->end) { + if (*d->ptr < '0' || *d->ptr > '9') { + break; + } + d->ptr++; + } + + return d->ptr != start; +} + +static void jsondec_skipdigits(jsondec *d) { + if (!jsondec_tryskipdigits(d)) { + jsondec_err(d, "Expected one or more digits"); + } +} + +static double jsondec_number(jsondec *d) { + const char *start = d->ptr; + + assert(jsondec_rawpeek(d) == JD_NUMBER); + + /* Skip over the syntax of a number, as specified by JSON. */ + if (*d->ptr == '-') d->ptr++; + + if (jsondec_tryparsech(d, '0')) { + if (jsondec_tryskipdigits(d)) { + jsondec_err(d, "number cannot have leading zero"); + } + } else { + jsondec_skipdigits(d); + } + + if (d->ptr == d->end) goto parse; + if (jsondec_tryparsech(d, '.')) { + jsondec_skipdigits(d); + } + if (d->ptr == d->end) goto parse; + + if (*d->ptr == 'e' || *d->ptr == 'E') { + d->ptr++; + if (d->ptr == d->end) { + jsondec_err(d, "Unexpected EOF in number"); + } + if (*d->ptr == '+' || *d->ptr == '-') { + d->ptr++; + } + jsondec_skipdigits(d); + } + +parse: + /* Having verified the syntax of a JSON number, use strtod() to parse + * (strtod() accepts a superset of JSON syntax). */ + errno = 0; + { + char* end; + double val = strtod(start, &end); + assert(end == d->ptr); + + /* Currently the min/max-val conformance tests fail if we check this. Does + * this mean the conformance tests are wrong or strtod() is wrong, or + * something else? Investigate further. */ + /* + if (errno == ERANGE) { + jsondec_err(d, "Number out of range"); + } + */ + + if (val > DBL_MAX || val < -DBL_MAX) { + jsondec_err(d, "Number out of range"); + } + + return val; + } +} + +/* JSON string ****************************************************************/ + +static char jsondec_escape(jsondec *d) { + switch (*d->ptr++) { + case '"': + return '\"'; + case '\\': + return '\\'; + case '/': + return '/'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + default: + jsondec_err(d, "Invalid escape char"); + } +} + +static uint32_t jsondec_codepoint(jsondec *d) { + uint32_t cp = 0; + const char *end; + + if (d->end - d->ptr < 4) { + jsondec_err(d, "EOF inside string"); + } + + end = d->ptr + 4; + while (d->ptr < end) { + char ch = *d->ptr++; + if (ch >= '0' && ch <= '9') { + ch -= '0'; + } else if (ch >= 'a' && ch <= 'f') { + ch = ch - 'a' + 10; + } else if (ch >= 'A' && ch <= 'F') { + ch = ch - 'A' + 10; + } else { + jsondec_err(d, "Invalid hex digit"); + } + cp = (cp << 4) | ch; + } + + return cp; +} + +/* Parses a \uXXXX unicode escape (possibly a surrogate pair). */ +static size_t jsondec_unicode(jsondec *d, char* out) { + uint32_t cp = jsondec_codepoint(d); + if (cp >= 0xd800 && cp <= 0xdbff) { + /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */ + uint32_t high = cp; + uint32_t low; + jsondec_parselit(d, "\\u"); + low = jsondec_codepoint(d); + if (low < 0xdc00 || low > 0xdfff) { + jsondec_err(d, "Invalid low surrogate"); + } + cp = (high & 0x3ff) << 10; + cp |= (low & 0x3ff); + cp += 0x10000; + } else if (cp >= 0xdc00 && cp <= 0xdfff) { + jsondec_err(d, "Unpaired low surrogate"); + } + + /* Write to UTF-8 */ + if (cp <= 0x7f) { + out[0] = cp; + return 1; + } else if (cp <= 0x07FF) { + out[0] = ((cp >> 6) & 0x1F) | 0xC0; + out[1] = ((cp >> 0) & 0x3F) | 0x80; + return 2; + } else if (cp <= 0xFFFF) { + out[0] = ((cp >> 12) & 0x0F) | 0xE0; + out[1] = ((cp >> 6) & 0x3F) | 0x80; + out[2] = ((cp >> 0) & 0x3F) | 0x80; + return 3; + } else if (cp < 0x10FFFF) { + out[0] = ((cp >> 18) & 0x07) | 0xF0; + out[1] = ((cp >> 12) & 0x3f) | 0x80; + out[2] = ((cp >> 6) & 0x3f) | 0x80; + out[3] = ((cp >> 0) & 0x3f) | 0x80; + return 4; + } else { + jsondec_err(d, "Invalid codepoint"); + } +} + +static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) { + size_t oldsize = *buf_end - *buf; + size_t len = *end - *buf; + size_t size = UPB_MAX(8, 2 * oldsize); + + *buf = upb_arena_realloc(d->arena, *buf, len, size); + if (!*buf) jsondec_err(d, "Out of memory"); + + *end = *buf + len; + *buf_end = *buf + size; +} + +static upb_strview jsondec_string(jsondec *d) { + char *buf = NULL; + char *end = NULL; + char *buf_end = NULL; + + jsondec_skipws(d); + + if (*d->ptr++ != '"') { + jsondec_err(d, "Expected string"); + } + + while (d->ptr < d->end) { + char ch = *d->ptr++; + + if (end == buf_end) { + jsondec_resize(d, &buf, &end, &buf_end); + } + + switch (ch) { + case '"': { + upb_strview ret; + ret.data = buf; + ret.size = end - buf; + *end = '\0'; /* Needed for possible strtod(). */ + return ret; + } + case '\\': + if (d->ptr == d->end) goto eof; + if (*d->ptr == 'u') { + d->ptr++; + if (buf_end - end < 4) { + /* Allow space for maximum-sized code point (4 bytes). */ + jsondec_resize(d, &buf, &end, &buf_end); + } + end += jsondec_unicode(d, end); + } else { + *end++ = jsondec_escape(d); + } + break; + default: + if ((unsigned char)*d->ptr < 0x20) { + jsondec_err(d, "Invalid char in JSON string"); + } + *end++ = ch; + break; + } + } + +eof: + jsondec_err(d, "EOF inside string"); +} + +static void jsondec_skipval(jsondec *d) { + switch (jsondec_peek(d)) { + case JD_OBJECT: + jsondec_objstart(d); + while (jsondec_objnext(d)) { + jsondec_string(d); + jsondec_entrysep(d); + jsondec_skipval(d); + } + jsondec_objend(d); + break; + case JD_ARRAY: + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + jsondec_skipval(d); + } + jsondec_arrend(d); + break; + case JD_TRUE: + jsondec_true(d); + break; + case JD_FALSE: + jsondec_false(d); + break; + case JD_NULL: + jsondec_null(d); + break; + case JD_STRING: + jsondec_string(d); + break; + case JD_NUMBER: + jsondec_number(d); + break; + } +} + +/* Base64 decoding for bytes fields. ******************************************/ + +static unsigned int jsondec_base64_tablelookup(const char ch) { + /* Table includes the normal base64 chars plus the URL-safe variant. */ + const signed char table[256] = { + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/, + 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/, + 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1, + -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/, + 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/, + 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/, + 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/, + -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/, + 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/, + 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/, + 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/, + 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; + + /* Sign-extend return value so high bit will be set on any unexpected char. */ + return table[(unsigned)ch]; +} + +static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end, + char *out) { + int32_t val = -1; + + switch (end - ptr) { + case 2: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12; + out[0] = val >> 16; + out += 1; + break; + case 3: + val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6; + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out += 2; + break; + } + + if (val < 0) { + jsondec_err(d, "Corrupt base64"); + } + + return out; +} + +static size_t jsondec_base64(jsondec *d, upb_strview str) { + /* We decode in place. This is safe because this is a new buffer (not + * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */ + char *out = (char*)str.data; + const char *ptr = str.data; + const char *end = ptr + str.size; + const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */ + + for (; ptr < end4; ptr += 4, out += 3) { + int val = jsondec_base64_tablelookup(ptr[0]) << 18 | + jsondec_base64_tablelookup(ptr[1]) << 12 | + jsondec_base64_tablelookup(ptr[2]) << 6 | + jsondec_base64_tablelookup(ptr[3]) << 0; + + if (val < 0) { + /* Junk chars or padding. Remove trailing padding, if any. */ + if (end - ptr == 4 && ptr[3] == '=') { + if (ptr[2] == '=') { + end -= 2; + } else { + end -= 1; + } + } + break; + } + + out[0] = val >> 16; + out[1] = (val >> 8) & 0xff; + out[2] = val & 0xff; + } + + if (ptr < end) { + /* Process remaining chars. We do not require padding. */ + out = jsondec_partialbase64(d, ptr, end, out); + } + + return out - str.data; +} + +/* Low-level integer parsing **************************************************/ + +/* We use these hand-written routines instead of strto[u]l() because the "long + * long" variants aren't in c89. Also our version allows setting a ptr limit. */ + +static const char *jsondec_buftouint64(jsondec *d, const char *ptr, + const char *end, uint64_t *val) { + uint64_t u64 = 0; + while (ptr < end) { + unsigned ch = *ptr - '0'; + if (ch >= 10) break; + if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) { + jsondec_err(d, "Integer overflow"); + } + u64 *= 10; + u64 += ch; + ptr++; + } + + *val = u64; + return ptr; +} + +static const char *jsondec_buftoint64(jsondec *d, const char *ptr, + const char *end, int64_t *val) { + bool neg = false; + uint64_t u64; + + if (ptr != end && *ptr == '-') { + ptr++; + neg = true; + } + + ptr = jsondec_buftouint64(d, ptr, end, &u64); + if (u64 > (uint64_t)INT64_MAX + neg) { + jsondec_err(d, "Integer overflow"); + } + + *val = neg ? -u64 : u64; + return ptr; +} + +static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + uint64_t ret; + if (jsondec_buftouint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) { + const char *end = str.data + str.size; + int64_t ret; + if (jsondec_buftoint64(d, str.data, end, &ret) != end) { + jsondec_err(d, "Non-number characters in quoted integer"); + } + return ret; +} + +/* Primitive value types ******************************************************/ + +/* Parse INT32 or INT64 value. */ +static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) { + jsondec_err(d, "JSON number is out of range."); + } + val.int64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.int64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl, + val.int64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.int64_val = jsondec_strtoint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_INT32) { + if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) { + jsondec_err(d, "Integer out of range."); + } + val.int32_val = (int32_t)val.int64_val; + } + + return val; +} + +/* Parse UINT32 or UINT64 value. */ +static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) { + upb_msgval val = {0}; + + switch (jsondec_peek(d)) { + case JD_NUMBER: { + double dbl = jsondec_number(d); + if (dbl > 18446744073709549568.0 || dbl < 0) { + jsondec_err(d, "JSON number is out of range."); + } + val.uint64_val = dbl; /* must be guarded, overflow here is UB */ + if (val.uint64_val != dbl) { + jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl, + val.uint64_val); + } + break; + } + case JD_STRING: { + upb_strview str = jsondec_string(d); + val.uint64_val = jsondec_strtouint64(d, str); + break; + } + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_UINT32) { + if (val.uint64_val > UINT32_MAX) { + jsondec_err(d, "Integer out of range."); + } + val.uint32_val = (uint32_t)val.uint64_val; + } + + return val; +} + +/* Parse DOUBLE or FLOAT value. */ +static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) { + upb_strview str; + upb_msgval val = {0}; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + val.double_val = jsondec_number(d); + break; + case JD_STRING: + str = jsondec_string(d); + if (jsondec_streql(str, "NaN")) { + val.double_val = NAN; + } else if (jsondec_streql(str, "Infinity")) { + val.double_val = INFINITY; + } else if (jsondec_streql(str, "-Infinity")) { + val.double_val = -INFINITY; + } else { + val.double_val = strtod(str.data, NULL); + } + break; + default: + jsondec_err(d, "Expected number or string"); + } + + if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) { + if (val.double_val != INFINITY && val.double_val != -INFINITY && + (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) { + jsondec_err(d, "Float out of range"); + } + val.float_val = val.double_val; + } + + return val; +} + +/* Parse STRING or BYTES value. */ +static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) { + upb_msgval val; + val.str_val = jsondec_string(d); + if (upb_fielddef_type(f) == UPB_TYPE_BYTES) { + val.str_val.size = jsondec_base64(d, val.str_val); + } + return val; +} + +static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) { + switch (jsondec_peek(d)) { + case JD_STRING: { + const upb_enumdef *e = upb_fielddef_enumsubdef(f); + upb_strview str = jsondec_string(d); + upb_msgval val; + if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) { + if (d->options & UPB_JSONDEC_IGNOREUNKNOWN) { + val.int32_val = 0; + } else { + jsondec_errf(d, "Unknown enumerator: '" UPB_STRVIEW_FORMAT "'", + UPB_STRVIEW_ARGS(str)); + } + } + return val; + } + case JD_NULL: { + if (jsondec_isnullvalue(f)) { + upb_msgval val; + jsondec_null(d); + val.int32_val = 0; + return val; + } + } + /* Fallthrough. */ + default: + return jsondec_int(d, f); + } +} + +static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) { + bool is_map_key = upb_fielddef_number(f) == 1 && + upb_msgdef_mapentry(upb_fielddef_containingtype(f)); + upb_msgval val; + + if (is_map_key) { + upb_strview str = jsondec_string(d); + if (jsondec_streql(str, "true")) { + val.bool_val = true; + } else if (jsondec_streql(str, "false")) { + val.bool_val = false; + } else { + jsondec_err(d, "Invalid boolean map key"); + } + } else { + switch (jsondec_peek(d)) { + case JD_TRUE: + val.bool_val = true; + jsondec_true(d); + break; + case JD_FALSE: + val.bool_val = false; + jsondec_false(d); + break; + default: + jsondec_err(d, "Expected true or false"); + } + } + + return val; +} + +/* Composite types (array/message/map) ****************************************/ + +static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_array *arr = upb_msg_mutable(msg, f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msgval elem = jsondec_value(d, f); + upb_array_append(arr, elem, d->arena); + } + jsondec_arrend(d); +} + +static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) { + upb_map *map = upb_msg_mutable(msg, f, d->arena).map; + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, val; + key = jsondec_value(d, key_f); + jsondec_entrysep(d); + val = jsondec_value(d, val_f); + upb_map_set(map, key, val, d->arena); + } + jsondec_objend(d); +} + +static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + jsondec_object(d, msg, m); + } else { + jsondec_wellknown(d, msg, m); + } +} + +static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) { + const upb_msgdef *m = upb_fielddef_msgsubdef(f); + upb_msg *msg = upb_msg_new(m, d->arena); + upb_msgval val; + + jsondec_tomsg(d, msg, m); + val.msg_val = msg; + return val; +} + +static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_strview name; + const upb_fielddef *f; + const upb_fielddef *preserved; + + name = jsondec_string(d); + jsondec_entrysep(d); + f = upb_msgdef_lookupjsonname(m, name.data, name.size); + + if (!f) { + if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) { + jsondec_errf(d, "No such field: " UPB_STRVIEW_FORMAT, + UPB_STRVIEW_ARGS(name)); + } + jsondec_skipval(d); + return; + } + + if (upb_fielddef_realcontainingoneof(f) && + upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) { + jsondec_err(d, "More than one field for this oneof."); + } + + if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) { + /* JSON "null" indicates a default value, so no need to set anything. */ + jsondec_null(d); + return; + } + + preserved = d->debug_field; + d->debug_field = f; + + if (upb_fielddef_ismap(f)) { + jsondec_map(d, msg, f); + } else if (upb_fielddef_isseq(f)) { + jsondec_array(d, msg, f); + } else if (upb_fielddef_issubmsg(f)) { + upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg; + const upb_msgdef *subm = upb_fielddef_msgsubdef(f); + jsondec_tomsg(d, submsg, subm); + } else { + upb_msgval val = jsondec_value(d, f); + upb_msg_set(msg, f, val, d->arena); + } + + d->debug_field = preserved; +} + +static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + jsondec_objstart(d); + while (jsondec_objnext(d)) { + jsondec_field(d, msg, m); + } + jsondec_objend(d); +} + +static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + return jsondec_bool(d, f); + case UPB_TYPE_FLOAT: + case UPB_TYPE_DOUBLE: + return jsondec_double(d, f); + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + return jsondec_uint(d, f); + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + return jsondec_int(d, f); + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return jsondec_strfield(d, f); + case UPB_TYPE_ENUM: + return jsondec_enum(d, f); + case UPB_TYPE_MESSAGE: + return jsondec_msg(d, f); + default: + UPB_UNREACHABLE(); + } +} + +/* Well-known types ***********************************************************/ + +static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits, + const char *after) { + uint64_t val; + const char *p = *ptr; + const char *end = p + digits; + size_t after_len = after ? strlen(after) : 0; + + UPB_ASSERT(digits <= 9); /* int can't overflow. */ + + if (jsondec_buftouint64(d, p, end, &val) != end || + (after_len && memcmp(end, after, after_len) != 0)) { + jsondec_err(d, "Malformed timestamp"); + } + + UPB_ASSERT(val < INT_MAX); + + *ptr = end + after_len; + return (int)val; +} + +static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) { + uint64_t nanos = 0; + const char *p = *ptr; + + if (p != end && *p == '.') { + const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos); + int digits = (int)(nano_end - p - 1); + int exp_lg10 = 9 - digits; + if (digits > 9) { + jsondec_err(d, "Too many digits for partial seconds"); + } + while (exp_lg10--) nanos *= 10; + *ptr = nano_end; + } + + UPB_ASSERT(nanos < INT_MAX); + + return (int)nanos; +} + +/* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */ +int jsondec_epochdays(int y, int m, int d) { + const uint32_t year_base = 4800; /* Before min year, multiple of 400. */ + const uint32_t m_adj = m - 3; /* March-based month. */ + const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0; + const uint32_t adjust = carry ? 12 : 0; + const uint32_t y_adj = y + year_base - carry; + const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048; + const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400; + return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632; +} + +static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) { + return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s; +} + +static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + + if (str.size < 20) goto malformed; + + { + /* 1972-01-01T01:00:00 */ + int year = jsondec_tsdigits(d, &ptr, 4, "-"); + int mon = jsondec_tsdigits(d, &ptr, 2, "-"); + int day = jsondec_tsdigits(d, &ptr, 2, "T"); + int hour = jsondec_tsdigits(d, &ptr, 2, ":"); + int min = jsondec_tsdigits(d, &ptr, 2, ":"); + int sec = jsondec_tsdigits(d, &ptr, 2, NULL); + + seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec); + } + + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + { + /* [+-]08:00 or Z */ + int ofs = 0; + bool neg = false; + + if (ptr == end) goto malformed; + + switch (*ptr++) { + case '-': + neg = true; + /* fallthrough */ + case '+': + if ((end - ptr) != 5) goto malformed; + ofs = jsondec_tsdigits(d, &ptr, 2, ":00"); + ofs *= 60 * 60; + seconds.int64_val += (neg ? ofs : -ofs); + break; + case 'Z': + if (ptr != end) goto malformed; + break; + default: + goto malformed; + } + } + + if (seconds.int64_val < -62135596800) { + jsondec_err(d, "Timestamp out of range"); + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); + return; + +malformed: + jsondec_err(d, "Malformed timestamp"); +} + +static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + upb_msgval seconds; + upb_msgval nanos; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + const int64_t max = (uint64_t)3652500 * 86400; + + /* "3.000000001s", "3s", etc. */ + ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val); + nanos.int32_val = jsondec_nanos(d, &ptr, end); + + if (end - ptr != 1 || *ptr != 's') { + jsondec_err(d, "Malformed duration"); + } + + if (seconds.int64_val < -max || seconds.int64_val > max) { + jsondec_err(d, "Duration out of range"); + } + + if (seconds.int64_val < 0) { + nanos.int32_val = - nanos.int32_val; + } + + upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena); + upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena); +} + +static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *values_f = upb_msgdef_itof(m, 1); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f); + upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array; + + jsondec_arrstart(d); + while (jsondec_arrnext(d)) { + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + upb_msgval value; + value.msg_val = value_msg; + upb_array_append(values, value, d->arena); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_arrend(d); +} + +static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *fields_f = upb_msgdef_itof(m, 1); + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f); + const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2); + const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f); + upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map; + + jsondec_objstart(d); + while (jsondec_objnext(d)) { + upb_msgval key, value; + upb_msg *value_msg = upb_msg_new(value_m, d->arena); + key.str_val = jsondec_string(d); + value.msg_val = value_msg; + upb_map_set(fields, key, value, d->arena); + jsondec_entrysep(d); + jsondec_wellknownvalue(d, value_msg, value_m); + } + jsondec_objend(d); +} + +static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + upb_msgval val; + const upb_fielddef *f; + upb_msg *submsg; + + switch (jsondec_peek(d)) { + case JD_NUMBER: + /* double number_value = 2; */ + f = upb_msgdef_itof(m, 2); + val.double_val = jsondec_number(d); + break; + case JD_STRING: + /* string string_value = 3; */ + f = upb_msgdef_itof(m, 3); + val.str_val = jsondec_string(d); + break; + case JD_FALSE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = false; + jsondec_false(d); + break; + case JD_TRUE: + /* bool bool_value = 4; */ + f = upb_msgdef_itof(m, 4); + val.bool_val = true; + jsondec_true(d); + break; + case JD_NULL: + /* NullValue null_value = 1; */ + f = upb_msgdef_itof(m, 1); + val.int32_val = 0; + jsondec_null(d); + break; + /* Note: these cases return, because upb_msg_mutable() is enough. */ + case JD_OBJECT: + /* Struct struct_value = 5; */ + f = upb_msgdef_itof(m, 5); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f)); + return; + case JD_ARRAY: + /* ListValue list_value = 6; */ + f = upb_msgdef_itof(m, 6); + submsg = upb_msg_mutable(msg, f, d->arena).msg; + jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f)); + return; + default: + UPB_UNREACHABLE(); + } + + upb_msg_set(msg, f, val, d->arena); +} + +static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) { + /* FieldMask fields grow due to inserted '_' characters, so we can't do the + * transform in place. */ + const char *ptr = buf; + upb_strview ret; + char *out; + + ret.size = end - ptr; + while (ptr < end) { + ret.size += (*ptr >= 'A' && *ptr <= 'Z'); + ptr++; + } + + out = upb_arena_malloc(d->arena, ret.size); + ptr = buf; + ret.data = out; + + while (ptr < end) { + char ch = *ptr++; + if (ch >= 'A' && ch <= 'Z') { + *out++ = '_'; + *out++ = ch + 32; + } else if (ch == '_') { + jsondec_err(d, "field mask may not contain '_'"); + } else { + *out++ = ch; + } + } + + return ret; +} + +static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* repeated string paths = 1; */ + const upb_fielddef *paths_f = upb_msgdef_itof(m, 1); + upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array; + upb_strview str = jsondec_string(d); + const char *ptr = str.data; + const char *end = ptr + str.size; + upb_msgval val; + + while (ptr < end) { + const char *elem_end = memchr(ptr, ',', end - ptr); + if (elem_end) { + val.str_val = jsondec_mask(d, ptr, elem_end); + ptr = elem_end + 1; + } else { + val.str_val = jsondec_mask(d, ptr, end); + ptr = end; + } + upb_array_append(arr, val, d->arena); + } +} + +static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) { + /* For regular types: {"@type": "[user type]", "f1": , "f2": } + * where f1, f2, etc. are the normal fields of this type. */ + jsondec_field(d, msg, m); + } else { + /* For well-known types: {"@type": "[well-known type]", "value": } + * where is whatever encoding the WKT normally uses. */ + upb_strview str = jsondec_string(d); + jsondec_entrysep(d); + if (!jsondec_streql(str, "value")) { + jsondec_err(d, "Key for well-known type must be 'value'"); + } + jsondec_wellknown(d, msg, m); + } +} + +static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); + const upb_msgdef *type_m; + upb_strview type_url = jsondec_string(d); + const char *end = type_url.data + type_url.size; + const char *ptr = end; + upb_msgval val; + + val.str_val = type_url; + upb_msg_set(msg, type_url_f, val, d->arena); + + /* Find message name after the last '/' */ + while (ptr > type_url.data && *--ptr != '/') {} + + if (ptr == type_url.data || ptr == end) { + jsondec_err(d, "Type url must have at least one '/' and non-empty host"); + } + + ptr++; + type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr); + + if (!type_m) { + jsondec_err(d, "Type was not found"); + } + + return type_m; +} + +static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + /* string type_url = 1; + * bytes value = 2; */ + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); + upb_msg *any_msg; + const upb_msgdef *any_m = NULL; + const char *pre_type_data = NULL; + const char *pre_type_end = NULL; + upb_msgval encoded; + + jsondec_objstart(d); + + /* Scan looking for "@type", which is not necessarily first. */ + while (!any_m && jsondec_objnext(d)) { + const char *start = d->ptr; + upb_strview name = jsondec_string(d); + jsondec_entrysep(d); + if (jsondec_streql(name, "@type")) { + any_m = jsondec_typeurl(d, msg, m); + if (pre_type_data) { + pre_type_end = start; + while (*pre_type_end != ',') pre_type_end--; + } + } else { + if (!pre_type_data) pre_type_data = start; + jsondec_skipval(d); + } + } + + if (!any_m) { + jsondec_err(d, "Any object didn't contain a '@type' field"); + } + + any_msg = upb_msg_new(any_m, d->arena); + + if (pre_type_data) { + size_t len = pre_type_end - pre_type_data + 1; + char *tmp = upb_arena_malloc(d->arena, len); + const char *saved_ptr = d->ptr; + const char *saved_end = d->end; + memcpy(tmp, pre_type_data, len - 1); + tmp[len - 1] = '}'; + d->ptr = tmp; + d->end = tmp + len; + d->is_first = true; + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + d->ptr = saved_ptr; + d->end = saved_end; + } + + while (jsondec_objnext(d)) { + jsondec_anyfield(d, any_msg, any_m); + } + + jsondec_objend(d); + + encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena, + &encoded.str_val.size); + upb_msg_set(msg, value_f, encoded, d->arena); +} + +static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *value_f = upb_msgdef_itof(m, 1); + upb_msgval val = jsondec_value(d, value_f); + upb_msg_set(msg, value_f, val, d->arena); +} + +static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) { + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_ANY: + jsondec_any(d, msg, m); + break; + case UPB_WELLKNOWN_FIELDMASK: + jsondec_fieldmask(d, msg, m); + break; + case UPB_WELLKNOWN_DURATION: + jsondec_duration(d, msg, m); + break; + case UPB_WELLKNOWN_TIMESTAMP: + jsondec_timestamp(d, msg, m); + break; + case UPB_WELLKNOWN_VALUE: + jsondec_wellknownvalue(d, msg, m); + break; + case UPB_WELLKNOWN_LISTVALUE: + jsondec_listvalue(d, msg, m); + break; + case UPB_WELLKNOWN_STRUCT: + jsondec_struct(d, msg, m); + break; + case UPB_WELLKNOWN_DOUBLEVALUE: + case UPB_WELLKNOWN_FLOATVALUE: + case UPB_WELLKNOWN_INT64VALUE: + case UPB_WELLKNOWN_UINT64VALUE: + case UPB_WELLKNOWN_INT32VALUE: + case UPB_WELLKNOWN_UINT32VALUE: + case UPB_WELLKNOWN_STRINGVALUE: + case UPB_WELLKNOWN_BYTESVALUE: + case UPB_WELLKNOWN_BOOLVALUE: + jsondec_wrapper(d, msg, m); + break; + default: + UPB_UNREACHABLE(); + } +} + +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status) { + jsondec d; + d.ptr = buf; + d.end = buf + size; + d.arena = arena; + d.any_pool = any_pool; + d.status = status; + d.options = options; + d.depth = 64; + d.line = 1; + d.line_begin = d.ptr; + d.debug_field = NULL; + d.is_first = false; + + if (UPB_SETJMP(d.err)) return false; + + jsondec_tomsg(&d, msg, m); + return true; +} + + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Must be last. */ + +typedef struct { + char *buf, *ptr, *end; + size_t overflow; + int indent_depth; + int options; + const upb_symtab *ext_pool; + jmp_buf err; + upb_status *status; + upb_arena *arena; +} jsonenc; + +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); +static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f); +static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m); +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m); +static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m); + +UPB_NORETURN static void jsonenc_err(jsonenc *e, const char *msg) { + upb_status_seterrmsg(e->status, msg); + longjmp(e->err, 1); +} + +UPB_PRINTF(2, 3) +UPB_NORETURN static void jsonenc_errf(jsonenc *e, const char *fmt, ...) { + va_list argp; + va_start(argp, fmt); + upb_status_vseterrf(e->status, fmt, argp); + va_end(argp); + longjmp(e->err, 1); +} + +static upb_arena *jsonenc_arena(jsonenc *e) { + /* Create lazily, since it's only needed for Any */ + if (!e->arena) { + e->arena = upb_arena_new(); + } + return e->arena; +} + +static void jsonenc_putbytes(jsonenc *e, const void *data, size_t len) { + size_t have = e->end - e->ptr; + if (UPB_LIKELY(have >= len)) { + memcpy(e->ptr, data, len); + e->ptr += len; + } else { + if (have) memcpy(e->ptr, data, have); + e->ptr += have; + e->overflow += (len - have); + } +} + +static void jsonenc_putstr(jsonenc *e, const char *str) { + jsonenc_putbytes(e, str, strlen(str)); +} + +UPB_PRINTF(2, 3) +static void jsonenc_printf(jsonenc *e, const char *fmt, ...) { + size_t n; + size_t have = e->end - e->ptr; + va_list args; + + va_start(args, fmt); + n = vsnprintf(e->ptr, have, fmt, args); + va_end(args); + + if (UPB_LIKELY(have > n)) { + e->ptr += n; + } else { + e->ptr += have; + e->overflow += (n - have); + } +} + +static void jsonenc_nanos(jsonenc *e, int32_t nanos) { + int digits = 9; + + if (nanos == 0) return; + if (nanos < 0 || nanos >= 1000000000) { + jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos"); + } + + while (nanos % 1000 == 0) { + nanos /= 1000; + digits -= 3; + } + + jsonenc_printf(e, ".%.*" PRId32, digits, nanos); +} + +static void jsonenc_timestamp(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2); + int64_t seconds = upb_msg_get(msg, seconds_f).int64_val; + int32_t nanos = upb_msg_get(msg, nanos_f).int32_val; + int L, N, I, J, K, hour, min, sec; + + if (seconds < -62135596800) { + jsonenc_err(e, + "error formatting timestamp as JSON: minimum acceptable value " + "is 0001-01-01T00:00:00Z"); + } else if (seconds > 253402300799) { + jsonenc_err(e, + "error formatting timestamp as JSON: maximum acceptable value " + "is 9999-12-31T23:59:59Z"); + } + + /* Julian Day -> Y/M/D, Algorithm from: + * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for + * Processing Calendar Dates," Communications of the Association of + * Computing Machines, vol. 11 (1968), p. 657. */ + L = (int)(seconds / 86400) + 68569 + 2440588; + N = 4 * L / 146097; + L = L - (146097 * N + 3) / 4; + I = 4000 * (L + 1) / 1461001; + L = L - 1461 * I / 4 + 31; + J = 80 * L / 2447; + K = L - 2447 * J / 80; + L = J / 11; + J = J + 2 - 12 * L; + I = 100 * (N - 49) + I + L; + + sec = seconds % 60; + min = (seconds / 60) % 60; + hour = (seconds / 3600) % 24; + + jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec); + jsonenc_nanos(e, nanos); + jsonenc_putstr(e, "Z\""); +} + +static void jsonenc_duration(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *seconds_f = upb_msgdef_itof(m, 1); + const upb_fielddef *nanos_f = upb_msgdef_itof(m, 2); + int64_t seconds = upb_msg_get(msg, seconds_f).int64_val; + int32_t nanos = upb_msg_get(msg, nanos_f).int32_val; + + if (seconds > 315576000000 || seconds < -315576000000 || + (seconds < 0) != (nanos < 0)) { + jsonenc_err(e, "bad duration"); + } + + if (nanos < 0) { + nanos = -nanos; + } + + jsonenc_printf(e, "\"%" PRId64, seconds); + jsonenc_nanos(e, nanos); + jsonenc_putstr(e, "s\""); +} + +static void jsonenc_enum(int32_t val, const upb_fielddef *f, jsonenc *e) { + const upb_enumdef *e_def = upb_fielddef_enumsubdef(f); + + if (strcmp(upb_enumdef_fullname(e_def), "google.protobuf.NullValue") == 0) { + jsonenc_putstr(e, "null"); + } else { + const char *name = upb_enumdef_iton(e_def, val); + + if (name) { + jsonenc_printf(e, "\"%s\"", name); + } else { + jsonenc_printf(e, "%" PRId32, val); + } + } +} + +static void jsonenc_bytes(jsonenc *e, upb_strview str) { + /* This is the regular base64, not the "web-safe" version. */ + static const char base64[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + const unsigned char *ptr = (unsigned char*)str.data; + const unsigned char *end = ptr + str.size; + char buf[4]; + + jsonenc_putstr(e, "\""); + + while (end - ptr >= 3) { + buf[0] = base64[ptr[0] >> 2]; + buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)]; + buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)]; + buf[3] = base64[ptr[2] & 0x3f]; + jsonenc_putbytes(e, buf, 4); + ptr += 3; + } + + switch (end - ptr) { + case 2: + buf[0] = base64[ptr[0] >> 2]; + buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)]; + buf[2] = base64[(ptr[1] & 0xf) << 2]; + buf[3] = '='; + jsonenc_putbytes(e, buf, 4); + break; + case 1: + buf[0] = base64[ptr[0] >> 2]; + buf[1] = base64[((ptr[0] & 0x3) << 4)]; + buf[2] = '='; + buf[3] = '='; + jsonenc_putbytes(e, buf, 4); + break; + } + + jsonenc_putstr(e, "\""); +} + +static void jsonenc_stringbody(jsonenc *e, upb_strview str) { + const char *ptr = str.data; + const char *end = ptr + str.size; + + while (ptr < end) { + switch (*ptr) { + case '\n': + jsonenc_putstr(e, "\\n"); + break; + case '\r': + jsonenc_putstr(e, "\\r"); + break; + case '\t': + jsonenc_putstr(e, "\\t"); + break; + case '\"': + jsonenc_putstr(e, "\\\""); + break; + case '\f': + jsonenc_putstr(e, "\\f"); + break; + case '\b': + jsonenc_putstr(e, "\\b"); + break; + case '\\': + jsonenc_putstr(e, "\\\\"); + break; + default: + if ((uint8_t)*ptr < 0x20) { + jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr); + } else { + /* This could be a non-ASCII byte. We rely on the string being valid + * UTF-8. */ + jsonenc_putbytes(e, ptr, 1); + } + break; + } + ptr++; + } +} + +static void jsonenc_string(jsonenc *e, upb_strview str) { + jsonenc_putstr(e, "\""); + jsonenc_stringbody(e, str); + jsonenc_putstr(e, "\""); +} + +static void jsonenc_double(jsonenc *e, const char *fmt, double val) { + if (val == INFINITY) { + jsonenc_putstr(e, "\"Infinity\""); + } else if (val == -INFINITY) { + jsonenc_putstr(e, "\"-Infinity\""); + } else if (val != val) { + jsonenc_putstr(e, "\"NaN\""); + } else { + jsonenc_printf(e, fmt, val); + } +} + +static void jsonenc_wrapper(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *val_f = upb_msgdef_itof(m, 1); + upb_msgval val = upb_msg_get(msg, val_f); + jsonenc_scalar(e, val, val_f); +} + +static const upb_msgdef *jsonenc_getanymsg(jsonenc *e, upb_strview type_url) { + /* Find last '/', if any. */ + const char *end = type_url.data + type_url.size; + const char *ptr = end; + const upb_msgdef *ret; + + if (!e->ext_pool) { + jsonenc_err(e, "Tried to encode Any, but no symtab was provided"); + } + + if (type_url.size == 0) goto badurl; + + while (true) { + if (--ptr == type_url.data) { + /* Type URL must contain at least one '/', with host before. */ + goto badurl; + } + if (*ptr == '/') { + ptr++; + break; + } + } + + ret = upb_symtab_lookupmsg2(e->ext_pool, ptr, end - ptr); + + if (!ret) { + jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr); + } + + return ret; + +badurl: + jsonenc_errf( + e, "Bad type URL: " UPB_STRVIEW_FORMAT, UPB_STRVIEW_ARGS(type_url)); +} + +static void jsonenc_any(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1); + const upb_fielddef *value_f = upb_msgdef_itof(m, 2); + upb_strview type_url = upb_msg_get(msg, type_url_f).str_val; + upb_strview value = upb_msg_get(msg, value_f).str_val; + const upb_msgdef *any_m = jsonenc_getanymsg(e, type_url); + const upb_msglayout *any_layout = upb_msgdef_layout(any_m); + upb_arena *arena = jsonenc_arena(e); + upb_msg *any = upb_msg_new(any_m, arena); + + if (!upb_decode(value.data, value.size, any, any_layout, arena)) { + jsonenc_err(e, "Error decoding message in Any"); + } + + jsonenc_putstr(e, "{\"@type\":"); + jsonenc_string(e, type_url); + jsonenc_putstr(e, ","); + + if (upb_msgdef_wellknowntype(any_m) == UPB_WELLKNOWN_UNSPECIFIED) { + /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */ + jsonenc_msgfields(e, any, any_m); + } else { + /* Well-known type: {"@type": "...","value": } */ + jsonenc_putstr(e, "\"value\":"); + jsonenc_msgfield(e, any, any_m); + } + + jsonenc_putstr(e, "}"); +} + +static void jsonenc_putsep(jsonenc *e, const char *str, bool *first) { + if (*first) { + *first = false; + } else { + jsonenc_putstr(e, str); + } +} + +static void jsonenc_fieldpath(jsonenc *e, upb_strview path) { + const char *ptr = path.data; + const char *end = ptr + path.size; + + while (ptr < end) { + char ch = *ptr; + + if (ch >= 'A' && ch <= 'Z') { + jsonenc_err(e, "Field mask element may not have upper-case letter."); + } else if (ch == '_') { + if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') { + jsonenc_err(e, "Underscore must be followed by a lowercase letter."); + } + ch = *++ptr - 32; + } + + jsonenc_putbytes(e, &ch, 1); + ptr++; + } +} + +static void jsonenc_fieldmask(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *paths_f = upb_msgdef_itof(m, 1); + const upb_array *paths = upb_msg_get(msg, paths_f).array_val; + bool first = true; + size_t i, n = 0; + + if (paths) n = upb_array_size(paths); + + jsonenc_putstr(e, "\""); + + for (i = 0; i < n; i++) { + jsonenc_putsep(e, ",", &first); + jsonenc_fieldpath(e, upb_array_get(paths, i).str_val); + } + + jsonenc_putstr(e, "\""); +} + +static void jsonenc_struct(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *fields_f = upb_msgdef_itof(m, 1); + const upb_map *fields = upb_msg_get(msg, fields_f).map_val; + const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f); + const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2); + size_t iter = UPB_MAP_BEGIN; + bool first = true; + + jsonenc_putstr(e, "{"); + + if (fields) { + while (upb_mapiter_next(fields, &iter)) { + upb_msgval key = upb_mapiter_key(fields, iter); + upb_msgval val = upb_mapiter_value(fields, iter); + + jsonenc_putsep(e, ",", &first); + jsonenc_string(e, key.str_val); + jsonenc_putstr(e, ":"); + jsonenc_value(e, val.msg_val, upb_fielddef_msgsubdef(value_f)); + } + } + + jsonenc_putstr(e, "}"); +} + +static void jsonenc_listvalue(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + const upb_fielddef *values_f = upb_msgdef_itof(m, 1); + const upb_msgdef *values_m = upb_fielddef_msgsubdef(values_f); + const upb_array *values = upb_msg_get(msg, values_f).array_val; + size_t i; + bool first = true; + + jsonenc_putstr(e, "["); + + if (values) { + const size_t size = upb_array_size(values); + for (i = 0; i < size; i++) { + upb_msgval elem = upb_array_get(values, i); + + jsonenc_putsep(e, ",", &first); + jsonenc_value(e, elem.msg_val, values_m); + } + } + + jsonenc_putstr(e, "]"); +} + +static void jsonenc_value(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + /* TODO(haberman): do we want a reflection method to get oneof case? */ + size_t iter = UPB_MSG_BEGIN; + const upb_fielddef *f; + upb_msgval val; + + if (!upb_msg_next(msg, m, NULL, &f, &val, &iter)) { + jsonenc_err(e, "No value set in Value proto"); + } + + switch (upb_fielddef_number(f)) { + case 1: + jsonenc_putstr(e, "null"); + break; + case 2: + jsonenc_double(e, "%.17g", val.double_val); + break; + case 3: + jsonenc_string(e, val.str_val); + break; + case 4: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case 5: + jsonenc_struct(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + case 6: + jsonenc_listvalue(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + } +} + +static void jsonenc_msgfield(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + switch (upb_msgdef_wellknowntype(m)) { + case UPB_WELLKNOWN_UNSPECIFIED: + jsonenc_msg(e, msg, m); + break; + case UPB_WELLKNOWN_ANY: + jsonenc_any(e, msg, m); + break; + case UPB_WELLKNOWN_FIELDMASK: + jsonenc_fieldmask(e, msg, m); + break; + case UPB_WELLKNOWN_DURATION: + jsonenc_duration(e, msg, m); + break; + case UPB_WELLKNOWN_TIMESTAMP: + jsonenc_timestamp(e, msg, m); + break; + case UPB_WELLKNOWN_DOUBLEVALUE: + case UPB_WELLKNOWN_FLOATVALUE: + case UPB_WELLKNOWN_INT64VALUE: + case UPB_WELLKNOWN_UINT64VALUE: + case UPB_WELLKNOWN_INT32VALUE: + case UPB_WELLKNOWN_UINT32VALUE: + case UPB_WELLKNOWN_STRINGVALUE: + case UPB_WELLKNOWN_BYTESVALUE: + case UPB_WELLKNOWN_BOOLVALUE: + jsonenc_wrapper(e, msg, m); + break; + case UPB_WELLKNOWN_VALUE: + jsonenc_value(e, msg, m); + break; + case UPB_WELLKNOWN_LISTVALUE: + jsonenc_listvalue(e, msg, m); + break; + case UPB_WELLKNOWN_STRUCT: + jsonenc_struct(e, msg, m); + break; + } +} + +static void jsonenc_scalar(jsonenc *e, upb_msgval val, const upb_fielddef *f) { + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case UPB_TYPE_FLOAT: + jsonenc_double(e, "%.9g", val.float_val); + break; + case UPB_TYPE_DOUBLE: + jsonenc_double(e, "%.17g", val.double_val); + break; + case UPB_TYPE_INT32: + jsonenc_printf(e, "%" PRId32, val.int32_val); + break; + case UPB_TYPE_UINT32: + jsonenc_printf(e, "%" PRIu32, val.uint32_val); + break; + case UPB_TYPE_INT64: + jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val); + break; + case UPB_TYPE_UINT64: + jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val); + break; + case UPB_TYPE_STRING: + jsonenc_string(e, val.str_val); + break; + case UPB_TYPE_BYTES: + jsonenc_bytes(e, val.str_val); + break; + case UPB_TYPE_ENUM: + jsonenc_enum(val.int32_val, f, e); + break; + case UPB_TYPE_MESSAGE: + jsonenc_msgfield(e, val.msg_val, upb_fielddef_msgsubdef(f)); + break; + } +} + +static void jsonenc_mapkey(jsonenc *e, upb_msgval val, const upb_fielddef *f) { + jsonenc_putstr(e, "\""); + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + jsonenc_putstr(e, val.bool_val ? "true" : "false"); + break; + case UPB_TYPE_INT32: + jsonenc_printf(e, "%" PRId32, val.int32_val); + break; + case UPB_TYPE_UINT32: + jsonenc_printf(e, "%" PRIu32, val.uint32_val); + break; + case UPB_TYPE_INT64: + jsonenc_printf(e, "%" PRId64, val.int64_val); + break; + case UPB_TYPE_UINT64: + jsonenc_printf(e, "%" PRIu64, val.uint64_val); + break; + case UPB_TYPE_STRING: + jsonenc_stringbody(e, val.str_val); + break; + default: + UPB_UNREACHABLE(); + } + + jsonenc_putstr(e, "\":"); +} + +static void jsonenc_array(jsonenc *e, const upb_array *arr, + const upb_fielddef *f) { + size_t i; + size_t size = arr ? upb_array_size(arr) : 0; + bool first = true; + + jsonenc_putstr(e, "["); + + for (i = 0; i < size; i++) { + jsonenc_putsep(e, ",", &first); + jsonenc_scalar(e, upb_array_get(arr, i), f); + } + + jsonenc_putstr(e, "]"); +} + +static void jsonenc_map(jsonenc *e, const upb_map *map, const upb_fielddef *f) { + const upb_msgdef *entry = upb_fielddef_msgsubdef(f); + const upb_fielddef *key_f = upb_msgdef_itof(entry, 1); + const upb_fielddef *val_f = upb_msgdef_itof(entry, 2); + size_t iter = UPB_MAP_BEGIN; + bool first = true; + + jsonenc_putstr(e, "{"); + + if (map) { + while (upb_mapiter_next(map, &iter)) { + jsonenc_putsep(e, ",", &first); + jsonenc_mapkey(e, upb_mapiter_key(map, iter), key_f); + jsonenc_scalar(e, upb_mapiter_value(map, iter), val_f); + } + } + + jsonenc_putstr(e, "}"); +} + +static void jsonenc_fieldval(jsonenc *e, const upb_fielddef *f, + upb_msgval val, bool *first) { + const char *name; + + if (e->options & UPB_JSONENC_PROTONAMES) { + name = upb_fielddef_name(f); + } else { + name = upb_fielddef_jsonname(f); + } + + jsonenc_putsep(e, ",", first); + jsonenc_printf(e, "\"%s\":", name); + + if (upb_fielddef_ismap(f)) { + jsonenc_map(e, val.map_val, f); + } else if (upb_fielddef_isseq(f)) { + jsonenc_array(e, val.array_val, f); + } else { + jsonenc_scalar(e, val, f); + } +} + +static void jsonenc_msgfields(jsonenc *e, const upb_msg *msg, + const upb_msgdef *m) { + upb_msgval val; + const upb_fielddef *f; + bool first = true; + + if (e->options & UPB_JSONENC_EMITDEFAULTS) { + /* Iterate over all fields. */ + int i = 0; + int n = upb_msgdef_fieldcount(m); + for (i = 0; i < n; i++) { + f = upb_msgdef_field(m, i); + if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { + jsonenc_fieldval(e, f, upb_msg_get(msg, f), &first); + } + } + } else { + /* Iterate over non-empty fields. */ + size_t iter = UPB_MSG_BEGIN; + while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) { + jsonenc_fieldval(e, f, val, &first); + } + } +} + +static void jsonenc_msg(jsonenc *e, const upb_msg *msg, const upb_msgdef *m) { + jsonenc_putstr(e, "{"); + jsonenc_msgfields(e, msg, m); + jsonenc_putstr(e, "}"); +} + +static size_t jsonenc_nullz(jsonenc *e, size_t size) { + size_t ret = e->ptr - e->buf + e->overflow; + + if (size > 0) { + if (e->ptr == e->end) e->ptr--; + *e->ptr = '\0'; + } + + return ret; +} + +size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, + const upb_symtab *ext_pool, int options, char *buf, + size_t size, upb_status *status) { + jsonenc e; + + e.buf = buf; + e.ptr = buf; + e.end = buf + size; + e.overflow = 0; + e.options = options; + e.ext_pool = ext_pool; + e.status = status; + e.arena = NULL; + + if (setjmp(e.err)) return -1; + + jsonenc_msgfield(&e, msg, m); + if (e.arena) upb_arena_free(e.arena); + return jsonenc_nullz(&e, size); +} +/* See port_def.inc. This should #undef all macros #defined there. */ + +#undef UPB_MAPTYPE_STRING +#undef UPB_SIZE +#undef UPB_PTR_AT +#undef UPB_READ_ONEOF +#undef UPB_WRITE_ONEOF +#undef UPB_INLINE +#undef UPB_ALIGN_UP +#undef UPB_ALIGN_DOWN +#undef UPB_ALIGN_MALLOC +#undef UPB_ALIGN_OF +#undef UPB_FORCEINLINE +#undef UPB_NOINLINE +#undef UPB_NORETURN +#undef UPB_MAX +#undef UPB_MIN +#undef UPB_UNUSED +#undef UPB_ASSUME +#undef UPB_ASSERT +#undef UPB_UNREACHABLE +#undef UPB_POISON_MEMORY_REGION +#undef UPB_UNPOISON_MEMORY_REGION +#undef UPB_ASAN diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h old mode 100644 new mode 100755 similarity index 58% rename from ruby/ext/google/protobuf_c/upb.h rename to ruby/ext/google/protobuf_c/ruby-upb.h index b7da76fa0018..106c73b6934b --- a/ruby/ext/google/protobuf_c/upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -21,7 +21,15 @@ * * This file is private and must not be included by users! */ + +#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (defined(_MSC_VER) && _MSC_VER >= 1900)) +#error upb requires C99 or C++11 or MSVC >= 2015. +#endif + #include +#include #if UINTPTR_MAX == 0xffffffff #define UPB_SIZE(size32, size64) size32 @@ -54,6 +62,11 @@ #define UPB_INLINE static #endif +#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align)) +#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align)) +#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16) +#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member) + /* Hints to the compiler about likely/unlikely branches. */ #if defined (__GNUC__) || defined(__clang__) #define UPB_LIKELY(x) __builtin_expect((x),1) @@ -63,64 +76,22 @@ #define UPB_UNLIKELY(x) (x) #endif -/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler - * doesn't provide these preprocessor symbols. */ -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#define UPB_BIG_ENDIAN -#endif - /* Macros for function attributes on compilers that support them. */ #ifdef __GNUC__ #define UPB_FORCEINLINE __inline__ __attribute__((always_inline)) #define UPB_NOINLINE __attribute__((noinline)) #define UPB_NORETURN __attribute__((__noreturn__)) +#define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg))) +#elif defined(_MSC_VER) +#define UPB_NOINLINE +#define UPB_FORCEINLINE +#define UPB_NORETURN __declspec(noreturn) +#define UPB_PRINTF(str, first_vararg) #else /* !defined(__GNUC__) */ #define UPB_FORCEINLINE #define UPB_NOINLINE #define UPB_NORETURN -#endif - -#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L -/* C99/C++11 versions. */ -#include -#define _upb_snprintf snprintf -#define _upb_vsnprintf vsnprintf -#define _upb_va_copy(a, b) va_copy(a, b) -#elif defined(_MSC_VER) -/* Microsoft C/C++ versions. */ -#include -#include -#if _MSC_VER < 1900 -int msvc_snprintf(char* s, size_t n, const char* format, ...); -int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); -#define UPB_MSVC_VSNPRINTF -#define _upb_snprintf msvc_snprintf -#define _upb_vsnprintf msvc_vsnprintf -#else -#define _upb_snprintf snprintf -#define _upb_vsnprintf vsnprintf -#endif -#define _upb_va_copy(a, b) va_copy(a, b) -#elif defined __GNUC__ -/* A few hacky workarounds for functions not in C89. - * For internal use only! - * TODO(haberman): fix these by including our own implementations, or finding - * another workaround. - */ -#define _upb_snprintf __builtin_snprintf -#define _upb_vsnprintf __builtin_vsnprintf -#define _upb_va_copy(a, b) __va_copy(a, b) -#else -#error Need implementations of [v]snprintf and va_copy -#endif - -#ifdef __cplusplus -#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \ - (defined(_MSC_VER) && _MSC_VER >= 1900) -/* C++11 is present */ -#else -#error upb requires C++11 for C++ support -#endif +#define UPB_PRINTF(str, first_vararg) #endif #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) @@ -133,8 +104,10 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); #ifdef NDEBUG #ifdef __GNUC__ #define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable() +#elif defined _MSC_VER +#define UPB_ASSUME(expr) if (!(expr)) __assume(0) #else -#define UPB_ASSUME(expr) do {} if (false && (expr)) +#define UPB_ASSUME(expr) do {} while (false && (expr)) #endif #else #define UPB_ASSUME(expr) assert(expr) @@ -148,23 +121,79 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); #define UPB_ASSERT(expr) assert(expr) #endif -/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only - * exist in debug mode. This turns into regular assert. */ -#define UPB_ASSERT_DEBUGVAR(expr) assert(expr) - #if defined(__GNUC__) || defined(__clang__) #define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) #else #define UPB_UNREACHABLE() do { assert(0); } while(0) #endif -/* UPB_INFINITY representing floating-point positive infinity. */ -#include -#ifdef INFINITY -#define UPB_INFINITY INFINITY +/* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */ +#ifdef __APPLE__ +#define UPB_SETJMP(buf) _setjmp(buf) +#define UPB_LONGJMP(buf, val) _longjmp(buf, val) +#else +#define UPB_SETJMP(buf) setjmp(buf) +#define UPB_LONGJMP(buf, val) longjmp(buf, val) +#endif + +/* Configure whether fasttable is switched on or not. *************************/ + +#if defined(__x86_64__) && defined(__GNUC__) +#define UPB_FASTTABLE_SUPPORTED 1 +#else +#define UPB_FASTTABLE_SUPPORTED 0 +#endif + +/* define UPB_ENABLE_FASTTABLE to force fast table support. + * This is useful when we want to ensure we are really getting fasttable, + * for example for testing or benchmarking. */ +#if defined(UPB_ENABLE_FASTTABLE) +#if !UPB_FASTTABLE_SUPPORTED +#error fasttable is x86-64 + Clang/GCC only +#endif +#define UPB_FASTTABLE 1 +/* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible. + * This is useful for releasing code that might be used on multiple platforms, + * for example the PHP or Ruby C extensions. */ +#elif defined(UPB_TRY_ENABLE_FASTTABLE) +#define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED +#else +#define UPB_FASTTABLE 0 +#endif + +/* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully + * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */ +#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE) +#define UPB_FASTTABLE_INIT(...) #else -#define UPB_INFINITY (1.0 / 0.0) +#define UPB_FASTTABLE_INIT(...) __VA_ARGS__ +#endif + +#undef UPB_FASTTABLE_SUPPORTED + +/* ASAN poisoning (for arena) *************************************************/ + +#if defined(__SANITIZE_ADDRESS__) +#define UPB_ASAN 1 +#ifdef __cplusplus +extern "C" { +#endif +void __asan_poison_memory_region(void const volatile *addr, size_t size); +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#ifdef __cplusplus +} /* extern "C" */ #endif +#define UPB_POISON_MEMORY_REGION(addr, size) \ + __asan_poison_memory_region((addr), (size)) +#define UPB_UNPOISON_MEMORY_REGION(addr, size) \ + __asan_unpoison_memory_region((addr), (size)) +#else +#define UPB_ASAN 0 +#define UPB_POISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#define UPB_UNPOISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#endif /* ** upb_decode: parsing into a upb_msg using a upb_msglayout. */ @@ -183,6 +212,7 @@ int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); #define UPB_MSG_H_ #include +#include #include /* @@ -243,8 +273,12 @@ bool upb_ok(const upb_status *status); /* These are no-op if |status| is NULL. */ void upb_status_clear(upb_status *status); void upb_status_seterrmsg(upb_status *status, const char *msg); -void upb_status_seterrf(upb_status *status, const char *fmt, ...); -void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args); +void upb_status_seterrf(upb_status *status, const char *fmt, ...) + UPB_PRINTF(2, 3); +void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) + UPB_PRINTF(2, 0); +void upb_status_vappenderrf(upb_status *status, const char *fmt, va_list args) + UPB_PRINTF(2, 0); /** upb_strview ************************************************************/ @@ -352,47 +386,65 @@ typedef struct upb_arena upb_arena; typedef struct { /* We implement the allocator interface. - * This must be the first member of upb_arena! */ + * This must be the first member of upb_arena! + * TODO(haberman): remove once handlers are gone. */ upb_alloc alloc; char *ptr, *end; } _upb_arena_head; -UPB_INLINE size_t _upb_arena_alignup(size_t size) { - const size_t maxalign = 16; - return ((size + maxalign - 1) / maxalign) * maxalign; -} - /* Creates an arena from the given initial block (if any -- n may be 0). * Additional blocks will be allocated from |alloc|. If |alloc| is NULL, this * is a fixed-size arena and cannot grow. */ upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc); void upb_arena_free(upb_arena *a); bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func); -size_t upb_arena_bytesallocated(const upb_arena *a); +void upb_arena_fuse(upb_arena *a, upb_arena *b); void *_upb_arena_slowmalloc(upb_arena *a, size_t size); UPB_INLINE upb_alloc *upb_arena_alloc(upb_arena *a) { return (upb_alloc*)a; } +UPB_INLINE size_t _upb_arenahas(upb_arena *a) { + _upb_arena_head *h = (_upb_arena_head*)a; + return (size_t)(h->end - h->ptr); +} + UPB_INLINE void *upb_arena_malloc(upb_arena *a, size_t size) { _upb_arena_head *h = (_upb_arena_head*)a; - size = _upb_arena_alignup(size); - if (UPB_LIKELY((size_t)(h->end - h->ptr) >= size)) { - void* ret = h->ptr; - h->ptr += size; - return ret; - } else { + void* ret; + size = UPB_ALIGN_MALLOC(size); + + if (UPB_UNLIKELY(_upb_arenahas(a) < size)) { return _upb_arena_slowmalloc(a, size); } + + ret = h->ptr; + h->ptr += size; + UPB_UNPOISON_MEMORY_REGION(ret, size); + +#if UPB_ASAN + { + size_t guard_size = 32; + if (_upb_arenahas(a) >= guard_size) { + h->ptr += guard_size; + } else { + h->ptr = h->end; + } + } +#endif + + return ret; } UPB_INLINE void *upb_arena_realloc(upb_arena *a, void *ptr, size_t oldsize, size_t size) { - if (oldsize == 0) { - return upb_arena_malloc(a, size); - } else { - return upb_realloc(upb_arena_alloc(a), ptr, oldsize, size); + void *ret = upb_arena_malloc(a, size); + + if (ret && oldsize > 0) { + memcpy(ret, ptr, oldsize); } + + return ret; } UPB_INLINE upb_arena *upb_arena_new(void) { @@ -480,7 +532,44 @@ typedef enum { UPB_DTYPE_SINT64 = 18 } upb_descriptortype_t; -#define UPB_MAP_BEGIN -1 +#define UPB_MAP_BEGIN ((size_t)-1) + +UPB_INLINE bool _upb_isle(void) { + int x = 1; + return *(char*)&x == 1; +} + +UPB_INLINE uint32_t _upb_be_swap32(uint32_t val) { + if (_upb_isle()) { + return val; + } else { + return ((val & 0xff) << 24) | ((val & 0xff00) << 8) | + ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24); + } +} + +UPB_INLINE uint64_t _upb_be_swap64(uint64_t val) { + if (_upb_isle()) { + return val; + } else { + return ((uint64_t)_upb_be_swap32(val) << 32) | _upb_be_swap32(val >> 32); + } +} + +UPB_INLINE int _upb_lg2ceil(int x) { + if (x <= 1) return 0; +#ifdef __GNUC__ + return 32 - __builtin_clz(x - 1); +#else + int lg2 = 0; + while (1 << lg2 < x) lg2++; + return lg2; +#endif +} + +UPB_INLINE int _upb_lg2ceilsize(int x) { + return 1 << _upb_lg2ceil(x); +} #ifdef __cplusplus @@ -611,10 +700,17 @@ UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) { return mem + sizeof(*len); } +UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) { + upb_strview ret; + uint32_t len; + ret.data = upb_tabstr(key, &len); + ret.size = len; + return ret; +} /* upb_tabval *****************************************************************/ -typedef struct { +typedef struct upb_tabval { uint64_t val; } upb_tabval; @@ -635,7 +731,8 @@ typedef struct _upb_tabent { typedef struct { size_t count; /* Number of entries in the hash part. */ - size_t mask; /* Mask to turn hash value -> bucket. */ + uint32_t mask; /* Mask to turn hash value -> bucket. */ + uint32_t max_count; /* Max count before we hit our load limit. */ uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */ /* Hash table entries. @@ -694,7 +791,8 @@ UPB_INLINE bool upb_arrhas(upb_tabval key) { /* Initialize and uninitialize a table, respectively. If memory allocation * failed, false is returned that the table is uninitialized. */ bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a); -bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a); +bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, + size_t expected_size, upb_alloc *a); void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a); void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a); @@ -703,7 +801,7 @@ UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) { } UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) { - return upb_strtable_init2(table, ctype, &upb_alloc_global); + return upb_strtable_init2(table, ctype, 4, &upb_alloc_global); } UPB_INLINE void upb_inttable_uninit(upb_inttable *table) { @@ -790,15 +888,6 @@ UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key, * invalidate iterators. */ bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val); -/* Handy routines for treating an inttable like a stack. May not be mixed with - * other insert/remove calls. */ -bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a); -upb_value upb_inttable_pop(upb_inttable *t); - -UPB_INLINE bool upb_inttable_push(upb_inttable *t, upb_value val) { - return upb_inttable_push2(t, val, &upb_alloc_global); -} - /* Convenience routines for inttables with pointer keys. */ bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, upb_alloc *a); @@ -937,6 +1026,7 @@ bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, #endif /* UPB_TABLE_H_ */ +/* Must be last. */ #ifdef __cplusplus extern "C" { @@ -962,12 +1052,24 @@ enum { typedef struct { uint32_t number; uint16_t offset; - int16_t presence; /* If >0, hasbit_index. If <0, -oneof_index. */ + int16_t presence; /* If >0, hasbit_index. If <0, ~oneof_index. */ uint16_t submsg_index; /* undefined if descriptortype != MESSAGE or GROUP. */ uint8_t descriptortype; - uint8_t label; + uint8_t label; /* google.protobuf.Label or _UPB_LABEL_* above. */ } upb_msglayout_field; +struct upb_decstate; +struct upb_msglayout; + +typedef const char *_upb_field_parser(struct upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits, uint64_t data); + +typedef struct { + uint64_t field_data; + _upb_field_parser *field_parser; +} _upb_fasttable_entry; + typedef struct upb_msglayout { const struct upb_msglayout *const* submsgs; const upb_msglayout_field *fields; @@ -976,6 +1078,10 @@ typedef struct upb_msglayout { uint16_t size; uint16_t field_count; bool extendable; + uint8_t table_mask; + /* To constant-initialize the tables of variable length, we need a flexible + * array member, and we need to compile in C99 mode. */ + _upb_fasttable_entry fasttable[]; } upb_msglayout; /** upb_msg *******************************************************************/ @@ -984,25 +1090,48 @@ typedef struct upb_msglayout { * compatibility. We put these before the user's data. The user's upb_msg* * points after the upb_msg_internal. */ -/* Used when a message is not extendable. */ typedef struct { - char *unknown; - size_t unknown_len; - size_t unknown_size; -} upb_msg_internal; + uint32_t len; + uint32_t size; + /* Data follows. */ +} upb_msg_unknowndata; -/* Used when a message is extendable. */ +/* Used when a message is not extendable. */ typedef struct { - upb_inttable *extdict; - upb_msg_internal base; -} upb_msg_internal_withext; + upb_msg_unknowndata *unknown; +} upb_msg_internal; /* Maps upb_fieldtype_t -> memory size. */ extern char _upb_fieldtype_to_size[12]; +UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) { + return l->size + sizeof(upb_msg_internal); +} + +UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) { + size_t size = upb_msg_sizeof(l); + void *mem = upb_arena_malloc(a, size); + upb_msg *msg; + if (UPB_UNLIKELY(!mem)) return NULL; + msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg); + memset(mem, 0, size); + return msg; +} + /* Creates a new messages with the given layout on the given arena. */ upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a); +UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { + ptrdiff_t size = sizeof(upb_msg_internal); + return (upb_msg_internal*)((char*)msg - size); +} + +/* Clears the given message. */ +void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l); + +/* Discards the unknown fields for this message only. */ +void _upb_msg_discardunknown_shallow(upb_msg *msg); + /* Adds unknown data (serialized protobuf data) to the given message. The data * is copied into the message instance. */ bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, @@ -1011,30 +1140,77 @@ bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, /* Returns a reference to the message's unknown data. */ const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); -UPB_INLINE bool _upb_has_field(const void *msg, size_t idx) { +/** Hasbit access *************************************************************/ + +UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) { return (*PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0; } -UPB_INLINE bool _upb_sethas(const void *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); +UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) { + (*PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8)); +} + +UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) { + (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); +} + +UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) { + UPB_ASSERT(f->presence > 0); + return f->presence; +} + +UPB_INLINE bool _upb_hasbit_field(const upb_msg *msg, + const upb_msglayout_field *f) { + return _upb_hasbit(msg, _upb_msg_hasidx(f)); +} + +UPB_INLINE void _upb_sethas_field(const upb_msg *msg, + const upb_msglayout_field *f) { + _upb_sethas(msg, _upb_msg_hasidx(f)); +} + +UPB_INLINE void _upb_clearhas_field(const upb_msg *msg, + const upb_msglayout_field *f) { + _upb_clearhas(msg, _upb_msg_hasidx(f)); +} + +/** Oneof case access *********************************************************/ + +UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) { + return PTR_AT(msg, case_ofs, uint32_t); +} + +UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) { + return *PTR_AT(msg, case_ofs, uint32_t); } -UPB_INLINE bool _upb_clearhas(const void *msg, size_t idx) { - return (*PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8))); +UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) { + UPB_ASSERT(f->presence < 0); + return ~(ptrdiff_t)f->presence; } -UPB_INLINE bool _upb_has_oneof_field(const void *msg, size_t case_ofs, int32_t num) { - return *PTR_AT(msg, case_ofs, int32_t) == num; +UPB_INLINE uint32_t *_upb_oneofcase_field(upb_msg *msg, + const upb_msglayout_field *f) { + return _upb_oneofcase(msg, _upb_oneofcase_ofs(f)); } -UPB_INLINE bool _upb_has_submsg_nohasbit(const void *msg, size_t ofs) { - return *PTR_AT(msg, ofs, const void*) != NULL; +UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg, + const upb_msglayout_field *f) { + return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f)); +} + +UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) { + return *PTR_AT(msg, ofs, const upb_msg*) != NULL; } UPB_INLINE bool _upb_isrepeated(const upb_msglayout_field *field) { return (field->label & 3) == UPB_LABEL_REPEATED; } +UPB_INLINE bool _upb_repeated_or_map(const upb_msglayout_field *field) { + return field->label >= UPB_LABEL_REPEATED; +} + /** upb_array *****************************************************************/ /* Our internal representation for repeated fields. */ @@ -1042,27 +1218,62 @@ typedef struct { uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */ size_t len; /* Measured in elements. */ size_t size; /* Measured in elements. */ + uint64_t junk; } upb_array; UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) { + UPB_ASSERT((arr->data & 7) <= 4); return (void*)(arr->data & ~(uintptr_t)7); } +UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); + return (uintptr_t)ptr | elem_size_lg2; +} + UPB_INLINE void *_upb_array_ptr(upb_array *arr) { return (void*)_upb_array_constptr(arr); } -/* Creates a new array on the given arena. */ -upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type); +UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) { + UPB_ASSERT(elem_size_lg2 <= 4); + UPB_ASSERT(((uintptr_t)ptr & 7) == 0); + return (uintptr_t)ptr | (unsigned)elem_size_lg2; +} + +UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size, + int elem_size_lg2) { + const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8); + const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2); + upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes); + if (!arr) return NULL; + arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2); + arr->len = 0; + arr->size = init_size; + return arr; +} /* Resizes the capacity of the array to be at least min_size. */ bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena); /* Fallback functions for when the accessors require a resize. */ void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, - upb_fieldtype_t type, upb_arena *arena); + int elem_size_lg2, upb_arena *arena); bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value, - upb_fieldtype_t type, upb_arena *arena); + int elem_size_lg2, upb_arena *arena); + +UPB_INLINE bool _upb_array_reserve(upb_array *arr, size_t size, + upb_arena *arena) { + if (arr->size < size) return _upb_array_realloc(arr, size, arena); + return true; +} + +UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size, + upb_arena *arena) { + if (!_upb_array_reserve(arr, size, arena)) return false; + arr->len = size; + return true; +} UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs, size_t *size) { @@ -1088,29 +1299,28 @@ UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs, } } -UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size, - upb_fieldtype_t type, - upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array*); +UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size, + int elem_size_lg2, + upb_arena *arena) { + upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); upb_array *arr = *arr_ptr; if (!arr || arr->size < size) { - return _upb_array_resize_fallback(arr_ptr, size, type, arena); + return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena); } arr->len = size; return _upb_array_ptr(arr); } - -UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs, - size_t elem_size, - upb_fieldtype_t type, - const void *value, - upb_arena *arena) { - upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array*); +UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs, + int elem_size_lg2, + const void *value, + upb_arena *arena) { + upb_array **arr_ptr = PTR_AT(msg, ofs, upb_array *); + size_t elem_size = 1 << elem_size_lg2; upb_array *arr = *arr_ptr; - void* ptr; + void *ptr; if (!arr || arr->len == arr->size) { - return _upb_array_append_fallback(arr_ptr, value, type, arena); + return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena); } ptr = _upb_array_ptr(arr); memcpy(PTR_AT(ptr, arr->len * elem_size, char), value, elem_size); @@ -1118,6 +1328,42 @@ UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs, return true; } +/* Used by old generated code, remove once all code has been regenerated. */ +UPB_INLINE int _upb_sizelg2(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_BOOL: + return 0; + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + return 2; + case UPB_TYPE_MESSAGE: + return UPB_SIZE(2, 3); + case UPB_TYPE_DOUBLE: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + return 3; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + return UPB_SIZE(3, 4); + } + UPB_UNREACHABLE(); +} +UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size, + upb_fieldtype_t type, + upb_arena *arena) { + return _upb_array_resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena); +} +UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs, + size_t elem_size, upb_fieldtype_t type, + const void *value, + upb_arena *arena) { + (void)elem_size; + return _upb_array_append_accessor2(msg, ofs, _upb_sizelg2(type), value, + arena); +} + /** upb_map *******************************************************************/ /* Right now we use strmaps for everything. We'll likely want to use @@ -1174,17 +1420,17 @@ UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) { } } -UPB_INLINE upb_value _upb_map_tovalue(const void *val, size_t size, - upb_arena *a) { - upb_value ret = {0}; +UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval, + upb_arena *a) { if (size == UPB_MAPTYPE_STRING) { upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp)); + if (!strp) return false; *strp = *(upb_strview*)val; - memcpy(&ret, &strp, sizeof(strp)); + *msgval = upb_value_ptr(strp); } else { - memcpy(&ret, val, size); + memcpy(msgval, val, size); } - return ret; + return true; } UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) { @@ -1207,7 +1453,7 @@ UPB_INLINE bool _upb_map_get(const upb_map *map, const void *key, upb_value tabval; upb_strview k = _upb_map_tokey(key, key_size); bool ret = upb_strtable_lookup2(&map->table, k.data, k.size, &tabval); - if (ret) { + if (ret && val) { _upb_map_fromvalue(tabval, val, val_size); } return ret; @@ -1218,15 +1464,16 @@ UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) { it.t = &map->table; it.index = *iter; upb_strtable_next(&it); - if (upb_strtable_done(&it)) return NULL; *iter = it.index; + if (upb_strtable_done(&it)) return NULL; return (void*)str_tabent(&it); } UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size, void *val, size_t val_size, upb_arena *arena) { upb_strview strkey = _upb_map_tokey(key, key_size); - upb_value tabval = _upb_map_tovalue(val, val_size, arena); + upb_value tabval = {0}; + if (!_upb_map_tovalue(val, val_size, &tabval, arena)) return false; upb_alloc *a = upb_arena_alloc(arena); /* TODO(haberman): add overwrite operation to minimize number of lookups. */ @@ -1311,13 +1558,60 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) /* This is like _upb_map_tovalue() except the entry already exists so we can * reuse the allocated upb_strview for string fields. */ if (size == UPB_MAPTYPE_STRING) { - upb_strview *strp = (upb_strview*)ent->val.val; + upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val; memcpy(strp, val, sizeof(*strp)); } else { memcpy(&ent->val.val, val, size); } } +/** _upb_mapsorter *************************************************************/ + +/* _upb_mapsorter sorts maps and provides ordered iteration over the entries. + * Since maps can be recursive (map values can be messages which contain other maps). + * _upb_mapsorter can contain a stack of maps. */ + +typedef struct { + upb_tabent const**entries; + int size; + int cap; +} _upb_mapsorter; + +typedef struct { + int start; + int pos; + int end; +} _upb_sortedmap; + +UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) { + s->entries = NULL; + s->size = 0; + s->cap = 0; +} + +UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) { + if (s->entries) free(s->entries); +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type, + const upb_map *map, _upb_sortedmap *sorted); + +UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) { + s->size = sorted->start; +} + +UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map, + _upb_sortedmap *sorted, + upb_map_entry *ent) { + if (sorted->pos == sorted->end) return false; + const upb_tabent *tabent = s->entries[sorted->pos++]; + upb_strview key = upb_tabstrview(tabent->key); + _upb_map_fromkey(key, &ent->k, map->key_size); + upb_value val = {tabent->val.val}; + _upb_map_fromvalue(val, &ent->v, map->val_size); + return true; +} + #undef PTR_AT #ifdef __cplusplus @@ -1327,19 +1621,223 @@ UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) #endif /* UPB_MSG_H_ */ +/* Must be last. */ + #ifdef __cplusplus extern "C" { #endif +enum { + /* If set, strings will alias the input buffer instead of copying into the + * arena. */ + UPB_DECODE_ALIAS = 1, +}; + +#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16) + +bool _upb_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msglayout *l, upb_arena *arena, int options); + +UPB_INLINE bool upb_decode(const char *buf, size_t size, upb_msg *msg, - const upb_msglayout *l, upb_arena *arena); + const upb_msglayout *l, upb_arena *arena) { + return _upb_decode(buf, size, msg, l, arena, 0); +} #ifdef __cplusplus } /* extern "C" */ #endif + #endif /* UPB_DECODE_H_ */ /* +** Internal implementation details of the decoder that are shared between +** decode.c and decode_fast.c. +*/ + +#ifndef UPB_DECODE_INT_H_ +#define UPB_DECODE_INT_H_ + +#include + + +#ifndef UPB_INT_H_ +#define UPB_INT_H_ + + +struct mem_block; +typedef struct mem_block mem_block; + +struct upb_arena { + _upb_arena_head head; + uint32_t *cleanups; + + /* Allocator to allocate arena blocks. We are responsible for freeing these + * when we are destroyed. */ + upb_alloc *block_alloc; + uint32_t last_size; + + /* When multiple arenas are fused together, each arena points to a parent + * arena (root points to itself). The root tracks how many live arenas + * reference it. */ + uint32_t refcount; /* Only used when a->parent == a */ + struct upb_arena *parent; + + /* Linked list of blocks to free/cleanup. */ + mem_block *freelist, *freelist_tail; +}; + +#endif /* UPB_INT_H_ */ + +/* Must be last. */ + +#define DECODE_NOGROUP (uint32_t)-1 + +typedef struct upb_decstate { + const char *end; /* Can read up to 16 bytes slop beyond this. */ + const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */ + upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */ + const char *unknown; /* Start of unknown data. */ + int limit; /* Submessage limit relative to end. */ + int depth; + uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */ + bool alias; + char patch[32]; + upb_arena arena; + jmp_buf err; +} upb_decstate; + +/* Error function that will abort decoding with longjmp(). We can't declare this + * UPB_NORETURN, even though it is appropriate, because if we do then compilers + * will "helpfully" refuse to tailcall to it + * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal + * of our optimizations. That is also why we must declare it in a separate file, + * otherwise the compiler will see that it calls longjmp() and deduce that it is + * noreturn. */ +const char *fastdecode_err(upb_decstate *d); + +extern const uint8_t upb_utf8_offsets[]; + +UPB_INLINE +bool decode_verifyutf8_inl(const char *buf, int len) { + int i, j; + uint8_t offset; + + i = 0; + while (i < len) { + offset = upb_utf8_offsets[(uint8_t)buf[i]]; + if (offset == 0 || i + offset > len) { + return false; + } + for (j = i + 1; j < i + offset; j++) { + if ((buf[j] & 0xc0) != 0x80) { + return false; + } + } + i += offset; + } + return i == len; +} + +/* x86-64 pointers always have the high 16 bits matching. So we can shift + * left 8 and right 8 without loss of information. */ +UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) { + return ((intptr_t)tablep << 8) | tablep->table_mask; +} + +UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) { + return (const upb_msglayout*)(table >> 8); +} + +UPB_INLINE +const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr, + int overrun) { + if (overrun < d->limit) { + /* Need to copy remaining data into patch buffer. */ + UPB_ASSERT(overrun < 16); + if (d->unknown_msg) { + if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown, + &d->arena)) { + return NULL; + } + d->unknown = &d->patch[0] + overrun; + } + memset(d->patch + 16, 0, 16); + memcpy(d->patch, d->end, 16); + ptr = &d->patch[0] + overrun; + d->end = &d->patch[16]; + d->limit -= 16; + d->limit_ptr = d->end + d->limit; + d->alias = false; + UPB_ASSERT(ptr < d->limit_ptr); + return ptr; + } else { + return NULL; + } +} + +const char *decode_isdonefallback(upb_decstate *d, const char *ptr, + int overrun); + +UPB_INLINE +bool decode_isdone(upb_decstate *d, const char **ptr) { + int overrun = *ptr - d->end; + if (UPB_LIKELY(*ptr < d->limit_ptr)) { + return false; + } else if (UPB_LIKELY(overrun == d->limit)) { + return true; + } else { + *ptr = decode_isdonefallback(d, *ptr, overrun); + return false; + } +} + +UPB_INLINE +const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, + uint64_t hasbits, uint32_t tag) { + const upb_msglayout *table_p = decode_totablep(table); + uint8_t mask = table; + uint64_t data; + size_t idx = tag & mask; + UPB_ASSUME((idx & 7) == 0); + idx >>= 3; + data = table_p->fasttable[idx].field_data ^ tag; + return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data); +} + +UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) { + uint16_t tag; + memcpy(&tag, ptr, 2); + return tag; +} + +UPB_INLINE void decode_checklimit(upb_decstate *d) { + UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit)); +} + +UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) { + int limit = size + (int)(ptr - d->end); + int delta = d->limit - limit; + decode_checklimit(d); + d->limit = limit; + d->limit_ptr = d->end + UPB_MIN(0, limit); + decode_checklimit(d); + return delta; +} + +UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr, + int saved_delta) { + UPB_ASSERT(ptr - d->end == d->limit); + decode_checklimit(d); + d->limit += saved_delta; + d->limit_ptr = d->end + UPB_MIN(0, d->limit); + decode_checklimit(d); +} + + +#endif /* UPB_DECODE_INT_H_ */ +/* ** upb_encode: parsing into a upb_msg using a upb_msglayout. */ @@ -1347,18 +1845,166 @@ bool upb_decode(const char *buf, size_t size, upb_msg *msg, #define UPB_ENCODE_H_ +/* Must be last. */ + #ifdef __cplusplus extern "C" { #endif -char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena, - size_t *size); +enum { + /* If set, the results of serializing will be deterministic across all + * instances of this binary. There are no guarantees across different + * binary builds. + * + * If your proto contains maps, the encoder will need to malloc()/free() + * memory during encode. */ + UPB_ENCODE_DETERMINISTIC = 1, + + /* When set, unknown fields are not printed. */ + UPB_ENCODE_SKIPUNKNOWN = 2, +}; + +#define UPB_ENCODE_MAXDEPTH(depth) ((depth) << 16) + +char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options, + upb_arena *arena, size_t *size); + +UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l, + upb_arena *arena, size_t *size) { + return upb_encode_ex(msg, l, 0, arena, size); +} + #ifdef __cplusplus } /* extern "C" */ #endif #endif /* UPB_ENCODE_H_ */ +// These are the specialized field parser functions for the fast parser. +// Generated tables will refer to these by name. +// +// The function names are encoded with names like: +// +// // 123 4 +// upb_pss_1bt(); // Parse singular string, 1 byte tag. +// +// In position 1: +// - 'p' for parse, most function use this +// - 'c' for copy, for when we are copying strings instead of aliasing +// +// In position 2 (cardinality): +// - 's' for singular, with or without hasbit +// - 'o' for oneof +// - 'r' for non-packed repeated +// - 'p' for packed repeated +// +// In position 3 (type): +// - 'b1' for bool +// - 'v4' for 4-byte varint +// - 'v8' for 8-byte varint +// - 'z4' for zig-zag-encoded 4-byte varint +// - 'z8' for zig-zag-encoded 8-byte varint +// - 'f4' for 4-byte fixed +// - 'f8' for 8-byte fixed +// - 'm' for sub-message +// - 's' for string (validate UTF-8) +// - 'b' for bytes +// +// In position 4 (tag length): +// - '1' for one-byte tags (field numbers 1-15) +// - '2' for two-byte tags (field numbers 16-2048) + +#ifndef UPB_DECODE_FAST_H_ +#define UPB_DECODE_FAST_H_ + + +struct upb_decstate; + +// The fallback, generic parsing function that can handle any field type. +// This just uses the regular (non-fast) parser to parse a single field. +const char *fastdecode_generic(struct upb_decstate *d, const char *ptr, + upb_msg *msg, intptr_t table, uint64_t hasbits, + uint64_t data); + +#define UPB_PARSE_PARAMS \ + struct upb_decstate *d, const char *ptr, upb_msg *msg, intptr_t table, \ + uint64_t hasbits, uint64_t data + +/* primitive fields ***********************************************************/ + +#define F(card, type, valbytes, tagbytes) \ + const char *upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS); + +#define TYPES(card, tagbytes) \ + F(card, b, 1, tagbytes) \ + F(card, v, 4, tagbytes) \ + F(card, v, 8, tagbytes) \ + F(card, z, 4, tagbytes) \ + F(card, z, 8, tagbytes) \ + F(card, f, 4, tagbytes) \ + F(card, f, 8, tagbytes) + +#define TAGBYTES(card) \ + TYPES(card, 1) \ + TYPES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) +TAGBYTES(p) + +#undef F +#undef TYPES +#undef TAGBYTES + +/* string fields **************************************************************/ + +#define F(card, tagbytes, type) \ + const char *upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS); \ + const char *upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS); + +#define UTF8(card, tagbytes) \ + F(card, tagbytes, s) \ + F(card, tagbytes, b) + +#define TAGBYTES(card) \ + UTF8(card, 1) \ + UTF8(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef F +#undef TAGBYTES + +/* sub-message fields *********************************************************/ + +#define F(card, tagbytes, size_ceil, ceil_arg) \ + const char *upb_p##card##m_##tagbytes##bt_max##size_ceil##b(UPB_PARSE_PARAMS); + +#define SIZES(card, tagbytes) \ + F(card, tagbytes, 64, 64) \ + F(card, tagbytes, 128, 128) \ + F(card, tagbytes, 192, 192) \ + F(card, tagbytes, 256, 256) \ + F(card, tagbytes, max, -1) + +#define TAGBYTES(card) \ + SIZES(card, 1) \ + SIZES(card, 2) + +TAGBYTES(s) +TAGBYTES(o) +TAGBYTES(r) + +#undef TAGBYTES +#undef SIZES +#undef F + +#undef UPB_PARSE_PARAMS + +#endif /* UPB_DECODE_FAST_H_ */ /* This file was generated by upbc (the upb compiler) from the input * file: * @@ -1520,6 +2166,12 @@ UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_ google_protobuf_FileDescriptorSet *ret = google_protobuf_FileDescriptorSet_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileDescriptorSet *google_protobuf_FileDescriptorSet_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileDescriptorSet *ret = google_protobuf_FileDescriptorSet_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileDescriptorSet_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileDescriptorSet_serialize(const google_protobuf_FileDescriptorSet *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorSet_msginit, arena, len); } @@ -1531,12 +2183,12 @@ UPB_INLINE google_protobuf_FileDescriptorProto** google_protobuf_FileDescriptorS return (google_protobuf_FileDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(0, 0), len); } UPB_INLINE google_protobuf_FileDescriptorProto** google_protobuf_FileDescriptorSet_resize_file(google_protobuf_FileDescriptorSet *msg, size_t len, upb_arena *arena) { - return (google_protobuf_FileDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(0, 0), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_FileDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(0, 0), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_FileDescriptorProto* google_protobuf_FileDescriptorSet_add_file(google_protobuf_FileDescriptorSet *msg, upb_arena *arena) { struct google_protobuf_FileDescriptorProto* sub = (struct google_protobuf_FileDescriptorProto*)_upb_msg_new(&google_protobuf_FileDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(0, 0), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(0, 0), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1551,13 +2203,19 @@ UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorPr google_protobuf_FileDescriptorProto *ret = google_protobuf_FileDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileDescriptorProto *google_protobuf_FileDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileDescriptorProto *ret = google_protobuf_FileDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileDescriptorProto_serialize(const google_protobuf_FileDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_FileDescriptorProto_has_name(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_FileDescriptorProto_has_name(const google_protobuf_FileDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_FileDescriptorProto_name(const google_protobuf_FileDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } -UPB_INLINE bool google_protobuf_FileDescriptorProto_has_package(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_FileDescriptorProto_has_package(const google_protobuf_FileDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE upb_strview google_protobuf_FileDescriptorProto_package(const google_protobuf_FileDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), upb_strview); } UPB_INLINE upb_strview const* google_protobuf_FileDescriptorProto_dependency(const google_protobuf_FileDescriptorProto *msg, size_t *len) { return (upb_strview const*)_upb_array_accessor(msg, UPB_SIZE(36, 72), len); } UPB_INLINE bool google_protobuf_FileDescriptorProto_has_message_type(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(40, 80)); } @@ -1568,13 +2226,13 @@ UPB_INLINE bool google_protobuf_FileDescriptorProto_has_service(const google_pro UPB_INLINE const google_protobuf_ServiceDescriptorProto* const* google_protobuf_FileDescriptorProto_service(const google_protobuf_FileDescriptorProto *msg, size_t *len) { return (const google_protobuf_ServiceDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(48, 96), len); } UPB_INLINE bool google_protobuf_FileDescriptorProto_has_extension(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(52, 104)); } UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_FileDescriptorProto_extension(const google_protobuf_FileDescriptorProto *msg, size_t *len) { return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(52, 104), len); } -UPB_INLINE bool google_protobuf_FileDescriptorProto_has_options(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_field(msg, 4); } +UPB_INLINE bool google_protobuf_FileDescriptorProto_has_options(const google_protobuf_FileDescriptorProto *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE const google_protobuf_FileOptions* google_protobuf_FileDescriptorProto_options(const google_protobuf_FileDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(28, 56), const google_protobuf_FileOptions*); } -UPB_INLINE bool google_protobuf_FileDescriptorProto_has_source_code_info(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_field(msg, 5); } +UPB_INLINE bool google_protobuf_FileDescriptorProto_has_source_code_info(const google_protobuf_FileDescriptorProto *msg) { return _upb_hasbit(msg, 4); } UPB_INLINE const google_protobuf_SourceCodeInfo* google_protobuf_FileDescriptorProto_source_code_info(const google_protobuf_FileDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(32, 64), const google_protobuf_SourceCodeInfo*); } UPB_INLINE int32_t const* google_protobuf_FileDescriptorProto_public_dependency(const google_protobuf_FileDescriptorProto *msg, size_t *len) { return (int32_t const*)_upb_array_accessor(msg, UPB_SIZE(56, 112), len); } UPB_INLINE int32_t const* google_protobuf_FileDescriptorProto_weak_dependency(const google_protobuf_FileDescriptorProto *msg, size_t *len) { return (int32_t const*)_upb_array_accessor(msg, UPB_SIZE(60, 120), len); } -UPB_INLINE bool google_protobuf_FileDescriptorProto_has_syntax(const google_protobuf_FileDescriptorProto *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_FileDescriptorProto_has_syntax(const google_protobuf_FileDescriptorProto *msg) { return _upb_hasbit(msg, 5); } UPB_INLINE upb_strview google_protobuf_FileDescriptorProto_syntax(const google_protobuf_FileDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(20, 40), upb_strview); } UPB_INLINE void google_protobuf_FileDescriptorProto_set_name(google_protobuf_FileDescriptorProto *msg, upb_strview value) { @@ -1589,22 +2247,22 @@ UPB_INLINE upb_strview* google_protobuf_FileDescriptorProto_mutable_dependency(g return (upb_strview*)_upb_array_mutable_accessor(msg, UPB_SIZE(36, 72), len); } UPB_INLINE upb_strview* google_protobuf_FileDescriptorProto_resize_dependency(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (upb_strview*)_upb_array_resize_accessor(msg, UPB_SIZE(36, 72), len, UPB_TYPE_STRING, arena); + return (upb_strview*)_upb_array_resize_accessor2(msg, UPB_SIZE(36, 72), len, UPB_SIZE(3, 4), arena); } UPB_INLINE bool google_protobuf_FileDescriptorProto_add_dependency(google_protobuf_FileDescriptorProto *msg, upb_strview val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(36, 72), UPB_SIZE(8, 16), UPB_TYPE_STRING, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(36, 72), UPB_SIZE(3, 4), &val, arena); } UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_FileDescriptorProto_mutable_message_type(google_protobuf_FileDescriptorProto *msg, size_t *len) { return (google_protobuf_DescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(40, 80), len); } UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_FileDescriptorProto_resize_message_type(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_DescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(40, 80), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_DescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(40, 80), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_DescriptorProto* google_protobuf_FileDescriptorProto_add_message_type(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_DescriptorProto* sub = (struct google_protobuf_DescriptorProto*)_upb_msg_new(&google_protobuf_DescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(40, 80), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(40, 80), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1612,12 +2270,12 @@ UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_FileDescriptorP return (google_protobuf_EnumDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(44, 88), len); } UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_FileDescriptorProto_resize_enum_type(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_EnumDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(44, 88), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_EnumDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(44, 88), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_EnumDescriptorProto* google_protobuf_FileDescriptorProto_add_enum_type(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_EnumDescriptorProto* sub = (struct google_protobuf_EnumDescriptorProto*)_upb_msg_new(&google_protobuf_EnumDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(44, 88), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(44, 88), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1625,12 +2283,12 @@ UPB_INLINE google_protobuf_ServiceDescriptorProto** google_protobuf_FileDescript return (google_protobuf_ServiceDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(48, 96), len); } UPB_INLINE google_protobuf_ServiceDescriptorProto** google_protobuf_FileDescriptorProto_resize_service(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_ServiceDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(48, 96), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_ServiceDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(48, 96), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_ServiceDescriptorProto* google_protobuf_FileDescriptorProto_add_service(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_ServiceDescriptorProto* sub = (struct google_protobuf_ServiceDescriptorProto*)_upb_msg_new(&google_protobuf_ServiceDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(48, 96), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(48, 96), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1638,17 +2296,17 @@ UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_FileDescriptor return (google_protobuf_FieldDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(52, 104), len); } UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_FileDescriptorProto_resize_extension(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(52, 104), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(52, 104), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_FileDescriptorProto_add_extension(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_msg_new(&google_protobuf_FieldDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(52, 104), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(52, 104), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } UPB_INLINE void google_protobuf_FileDescriptorProto_set_options(google_protobuf_FileDescriptorProto *msg, google_protobuf_FileOptions* value) { - _upb_sethas(msg, 4); + _upb_sethas(msg, 3); *UPB_PTR_AT(msg, UPB_SIZE(28, 56), google_protobuf_FileOptions*) = value; } UPB_INLINE struct google_protobuf_FileOptions* google_protobuf_FileDescriptorProto_mutable_options(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { @@ -1661,7 +2319,7 @@ UPB_INLINE struct google_protobuf_FileOptions* google_protobuf_FileDescriptorPro return sub; } UPB_INLINE void google_protobuf_FileDescriptorProto_set_source_code_info(google_protobuf_FileDescriptorProto *msg, google_protobuf_SourceCodeInfo* value) { - _upb_sethas(msg, 5); + _upb_sethas(msg, 4); *UPB_PTR_AT(msg, UPB_SIZE(32, 64), google_protobuf_SourceCodeInfo*) = value; } UPB_INLINE struct google_protobuf_SourceCodeInfo* google_protobuf_FileDescriptorProto_mutable_source_code_info(google_protobuf_FileDescriptorProto *msg, upb_arena *arena) { @@ -1677,24 +2335,24 @@ UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_mutable_public_dependenc return (int32_t*)_upb_array_mutable_accessor(msg, UPB_SIZE(56, 112), len); } UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_resize_public_dependency(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (int32_t*)_upb_array_resize_accessor(msg, UPB_SIZE(56, 112), len, UPB_TYPE_INT32, arena); + return (int32_t*)_upb_array_resize_accessor2(msg, UPB_SIZE(56, 112), len, 2, arena); } UPB_INLINE bool google_protobuf_FileDescriptorProto_add_public_dependency(google_protobuf_FileDescriptorProto *msg, int32_t val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(56, 112), UPB_SIZE(4, 4), UPB_TYPE_INT32, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(56, 112), 2, &val, arena); } UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_mutable_weak_dependency(google_protobuf_FileDescriptorProto *msg, size_t *len) { return (int32_t*)_upb_array_mutable_accessor(msg, UPB_SIZE(60, 120), len); } UPB_INLINE int32_t* google_protobuf_FileDescriptorProto_resize_weak_dependency(google_protobuf_FileDescriptorProto *msg, size_t len, upb_arena *arena) { - return (int32_t*)_upb_array_resize_accessor(msg, UPB_SIZE(60, 120), len, UPB_TYPE_INT32, arena); + return (int32_t*)_upb_array_resize_accessor2(msg, UPB_SIZE(60, 120), len, 2, arena); } UPB_INLINE bool google_protobuf_FileDescriptorProto_add_weak_dependency(google_protobuf_FileDescriptorProto *msg, int32_t val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(60, 120), UPB_SIZE(4, 4), UPB_TYPE_INT32, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(60, 120), 2, &val, arena); } UPB_INLINE void google_protobuf_FileDescriptorProto_set_syntax(google_protobuf_FileDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 3); + _upb_sethas(msg, 5); *UPB_PTR_AT(msg, UPB_SIZE(20, 40), upb_strview) = value; } @@ -1708,11 +2366,17 @@ UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_pars google_protobuf_DescriptorProto *ret = google_protobuf_DescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto *google_protobuf_DescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto *ret = google_protobuf_DescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_serialize(const google_protobuf_DescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_DescriptorProto_has_name(const google_protobuf_DescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_DescriptorProto_has_name(const google_protobuf_DescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_DescriptorProto_name(const google_protobuf_DescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } UPB_INLINE bool google_protobuf_DescriptorProto_has_field(const google_protobuf_DescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(16, 32)); } UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_DescriptorProto_field(const google_protobuf_DescriptorProto *msg, size_t *len) { return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(16, 32), len); } @@ -1724,7 +2388,7 @@ UPB_INLINE bool google_protobuf_DescriptorProto_has_extension_range(const google UPB_INLINE const google_protobuf_DescriptorProto_ExtensionRange* const* google_protobuf_DescriptorProto_extension_range(const google_protobuf_DescriptorProto *msg, size_t *len) { return (const google_protobuf_DescriptorProto_ExtensionRange* const*)_upb_array_accessor(msg, UPB_SIZE(28, 56), len); } UPB_INLINE bool google_protobuf_DescriptorProto_has_extension(const google_protobuf_DescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(32, 64)); } UPB_INLINE const google_protobuf_FieldDescriptorProto* const* google_protobuf_DescriptorProto_extension(const google_protobuf_DescriptorProto *msg, size_t *len) { return (const google_protobuf_FieldDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(32, 64), len); } -UPB_INLINE bool google_protobuf_DescriptorProto_has_options(const google_protobuf_DescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_DescriptorProto_has_options(const google_protobuf_DescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE const google_protobuf_MessageOptions* google_protobuf_DescriptorProto_options(const google_protobuf_DescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), const google_protobuf_MessageOptions*); } UPB_INLINE bool google_protobuf_DescriptorProto_has_oneof_decl(const google_protobuf_DescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(36, 72)); } UPB_INLINE const google_protobuf_OneofDescriptorProto* const* google_protobuf_DescriptorProto_oneof_decl(const google_protobuf_DescriptorProto *msg, size_t *len) { return (const google_protobuf_OneofDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(36, 72), len); } @@ -1740,12 +2404,12 @@ UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProt return (google_protobuf_FieldDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(16, 32), len); } UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_resize_field(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(16, 32), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(16, 32), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_DescriptorProto_add_field(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_msg_new(&google_protobuf_FieldDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(16, 32), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(16, 32), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1753,12 +2417,12 @@ UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_DescriptorProto_mut return (google_protobuf_DescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(20, 40), len); } UPB_INLINE google_protobuf_DescriptorProto** google_protobuf_DescriptorProto_resize_nested_type(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_DescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(20, 40), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_DescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(20, 40), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_DescriptorProto* google_protobuf_DescriptorProto_add_nested_type(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_DescriptorProto* sub = (struct google_protobuf_DescriptorProto*)_upb_msg_new(&google_protobuf_DescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(20, 40), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(20, 40), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1766,12 +2430,12 @@ UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_DescriptorProto return (google_protobuf_EnumDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(24, 48), len); } UPB_INLINE google_protobuf_EnumDescriptorProto** google_protobuf_DescriptorProto_resize_enum_type(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_EnumDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(24, 48), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_EnumDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(24, 48), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_EnumDescriptorProto* google_protobuf_DescriptorProto_add_enum_type(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_EnumDescriptorProto* sub = (struct google_protobuf_EnumDescriptorProto*)_upb_msg_new(&google_protobuf_EnumDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(24, 48), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(24, 48), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1779,12 +2443,12 @@ UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange** google_protobuf_Desc return (google_protobuf_DescriptorProto_ExtensionRange**)_upb_array_mutable_accessor(msg, UPB_SIZE(28, 56), len); } UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange** google_protobuf_DescriptorProto_resize_extension_range(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_DescriptorProto_ExtensionRange**)_upb_array_resize_accessor(msg, UPB_SIZE(28, 56), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_DescriptorProto_ExtensionRange**)_upb_array_resize_accessor2(msg, UPB_SIZE(28, 56), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_DescriptorProto_ExtensionRange* google_protobuf_DescriptorProto_add_extension_range(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_DescriptorProto_ExtensionRange* sub = (struct google_protobuf_DescriptorProto_ExtensionRange*)_upb_msg_new(&google_protobuf_DescriptorProto_ExtensionRange_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(28, 56), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(28, 56), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1792,12 +2456,12 @@ UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProt return (google_protobuf_FieldDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(32, 64), len); } UPB_INLINE google_protobuf_FieldDescriptorProto** google_protobuf_DescriptorProto_resize_extension(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(32, 64), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_FieldDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(32, 64), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_FieldDescriptorProto* google_protobuf_DescriptorProto_add_extension(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_FieldDescriptorProto* sub = (struct google_protobuf_FieldDescriptorProto*)_upb_msg_new(&google_protobuf_FieldDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(32, 64), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(32, 64), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1818,12 +2482,12 @@ UPB_INLINE google_protobuf_OneofDescriptorProto** google_protobuf_DescriptorProt return (google_protobuf_OneofDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(36, 72), len); } UPB_INLINE google_protobuf_OneofDescriptorProto** google_protobuf_DescriptorProto_resize_oneof_decl(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_OneofDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(36, 72), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_OneofDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(36, 72), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_OneofDescriptorProto* google_protobuf_DescriptorProto_add_oneof_decl(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_OneofDescriptorProto* sub = (struct google_protobuf_OneofDescriptorProto*)_upb_msg_new(&google_protobuf_OneofDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(36, 72), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(36, 72), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1831,12 +2495,12 @@ UPB_INLINE google_protobuf_DescriptorProto_ReservedRange** google_protobuf_Descr return (google_protobuf_DescriptorProto_ReservedRange**)_upb_array_mutable_accessor(msg, UPB_SIZE(40, 80), len); } UPB_INLINE google_protobuf_DescriptorProto_ReservedRange** google_protobuf_DescriptorProto_resize_reserved_range(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_DescriptorProto_ReservedRange**)_upb_array_resize_accessor(msg, UPB_SIZE(40, 80), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_DescriptorProto_ReservedRange**)_upb_array_resize_accessor2(msg, UPB_SIZE(40, 80), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_DescriptorProto_ReservedRange* google_protobuf_DescriptorProto_add_reserved_range(google_protobuf_DescriptorProto *msg, upb_arena *arena) { struct google_protobuf_DescriptorProto_ReservedRange* sub = (struct google_protobuf_DescriptorProto_ReservedRange*)_upb_msg_new(&google_protobuf_DescriptorProto_ReservedRange_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(40, 80), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(40, 80), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1844,10 +2508,10 @@ UPB_INLINE upb_strview* google_protobuf_DescriptorProto_mutable_reserved_name(go return (upb_strview*)_upb_array_mutable_accessor(msg, UPB_SIZE(44, 88), len); } UPB_INLINE upb_strview* google_protobuf_DescriptorProto_resize_reserved_name(google_protobuf_DescriptorProto *msg, size_t len, upb_arena *arena) { - return (upb_strview*)_upb_array_resize_accessor(msg, UPB_SIZE(44, 88), len, UPB_TYPE_STRING, arena); + return (upb_strview*)_upb_array_resize_accessor2(msg, UPB_SIZE(44, 88), len, UPB_SIZE(3, 4), arena); } UPB_INLINE bool google_protobuf_DescriptorProto_add_reserved_name(google_protobuf_DescriptorProto *msg, upb_strview val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(44, 88), UPB_SIZE(8, 16), UPB_TYPE_STRING, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(44, 88), UPB_SIZE(3, 4), &val, arena); } @@ -1861,15 +2525,21 @@ UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_Descr google_protobuf_DescriptorProto_ExtensionRange *ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto_ExtensionRange *google_protobuf_DescriptorProto_ExtensionRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto_ExtensionRange *ret = google_protobuf_DescriptorProto_ExtensionRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_ExtensionRange_serialize(const google_protobuf_DescriptorProto_ExtensionRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ExtensionRange_msginit, arena, len); } -UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_start(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_start(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE int32_t google_protobuf_DescriptorProto_ExtensionRange_start(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } -UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_end(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_end(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE int32_t google_protobuf_DescriptorProto_ExtensionRange_end(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } -UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_options(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_DescriptorProto_ExtensionRange_has_options(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE const google_protobuf_ExtensionRangeOptions* google_protobuf_DescriptorProto_ExtensionRange_options(const google_protobuf_DescriptorProto_ExtensionRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 16), const google_protobuf_ExtensionRangeOptions*); } UPB_INLINE void google_protobuf_DescriptorProto_ExtensionRange_set_start(google_protobuf_DescriptorProto_ExtensionRange *msg, int32_t value) { @@ -1904,13 +2574,19 @@ UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_Descri google_protobuf_DescriptorProto_ReservedRange *ret = google_protobuf_DescriptorProto_ReservedRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_DescriptorProto_ReservedRange *google_protobuf_DescriptorProto_ReservedRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_DescriptorProto_ReservedRange *ret = google_protobuf_DescriptorProto_ReservedRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_DescriptorProto_ReservedRange_serialize(const google_protobuf_DescriptorProto_ReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_DescriptorProto_ReservedRange_msginit, arena, len); } -UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_start(const google_protobuf_DescriptorProto_ReservedRange *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_start(const google_protobuf_DescriptorProto_ReservedRange *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE int32_t google_protobuf_DescriptorProto_ReservedRange_start(const google_protobuf_DescriptorProto_ReservedRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } -UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_end(const google_protobuf_DescriptorProto_ReservedRange *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_DescriptorProto_ReservedRange_has_end(const google_protobuf_DescriptorProto_ReservedRange *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE int32_t google_protobuf_DescriptorProto_ReservedRange_end(const google_protobuf_DescriptorProto_ReservedRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } UPB_INLINE void google_protobuf_DescriptorProto_ReservedRange_set_start(google_protobuf_DescriptorProto_ReservedRange *msg, int32_t value) { @@ -1932,7 +2608,13 @@ UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRange google_protobuf_ExtensionRangeOptions *ret = google_protobuf_ExtensionRangeOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit, arena)) ? ret : NULL; } -UPB_INLINE char *google_protobuf_ExtensionRangeOptions_serialize(const google_protobuf_ExtensionRangeOptions *msg, upb_arena *arena, size_t *len) { +UPB_INLINE google_protobuf_ExtensionRangeOptions *google_protobuf_ExtensionRangeOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ExtensionRangeOptions *ret = google_protobuf_ExtensionRangeOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ExtensionRangeOptions_msginit, arena, options)) + ? ret : NULL; +} +UPB_INLINE char *google_protobuf_ExtensionRangeOptions_serialize(const google_protobuf_ExtensionRangeOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ExtensionRangeOptions_msginit, arena, len); } @@ -1943,12 +2625,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ExtensionRangeO return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(0, 0), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ExtensionRangeOptions_resize_uninterpreted_option(google_protobuf_ExtensionRangeOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(0, 0), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(0, 0), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_ExtensionRangeOptions_add_uninterpreted_option(google_protobuf_ExtensionRangeOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(0, 0), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(0, 0), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -1963,64 +2645,70 @@ UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptor google_protobuf_FieldDescriptorProto *ret = google_protobuf_FieldDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FieldDescriptorProto *google_protobuf_FieldDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FieldDescriptorProto *ret = google_protobuf_FieldDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FieldDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FieldDescriptorProto_serialize(const google_protobuf_FieldDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 6); } -UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(36, 40), upb_strview); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_extendee(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 7); } -UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_extendee(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(44, 56), upb_strview); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_number(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 3); } -UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_number(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(24, 24), int32_t); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_label(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 1); } -UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_label(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 2); } -UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_type(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 8); } -UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_type_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(52, 72), upb_strview); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_default_value(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 9); } -UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_default_value(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(60, 88), upb_strview); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_options(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 11); } -UPB_INLINE const google_protobuf_FieldOptions* google_protobuf_FieldDescriptorProto_options(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(76, 120), const google_protobuf_FieldOptions*); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_oneof_index(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 4); } -UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_oneof_index(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(28, 28), int32_t); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_json_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 10); } -UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_json_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(68, 104), upb_strview); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_proto3_optional(const google_protobuf_FieldDescriptorProto *msg) { return _upb_has_field(msg, 5); } -UPB_INLINE bool google_protobuf_FieldDescriptorProto_proto3_optional(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(32, 32), bool); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 1); } +UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(24, 24), upb_strview); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_extendee(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 2); } +UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_extendee(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(32, 40), upb_strview); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_number(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 3); } +UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_number(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 12), int32_t); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_label(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 4); } +UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_label(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 5); } +UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_type(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_type_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 6); } +UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_type_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(40, 56), upb_strview); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_default_value(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 7); } +UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_default_value(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(48, 72), upb_strview); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_options(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 8); } +UPB_INLINE const google_protobuf_FieldOptions* google_protobuf_FieldDescriptorProto_options(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(64, 104), const google_protobuf_FieldOptions*); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_oneof_index(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 9); } +UPB_INLINE int32_t google_protobuf_FieldDescriptorProto_oneof_index(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_json_name(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 10); } +UPB_INLINE upb_strview google_protobuf_FieldDescriptorProto_json_name(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(56, 88), upb_strview); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_has_proto3_optional(const google_protobuf_FieldDescriptorProto *msg) { return _upb_hasbit(msg, 11); } +UPB_INLINE bool google_protobuf_FieldDescriptorProto_proto3_optional(const google_protobuf_FieldDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(20, 20), bool); } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_name(google_protobuf_FieldDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 6); - *UPB_PTR_AT(msg, UPB_SIZE(36, 40), upb_strview) = value; + _upb_sethas(msg, 1); + *UPB_PTR_AT(msg, UPB_SIZE(24, 24), upb_strview) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_extendee(google_protobuf_FieldDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 7); - *UPB_PTR_AT(msg, UPB_SIZE(44, 56), upb_strview) = value; + _upb_sethas(msg, 2); + *UPB_PTR_AT(msg, UPB_SIZE(32, 40), upb_strview) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_number(google_protobuf_FieldDescriptorProto *msg, int32_t value) { _upb_sethas(msg, 3); - *UPB_PTR_AT(msg, UPB_SIZE(24, 24), int32_t) = value; + *UPB_PTR_AT(msg, UPB_SIZE(12, 12), int32_t) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_label(google_protobuf_FieldDescriptorProto *msg, int32_t value) { - _upb_sethas(msg, 1); - *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; + _upb_sethas(msg, 4); + *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_type(google_protobuf_FieldDescriptorProto *msg, int32_t value) { - _upb_sethas(msg, 2); - *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t) = value; + _upb_sethas(msg, 5); + *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_type_name(google_protobuf_FieldDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 8); - *UPB_PTR_AT(msg, UPB_SIZE(52, 72), upb_strview) = value; + _upb_sethas(msg, 6); + *UPB_PTR_AT(msg, UPB_SIZE(40, 56), upb_strview) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_default_value(google_protobuf_FieldDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 9); - *UPB_PTR_AT(msg, UPB_SIZE(60, 88), upb_strview) = value; + _upb_sethas(msg, 7); + *UPB_PTR_AT(msg, UPB_SIZE(48, 72), upb_strview) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_options(google_protobuf_FieldDescriptorProto *msg, google_protobuf_FieldOptions* value) { - _upb_sethas(msg, 11); - *UPB_PTR_AT(msg, UPB_SIZE(76, 120), google_protobuf_FieldOptions*) = value; + _upb_sethas(msg, 8); + *UPB_PTR_AT(msg, UPB_SIZE(64, 104), google_protobuf_FieldOptions*) = value; } UPB_INLINE struct google_protobuf_FieldOptions* google_protobuf_FieldDescriptorProto_mutable_options(google_protobuf_FieldDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_FieldOptions* sub = (struct google_protobuf_FieldOptions*)google_protobuf_FieldDescriptorProto_options(msg); @@ -2032,16 +2720,16 @@ UPB_INLINE struct google_protobuf_FieldOptions* google_protobuf_FieldDescriptorP return sub; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_oneof_index(google_protobuf_FieldDescriptorProto *msg, int32_t value) { - _upb_sethas(msg, 4); - *UPB_PTR_AT(msg, UPB_SIZE(28, 28), int32_t) = value; + _upb_sethas(msg, 9); + *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_json_name(google_protobuf_FieldDescriptorProto *msg, upb_strview value) { _upb_sethas(msg, 10); - *UPB_PTR_AT(msg, UPB_SIZE(68, 104), upb_strview) = value; + *UPB_PTR_AT(msg, UPB_SIZE(56, 88), upb_strview) = value; } UPB_INLINE void google_protobuf_FieldDescriptorProto_set_proto3_optional(google_protobuf_FieldDescriptorProto *msg, bool value) { - _upb_sethas(msg, 5); - *UPB_PTR_AT(msg, UPB_SIZE(32, 32), bool) = value; + _upb_sethas(msg, 11); + *UPB_PTR_AT(msg, UPB_SIZE(20, 20), bool) = value; } /* google.protobuf.OneofDescriptorProto */ @@ -2054,13 +2742,19 @@ UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptor google_protobuf_OneofDescriptorProto *ret = google_protobuf_OneofDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_OneofDescriptorProto *google_protobuf_OneofDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_OneofDescriptorProto *ret = google_protobuf_OneofDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_OneofDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_OneofDescriptorProto_serialize(const google_protobuf_OneofDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_name(const google_protobuf_OneofDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_name(const google_protobuf_OneofDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_OneofDescriptorProto_name(const google_protobuf_OneofDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } -UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_options(const google_protobuf_OneofDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_OneofDescriptorProto_has_options(const google_protobuf_OneofDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE const google_protobuf_OneofOptions* google_protobuf_OneofDescriptorProto_options(const google_protobuf_OneofDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), const google_protobuf_OneofOptions*); } UPB_INLINE void google_protobuf_OneofDescriptorProto_set_name(google_protobuf_OneofDescriptorProto *msg, upb_strview value) { @@ -2091,15 +2785,21 @@ UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorPr google_protobuf_EnumDescriptorProto *ret = google_protobuf_EnumDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumDescriptorProto *google_protobuf_EnumDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumDescriptorProto *ret = google_protobuf_EnumDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumDescriptorProto_serialize(const google_protobuf_EnumDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_name(const google_protobuf_EnumDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_name(const google_protobuf_EnumDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_EnumDescriptorProto_name(const google_protobuf_EnumDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_value(const google_protobuf_EnumDescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(16, 32)); } UPB_INLINE const google_protobuf_EnumValueDescriptorProto* const* google_protobuf_EnumDescriptorProto_value(const google_protobuf_EnumDescriptorProto *msg, size_t *len) { return (const google_protobuf_EnumValueDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(16, 32), len); } -UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_options(const google_protobuf_EnumDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_options(const google_protobuf_EnumDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE const google_protobuf_EnumOptions* google_protobuf_EnumDescriptorProto_options(const google_protobuf_EnumDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), const google_protobuf_EnumOptions*); } UPB_INLINE bool google_protobuf_EnumDescriptorProto_has_reserved_range(const google_protobuf_EnumDescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(20, 40)); } UPB_INLINE const google_protobuf_EnumDescriptorProto_EnumReservedRange* const* google_protobuf_EnumDescriptorProto_reserved_range(const google_protobuf_EnumDescriptorProto *msg, size_t *len) { return (const google_protobuf_EnumDescriptorProto_EnumReservedRange* const*)_upb_array_accessor(msg, UPB_SIZE(20, 40), len); } @@ -2113,12 +2813,12 @@ UPB_INLINE google_protobuf_EnumValueDescriptorProto** google_protobuf_EnumDescri return (google_protobuf_EnumValueDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(16, 32), len); } UPB_INLINE google_protobuf_EnumValueDescriptorProto** google_protobuf_EnumDescriptorProto_resize_value(google_protobuf_EnumDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_EnumValueDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(16, 32), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_EnumValueDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(16, 32), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_EnumValueDescriptorProto* google_protobuf_EnumDescriptorProto_add_value(google_protobuf_EnumDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_EnumValueDescriptorProto* sub = (struct google_protobuf_EnumValueDescriptorProto*)_upb_msg_new(&google_protobuf_EnumValueDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(16, 32), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(16, 32), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2139,12 +2839,12 @@ UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange** google_protob return (google_protobuf_EnumDescriptorProto_EnumReservedRange**)_upb_array_mutable_accessor(msg, UPB_SIZE(20, 40), len); } UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange** google_protobuf_EnumDescriptorProto_resize_reserved_range(google_protobuf_EnumDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_EnumDescriptorProto_EnumReservedRange**)_upb_array_resize_accessor(msg, UPB_SIZE(20, 40), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_EnumDescriptorProto_EnumReservedRange**)_upb_array_resize_accessor2(msg, UPB_SIZE(20, 40), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_EnumDescriptorProto_EnumReservedRange* google_protobuf_EnumDescriptorProto_add_reserved_range(google_protobuf_EnumDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_EnumDescriptorProto_EnumReservedRange* sub = (struct google_protobuf_EnumDescriptorProto_EnumReservedRange*)_upb_msg_new(&google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(20, 40), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(20, 40), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2152,10 +2852,10 @@ UPB_INLINE upb_strview* google_protobuf_EnumDescriptorProto_mutable_reserved_nam return (upb_strview*)_upb_array_mutable_accessor(msg, UPB_SIZE(24, 48), len); } UPB_INLINE upb_strview* google_protobuf_EnumDescriptorProto_resize_reserved_name(google_protobuf_EnumDescriptorProto *msg, size_t len, upb_arena *arena) { - return (upb_strview*)_upb_array_resize_accessor(msg, UPB_SIZE(24, 48), len, UPB_TYPE_STRING, arena); + return (upb_strview*)_upb_array_resize_accessor2(msg, UPB_SIZE(24, 48), len, UPB_SIZE(3, 4), arena); } UPB_INLINE bool google_protobuf_EnumDescriptorProto_add_reserved_name(google_protobuf_EnumDescriptorProto *msg, upb_strview val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(24, 48), UPB_SIZE(8, 16), UPB_TYPE_STRING, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(24, 48), UPB_SIZE(3, 4), &val, arena); } @@ -2169,13 +2869,19 @@ UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobu google_protobuf_EnumDescriptorProto_EnumReservedRange *ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumDescriptorProto_EnumReservedRange *google_protobuf_EnumDescriptorProto_EnumReservedRange_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumDescriptorProto_EnumReservedRange *ret = google_protobuf_EnumDescriptorProto_EnumReservedRange_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumDescriptorProto_EnumReservedRange_serialize(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, arena, len); } -UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_start(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_start(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE int32_t google_protobuf_EnumDescriptorProto_EnumReservedRange_start(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } -UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_end(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_EnumDescriptorProto_EnumReservedRange_has_end(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE int32_t google_protobuf_EnumDescriptorProto_EnumReservedRange_end(const google_protobuf_EnumDescriptorProto_EnumReservedRange *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } UPB_INLINE void google_protobuf_EnumDescriptorProto_EnumReservedRange_set_start(google_protobuf_EnumDescriptorProto_EnumReservedRange *msg, int32_t value) { @@ -2197,23 +2903,29 @@ UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDe google_protobuf_EnumValueDescriptorProto *ret = google_protobuf_EnumValueDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumValueDescriptorProto *google_protobuf_EnumValueDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumValueDescriptorProto *ret = google_protobuf_EnumValueDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumValueDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumValueDescriptorProto_serialize(const google_protobuf_EnumValueDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_name(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_name(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_EnumValueDescriptorProto_name(const google_protobuf_EnumValueDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), upb_strview); } -UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_number(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_number(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE int32_t google_protobuf_EnumValueDescriptorProto_number(const google_protobuf_EnumValueDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } -UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_options(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_EnumValueDescriptorProto_has_options(const google_protobuf_EnumValueDescriptorProto *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE const google_protobuf_EnumValueOptions* google_protobuf_EnumValueDescriptorProto_options(const google_protobuf_EnumValueDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 24), const google_protobuf_EnumValueOptions*); } UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_name(google_protobuf_EnumValueDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 2); + _upb_sethas(msg, 1); *UPB_PTR_AT(msg, UPB_SIZE(8, 8), upb_strview) = value; } UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_number(google_protobuf_EnumValueDescriptorProto *msg, int32_t value) { - _upb_sethas(msg, 1); + _upb_sethas(msg, 2); *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE void google_protobuf_EnumValueDescriptorProto_set_options(google_protobuf_EnumValueDescriptorProto *msg, google_protobuf_EnumValueOptions* value) { @@ -2240,15 +2952,21 @@ UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescri google_protobuf_ServiceDescriptorProto *ret = google_protobuf_ServiceDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_ServiceDescriptorProto *google_protobuf_ServiceDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ServiceDescriptorProto *ret = google_protobuf_ServiceDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ServiceDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_ServiceDescriptorProto_serialize(const google_protobuf_ServiceDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_name(const google_protobuf_ServiceDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_name(const google_protobuf_ServiceDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_ServiceDescriptorProto_name(const google_protobuf_ServiceDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_method(const google_protobuf_ServiceDescriptorProto *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(16, 32)); } UPB_INLINE const google_protobuf_MethodDescriptorProto* const* google_protobuf_ServiceDescriptorProto_method(const google_protobuf_ServiceDescriptorProto *msg, size_t *len) { return (const google_protobuf_MethodDescriptorProto* const*)_upb_array_accessor(msg, UPB_SIZE(16, 32), len); } -UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_options(const google_protobuf_ServiceDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_ServiceDescriptorProto_has_options(const google_protobuf_ServiceDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE const google_protobuf_ServiceOptions* google_protobuf_ServiceDescriptorProto_options(const google_protobuf_ServiceDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), const google_protobuf_ServiceOptions*); } UPB_INLINE void google_protobuf_ServiceDescriptorProto_set_name(google_protobuf_ServiceDescriptorProto *msg, upb_strview value) { @@ -2259,12 +2977,12 @@ UPB_INLINE google_protobuf_MethodDescriptorProto** google_protobuf_ServiceDescri return (google_protobuf_MethodDescriptorProto**)_upb_array_mutable_accessor(msg, UPB_SIZE(16, 32), len); } UPB_INLINE google_protobuf_MethodDescriptorProto** google_protobuf_ServiceDescriptorProto_resize_method(google_protobuf_ServiceDescriptorProto *msg, size_t len, upb_arena *arena) { - return (google_protobuf_MethodDescriptorProto**)_upb_array_resize_accessor(msg, UPB_SIZE(16, 32), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_MethodDescriptorProto**)_upb_array_resize_accessor2(msg, UPB_SIZE(16, 32), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_MethodDescriptorProto* google_protobuf_ServiceDescriptorProto_add_method(google_protobuf_ServiceDescriptorProto *msg, upb_arena *arena) { struct google_protobuf_MethodDescriptorProto* sub = (struct google_protobuf_MethodDescriptorProto*)_upb_msg_new(&google_protobuf_MethodDescriptorProto_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(16, 32), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(16, 32), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2292,37 +3010,43 @@ UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescript google_protobuf_MethodDescriptorProto *ret = google_protobuf_MethodDescriptorProto_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MethodDescriptorProto *google_protobuf_MethodDescriptorProto_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MethodDescriptorProto *ret = google_protobuf_MethodDescriptorProto_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MethodDescriptorProto_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MethodDescriptorProto_serialize(const google_protobuf_MethodDescriptorProto *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodDescriptorProto_msginit, arena, len); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_name(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_name(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_MethodDescriptorProto_name(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_input_type(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 4); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_input_type(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE upb_strview google_protobuf_MethodDescriptorProto_input_type(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), upb_strview); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_output_type(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 5); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_output_type(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE upb_strview google_protobuf_MethodDescriptorProto_output_type(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(20, 40), upb_strview); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_options(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 6); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_options(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 4); } UPB_INLINE const google_protobuf_MethodOptions* google_protobuf_MethodDescriptorProto_options(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(28, 56), const google_protobuf_MethodOptions*); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_client_streaming(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_client_streaming(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 5); } UPB_INLINE bool google_protobuf_MethodDescriptorProto_client_streaming(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } -UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_server_streaming(const google_protobuf_MethodDescriptorProto *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_MethodDescriptorProto_has_server_streaming(const google_protobuf_MethodDescriptorProto *msg) { return _upb_hasbit(msg, 6); } UPB_INLINE bool google_protobuf_MethodDescriptorProto_server_streaming(const google_protobuf_MethodDescriptorProto *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(2, 2), bool); } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_name(google_protobuf_MethodDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 3); + _upb_sethas(msg, 1); *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview) = value; } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_input_type(google_protobuf_MethodDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 4); + _upb_sethas(msg, 2); *UPB_PTR_AT(msg, UPB_SIZE(12, 24), upb_strview) = value; } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_output_type(google_protobuf_MethodDescriptorProto *msg, upb_strview value) { - _upb_sethas(msg, 5); + _upb_sethas(msg, 3); *UPB_PTR_AT(msg, UPB_SIZE(20, 40), upb_strview) = value; } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_options(google_protobuf_MethodDescriptorProto *msg, google_protobuf_MethodOptions* value) { - _upb_sethas(msg, 6); + _upb_sethas(msg, 4); *UPB_PTR_AT(msg, UPB_SIZE(28, 56), google_protobuf_MethodOptions*) = value; } UPB_INLINE struct google_protobuf_MethodOptions* google_protobuf_MethodDescriptorProto_mutable_options(google_protobuf_MethodDescriptorProto *msg, upb_arena *arena) { @@ -2335,11 +3059,11 @@ UPB_INLINE struct google_protobuf_MethodOptions* google_protobuf_MethodDescripto return sub; } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_client_streaming(google_protobuf_MethodDescriptorProto *msg, bool value) { - _upb_sethas(msg, 1); + _upb_sethas(msg, 5); *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool) = value; } UPB_INLINE void google_protobuf_MethodDescriptorProto_set_server_streaming(google_protobuf_MethodDescriptorProto *msg, bool value) { - _upb_sethas(msg, 2); + _upb_sethas(msg, 6); *UPB_PTR_AT(msg, UPB_SIZE(2, 2), bool) = value; } @@ -2353,143 +3077,149 @@ UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_parse(const google_protobuf_FileOptions *ret = google_protobuf_FileOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FileOptions *google_protobuf_FileOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FileOptions *ret = google_protobuf_FileOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FileOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FileOptions_serialize(const google_protobuf_FileOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FileOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_package(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 11); } -UPB_INLINE upb_strview google_protobuf_FileOptions_java_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(28, 32), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_outer_classname(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 12); } -UPB_INLINE upb_strview google_protobuf_FileOptions_java_outer_classname(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(36, 48), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_optimize_for(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 1); } -UPB_INLINE int32_t google_protobuf_FileOptions_optimize_for(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_multiple_files(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 2); } -UPB_INLINE bool google_protobuf_FileOptions_java_multiple_files(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_go_package(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 13); } -UPB_INLINE upb_strview google_protobuf_FileOptions_go_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(44, 64), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_cc_generic_services(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 3); } -UPB_INLINE bool google_protobuf_FileOptions_cc_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(17, 17), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_generic_services(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 4); } -UPB_INLINE bool google_protobuf_FileOptions_java_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(18, 18), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_py_generic_services(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 5); } -UPB_INLINE bool google_protobuf_FileOptions_py_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(19, 19), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_generate_equals_and_hash(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 6); } -UPB_INLINE bool google_protobuf_FileOptions_java_generate_equals_and_hash(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(20, 20), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_deprecated(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 7); } -UPB_INLINE bool google_protobuf_FileOptions_deprecated(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(21, 21), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_java_string_check_utf8(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 8); } -UPB_INLINE bool google_protobuf_FileOptions_java_string_check_utf8(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(22, 22), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_cc_enable_arenas(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 9); } -UPB_INLINE bool google_protobuf_FileOptions_cc_enable_arenas(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(23, 23), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_objc_class_prefix(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 14); } -UPB_INLINE upb_strview google_protobuf_FileOptions_objc_class_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(52, 80), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_csharp_namespace(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 15); } -UPB_INLINE upb_strview google_protobuf_FileOptions_csharp_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(60, 96), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_swift_prefix(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 16); } -UPB_INLINE upb_strview google_protobuf_FileOptions_swift_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(68, 112), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_php_class_prefix(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 17); } -UPB_INLINE upb_strview google_protobuf_FileOptions_php_class_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(76, 128), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_php_namespace(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 18); } -UPB_INLINE upb_strview google_protobuf_FileOptions_php_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(84, 144), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_php_generic_services(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 10); } -UPB_INLINE bool google_protobuf_FileOptions_php_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(24, 24), bool); } -UPB_INLINE bool google_protobuf_FileOptions_has_php_metadata_namespace(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 19); } -UPB_INLINE upb_strview google_protobuf_FileOptions_php_metadata_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(92, 160), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_ruby_package(const google_protobuf_FileOptions *msg) { return _upb_has_field(msg, 20); } -UPB_INLINE upb_strview google_protobuf_FileOptions_ruby_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(100, 176), upb_strview); } -UPB_INLINE bool google_protobuf_FileOptions_has_uninterpreted_option(const google_protobuf_FileOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(108, 192)); } -UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FileOptions_uninterpreted_option(const google_protobuf_FileOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(108, 192), len); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_package(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 1); } +UPB_INLINE upb_strview google_protobuf_FileOptions_java_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(20, 24), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_outer_classname(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 2); } +UPB_INLINE upb_strview google_protobuf_FileOptions_java_outer_classname(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(28, 40), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_optimize_for(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 3); } +UPB_INLINE int32_t google_protobuf_FileOptions_optimize_for(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_multiple_files(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 4); } +UPB_INLINE bool google_protobuf_FileOptions_java_multiple_files(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_go_package(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 5); } +UPB_INLINE upb_strview google_protobuf_FileOptions_go_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(36, 56), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_cc_generic_services(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 6); } +UPB_INLINE bool google_protobuf_FileOptions_cc_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(9, 9), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_generic_services(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 7); } +UPB_INLINE bool google_protobuf_FileOptions_java_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(10, 10), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_py_generic_services(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 8); } +UPB_INLINE bool google_protobuf_FileOptions_py_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(11, 11), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_generate_equals_and_hash(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 9); } +UPB_INLINE bool google_protobuf_FileOptions_java_generate_equals_and_hash(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 12), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_deprecated(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 10); } +UPB_INLINE bool google_protobuf_FileOptions_deprecated(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(13, 13), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_java_string_check_utf8(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 11); } +UPB_INLINE bool google_protobuf_FileOptions_java_string_check_utf8(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(14, 14), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_cc_enable_arenas(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 12); } +UPB_INLINE bool google_protobuf_FileOptions_cc_enable_arenas(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(15, 15), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_objc_class_prefix(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 13); } +UPB_INLINE upb_strview google_protobuf_FileOptions_objc_class_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(44, 72), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_csharp_namespace(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 14); } +UPB_INLINE upb_strview google_protobuf_FileOptions_csharp_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(52, 88), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_swift_prefix(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 15); } +UPB_INLINE upb_strview google_protobuf_FileOptions_swift_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(60, 104), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_php_class_prefix(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 16); } +UPB_INLINE upb_strview google_protobuf_FileOptions_php_class_prefix(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(68, 120), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_php_namespace(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 17); } +UPB_INLINE upb_strview google_protobuf_FileOptions_php_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(76, 136), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_php_generic_services(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 18); } +UPB_INLINE bool google_protobuf_FileOptions_php_generic_services(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool); } +UPB_INLINE bool google_protobuf_FileOptions_has_php_metadata_namespace(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 19); } +UPB_INLINE upb_strview google_protobuf_FileOptions_php_metadata_namespace(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(84, 152), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_ruby_package(const google_protobuf_FileOptions *msg) { return _upb_hasbit(msg, 20); } +UPB_INLINE upb_strview google_protobuf_FileOptions_ruby_package(const google_protobuf_FileOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(92, 168), upb_strview); } +UPB_INLINE bool google_protobuf_FileOptions_has_uninterpreted_option(const google_protobuf_FileOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(100, 184)); } +UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FileOptions_uninterpreted_option(const google_protobuf_FileOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(100, 184), len); } UPB_INLINE void google_protobuf_FileOptions_set_java_package(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 11); - *UPB_PTR_AT(msg, UPB_SIZE(28, 32), upb_strview) = value; + _upb_sethas(msg, 1); + *UPB_PTR_AT(msg, UPB_SIZE(20, 24), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_java_outer_classname(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 12); - *UPB_PTR_AT(msg, UPB_SIZE(36, 48), upb_strview) = value; + _upb_sethas(msg, 2); + *UPB_PTR_AT(msg, UPB_SIZE(28, 40), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_optimize_for(google_protobuf_FileOptions *msg, int32_t value) { - _upb_sethas(msg, 1); - *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; + _upb_sethas(msg, 3); + *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE void google_protobuf_FileOptions_set_java_multiple_files(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 2); - *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool) = value; + _upb_sethas(msg, 4); + *UPB_PTR_AT(msg, UPB_SIZE(8, 8), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_go_package(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 13); - *UPB_PTR_AT(msg, UPB_SIZE(44, 64), upb_strview) = value; + _upb_sethas(msg, 5); + *UPB_PTR_AT(msg, UPB_SIZE(36, 56), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_cc_generic_services(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 3); - *UPB_PTR_AT(msg, UPB_SIZE(17, 17), bool) = value; + _upb_sethas(msg, 6); + *UPB_PTR_AT(msg, UPB_SIZE(9, 9), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_java_generic_services(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 4); - *UPB_PTR_AT(msg, UPB_SIZE(18, 18), bool) = value; + _upb_sethas(msg, 7); + *UPB_PTR_AT(msg, UPB_SIZE(10, 10), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_py_generic_services(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 5); - *UPB_PTR_AT(msg, UPB_SIZE(19, 19), bool) = value; + _upb_sethas(msg, 8); + *UPB_PTR_AT(msg, UPB_SIZE(11, 11), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_java_generate_equals_and_hash(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 6); - *UPB_PTR_AT(msg, UPB_SIZE(20, 20), bool) = value; + _upb_sethas(msg, 9); + *UPB_PTR_AT(msg, UPB_SIZE(12, 12), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_deprecated(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 7); - *UPB_PTR_AT(msg, UPB_SIZE(21, 21), bool) = value; + _upb_sethas(msg, 10); + *UPB_PTR_AT(msg, UPB_SIZE(13, 13), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_java_string_check_utf8(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 8); - *UPB_PTR_AT(msg, UPB_SIZE(22, 22), bool) = value; + _upb_sethas(msg, 11); + *UPB_PTR_AT(msg, UPB_SIZE(14, 14), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_cc_enable_arenas(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 9); - *UPB_PTR_AT(msg, UPB_SIZE(23, 23), bool) = value; + _upb_sethas(msg, 12); + *UPB_PTR_AT(msg, UPB_SIZE(15, 15), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_objc_class_prefix(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 14); - *UPB_PTR_AT(msg, UPB_SIZE(52, 80), upb_strview) = value; + _upb_sethas(msg, 13); + *UPB_PTR_AT(msg, UPB_SIZE(44, 72), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_csharp_namespace(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 15); - *UPB_PTR_AT(msg, UPB_SIZE(60, 96), upb_strview) = value; + _upb_sethas(msg, 14); + *UPB_PTR_AT(msg, UPB_SIZE(52, 88), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_swift_prefix(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 16); - *UPB_PTR_AT(msg, UPB_SIZE(68, 112), upb_strview) = value; + _upb_sethas(msg, 15); + *UPB_PTR_AT(msg, UPB_SIZE(60, 104), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_php_class_prefix(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 17); - *UPB_PTR_AT(msg, UPB_SIZE(76, 128), upb_strview) = value; + _upb_sethas(msg, 16); + *UPB_PTR_AT(msg, UPB_SIZE(68, 120), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_php_namespace(google_protobuf_FileOptions *msg, upb_strview value) { - _upb_sethas(msg, 18); - *UPB_PTR_AT(msg, UPB_SIZE(84, 144), upb_strview) = value; + _upb_sethas(msg, 17); + *UPB_PTR_AT(msg, UPB_SIZE(76, 136), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_php_generic_services(google_protobuf_FileOptions *msg, bool value) { - _upb_sethas(msg, 10); - *UPB_PTR_AT(msg, UPB_SIZE(24, 24), bool) = value; + _upb_sethas(msg, 18); + *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool) = value; } UPB_INLINE void google_protobuf_FileOptions_set_php_metadata_namespace(google_protobuf_FileOptions *msg, upb_strview value) { _upb_sethas(msg, 19); - *UPB_PTR_AT(msg, UPB_SIZE(92, 160), upb_strview) = value; + *UPB_PTR_AT(msg, UPB_SIZE(84, 152), upb_strview) = value; } UPB_INLINE void google_protobuf_FileOptions_set_ruby_package(google_protobuf_FileOptions *msg, upb_strview value) { _upb_sethas(msg, 20); - *UPB_PTR_AT(msg, UPB_SIZE(100, 176), upb_strview) = value; + *UPB_PTR_AT(msg, UPB_SIZE(92, 168), upb_strview) = value; } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FileOptions_mutable_uninterpreted_option(google_protobuf_FileOptions *msg, size_t *len) { - return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(108, 192), len); + return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(100, 184), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FileOptions_resize_uninterpreted_option(google_protobuf_FileOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(108, 192), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(100, 184), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_FileOptions_add_uninterpreted_option(google_protobuf_FileOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(108, 192), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(100, 184), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2504,17 +3234,23 @@ UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_parse( google_protobuf_MessageOptions *ret = google_protobuf_MessageOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MessageOptions *google_protobuf_MessageOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MessageOptions *ret = google_protobuf_MessageOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MessageOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MessageOptions_serialize(const google_protobuf_MessageOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MessageOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_MessageOptions_has_message_set_wire_format(const google_protobuf_MessageOptions *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_MessageOptions_has_message_set_wire_format(const google_protobuf_MessageOptions *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE bool google_protobuf_MessageOptions_message_set_wire_format(const google_protobuf_MessageOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } -UPB_INLINE bool google_protobuf_MessageOptions_has_no_standard_descriptor_accessor(const google_protobuf_MessageOptions *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_MessageOptions_has_no_standard_descriptor_accessor(const google_protobuf_MessageOptions *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE bool google_protobuf_MessageOptions_no_standard_descriptor_accessor(const google_protobuf_MessageOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(2, 2), bool); } -UPB_INLINE bool google_protobuf_MessageOptions_has_deprecated(const google_protobuf_MessageOptions *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_MessageOptions_has_deprecated(const google_protobuf_MessageOptions *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE bool google_protobuf_MessageOptions_deprecated(const google_protobuf_MessageOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(3, 3), bool); } -UPB_INLINE bool google_protobuf_MessageOptions_has_map_entry(const google_protobuf_MessageOptions *msg) { return _upb_has_field(msg, 4); } +UPB_INLINE bool google_protobuf_MessageOptions_has_map_entry(const google_protobuf_MessageOptions *msg) { return _upb_hasbit(msg, 4); } UPB_INLINE bool google_protobuf_MessageOptions_map_entry(const google_protobuf_MessageOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), bool); } UPB_INLINE bool google_protobuf_MessageOptions_has_uninterpreted_option(const google_protobuf_MessageOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(8, 8)); } UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_MessageOptions_uninterpreted_option(const google_protobuf_MessageOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(8, 8), len); } @@ -2539,12 +3275,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MessageOptions_ return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(8, 8), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MessageOptions_resize_uninterpreted_option(google_protobuf_MessageOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(8, 8), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(8, 8), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_MessageOptions_add_uninterpreted_option(google_protobuf_MessageOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(8, 8), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(8, 8), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2559,59 +3295,65 @@ UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_parse(cons google_protobuf_FieldOptions *ret = google_protobuf_FieldOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_FieldOptions *google_protobuf_FieldOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_FieldOptions *ret = google_protobuf_FieldOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_FieldOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_FieldOptions_serialize(const google_protobuf_FieldOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_FieldOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_FieldOptions_has_ctype(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 1); } -UPB_INLINE int32_t google_protobuf_FieldOptions_ctype(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } -UPB_INLINE bool google_protobuf_FieldOptions_has_packed(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 3); } -UPB_INLINE bool google_protobuf_FieldOptions_packed(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(24, 24), bool); } -UPB_INLINE bool google_protobuf_FieldOptions_has_deprecated(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 4); } -UPB_INLINE bool google_protobuf_FieldOptions_deprecated(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(25, 25), bool); } -UPB_INLINE bool google_protobuf_FieldOptions_has_lazy(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 5); } -UPB_INLINE bool google_protobuf_FieldOptions_lazy(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(26, 26), bool); } -UPB_INLINE bool google_protobuf_FieldOptions_has_jstype(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 2); } -UPB_INLINE int32_t google_protobuf_FieldOptions_jstype(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t); } -UPB_INLINE bool google_protobuf_FieldOptions_has_weak(const google_protobuf_FieldOptions *msg) { return _upb_has_field(msg, 6); } -UPB_INLINE bool google_protobuf_FieldOptions_weak(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(27, 27), bool); } -UPB_INLINE bool google_protobuf_FieldOptions_has_uninterpreted_option(const google_protobuf_FieldOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(28, 32)); } -UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FieldOptions_uninterpreted_option(const google_protobuf_FieldOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(28, 32), len); } +UPB_INLINE bool google_protobuf_FieldOptions_has_ctype(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 1); } +UPB_INLINE int32_t google_protobuf_FieldOptions_ctype(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } +UPB_INLINE bool google_protobuf_FieldOptions_has_packed(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 2); } +UPB_INLINE bool google_protobuf_FieldOptions_packed(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 12), bool); } +UPB_INLINE bool google_protobuf_FieldOptions_has_deprecated(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 3); } +UPB_INLINE bool google_protobuf_FieldOptions_deprecated(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(13, 13), bool); } +UPB_INLINE bool google_protobuf_FieldOptions_has_lazy(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 4); } +UPB_INLINE bool google_protobuf_FieldOptions_lazy(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(14, 14), bool); } +UPB_INLINE bool google_protobuf_FieldOptions_has_jstype(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 5); } +UPB_INLINE int32_t google_protobuf_FieldOptions_jstype(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } +UPB_INLINE bool google_protobuf_FieldOptions_has_weak(const google_protobuf_FieldOptions *msg) { return _upb_hasbit(msg, 6); } +UPB_INLINE bool google_protobuf_FieldOptions_weak(const google_protobuf_FieldOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(15, 15), bool); } +UPB_INLINE bool google_protobuf_FieldOptions_has_uninterpreted_option(const google_protobuf_FieldOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(16, 16)); } +UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_FieldOptions_uninterpreted_option(const google_protobuf_FieldOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(16, 16), len); } UPB_INLINE void google_protobuf_FieldOptions_set_ctype(google_protobuf_FieldOptions *msg, int32_t value) { _upb_sethas(msg, 1); - *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; + *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE void google_protobuf_FieldOptions_set_packed(google_protobuf_FieldOptions *msg, bool value) { - _upb_sethas(msg, 3); - *UPB_PTR_AT(msg, UPB_SIZE(24, 24), bool) = value; + _upb_sethas(msg, 2); + *UPB_PTR_AT(msg, UPB_SIZE(12, 12), bool) = value; } UPB_INLINE void google_protobuf_FieldOptions_set_deprecated(google_protobuf_FieldOptions *msg, bool value) { - _upb_sethas(msg, 4); - *UPB_PTR_AT(msg, UPB_SIZE(25, 25), bool) = value; + _upb_sethas(msg, 3); + *UPB_PTR_AT(msg, UPB_SIZE(13, 13), bool) = value; } UPB_INLINE void google_protobuf_FieldOptions_set_lazy(google_protobuf_FieldOptions *msg, bool value) { - _upb_sethas(msg, 5); - *UPB_PTR_AT(msg, UPB_SIZE(26, 26), bool) = value; + _upb_sethas(msg, 4); + *UPB_PTR_AT(msg, UPB_SIZE(14, 14), bool) = value; } UPB_INLINE void google_protobuf_FieldOptions_set_jstype(google_protobuf_FieldOptions *msg, int32_t value) { - _upb_sethas(msg, 2); - *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int32_t) = value; + _upb_sethas(msg, 5); + *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; } UPB_INLINE void google_protobuf_FieldOptions_set_weak(google_protobuf_FieldOptions *msg, bool value) { _upb_sethas(msg, 6); - *UPB_PTR_AT(msg, UPB_SIZE(27, 27), bool) = value; + *UPB_PTR_AT(msg, UPB_SIZE(15, 15), bool) = value; } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FieldOptions_mutable_uninterpreted_option(google_protobuf_FieldOptions *msg, size_t *len) { - return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(28, 32), len); + return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(16, 16), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_FieldOptions_resize_uninterpreted_option(google_protobuf_FieldOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(28, 32), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(16, 16), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_FieldOptions_add_uninterpreted_option(google_protobuf_FieldOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(28, 32), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(16, 16), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2626,6 +3368,12 @@ UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_parse(cons google_protobuf_OneofOptions *ret = google_protobuf_OneofOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_OneofOptions *google_protobuf_OneofOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_OneofOptions *ret = google_protobuf_OneofOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_OneofOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_OneofOptions_serialize(const google_protobuf_OneofOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_OneofOptions_msginit, arena, len); } @@ -2637,12 +3385,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_OneofOptions_mu return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(0, 0), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_OneofOptions_resize_uninterpreted_option(google_protobuf_OneofOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(0, 0), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(0, 0), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_OneofOptions_add_uninterpreted_option(google_protobuf_OneofOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(0, 0), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(0, 0), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2657,13 +3405,19 @@ UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_parse(const google_protobuf_EnumOptions *ret = google_protobuf_EnumOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumOptions *google_protobuf_EnumOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumOptions *ret = google_protobuf_EnumOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumOptions_serialize(const google_protobuf_EnumOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_EnumOptions_has_allow_alias(const google_protobuf_EnumOptions *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_EnumOptions_has_allow_alias(const google_protobuf_EnumOptions *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE bool google_protobuf_EnumOptions_allow_alias(const google_protobuf_EnumOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } -UPB_INLINE bool google_protobuf_EnumOptions_has_deprecated(const google_protobuf_EnumOptions *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_EnumOptions_has_deprecated(const google_protobuf_EnumOptions *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE bool google_protobuf_EnumOptions_deprecated(const google_protobuf_EnumOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(2, 2), bool); } UPB_INLINE bool google_protobuf_EnumOptions_has_uninterpreted_option(const google_protobuf_EnumOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(4, 8)); } UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_EnumOptions_uninterpreted_option(const google_protobuf_EnumOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(4, 8), len); } @@ -2680,12 +3434,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumOptions_mut return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(4, 8), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumOptions_resize_uninterpreted_option(google_protobuf_EnumOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(4, 8), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(4, 8), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_EnumOptions_add_uninterpreted_option(google_protobuf_EnumOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(4, 8), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(4, 8), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2700,11 +3454,17 @@ UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_pa google_protobuf_EnumValueOptions *ret = google_protobuf_EnumValueOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_EnumValueOptions *google_protobuf_EnumValueOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_EnumValueOptions *ret = google_protobuf_EnumValueOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_EnumValueOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_EnumValueOptions_serialize(const google_protobuf_EnumValueOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_EnumValueOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_EnumValueOptions_has_deprecated(const google_protobuf_EnumValueOptions *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_EnumValueOptions_has_deprecated(const google_protobuf_EnumValueOptions *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE bool google_protobuf_EnumValueOptions_deprecated(const google_protobuf_EnumValueOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } UPB_INLINE bool google_protobuf_EnumValueOptions_has_uninterpreted_option(const google_protobuf_EnumValueOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(4, 8)); } UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_EnumValueOptions_uninterpreted_option(const google_protobuf_EnumValueOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(4, 8), len); } @@ -2717,12 +3477,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumValueOption return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(4, 8), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_EnumValueOptions_resize_uninterpreted_option(google_protobuf_EnumValueOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(4, 8), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(4, 8), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_EnumValueOptions_add_uninterpreted_option(google_protobuf_EnumValueOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(4, 8), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(4, 8), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2737,11 +3497,17 @@ UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_parse( google_protobuf_ServiceOptions *ret = google_protobuf_ServiceOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_ServiceOptions *google_protobuf_ServiceOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_ServiceOptions *ret = google_protobuf_ServiceOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_ServiceOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_ServiceOptions_serialize(const google_protobuf_ServiceOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_ServiceOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_ServiceOptions_has_deprecated(const google_protobuf_ServiceOptions *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_ServiceOptions_has_deprecated(const google_protobuf_ServiceOptions *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE bool google_protobuf_ServiceOptions_deprecated(const google_protobuf_ServiceOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } UPB_INLINE bool google_protobuf_ServiceOptions_has_uninterpreted_option(const google_protobuf_ServiceOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(4, 8)); } UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_ServiceOptions_uninterpreted_option(const google_protobuf_ServiceOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(4, 8), len); } @@ -2754,12 +3520,12 @@ UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ServiceOptions_ return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(4, 8), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_ServiceOptions_resize_uninterpreted_option(google_protobuf_ServiceOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(4, 8), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(4, 8), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_ServiceOptions_add_uninterpreted_option(google_protobuf_ServiceOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(4, 8), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(4, 8), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2774,35 +3540,41 @@ UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_parse(co google_protobuf_MethodOptions *ret = google_protobuf_MethodOptions_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_MethodOptions *google_protobuf_MethodOptions_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_MethodOptions *ret = google_protobuf_MethodOptions_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_MethodOptions_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_MethodOptions_serialize(const google_protobuf_MethodOptions *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_MethodOptions_msginit, arena, len); } -UPB_INLINE bool google_protobuf_MethodOptions_has_deprecated(const google_protobuf_MethodOptions *msg) { return _upb_has_field(msg, 2); } -UPB_INLINE bool google_protobuf_MethodOptions_deprecated(const google_protobuf_MethodOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool); } -UPB_INLINE bool google_protobuf_MethodOptions_has_idempotency_level(const google_protobuf_MethodOptions *msg) { return _upb_has_field(msg, 1); } -UPB_INLINE int32_t google_protobuf_MethodOptions_idempotency_level(const google_protobuf_MethodOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } -UPB_INLINE bool google_protobuf_MethodOptions_has_uninterpreted_option(const google_protobuf_MethodOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(20, 24)); } -UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_MethodOptions_uninterpreted_option(const google_protobuf_MethodOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(20, 24), len); } +UPB_INLINE bool google_protobuf_MethodOptions_has_deprecated(const google_protobuf_MethodOptions *msg) { return _upb_hasbit(msg, 1); } +UPB_INLINE bool google_protobuf_MethodOptions_deprecated(const google_protobuf_MethodOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), bool); } +UPB_INLINE bool google_protobuf_MethodOptions_has_idempotency_level(const google_protobuf_MethodOptions *msg) { return _upb_hasbit(msg, 2); } +UPB_INLINE int32_t google_protobuf_MethodOptions_idempotency_level(const google_protobuf_MethodOptions *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } +UPB_INLINE bool google_protobuf_MethodOptions_has_uninterpreted_option(const google_protobuf_MethodOptions *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(12, 16)); } +UPB_INLINE const google_protobuf_UninterpretedOption* const* google_protobuf_MethodOptions_uninterpreted_option(const google_protobuf_MethodOptions *msg, size_t *len) { return (const google_protobuf_UninterpretedOption* const*)_upb_array_accessor(msg, UPB_SIZE(12, 16), len); } UPB_INLINE void google_protobuf_MethodOptions_set_deprecated(google_protobuf_MethodOptions *msg, bool value) { - _upb_sethas(msg, 2); - *UPB_PTR_AT(msg, UPB_SIZE(16, 16), bool) = value; + _upb_sethas(msg, 1); + *UPB_PTR_AT(msg, UPB_SIZE(8, 8), bool) = value; } UPB_INLINE void google_protobuf_MethodOptions_set_idempotency_level(google_protobuf_MethodOptions *msg, int32_t value) { - _upb_sethas(msg, 1); - *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; + _upb_sethas(msg, 2); + *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MethodOptions_mutable_uninterpreted_option(google_protobuf_MethodOptions *msg, size_t *len) { - return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(20, 24), len); + return (google_protobuf_UninterpretedOption**)_upb_array_mutable_accessor(msg, UPB_SIZE(12, 16), len); } UPB_INLINE google_protobuf_UninterpretedOption** google_protobuf_MethodOptions_resize_uninterpreted_option(google_protobuf_MethodOptions *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor(msg, UPB_SIZE(20, 24), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption**)_upb_array_resize_accessor2(msg, UPB_SIZE(12, 16), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption* google_protobuf_MethodOptions_add_uninterpreted_option(google_protobuf_MethodOptions *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption* sub = (struct google_protobuf_UninterpretedOption*)_upb_msg_new(&google_protobuf_UninterpretedOption_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(20, 24), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(12, 16), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2817,52 +3589,58 @@ UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOpt google_protobuf_UninterpretedOption *ret = google_protobuf_UninterpretedOption_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_UninterpretedOption *google_protobuf_UninterpretedOption_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_UninterpretedOption *ret = google_protobuf_UninterpretedOption_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_UninterpretedOption_serialize(const google_protobuf_UninterpretedOption *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_msginit, arena, len); } UPB_INLINE bool google_protobuf_UninterpretedOption_has_name(const google_protobuf_UninterpretedOption *msg) { return _upb_has_submsg_nohasbit(msg, UPB_SIZE(56, 80)); } UPB_INLINE const google_protobuf_UninterpretedOption_NamePart* const* google_protobuf_UninterpretedOption_name(const google_protobuf_UninterpretedOption *msg, size_t *len) { return (const google_protobuf_UninterpretedOption_NamePart* const*)_upb_array_accessor(msg, UPB_SIZE(56, 80), len); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_identifier_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 4); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_identifier_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_UninterpretedOption_identifier_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(32, 32), upb_strview); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_positive_int_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_positive_int_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE uint64_t google_protobuf_UninterpretedOption_positive_int_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), uint64_t); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_negative_int_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_negative_int_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE int64_t google_protobuf_UninterpretedOption_negative_int_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int64_t); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_double_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_double_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 4); } UPB_INLINE double google_protobuf_UninterpretedOption_double_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(24, 24), double); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_string_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 5); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_string_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 5); } UPB_INLINE upb_strview google_protobuf_UninterpretedOption_string_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(40, 48), upb_strview); } -UPB_INLINE bool google_protobuf_UninterpretedOption_has_aggregate_value(const google_protobuf_UninterpretedOption *msg) { return _upb_has_field(msg, 6); } +UPB_INLINE bool google_protobuf_UninterpretedOption_has_aggregate_value(const google_protobuf_UninterpretedOption *msg) { return _upb_hasbit(msg, 6); } UPB_INLINE upb_strview google_protobuf_UninterpretedOption_aggregate_value(const google_protobuf_UninterpretedOption *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(48, 64), upb_strview); } UPB_INLINE google_protobuf_UninterpretedOption_NamePart** google_protobuf_UninterpretedOption_mutable_name(google_protobuf_UninterpretedOption *msg, size_t *len) { return (google_protobuf_UninterpretedOption_NamePart**)_upb_array_mutable_accessor(msg, UPB_SIZE(56, 80), len); } UPB_INLINE google_protobuf_UninterpretedOption_NamePart** google_protobuf_UninterpretedOption_resize_name(google_protobuf_UninterpretedOption *msg, size_t len, upb_arena *arena) { - return (google_protobuf_UninterpretedOption_NamePart**)_upb_array_resize_accessor(msg, UPB_SIZE(56, 80), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_UninterpretedOption_NamePart**)_upb_array_resize_accessor2(msg, UPB_SIZE(56, 80), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_UninterpretedOption_NamePart* google_protobuf_UninterpretedOption_add_name(google_protobuf_UninterpretedOption *msg, upb_arena *arena) { struct google_protobuf_UninterpretedOption_NamePart* sub = (struct google_protobuf_UninterpretedOption_NamePart*)_upb_msg_new(&google_protobuf_UninterpretedOption_NamePart_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(56, 80), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(56, 80), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } UPB_INLINE void google_protobuf_UninterpretedOption_set_identifier_value(google_protobuf_UninterpretedOption *msg, upb_strview value) { - _upb_sethas(msg, 4); + _upb_sethas(msg, 1); *UPB_PTR_AT(msg, UPB_SIZE(32, 32), upb_strview) = value; } UPB_INLINE void google_protobuf_UninterpretedOption_set_positive_int_value(google_protobuf_UninterpretedOption *msg, uint64_t value) { - _upb_sethas(msg, 1); + _upb_sethas(msg, 2); *UPB_PTR_AT(msg, UPB_SIZE(8, 8), uint64_t) = value; } UPB_INLINE void google_protobuf_UninterpretedOption_set_negative_int_value(google_protobuf_UninterpretedOption *msg, int64_t value) { - _upb_sethas(msg, 2); + _upb_sethas(msg, 3); *UPB_PTR_AT(msg, UPB_SIZE(16, 16), int64_t) = value; } UPB_INLINE void google_protobuf_UninterpretedOption_set_double_value(google_protobuf_UninterpretedOption *msg, double value) { - _upb_sethas(msg, 3); + _upb_sethas(msg, 4); *UPB_PTR_AT(msg, UPB_SIZE(24, 24), double) = value; } UPB_INLINE void google_protobuf_UninterpretedOption_set_string_value(google_protobuf_UninterpretedOption *msg, upb_strview value) { @@ -2884,21 +3662,27 @@ UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_Uninter google_protobuf_UninterpretedOption_NamePart *ret = google_protobuf_UninterpretedOption_NamePart_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_UninterpretedOption_NamePart *google_protobuf_UninterpretedOption_NamePart_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_UninterpretedOption_NamePart *ret = google_protobuf_UninterpretedOption_NamePart_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_UninterpretedOption_NamePart_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_UninterpretedOption_NamePart_serialize(const google_protobuf_UninterpretedOption_NamePart *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_UninterpretedOption_NamePart_msginit, arena, len); } -UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_name_part(const google_protobuf_UninterpretedOption_NamePart *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_name_part(const google_protobuf_UninterpretedOption_NamePart *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_UninterpretedOption_NamePart_name_part(const google_protobuf_UninterpretedOption_NamePart *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } -UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_is_extension(const google_protobuf_UninterpretedOption_NamePart *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_has_is_extension(const google_protobuf_UninterpretedOption_NamePart *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE bool google_protobuf_UninterpretedOption_NamePart_is_extension(const google_protobuf_UninterpretedOption_NamePart *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool); } UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_set_name_part(google_protobuf_UninterpretedOption_NamePart *msg, upb_strview value) { - _upb_sethas(msg, 2); + _upb_sethas(msg, 1); *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview) = value; } UPB_INLINE void google_protobuf_UninterpretedOption_NamePart_set_is_extension(google_protobuf_UninterpretedOption_NamePart *msg, bool value) { - _upb_sethas(msg, 1); + _upb_sethas(msg, 2); *UPB_PTR_AT(msg, UPB_SIZE(1, 1), bool) = value; } @@ -2912,6 +3696,12 @@ UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_parse( google_protobuf_SourceCodeInfo *ret = google_protobuf_SourceCodeInfo_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_SourceCodeInfo *google_protobuf_SourceCodeInfo_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_SourceCodeInfo *ret = google_protobuf_SourceCodeInfo_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_SourceCodeInfo_serialize(const google_protobuf_SourceCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_msginit, arena, len); } @@ -2923,12 +3713,12 @@ UPB_INLINE google_protobuf_SourceCodeInfo_Location** google_protobuf_SourceCodeI return (google_protobuf_SourceCodeInfo_Location**)_upb_array_mutable_accessor(msg, UPB_SIZE(0, 0), len); } UPB_INLINE google_protobuf_SourceCodeInfo_Location** google_protobuf_SourceCodeInfo_resize_location(google_protobuf_SourceCodeInfo *msg, size_t len, upb_arena *arena) { - return (google_protobuf_SourceCodeInfo_Location**)_upb_array_resize_accessor(msg, UPB_SIZE(0, 0), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_SourceCodeInfo_Location**)_upb_array_resize_accessor2(msg, UPB_SIZE(0, 0), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_SourceCodeInfo_Location* google_protobuf_SourceCodeInfo_add_location(google_protobuf_SourceCodeInfo *msg, upb_arena *arena) { struct google_protobuf_SourceCodeInfo_Location* sub = (struct google_protobuf_SourceCodeInfo_Location*)_upb_msg_new(&google_protobuf_SourceCodeInfo_Location_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(0, 0), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(0, 0), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -2943,15 +3733,21 @@ UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeIn google_protobuf_SourceCodeInfo_Location *ret = google_protobuf_SourceCodeInfo_Location_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_SourceCodeInfo_Location *google_protobuf_SourceCodeInfo_Location_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_SourceCodeInfo_Location *ret = google_protobuf_SourceCodeInfo_Location_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_SourceCodeInfo_Location_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_SourceCodeInfo_Location_serialize(const google_protobuf_SourceCodeInfo_Location *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_SourceCodeInfo_Location_msginit, arena, len); } UPB_INLINE int32_t const* google_protobuf_SourceCodeInfo_Location_path(const google_protobuf_SourceCodeInfo_Location *msg, size_t *len) { return (int32_t const*)_upb_array_accessor(msg, UPB_SIZE(20, 40), len); } UPB_INLINE int32_t const* google_protobuf_SourceCodeInfo_Location_span(const google_protobuf_SourceCodeInfo_Location *msg, size_t *len) { return (int32_t const*)_upb_array_accessor(msg, UPB_SIZE(24, 48), len); } -UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_leading_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_leading_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_SourceCodeInfo_Location_leading_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 8), upb_strview); } -UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_trailing_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_has_trailing_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE upb_strview google_protobuf_SourceCodeInfo_Location_trailing_comments(const google_protobuf_SourceCodeInfo_Location *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 24), upb_strview); } UPB_INLINE upb_strview const* google_protobuf_SourceCodeInfo_Location_leading_detached_comments(const google_protobuf_SourceCodeInfo_Location *msg, size_t *len) { return (upb_strview const*)_upb_array_accessor(msg, UPB_SIZE(28, 56), len); } @@ -2959,20 +3755,20 @@ UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_mutable_path(google_ return (int32_t*)_upb_array_mutable_accessor(msg, UPB_SIZE(20, 40), len); } UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_resize_path(google_protobuf_SourceCodeInfo_Location *msg, size_t len, upb_arena *arena) { - return (int32_t*)_upb_array_resize_accessor(msg, UPB_SIZE(20, 40), len, UPB_TYPE_INT32, arena); + return (int32_t*)_upb_array_resize_accessor2(msg, UPB_SIZE(20, 40), len, 2, arena); } UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_path(google_protobuf_SourceCodeInfo_Location *msg, int32_t val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(20, 40), UPB_SIZE(4, 4), UPB_TYPE_INT32, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(20, 40), 2, &val, arena); } UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_mutable_span(google_protobuf_SourceCodeInfo_Location *msg, size_t *len) { return (int32_t*)_upb_array_mutable_accessor(msg, UPB_SIZE(24, 48), len); } UPB_INLINE int32_t* google_protobuf_SourceCodeInfo_Location_resize_span(google_protobuf_SourceCodeInfo_Location *msg, size_t len, upb_arena *arena) { - return (int32_t*)_upb_array_resize_accessor(msg, UPB_SIZE(24, 48), len, UPB_TYPE_INT32, arena); + return (int32_t*)_upb_array_resize_accessor2(msg, UPB_SIZE(24, 48), len, 2, arena); } UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_span(google_protobuf_SourceCodeInfo_Location *msg, int32_t val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(24, 48), UPB_SIZE(4, 4), UPB_TYPE_INT32, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(24, 48), 2, &val, arena); } UPB_INLINE void google_protobuf_SourceCodeInfo_Location_set_leading_comments(google_protobuf_SourceCodeInfo_Location *msg, upb_strview value) { @@ -2987,10 +3783,10 @@ UPB_INLINE upb_strview* google_protobuf_SourceCodeInfo_Location_mutable_leading_ return (upb_strview*)_upb_array_mutable_accessor(msg, UPB_SIZE(28, 56), len); } UPB_INLINE upb_strview* google_protobuf_SourceCodeInfo_Location_resize_leading_detached_comments(google_protobuf_SourceCodeInfo_Location *msg, size_t len, upb_arena *arena) { - return (upb_strview*)_upb_array_resize_accessor(msg, UPB_SIZE(28, 56), len, UPB_TYPE_STRING, arena); + return (upb_strview*)_upb_array_resize_accessor2(msg, UPB_SIZE(28, 56), len, UPB_SIZE(3, 4), arena); } UPB_INLINE bool google_protobuf_SourceCodeInfo_Location_add_leading_detached_comments(google_protobuf_SourceCodeInfo_Location *msg, upb_strview val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(28, 56), UPB_SIZE(8, 16), UPB_TYPE_STRING, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(28, 56), UPB_SIZE(3, 4), &val, arena); } @@ -3004,6 +3800,12 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_ google_protobuf_GeneratedCodeInfo *ret = google_protobuf_GeneratedCodeInfo_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_GeneratedCodeInfo *google_protobuf_GeneratedCodeInfo_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_GeneratedCodeInfo *ret = google_protobuf_GeneratedCodeInfo_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_GeneratedCodeInfo_serialize(const google_protobuf_GeneratedCodeInfo *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_msginit, arena, len); } @@ -3015,12 +3817,12 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation** google_protobuf_Genera return (google_protobuf_GeneratedCodeInfo_Annotation**)_upb_array_mutable_accessor(msg, UPB_SIZE(0, 0), len); } UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation** google_protobuf_GeneratedCodeInfo_resize_annotation(google_protobuf_GeneratedCodeInfo *msg, size_t len, upb_arena *arena) { - return (google_protobuf_GeneratedCodeInfo_Annotation**)_upb_array_resize_accessor(msg, UPB_SIZE(0, 0), len, UPB_TYPE_MESSAGE, arena); + return (google_protobuf_GeneratedCodeInfo_Annotation**)_upb_array_resize_accessor2(msg, UPB_SIZE(0, 0), len, UPB_SIZE(2, 3), arena); } UPB_INLINE struct google_protobuf_GeneratedCodeInfo_Annotation* google_protobuf_GeneratedCodeInfo_add_annotation(google_protobuf_GeneratedCodeInfo *msg, upb_arena *arena) { struct google_protobuf_GeneratedCodeInfo_Annotation* sub = (struct google_protobuf_GeneratedCodeInfo_Annotation*)_upb_msg_new(&google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena); - bool ok = _upb_array_append_accessor( - msg, UPB_SIZE(0, 0), UPB_SIZE(4, 8), UPB_TYPE_MESSAGE, &sub, arena); + bool ok = _upb_array_append_accessor2( + msg, UPB_SIZE(0, 0), UPB_SIZE(2, 3), &sub, arena); if (!ok) return NULL; return sub; } @@ -3035,38 +3837,44 @@ UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_Generat google_protobuf_GeneratedCodeInfo_Annotation *ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena); return (ret && upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena)) ? ret : NULL; } +UPB_INLINE google_protobuf_GeneratedCodeInfo_Annotation *google_protobuf_GeneratedCodeInfo_Annotation_parse_ex(const char *buf, size_t size, + upb_arena *arena, int options) { + google_protobuf_GeneratedCodeInfo_Annotation *ret = google_protobuf_GeneratedCodeInfo_Annotation_new(arena); + return (ret && _upb_decode(buf, size, ret, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena, options)) + ? ret : NULL; +} UPB_INLINE char *google_protobuf_GeneratedCodeInfo_Annotation_serialize(const google_protobuf_GeneratedCodeInfo_Annotation *msg, upb_arena *arena, size_t *len) { return upb_encode(msg, &google_protobuf_GeneratedCodeInfo_Annotation_msginit, arena, len); } UPB_INLINE int32_t const* google_protobuf_GeneratedCodeInfo_Annotation_path(const google_protobuf_GeneratedCodeInfo_Annotation *msg, size_t *len) { return (int32_t const*)_upb_array_accessor(msg, UPB_SIZE(20, 32), len); } -UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_source_file(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_has_field(msg, 3); } +UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_source_file(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_hasbit(msg, 1); } UPB_INLINE upb_strview google_protobuf_GeneratedCodeInfo_Annotation_source_file(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(12, 16), upb_strview); } -UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_begin(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_has_field(msg, 1); } +UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_begin(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_hasbit(msg, 2); } UPB_INLINE int32_t google_protobuf_GeneratedCodeInfo_Annotation_begin(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t); } -UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_end(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_has_field(msg, 2); } +UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_has_end(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return _upb_hasbit(msg, 3); } UPB_INLINE int32_t google_protobuf_GeneratedCodeInfo_Annotation_end(const google_protobuf_GeneratedCodeInfo_Annotation *msg) { return *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t); } UPB_INLINE int32_t* google_protobuf_GeneratedCodeInfo_Annotation_mutable_path(google_protobuf_GeneratedCodeInfo_Annotation *msg, size_t *len) { return (int32_t*)_upb_array_mutable_accessor(msg, UPB_SIZE(20, 32), len); } UPB_INLINE int32_t* google_protobuf_GeneratedCodeInfo_Annotation_resize_path(google_protobuf_GeneratedCodeInfo_Annotation *msg, size_t len, upb_arena *arena) { - return (int32_t*)_upb_array_resize_accessor(msg, UPB_SIZE(20, 32), len, UPB_TYPE_INT32, arena); + return (int32_t*)_upb_array_resize_accessor2(msg, UPB_SIZE(20, 32), len, 2, arena); } UPB_INLINE bool google_protobuf_GeneratedCodeInfo_Annotation_add_path(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t val, upb_arena *arena) { - return _upb_array_append_accessor(msg, UPB_SIZE(20, 32), UPB_SIZE(4, 4), UPB_TYPE_INT32, &val, + return _upb_array_append_accessor2(msg, UPB_SIZE(20, 32), 2, &val, arena); } UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_source_file(google_protobuf_GeneratedCodeInfo_Annotation *msg, upb_strview value) { - _upb_sethas(msg, 3); + _upb_sethas(msg, 1); *UPB_PTR_AT(msg, UPB_SIZE(12, 16), upb_strview) = value; } UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_begin(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t value) { - _upb_sethas(msg, 1); + _upb_sethas(msg, 2); *UPB_PTR_AT(msg, UPB_SIZE(4, 4), int32_t) = value; } UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_protobuf_GeneratedCodeInfo_Annotation *msg, int32_t value) { - _upb_sethas(msg, 2); + _upb_sethas(msg, 3); *UPB_PTR_AT(msg, UPB_SIZE(8, 8), int32_t) = value; } @@ -3093,6 +3901,7 @@ UPB_INLINE void google_protobuf_GeneratedCodeInfo_Annotation_set_end(google_prot #define UPB_DEF_H_ +/* Must be last. */ #ifdef __cplusplus extern "C" { @@ -3191,9 +4000,10 @@ typedef upb_inttable_iter upb_oneof_iter; const char *upb_oneofdef_name(const upb_oneofdef *o); const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o); -int upb_oneofdef_numfields(const upb_oneofdef *o); uint32_t upb_oneofdef_index(const upb_oneofdef *o); bool upb_oneofdef_issynthetic(const upb_oneofdef *o); +int upb_oneofdef_fieldcount(const upb_oneofdef *o); +const upb_fielddef *upb_oneofdef_field(const upb_oneofdef *o, int i); /* Oneof lookups: * - ntof: look up a field by name. @@ -3207,11 +4017,8 @@ UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o, } const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num); -/* upb_oneof_iter i; - * for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) { - * // ... - * } - */ +/* DEPRECATED, slated for removal. */ +int upb_oneofdef_numfields(const upb_oneofdef *o); void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o); void upb_oneof_next(upb_oneof_iter *iter); bool upb_oneof_done(upb_oneof_iter *iter); @@ -3219,6 +4026,7 @@ upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter); void upb_oneof_iter_setdone(upb_oneof_iter *iter); bool upb_oneof_iter_isequal(const upb_oneof_iter *iter1, const upb_oneof_iter *iter2); +/* END DEPRECATED */ /* upb_msgdef *****************************************************************/ @@ -3244,20 +4052,21 @@ typedef upb_strtable_iter upb_msg_oneof_iter; const char *upb_msgdef_fullname(const upb_msgdef *m); const upb_filedef *upb_msgdef_file(const upb_msgdef *m); const char *upb_msgdef_name(const upb_msgdef *m); -int upb_msgdef_numfields(const upb_msgdef *m); -int upb_msgdef_numoneofs(const upb_msgdef *m); -int upb_msgdef_numrealoneofs(const upb_msgdef *m); upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m); bool upb_msgdef_mapentry(const upb_msgdef *m); upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m); +bool upb_msgdef_iswrapper(const upb_msgdef *m); bool upb_msgdef_isnumberwrapper(const upb_msgdef *m); +int upb_msgdef_fieldcount(const upb_msgdef *m); +int upb_msgdef_oneofcount(const upb_msgdef *m); +const upb_fielddef *upb_msgdef_field(const upb_msgdef *m, int i); +const upb_oneofdef *upb_msgdef_oneof(const upb_msgdef *m, int i); const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i); const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, size_t len); const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, size_t len); const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m); -const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i); UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m, const char *name) { @@ -3289,19 +4098,10 @@ UPB_INLINE bool upb_msgdef_lookupnamez(const upb_msgdef *m, const char *name, const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, const char *name, size_t len); -/* Iteration over fields and oneofs. For example: - * - * upb_msg_field_iter i; - * for(upb_msg_field_begin(&i, m); - * !upb_msg_field_done(&i); - * upb_msg_field_next(&i)) { - * upb_fielddef *f = upb_msg_iter_field(&i); - * // ... - * } - * - * For C we don't have separate iterators for const and non-const. - * It is the caller's responsibility to cast the upb_fielddef* to - * const if the upb_msgdef* is const. */ +/* DEPRECATED, slated for removal */ +int upb_msgdef_numfields(const upb_msgdef *m); +int upb_msgdef_numoneofs(const upb_msgdef *m); +int upb_msgdef_numrealoneofs(const upb_msgdef *m); void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m); void upb_msg_field_next(upb_msg_field_iter *iter); bool upb_msg_field_done(const upb_msg_field_iter *iter); @@ -3309,9 +4109,6 @@ upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter); void upb_msg_field_iter_setdone(upb_msg_field_iter *iter); bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1, const upb_msg_field_iter * iter2); - -/* Similar to above, we also support iterating through the oneofs in a - * msgdef. */ void upb_msg_oneof_begin(upb_msg_oneof_iter * iter, const upb_msgdef *m); void upb_msg_oneof_next(upb_msg_oneof_iter * iter); bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter); @@ -3319,6 +4116,7 @@ const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter); void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter); bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1, const upb_msg_oneof_iter *iter2); +/* END DEPRECATED */ /* upb_enumdef ****************************************************************/ @@ -3343,11 +4141,6 @@ UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e, } const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num); -/* upb_enum_iter i; - * for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { - * // ... - * } - */ void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e); void upb_enum_next(upb_enum_iter *iter); bool upb_enum_done(upb_enum_iter *iter); @@ -3367,6 +4160,8 @@ int upb_filedef_enumcount(const upb_filedef *f); const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i); const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i); const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i); +const upb_symtab *upb_filedef_symtab(const upb_filedef *f); +upb_arena *_upb_symtab_arena(const upb_symtab *s); /* upb_symtab *****************************************************************/ @@ -3377,10 +4172,13 @@ const upb_msgdef *upb_symtab_lookupmsg2( const upb_symtab *s, const char *sym, size_t len); const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym); const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name); +const upb_filedef *upb_symtab_lookupfile2( + const upb_symtab *s, const char *name, size_t len); int upb_symtab_filecount(const upb_symtab *s); const upb_filedef *upb_symtab_addfile( upb_symtab *s, const google_protobuf_FileDescriptorProto *file, upb_status *status); +size_t _upb_symtab_bytesloaded(const upb_symtab *s); /* For generated code only: loads a generated descriptor. */ typedef struct upb_def_init { @@ -3404,6 +4202,10 @@ bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init); +#ifdef __cplusplus +extern "C" { +#endif + typedef union { bool bool_val; float float_val; @@ -3424,6 +4226,8 @@ typedef union { upb_array* array; } upb_mutmsgval; +upb_msgval upb_fielddef_default(const upb_fielddef *f); + /** upb_msg *******************************************************************/ /* Creates a new message of the given type in the given arena. */ @@ -3440,8 +4244,9 @@ upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, upb_arena *a) /* May only be called for fields where upb_fielddef_haspresence(f) == true. */ bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f); -/* Returns whether any field is set in the oneof. */ -bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o); +/* Returns the field that is set in the oneof, or NULL if none are set. */ +const upb_fielddef *upb_msg_whichoneof(const upb_msg *msg, + const upb_oneofdef *o); /* Sets the given field to the given value. For a msg/array/map/string, the * value must be in the same arena. */ @@ -3451,6 +4256,9 @@ void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, /* Clears any field presence and sets the value back to its default. */ void upb_msg_clearfield(upb_msg *msg, const upb_fielddef *f); +/* Clear all data and unknown fields. */ +void upb_msg_clear(upb_msg *msg, const upb_msgdef *m); + /* Iterate over present fields. * * size_t iter = UPB_MSG_BEGIN; @@ -3475,6 +4283,9 @@ bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, void upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, upb_arena *arena); +/* Clears all unknown field data from this message and all submessages. */ +bool upb_msg_discardunknown(upb_msg *msg, const upb_msgdef *m, int maxdepth); + /* Returns a reference to the message's unknown data. */ const char *upb_msg_getunknown(const upb_msg *msg, size_t *len); @@ -3539,6 +4350,11 @@ bool upb_map_delete(upb_map *map, upb_msgval key); /* Advances to the next entry. Returns false if no more entries are present. */ bool upb_mapiter_next(const upb_map *map, size_t *iter); +/* Returns true if the iterator still points to a valid entry, or false if the + * iterator is past the last element. It is an error to call this function with + * UPB_MAP_BEGIN (you must call next() at least once first). */ +bool upb_mapiter_done(const upb_map *map, size_t iter); + /* Returns the key and value for this entry of the map. */ upb_msgval upb_mapiter_key(const upb_map *map, size_t iter); upb_msgval upb_mapiter_value(const upb_map *map, size_t iter); @@ -3547,3211 +4363,69 @@ upb_msgval upb_mapiter_value(const upb_map *map, size_t iter); * iterator must not have been initialized const. */ void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); +#ifdef __cplusplus +} /* extern "C" */ +#endif -#endif /* UPB_REFLECTION_H_ */ -/* -** upb::Handlers (upb_handlers) -** -** A upb_handlers is like a virtual table for a upb_msgdef. Each field of the -** message can have associated functions that will be called when we are -** parsing or visiting a stream of data. This is similar to how handlers work -** in SAX (the Simple API for XML). -** -** The handlers have no idea where the data is coming from, so a single set of -** handlers could be used with two completely different data sources (for -** example, a parser and a visitor over in-memory objects). This decoupling is -** the most important feature of upb, because it allows parsers and serializers -** to be highly reusable. -** -** This is a mixed C/C++ interface that offers a full API to both languages. -** See the top-level README for more information. -*/ -#ifndef UPB_HANDLERS_H -#define UPB_HANDLERS_H +#endif /* UPB_REFLECTION_H_ */ +#ifndef UPB_JSONDECODE_H_ +#define UPB_JSONDECODE_H_ #ifdef __cplusplus -namespace upb { -class HandlersPtr; -class HandlerCache; -template class Handler; -template struct CanonicalType; -} /* namespace upb */ +extern "C" { #endif +enum { + UPB_JSONDEC_IGNOREUNKNOWN = 1 +}; -/* The maximum depth that the handler graph can have. This is a resource limit - * for the C stack since we sometimes need to recursively traverse the graph. - * Cycles are ok; the traversal will stop when it detects a cycle, but we must - * hit the cycle before the maximum depth is reached. - * - * If having a single static limit is too inflexible, we can add another variant - * of Handlers::Freeze that allows specifying this as a parameter. */ -#define UPB_MAX_HANDLER_DEPTH 64 - -/* All the different types of handlers that can be registered. - * Only needed for the advanced functions in upb::Handlers. */ -typedef enum { - UPB_HANDLER_INT32, - UPB_HANDLER_INT64, - UPB_HANDLER_UINT32, - UPB_HANDLER_UINT64, - UPB_HANDLER_FLOAT, - UPB_HANDLER_DOUBLE, - UPB_HANDLER_BOOL, - UPB_HANDLER_STARTSTR, - UPB_HANDLER_STRING, - UPB_HANDLER_ENDSTR, - UPB_HANDLER_STARTSUBMSG, - UPB_HANDLER_ENDSUBMSG, - UPB_HANDLER_STARTSEQ, - UPB_HANDLER_ENDSEQ -} upb_handlertype_t; - -#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1) - -#define UPB_BREAK NULL - -/* A convenient definition for when no closure is needed. */ -extern char _upb_noclosure; -#define UPB_NO_CLOSURE &_upb_noclosure - -/* A selector refers to a specific field handler in the Handlers object - * (for example: the STARTSUBMSG handler for field "field15"). */ -typedef int32_t upb_selector_t; - -/* Static selectors for upb::Handlers. */ -#define UPB_STARTMSG_SELECTOR 0 -#define UPB_ENDMSG_SELECTOR 1 -#define UPB_UNKNOWN_SELECTOR 2 -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/def.c. */ - -/* Static selectors for upb::BytesHandler. */ -#define UPB_STARTSTR_SELECTOR 0 -#define UPB_STRING_SELECTOR 1 -#define UPB_ENDSTR_SELECTOR 2 +bool upb_json_decode(const char *buf, size_t size, upb_msg *msg, + const upb_msgdef *m, const upb_symtab *any_pool, + int options, upb_arena *arena, upb_status *status); #ifdef __cplusplus -template const void *UniquePtrForType() { - static const char ch = 0; - return &ch; -} +} /* extern "C" */ #endif -/* upb_handlers ************************************************************/ - -/* Handler attributes, to be registered with the handler itself. */ -typedef struct { - const void *handler_data; - const void *closure_type; - const void *return_closure_type; - bool alwaysok; -} upb_handlerattr; - -#define UPB_HANDLERATTR_INIT {NULL, NULL, NULL, false} - -/* Bufhandle, data passed along with a buffer to indicate its provenance. */ -typedef struct { - /* The beginning of the buffer. This may be different than the pointer - * passed to a StringBuf handler because the handler may receive data - * that is from the middle or end of a larger buffer. */ - const char *buf; - - /* The offset within the attached object where this buffer begins. Only - * meaningful if there is an attached object. */ - size_t objofs; - - /* The attached object (if any) and a pointer representing its type. */ - const void *obj; - const void *objtype; +#endif /* UPB_JSONDECODE_H_ */ -#ifdef __cplusplus - template - void SetAttachedObject(const T* _obj) { - obj = _obj; - objtype = UniquePtrForType(); - } +#ifndef UPB_JSONENCODE_H_ +#define UPB_JSONENCODE_H_ - template - const T *GetAttachedObject() const { - return objtype == UniquePtrForType() ? static_cast(obj) - : NULL; - } -#endif -} upb_bufhandle; - -#define UPB_BUFHANDLE_INIT {NULL, 0, NULL, NULL} - -/* Handler function typedefs. */ -typedef void upb_handlerfree(void *d); -typedef bool upb_unknown_handlerfunc(void *c, const void *hd, const char *buf, - size_t n); -typedef bool upb_startmsg_handlerfunc(void *c, const void*); -typedef bool upb_endmsg_handlerfunc(void *c, const void *, upb_status *status); -typedef void* upb_startfield_handlerfunc(void *c, const void *hd); -typedef bool upb_endfield_handlerfunc(void *c, const void *hd); -typedef bool upb_int32_handlerfunc(void *c, const void *hd, int32_t val); -typedef bool upb_int64_handlerfunc(void *c, const void *hd, int64_t val); -typedef bool upb_uint32_handlerfunc(void *c, const void *hd, uint32_t val); -typedef bool upb_uint64_handlerfunc(void *c, const void *hd, uint64_t val); -typedef bool upb_float_handlerfunc(void *c, const void *hd, float val); -typedef bool upb_double_handlerfunc(void *c, const void *hd, double val); -typedef bool upb_bool_handlerfunc(void *c, const void *hd, bool val); -typedef void *upb_startstr_handlerfunc(void *c, const void *hd, - size_t size_hint); -typedef size_t upb_string_handlerfunc(void *c, const void *hd, const char *buf, - size_t n, const upb_bufhandle* handle); - -struct upb_handlers; -typedef struct upb_handlers upb_handlers; #ifdef __cplusplus extern "C" { #endif -/* Mutating accessors. */ -const upb_status *upb_handlers_status(upb_handlers *h); -void upb_handlers_clearerr(upb_handlers *h); -const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h); -bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *hfree); -bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setint32(upb_handlers *h, const upb_fielddef *f, - upb_int32_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setint64(upb_handlers *h, const upb_fielddef *f, - upb_int64_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setuint32(upb_handlers *h, const upb_fielddef *f, - upb_uint32_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setuint64(upb_handlers *h, const upb_fielddef *f, - upb_uint64_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setfloat(upb_handlers *h, const upb_fielddef *f, - upb_float_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setdouble(upb_handlers *h, const upb_fielddef *f, - upb_double_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setbool(upb_handlers *h, const upb_fielddef *f, - upb_bool_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartstr(upb_handlers *h, const upb_fielddef *f, - upb_startstr_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstring(upb_handlers *h, const upb_fielddef *f, - upb_string_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendstr(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartseq(upb_handlers *h, const upb_fielddef *f, - upb_startfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setstartsubmsg(upb_handlers *h, const upb_fielddef *f, - upb_startfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendsubmsg(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); -bool upb_handlers_setendseq(upb_handlers *h, const upb_fielddef *f, - upb_endfield_handlerfunc *func, - const upb_handlerattr *attr); - -/* Read-only accessors. */ -const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, - const upb_fielddef *f); -const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, - upb_selector_t sel); -upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s, - const void **handler_data); -bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t s, - upb_handlerattr *attr); - -/* "Static" methods */ -upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f); -bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, - upb_selector_t *s); -UPB_INLINE upb_selector_t upb_handlers_getendselector(upb_selector_t start) { - return start + 1; -} - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -typedef upb_handlers Handlers; -} - -/* Convenience macros for creating a Handler object that is wrapped with a - * type-safe wrapper function that converts the "void*" parameters/returns - * of the underlying C API into nice C++ function. - * - * Sample usage: - * void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) { - * // do stuff ... - * } - * - * // Handler that doesn't need any data bound to it. - * void OnValue2(MyClosure* c, int32_t val) { - * // do stuff ... - * } - * - * // Handler that returns bool so it can return failure if necessary. - * bool OnValue3(MyClosure* c, int32_t val) { - * // do stuff ... - * return ok; - * } - * - * // Member function handler. - * class MyClosure { - * public: - * void OnValue(int32_t val) { - * // do stuff ... - * } - * }; - * - * // Takes ownership of the MyHandlerData. - * handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...))); - * handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2)); - * handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3)); - * handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue)); - */ - -/* In C++11, the "template" disambiguator can appear even outside templates, - * so all calls can safely use this pair of macros. */ - -#define UpbMakeHandler(f) upb::MatchFunc(f).template GetFunc() - -/* We have to be careful to only evaluate "d" once. */ -#define UpbBind(f, d) upb::MatchFunc(f).template GetFunc((d)) - -/* Handler: a struct that contains the (handler, data, deleter) tuple that is - * used to register all handlers. Users can Make() these directly but it's - * more convenient to use the UpbMakeHandler/UpbBind macros above. */ -template class upb::Handler { - public: - /* The underlying, handler function signature that upb uses internally. */ - typedef T FuncPtr; - - /* Intentionally implicit. */ - template Handler(F func); - ~Handler() { UPB_ASSERT(registered_); } - - void AddCleanup(upb_handlers* h) const; - FuncPtr handler() const { return handler_; } - const upb_handlerattr& attr() const { return attr_; } - - private: - Handler(const Handler&) = delete; - Handler& operator=(const Handler&) = delete; +enum { + /* When set, emits 0/default values. TODO(haberman): proto3 only? */ + UPB_JSONENC_EMITDEFAULTS = 1, - FuncPtr handler_; - mutable upb_handlerattr attr_; - mutable bool registered_; - void *cleanup_data_; - upb_handlerfree *cleanup_func_; + /* When set, use normal (snake_caes) field names instead of JSON (camelCase) + names. */ + UPB_JSONENC_PROTONAMES = 2 }; -/* A upb::Handlers object represents the set of handlers associated with a - * message in the graph of messages. You can think of it as a big virtual - * table with functions corresponding to all the events that can fire while - * parsing or visiting a message of a specific type. +/* Encodes the given |msg| to JSON format. The message's reflection is given in + * |m|. The symtab in |symtab| is used to find extensions (if NULL, extensions + * will not be printed). * - * Any handlers that are not set behave as if they had successfully consumed - * the value. Any unset Start* handlers will propagate their closure to the - * inner frame. - * - * The easiest way to create the *Handler objects needed by the Set* methods is - * with the UpbBind() and UpbMakeHandler() macros; see below. */ -class upb::HandlersPtr { - public: - HandlersPtr(upb_handlers* ptr) : ptr_(ptr) {} - - upb_handlers* ptr() const { return ptr_; } - - typedef upb_selector_t Selector; - typedef upb_handlertype_t Type; - - typedef Handler StartFieldHandler; - typedef Handler EndFieldHandler; - typedef Handler StartMessageHandler; - typedef Handler - EndMessageHandler; - typedef Handler StartStringHandler; - typedef Handler - StringHandler; - - template struct ValueHandler { - typedef Handler H; - }; - - typedef ValueHandler::H Int32Handler; - typedef ValueHandler::H Int64Handler; - typedef ValueHandler::H UInt32Handler; - typedef ValueHandler::H UInt64Handler; - typedef ValueHandler::H FloatHandler; - typedef ValueHandler::H DoubleHandler; - typedef ValueHandler::H BoolHandler; - - /* Any function pointer can be converted to this and converted back to its - * correct type. */ - typedef void GenericFunction(); - - typedef void HandlersCallback(const void *closure, upb_handlers *h); - - /* Returns the msgdef associated with this handlers object. */ - MessageDefPtr message_def() const { - return MessageDefPtr(upb_handlers_msgdef(ptr())); - } - - /* Adds the given pointer and function to the list of cleanup functions that - * will be run when these handlers are freed. If this pointer has previously - * been registered, the function returns false and does nothing. */ - bool AddCleanup(void *ptr, upb_handlerfree *cleanup) { - return upb_handlers_addcleanup(ptr_, ptr, cleanup); - } - - /* Sets the startmsg handler for the message, which is defined as follows: - * - * bool startmsg(MyType* closure) { - * // Called when the message begins. Returns true if processing should - * // continue. - * return true; - * } - */ - bool SetStartMessageHandler(const StartMessageHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartmsg(ptr(), h.handler(), &h.attr()); - } - - /* Sets the endmsg handler for the message, which is defined as follows: - * - * bool endmsg(MyType* closure, upb_status *status) { - * // Called when processing of this message ends, whether in success or - * // failure. "status" indicates the final status of processing, and - * // can also be modified in-place to update the final status. - * } - */ - bool SetEndMessageHandler(const EndMessageHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setendmsg(ptr(), h.handler(), &h.attr()); - } - - /* Sets the value handler for the given field, which is defined as follows - * (this is for an int32 field; other field types will pass their native - * C/C++ type for "val"): - * - * bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) { - * // Called when the field's value is encountered. "d" contains - * // whatever data was bound to this field when it was registered. - * // Returns true if processing should continue. - * return true; - * } - * - * handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...))); - * - * The value type must exactly match f->type(). - * For example, a handler that takes an int32_t parameter may only be used for - * fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM. - * - * Returns false if the handler failed to register; in this case the cleanup - * handler (if any) will be called immediately. - */ - bool SetInt32Handler(FieldDefPtr f, const Int32Handler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setint32(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetInt64Handler (FieldDefPtr f, const Int64Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setint64(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetUInt32Handler(FieldDefPtr f, const UInt32Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setuint32(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetUInt64Handler(FieldDefPtr f, const UInt64Handler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setuint64(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetFloatHandler (FieldDefPtr f, const FloatHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setfloat(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetDoubleHandler(FieldDefPtr f, const DoubleHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setdouble(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetBoolHandler(FieldDefPtr f, const BoolHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setbool(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Like the previous, but templated on the type on the value (ie. int32). - * This is mostly useful to call from other templates. To call this you must - * specify the template parameter explicitly, ie: - * h->SetValueHandler(f, UpbBind(MyHandler, MyData)); */ - template - bool SetValueHandler( - FieldDefPtr f, - const typename ValueHandler::Type>::H &handler); - - /* Sets handlers for a string field, which are defined as follows: - * - * MySubClosure* startstr(MyClosure* c, const MyHandlerData* d, - * size_t size_hint) { - * // Called when a string value begins. The return value indicates the - * // closure for the string. "size_hint" indicates the size of the - * // string if it is known, however if the string is length-delimited - * // and the end-of-string is not available size_hint will be zero. - * // This case is indistinguishable from the case where the size is - * // known to be zero. - * // - * // TODO(haberman): is it important to distinguish these cases? - * // If we had ssize_t as a type we could make -1 "unknown", but - * // ssize_t is POSIX (not ANSI) and therefore less portable. - * // In practice I suspect it won't be important to distinguish. - * return closure; - * } - * - * size_t str(MyClosure* closure, const MyHandlerData* d, - * const char *str, size_t len) { - * // Called for each buffer of string data; the multiple physical buffers - * // are all part of the same logical string. The return value indicates - * // how many bytes were consumed. If this number is less than "len", - * // this will also indicate that processing should be halted for now, - * // like returning false or UPB_BREAK from any other callback. If - * // number is greater than "len", the excess bytes will be skipped over - * // and not passed to the callback. - * return len; - * } - * - * bool endstr(MyClosure* c, const MyHandlerData* d) { - * // Called when a string value ends. Return value indicates whether - * // processing should continue. - * return true; - * } - */ - bool SetStartStringHandler(FieldDefPtr f, const StartStringHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartstr(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetStringHandler(FieldDefPtr f, const StringHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setstring(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - bool SetEndStringHandler(FieldDefPtr f, const EndFieldHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setendstr(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the startseq handler, which is defined as follows: - * - * MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) { - * // Called when a sequence (repeated field) begins. The returned - * // pointer indicates the closure for the sequence (or UPB_BREAK - * // to interrupt processing). - * return closure; - * } - * - * h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...))); - * - * Returns "false" if "f" does not belong to this message or is not a - * repeated field. - */ - bool SetStartSequenceHandler(FieldDefPtr f, const StartFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartseq(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the startsubmsg handler for the given field, which is defined as - * follows: - * - * MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) { - * // Called when a submessage begins. The returned pointer indicates the - * // closure for the sequence (or UPB_BREAK to interrupt processing). - * return closure; - * } - * - * h->SetStartSubMessageHandler(f, UpbBind(startsubmsg, - * new MyHandlerData(...))); - * - * Returns "false" if "f" does not belong to this message or is not a - * submessage/group field. - */ - bool SetStartSubMessageHandler(FieldDefPtr f, const StartFieldHandler& h) { - h.AddCleanup(ptr()); - return upb_handlers_setstartsubmsg(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Sets the endsubmsg handler for the given field, which is defined as - * follows: - * - * bool endsubmsg(MyClosure* c, const MyHandlerData* d) { - * // Called when a submessage ends. Returns true to continue processing. - * return true; - * } - * - * Returns "false" if "f" does not belong to this message or is not a - * submessage/group field. - */ - bool SetEndSubMessageHandler(FieldDefPtr f, const EndFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setendsubmsg(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - /* Starts the endsubseq handler for the given field, which is defined as - * follows: - * - * bool endseq(MyClosure* c, const MyHandlerData* d) { - * // Called when a sequence ends. Returns true continue processing. - * return true; - * } - * - * Returns "false" if "f" does not belong to this message or is not a - * repeated field. - */ - bool SetEndSequenceHandler(FieldDefPtr f, const EndFieldHandler &h) { - h.AddCleanup(ptr()); - return upb_handlers_setendseq(ptr(), f.ptr(), h.handler(), &h.attr()); - } - - private: - upb_handlers* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_handlercache ***********************************************************/ - -/* A upb_handlercache lazily builds and caches upb_handlers. You pass it a - * function (with optional closure) that can build handlers for a given - * message on-demand, and the cache maintains a map of msgdef->handlers. */ - -#ifdef __cplusplus -extern "C" { -#endif - -struct upb_handlercache; -typedef struct upb_handlercache upb_handlercache; - -typedef void upb_handlers_callback(const void *closure, upb_handlers *h); - -upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback, - const void *closure); -void upb_handlercache_free(upb_handlercache *cache); -const upb_handlers *upb_handlercache_get(upb_handlercache *cache, - const upb_msgdef *md); -bool upb_handlercache_addcleanup(upb_handlercache *h, void *p, - upb_handlerfree *hfree); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::HandlerCache { - public: - HandlerCache(upb_handlers_callback *callback, const void *closure) - : ptr_(upb_handlercache_new(callback, closure), upb_handlercache_free) {} - HandlerCache(HandlerCache&&) = default; - HandlerCache& operator=(HandlerCache&&) = default; - HandlerCache(upb_handlercache* c) : ptr_(c, upb_handlercache_free) {} - - upb_handlercache* ptr() { return ptr_.get(); } - - const upb_handlers *Get(MessageDefPtr md) { - return upb_handlercache_get(ptr_.get(), md.ptr()); - } - - private: - std::unique_ptr ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_byteshandler ***********************************************************/ - -typedef struct { - upb_func *func; - - /* It is wasteful to include the entire attributes here: - * - * * Some of the information is redundant (like storing the closure type - * separately for each handler that must match). - * * Some of the info is only needed prior to freeze() (like closure types). - * * alignment padding wastes a lot of space for alwaysok_. - * - * If/when the size and locality of handlers is an issue, we can optimize this - * not to store the entire attr like this. We do not expose the table's - * layout to allow this optimization in the future. */ - upb_handlerattr attr; -} upb_handlers_tabent; - -#define UPB_TABENT_INIT {NULL, UPB_HANDLERATTR_INIT} - -typedef struct { - upb_handlers_tabent table[3]; -} upb_byteshandler; - -#define UPB_BYTESHANDLER_INIT \ - { \ - { UPB_TABENT_INIT, UPB_TABENT_INIT, UPB_TABENT_INIT } \ - } - -UPB_INLINE void upb_byteshandler_init(upb_byteshandler *handler) { - upb_byteshandler init = UPB_BYTESHANDLER_INIT; - *handler = init; -} - -#ifdef __cplusplus -extern "C" { -#endif - -/* Caller must ensure that "d" outlives the handlers. */ -bool upb_byteshandler_setstartstr(upb_byteshandler *h, - upb_startstr_handlerfunc *func, void *d); -bool upb_byteshandler_setstring(upb_byteshandler *h, - upb_string_handlerfunc *func, void *d); -bool upb_byteshandler_setendstr(upb_byteshandler *h, - upb_endfield_handlerfunc *func, void *d); - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -typedef upb_byteshandler BytesHandler; -} -#endif - -/** Message handlers ******************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -/* These are the handlers used internally by upb_msgfactory_getmergehandlers(). - * They write scalar data to a known offset from the message pointer. - * - * These would be trivial for anyone to implement themselves, but it's better - * to use these because some JITs will recognize and specialize these instead - * of actually calling the function. */ - -/* Sets a handler for the given primitive field that will write the data at the - * given offset. If hasbit > 0, also sets a hasbit at the given bit offset - * (addressing each byte low to high). */ -bool upb_msg_setscalarhandler(upb_handlers *h, - const upb_fielddef *f, - size_t offset, - int32_t hasbit); - -/* If the given handler is a msghandlers_primitive field, returns true and sets - * *type, *offset and *hasbit. Otherwise returns false. */ -bool upb_msg_getscalarhandlerdata(const upb_handlers *h, - upb_selector_t s, - upb_fieldtype_t *type, - size_t *offset, - int32_t *hasbit); - - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -/* -** Inline definitions for handlers.h, which are particularly long and a bit -** tricky. -*/ - -#ifndef UPB_HANDLERS_INL_H_ -#define UPB_HANDLERS_INL_H_ - -#include -#include - - -#ifdef __cplusplus - -/* Type detection and typedefs for integer types. - * For platforms where there are multiple 32-bit or 64-bit types, we need to be - * able to enumerate them so we can properly create overloads for all variants. - * - * If any platform existed where there were three integer types with the same - * size, this would have to become more complicated. For example, short, int, - * and long could all be 32-bits. Even more diabolically, short, int, long, - * and long long could all be 64 bits and still be standard-compliant. - * However, few platforms are this strange, and it's unlikely that upb will be - * used on the strangest ones. */ - -/* Can't count on stdint.h limits like INT32_MAX, because in C++ these are - * only defined when __STDC_LIMIT_MACROS are defined before the *first* include - * of stdint.h. We can't guarantee that someone else didn't include these first - * without defining __STDC_LIMIT_MACROS. */ -#define UPB_INT32_MAX 0x7fffffffLL -#define UPB_INT32_MIN (-UPB_INT32_MAX - 1) -#define UPB_INT64_MAX 0x7fffffffffffffffLL -#define UPB_INT64_MIN (-UPB_INT64_MAX - 1) - -#if INT_MAX == UPB_INT32_MAX && INT_MIN == UPB_INT32_MIN -#define UPB_INT_IS_32BITS 1 -#endif - -#if LONG_MAX == UPB_INT32_MAX && LONG_MIN == UPB_INT32_MIN -#define UPB_LONG_IS_32BITS 1 -#endif - -#if LONG_MAX == UPB_INT64_MAX && LONG_MIN == UPB_INT64_MIN -#define UPB_LONG_IS_64BITS 1 -#endif - -#if LLONG_MAX == UPB_INT64_MAX && LLONG_MIN == UPB_INT64_MIN -#define UPB_LLONG_IS_64BITS 1 -#endif - -/* We use macros instead of typedefs so we can undefine them later and avoid - * leaking them outside this header file. */ -#if UPB_INT_IS_32BITS -#define UPB_INT32_T int -#define UPB_UINT32_T unsigned int - -#if UPB_LONG_IS_32BITS -#define UPB_TWO_32BIT_TYPES 1 -#define UPB_INT32ALT_T long -#define UPB_UINT32ALT_T unsigned long -#endif /* UPB_LONG_IS_32BITS */ - -#elif UPB_LONG_IS_32BITS /* && !UPB_INT_IS_32BITS */ -#define UPB_INT32_T long -#define UPB_UINT32_T unsigned long -#endif /* UPB_INT_IS_32BITS */ - - -#if UPB_LONG_IS_64BITS -#define UPB_INT64_T long -#define UPB_UINT64_T unsigned long - -#if UPB_LLONG_IS_64BITS -#define UPB_TWO_64BIT_TYPES 1 -#define UPB_INT64ALT_T long long -#define UPB_UINT64ALT_T unsigned long long -#endif /* UPB_LLONG_IS_64BITS */ - -#elif UPB_LLONG_IS_64BITS /* && !UPB_LONG_IS_64BITS */ -#define UPB_INT64_T long long -#define UPB_UINT64_T unsigned long long -#endif /* UPB_LONG_IS_64BITS */ - -#undef UPB_INT32_MAX -#undef UPB_INT32_MIN -#undef UPB_INT64_MAX -#undef UPB_INT64_MIN -#undef UPB_INT_IS_32BITS -#undef UPB_LONG_IS_32BITS -#undef UPB_LONG_IS_64BITS -#undef UPB_LLONG_IS_64BITS - - -namespace upb { - -typedef void CleanupFunc(void *ptr); - -/* Template to remove "const" from "const T*" and just return "T*". - * - * We define a nonsense default because otherwise it will fail to instantiate as - * a function parameter type even in cases where we don't expect any caller to - * actually match the overload. */ -class CouldntRemoveConst {}; -template struct remove_constptr { typedef CouldntRemoveConst type; }; -template struct remove_constptr { typedef T *type; }; - -/* Template that we use below to remove a template specialization from - * consideration if it matches a specific type. */ -template struct disable_if_same { typedef void Type; }; -template struct disable_if_same {}; - -template void DeletePointer(void *p) { delete static_cast(p); } - -template -struct FirstUnlessVoidOrBool { - typedef T1 value; -}; - -template -struct FirstUnlessVoidOrBool { - typedef T2 value; -}; - -template -struct FirstUnlessVoidOrBool { - typedef T2 value; -}; - -template -struct is_same { - static bool value; -}; - -template -struct is_same { - static bool value; -}; - -template -bool is_same::value = false; - -template -bool is_same::value = true; - -/* FuncInfo *******************************************************************/ - -/* Info about the user's original, pre-wrapped function. */ -template -struct FuncInfo { - /* The type of the closure that the function takes (its first param). */ - typedef C Closure; - - /* The return type. */ - typedef R Return; -}; - -/* Func ***********************************************************************/ - -/* Func1, Func2, Func3: Template classes representing a function and its - * signature. - * - * Since the function is a template parameter, calling the function can be - * inlined at compile-time and does not require a function pointer at runtime. - * These functions are not bound to a handler data so have no data or cleanup - * handler. */ -struct UnboundFunc { - CleanupFunc *GetCleanup() { return nullptr; } - void *GetData() { return nullptr; } -}; - -template -struct Func1 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1) { return F(p1); } -}; - -template -struct Func2 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2) { return F(p1, p2); } -}; - -template -struct Func3 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3) { return F(p1, p2, p3); } -}; - -template -struct Func4 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3, P4 p4) { return F(p1, p2, p3, p4); } -}; - -template -struct Func5 : public UnboundFunc { - typedef R Return; - typedef I FuncInfo; - static R Call(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) { - return F(p1, p2, p3, p4, p5); - } -}; - -/* BoundFunc ******************************************************************/ - -/* BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that - * shall be bound to the function's second parameter. - * - * Note that the second parameter is a const pointer, but our stored bound value - * is non-const so we can free it when the handlers are destroyed. */ -template -struct BoundFunc { - typedef typename remove_constptr::type MutableP2; - explicit BoundFunc(MutableP2 data_) : data(data_) {} - CleanupFunc *GetCleanup() { return &DeletePointer; } - MutableP2 GetData() { return data; } - MutableP2 data; -}; - -template -struct BoundFunc2 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc2(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc3 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc3(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc4 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc4(typename Base::MutableP2 arg) : Base(arg) {} -}; - -template -struct BoundFunc5 : public BoundFunc { - typedef BoundFunc Base; - typedef I FuncInfo; - explicit BoundFunc5(typename Base::MutableP2 arg) : Base(arg) {} -}; - -/* FuncSig ********************************************************************/ - -/* FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function - * *signature*, but without a specific function attached. - * - * These classes contain member functions that can be invoked with a - * specific function to return a Func/BoundFunc class. */ -template -struct FuncSig1 { - template - Func1 > GetFunc() { - return Func1 >(); - } -}; - -template -struct FuncSig2 { - template - Func2 > GetFunc() { - return Func2 >(); - } - - template - BoundFunc2 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc2 >(param2); - } -}; - -template -struct FuncSig3 { - template - Func3 > GetFunc() { - return Func3 >(); - } - - template - BoundFunc3 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc3 >(param2); - } -}; - -template -struct FuncSig4 { - template - Func4 > GetFunc() { - return Func4 >(); - } - - template - BoundFunc4 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc4 >(param2); - } -}; - -template -struct FuncSig5 { - template - Func5 > GetFunc() { - return Func5 >(); - } - - template - BoundFunc5 > GetFunc( - typename remove_constptr::type param2) { - return BoundFunc5 >(param2); - } -}; - -/* Overloaded template function that can construct the appropriate FuncSig* - * class given a function pointer by deducing the template parameters. */ -template -inline FuncSig1 MatchFunc(R (*f)(P1)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig1(); -} - -template -inline FuncSig2 MatchFunc(R (*f)(P1, P2)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig2(); -} - -template -inline FuncSig3 MatchFunc(R (*f)(P1, P2, P3)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig3(); -} - -template -inline FuncSig4 MatchFunc(R (*f)(P1, P2, P3, P4)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig4(); -} - -template -inline FuncSig5 MatchFunc(R (*f)(P1, P2, P3, P4, P5)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return FuncSig5(); -} - -/* MethodSig ******************************************************************/ - -/* CallMethod*: a function template that calls a given method. */ -template -R CallMethod0(C *obj) { - return ((*obj).*F)(); -} - -template -R CallMethod1(C *obj, P1 arg1) { - return ((*obj).*F)(arg1); -} - -template -R CallMethod2(C *obj, P1 arg1, P2 arg2) { - return ((*obj).*F)(arg1, arg2); -} - -template -R CallMethod3(C *obj, P1 arg1, P2 arg2, P3 arg3) { - return ((*obj).*F)(arg1, arg2, arg3); -} - -template -R CallMethod4(C *obj, P1 arg1, P2 arg2, P3 arg3, P4 arg4) { - return ((*obj).*F)(arg1, arg2, arg3, arg4); -} - -/* MethodSig: like FuncSig, but for member functions. - * - * GetFunc() returns a normal FuncN object, so after calling GetFunc() no - * more logic is required to special-case methods. */ -template -struct MethodSig0 { - template - Func1, FuncInfo > GetFunc() { - return Func1, FuncInfo >(); - } -}; - -template -struct MethodSig1 { - template - Func2, FuncInfo > GetFunc() { - return Func2, FuncInfo >(); - } - - template - BoundFunc2, FuncInfo > GetFunc( - typename remove_constptr::type param1) { - return BoundFunc2, FuncInfo >( - param1); - } -}; - -template -struct MethodSig2 { - template - Func3, FuncInfo > - GetFunc() { - return Func3, - FuncInfo >(); - } - - template - BoundFunc3, FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc3, - FuncInfo >(param1); - } -}; - -template -struct MethodSig3 { - template - Func4, FuncInfo > - GetFunc() { - return Func4, - FuncInfo >(); - } - - template - BoundFunc4, - FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc4, - FuncInfo >(param1); - } -}; - -template -struct MethodSig4 { - template - Func5, - FuncInfo > - GetFunc() { - return Func5, - FuncInfo >(); - } - - template - BoundFunc5, - FuncInfo > - GetFunc(typename remove_constptr::type param1) { - return BoundFunc5, FuncInfo >( - param1); - } -}; - -template -inline MethodSig0 MatchFunc(R (C::*f)()) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig0(); -} - -template -inline MethodSig1 MatchFunc(R (C::*f)(P1)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig1(); -} - -template -inline MethodSig2 MatchFunc(R (C::*f)(P1, P2)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig2(); -} - -template -inline MethodSig3 MatchFunc(R (C::*f)(P1, P2, P3)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig3(); -} - -template -inline MethodSig4 MatchFunc(R (C::*f)(P1, P2, P3, P4)) { - UPB_UNUSED(f); /* Only used for template parameter deduction. */ - return MethodSig4(); -} - -/* MaybeWrapReturn ************************************************************/ - -/* Template class that attempts to wrap the return value of the function so it - * matches the expected type. There are two main adjustments it may make: - * - * 1. If the function returns void, make it return the expected type and with - * a value that always indicates success. - * 2. If the function returns bool, make it return the expected type with a - * value that indicates success or failure. - * - * The "expected type" for return is: - * 1. void* for start handlers. If the closure parameter has a different type - * we will cast it to void* for the return in the success case. - * 2. size_t for string buffer handlers. - * 3. bool for everything else. */ - -/* Template parameters are FuncN type and desired return type. */ -template -struct MaybeWrapReturn; - -/* If the return type matches, return the given function unwrapped. */ -template -struct MaybeWrapReturn { - typedef F Func; -}; - -/* Function wrapper that munges the return value from void to (bool)true. */ -template -bool ReturnTrue2(P1 p1, P2 p2) { - F(p1, p2); - return true; -} - -template -bool ReturnTrue3(P1 p1, P2 p2, P3 p3) { - F(p1, p2, p3); - return true; -} - -/* Function wrapper that munges the return value from void to (void*)arg1 */ -template -void *ReturnClosure2(P1 p1, P2 p2) { - F(p1, p2); - return p1; -} - -template -void *ReturnClosure3(P1 p1, P2 p2, P3 p3) { - F(p1, p2, p3); - return p1; -} - -/* Function wrapper that munges the return value from R to void*. */ -template -void *CastReturnToVoidPtr2(P1 p1, P2 p2) { - return F(p1, p2); -} - -template -void *CastReturnToVoidPtr3(P1 p1, P2 p2, P3 p3) { - return F(p1, p2, p3); -} - -/* Function wrapper that munges the return value from bool to void*. */ -template -void *ReturnClosureOrBreak2(P1 p1, P2 p2) { - return F(p1, p2) ? p1 : UPB_BREAK; -} - -template -void *ReturnClosureOrBreak3(P1 p1, P2 p2, P3 p3) { - return F(p1, p2, p3) ? p1 : UPB_BREAK; -} - -/* For the string callback, which takes five params, returns the size param. */ -template -size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4, - const upb_bufhandle *p5) { - F(p1, p2, p3, p4, p5); - return p4; -} - -/* For the string callback, which takes five params, returns the size param or - * zero. */ -template -size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4, - const upb_bufhandle *p5) { - return F(p1, p2, p3, p4, p5) ? p4 : 0; -} - -/* If we have a function returning void but want a function returning bool, wrap - * it in a function that returns true. */ -template -struct MaybeWrapReturn, bool> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, bool> { - typedef Func3, I> Func; -}; - -/* If our function returns void but we want one returning void*, wrap it in a - * function that returns the first argument. */ -template -struct MaybeWrapReturn, void *> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *> { - typedef Func3, I> Func; -}; - -/* If our function returns R* but we want one returning void*, wrap it in a - * function that casts to void*. */ -template -struct MaybeWrapReturn, void *, - typename disable_if_same::Type> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *, - typename disable_if_same::Type> { - typedef Func3, I> - Func; -}; - -/* If our function returns bool but we want one returning void*, wrap it in a - * function that returns either the first param or UPB_BREAK. */ -template -struct MaybeWrapReturn, void *> { - typedef Func2, I> Func; -}; - -template -struct MaybeWrapReturn, void *> { - typedef Func3, I> - Func; -}; - -/* If our function returns void but we want one returning size_t, wrap it in a - * function that returns the size argument. */ -template -struct MaybeWrapReturn< - Func5, - size_t> { - typedef Func5, I> Func; -}; - -/* If our function returns bool but we want one returning size_t, wrap it in a - * function that returns either 0 or the buf size. */ -template -struct MaybeWrapReturn< - Func5, - size_t> { - typedef Func5, I> Func; -}; - -/* ConvertParams **************************************************************/ - -/* Template class that converts the function parameters if necessary, and - * ignores the HandlerData parameter if appropriate. - * - * Template parameter is the are FuncN function type. */ -template -struct ConvertParams; - -/* Function that discards the handler data parameter. */ -template -R IgnoreHandlerData2(void *p1, const void *hd) { - UPB_UNUSED(hd); - return F(static_cast(p1)); -} - -template -R IgnoreHandlerData3(void *p1, const void *hd, P2Wrapper p2) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2); -} - -template -R IgnoreHandlerData4(void *p1, const void *hd, P2 p2, P3 p3) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2, p3); -} - -template -R IgnoreHandlerData5(void *p1, const void *hd, P2 p2, P3 p3, P4 p4) { - UPB_UNUSED(hd); - return F(static_cast(p1), p2, p3, p4); -} - -template -R IgnoreHandlerDataIgnoreHandle(void *p1, const void *hd, const char *p2, - size_t p3, const upb_bufhandle *handle) { - UPB_UNUSED(hd); - UPB_UNUSED(handle); - return F(static_cast(p1), p2, p3); -} - -/* Function that casts the handler data parameter. */ -template -R CastHandlerData2(void *c, const void *hd) { - return F(static_cast(c), static_cast(hd)); -} - -template -R CastHandlerData3(void *c, const void *hd, P3Wrapper p3) { - return F(static_cast(c), static_cast(hd), p3); -} - -template -R CastHandlerData5(void *c, const void *hd, P3 p3, P4 p4, P5 p5) { - return F(static_cast(c), static_cast(hd), p3, p4, p5); -} - -template -R CastHandlerDataIgnoreHandle(void *c, const void *hd, const char *p3, - size_t p4, const upb_bufhandle *handle) { - UPB_UNUSED(handle); - return F(static_cast(c), static_cast(hd), p3, p4); -} - -/* For unbound functions, ignore the handler data. */ -template -struct ConvertParams, T> { - typedef Func2, I> Func; -}; - -template -struct ConvertParams, - R2 (*)(P1_2, P2_2, P3_2)> { - typedef Func3, I> Func; -}; - -/* For StringBuffer only; this ignores both the handler data and the - * upb_bufhandle. */ -template -struct ConvertParams, T> { - typedef Func5, - I> Func; -}; - -template -struct ConvertParams, T> { - typedef Func5, I> Func; -}; - -/* For bound functions, cast the handler data. */ -template -struct ConvertParams, T> { - typedef Func2, I> - Func; -}; - -template -struct ConvertParams, - R2 (*)(P1_2, P2_2, P3_2)> { - typedef Func3, I> Func; -}; - -/* For StringBuffer only; this ignores the upb_bufhandle. */ -template -struct ConvertParams, T> { - typedef Func5, I> - Func; -}; - -template -struct ConvertParams, T> { - typedef Func5, I> Func; -}; - -/* utype/ltype are upper/lower-case, ctype is canonical C type, vtype is - * variant C type. */ -#define TYPE_METHODS(utype, ltype, ctype, vtype) \ - template <> \ - struct CanonicalType { \ - typedef ctype Type; \ - }; \ - template <> \ - inline bool HandlersPtr::SetValueHandler( \ - FieldDefPtr f, const HandlersPtr::utype##Handler &handler) { \ - handler.AddCleanup(ptr()); \ - return upb_handlers_set##ltype(ptr(), f.ptr(), handler.handler(), \ - &handler.attr()); \ - } - -TYPE_METHODS(Double, double, double, double) -TYPE_METHODS(Float, float, float, float) -TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T) -TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T) -TYPE_METHODS(Int64, int64, int64_t, UPB_INT64_T) -TYPE_METHODS(Int32, int32, int32_t, UPB_INT32_T) -TYPE_METHODS(Bool, bool, bool, bool) - -#ifdef UPB_TWO_32BIT_TYPES -TYPE_METHODS(Int32, int32, int32_t, UPB_INT32ALT_T) -TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T) -#endif - -#ifdef UPB_TWO_64BIT_TYPES -TYPE_METHODS(Int64, int64, int64_t, UPB_INT64ALT_T) -TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T) -#endif -#undef TYPE_METHODS - -template <> struct CanonicalType { - typedef Status* Type; -}; - -template struct ReturnOf; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - -template -struct ReturnOf { - typedef R Return; -}; - - -template -template -inline Handler::Handler(F func) - : registered_(false), - cleanup_data_(func.GetData()), - cleanup_func_(func.GetCleanup()) { - attr_.handler_data = func.GetData(); - typedef typename ReturnOf::Return Return; - typedef typename ConvertParams::Func ConvertedParamsFunc; - typedef typename MaybeWrapReturn::Func - ReturnWrappedFunc; - handler_ = ReturnWrappedFunc().Call; - - /* Set attributes based on what templates can statically tell us about the - * user's function. */ - - /* If the original function returns void, then we know that we wrapped it to - * always return ok. */ - bool always_ok = is_same::value; - attr_.alwaysok = always_ok; - - /* Closure parameter and return type. */ - attr_.closure_type = UniquePtrForType(); - - /* We use the closure type (from the first parameter) if the return type is - * void or bool, since these are the two cases we wrap to return the closure's - * type anyway. - * - * This is all nonsense for non START* handlers, but it doesn't matter because - * in that case the value will be ignored. */ - typedef typename FirstUnlessVoidOrBool::value - EffectiveReturn; - attr_.return_closure_type = UniquePtrForType(); -} - -template -inline void Handler::AddCleanup(upb_handlers* h) const { - UPB_ASSERT(!registered_); - registered_ = true; - if (cleanup_func_) { - bool ok = upb_handlers_addcleanup(h, cleanup_data_, cleanup_func_); - UPB_ASSERT(ok); - } -} - -} /* namespace upb */ - -#endif /* __cplusplus */ - - -#undef UPB_TWO_32BIT_TYPES -#undef UPB_TWO_64BIT_TYPES -#undef UPB_INT32_T -#undef UPB_UINT32_T -#undef UPB_INT32ALT_T -#undef UPB_UINT32ALT_T -#undef UPB_INT64_T -#undef UPB_UINT64_T -#undef UPB_INT64ALT_T -#undef UPB_UINT64ALT_T - - -#endif /* UPB_HANDLERS_INL_H_ */ - -#endif /* UPB_HANDLERS_H */ -/* -** upb::Sink (upb_sink) -** upb::BytesSink (upb_bytessink) -** -** A upb_sink is an object that binds a upb_handlers object to some runtime -** state. It is the object that can actually receive data via the upb_handlers -** interface. -** -** Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or -** thread-safe. You can create as many of them as you want, but each one may -** only be used in a single thread at a time. -** -** If we compare with class-based OOP, a you can think of a upb_def as an -** abstract base class, a upb_handlers as a concrete derived class, and a -** upb_sink as an object (class instance). -*/ - -#ifndef UPB_SINK_H -#define UPB_SINK_H - - - -#ifdef __cplusplus -namespace upb { -class BytesSink; -class Sink; -} -#endif - -/* upb_sink *******************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - const upb_handlers *handlers; - void *closure; -} upb_sink; - -#define PUTVAL(type, ctype) \ - UPB_INLINE bool upb_sink_put##type(upb_sink s, upb_selector_t sel, \ - ctype val) { \ - typedef upb_##type##_handlerfunc functype; \ - functype *func; \ - const void *hd; \ - if (!s.handlers) return true; \ - func = (functype *)upb_handlers_gethandler(s.handlers, sel, &hd); \ - if (!func) return true; \ - return func(s.closure, hd, val); \ - } - -PUTVAL(int32, int32_t) -PUTVAL(int64, int64_t) -PUTVAL(uint32, uint32_t) -PUTVAL(uint64, uint64_t) -PUTVAL(float, float) -PUTVAL(double, double) -PUTVAL(bool, bool) -#undef PUTVAL - -UPB_INLINE void upb_sink_reset(upb_sink *s, const upb_handlers *h, void *c) { - s->handlers = h; - s->closure = c; -} - -UPB_INLINE size_t upb_sink_putstring(upb_sink s, upb_selector_t sel, - const char *buf, size_t n, - const upb_bufhandle *handle) { - typedef upb_string_handlerfunc func; - func *handler; - const void *hd; - if (!s.handlers) return n; - handler = (func *)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!handler) return n; - return handler(s.closure, hd, buf, n, handle); -} - -UPB_INLINE bool upb_sink_putunknown(upb_sink s, const char *buf, size_t n) { - typedef upb_unknown_handlerfunc func; - func *handler; - const void *hd; - if (!s.handlers) return true; - handler = - (func *)upb_handlers_gethandler(s.handlers, UPB_UNKNOWN_SELECTOR, &hd); - - if (!handler) return n; - return handler(s.closure, hd, buf, n); -} - -UPB_INLINE bool upb_sink_startmsg(upb_sink s) { - typedef upb_startmsg_handlerfunc func; - func *startmsg; - const void *hd; - if (!s.handlers) return true; - startmsg = - (func *)upb_handlers_gethandler(s.handlers, UPB_STARTMSG_SELECTOR, &hd); - - if (!startmsg) return true; - return startmsg(s.closure, hd); -} - -UPB_INLINE bool upb_sink_endmsg(upb_sink s, upb_status *status) { - typedef upb_endmsg_handlerfunc func; - func *endmsg; - const void *hd; - if (!s.handlers) return true; - endmsg = - (func *)upb_handlers_gethandler(s.handlers, UPB_ENDMSG_SELECTOR, &hd); - - if (!endmsg) return true; - return endmsg(s.closure, hd, status); -} - -UPB_INLINE bool upb_sink_startseq(upb_sink s, upb_selector_t sel, - upb_sink *sub) { - typedef upb_startfield_handlerfunc func; - func *startseq; - const void *hd; - sub->closure = s.closure; - sub->handlers = s.handlers; - if (!s.handlers) return true; - startseq = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startseq) return true; - sub->closure = startseq(s.closure, hd); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endseq(upb_sink s, upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endseq; - const void *hd; - if (!s.handlers) return true; - endseq = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endseq) return true; - return endseq(s.closure, hd); -} - -UPB_INLINE bool upb_sink_startstr(upb_sink s, upb_selector_t sel, - size_t size_hint, upb_sink *sub) { - typedef upb_startstr_handlerfunc func; - func *startstr; - const void *hd; - sub->closure = s.closure; - sub->handlers = s.handlers; - if (!s.handlers) return true; - startstr = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startstr) return true; - sub->closure = startstr(s.closure, hd, size_hint); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endstr(upb_sink s, upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endstr; - const void *hd; - if (!s.handlers) return true; - endstr = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endstr) return true; - return endstr(s.closure, hd); -} - -UPB_INLINE bool upb_sink_startsubmsg(upb_sink s, upb_selector_t sel, - upb_sink *sub) { - typedef upb_startfield_handlerfunc func; - func *startsubmsg; - const void *hd; - sub->closure = s.closure; - if (!s.handlers) { - sub->handlers = NULL; - return true; - } - sub->handlers = upb_handlers_getsubhandlers_sel(s.handlers, sel); - startsubmsg = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!startsubmsg) return true; - sub->closure = startsubmsg(s.closure, hd); - return sub->closure ? true : false; -} - -UPB_INLINE bool upb_sink_endsubmsg(upb_sink s, upb_sink sub, - upb_selector_t sel) { - typedef upb_endfield_handlerfunc func; - func *endsubmsg; - const void *hd; - if (!s.handlers) return true; - endsubmsg = (func*)upb_handlers_gethandler(s.handlers, sel, &hd); - - if (!endsubmsg) return true; - return endsubmsg(sub.closure, hd); -} - -#ifdef __cplusplus -} /* extern "C" */ - -/* A upb::Sink is an object that binds a upb::Handlers object to some runtime - * state. It represents an endpoint to which data can be sent. - * - * TODO(haberman): right now all of these functions take selectors. Should they - * take selectorbase instead? - * - * ie. instead of calling: - * sink->StartString(FOO_FIELD_START_STRING, ...) - * a selector base would let you say: - * sink->StartString(FOO_FIELD, ...) - * - * This would make call sites a little nicer and require emitting fewer selector - * definitions in .h files. - * - * But the current scheme has the benefit that you can retrieve a function - * pointer for any handler with handlers->GetHandler(selector), without having - * to have a separate GetHandler() function for each handler type. The JIT - * compiler uses this. To accommodate we'd have to expose a separate - * GetHandler() for every handler type. - * - * Also to ponder: selectors right now are independent of a specific Handlers - * instance. In other words, they allocate a number to every possible handler - * that *could* be registered, without knowing anything about what handlers - * *are* registered. That means that using selectors as table offsets prohibits - * us from compacting the handler table at Freeze() time. If the table is very - * sparse, this could be wasteful. - * - * Having another selector-like thing that is specific to a Handlers instance - * would allow this compacting, but then it would be impossible to write code - * ahead-of-time that can be bound to any Handlers instance at runtime. For - * example, a .proto file parser written as straight C will not know what - * Handlers it will be bound to, so when it calls sink->StartString() what - * selector will it pass? It needs a selector like we have today, that is - * independent of any particular upb::Handlers. - * - * Is there a way then to allow Handlers table compaction? */ -class upb::Sink { - public: - /* Constructor with no initialization; must be Reset() before use. */ - Sink() {} - - Sink(const Sink&) = default; - Sink& operator=(const Sink&) = default; - - Sink(const upb_sink& sink) : sink_(sink) {} - Sink &operator=(const upb_sink &sink) { - sink_ = sink; - return *this; - } - - upb_sink sink() { return sink_; } - - /* Constructs a new sink for the given frozen handlers and closure. - * - * TODO: once the Handlers know the expected closure type, verify that T - * matches it. */ - template Sink(const upb_handlers* handlers, T* closure) { - Reset(handlers, closure); - } - - upb_sink* ptr() { return &sink_; } - - /* Resets the value of the sink. */ - template void Reset(const upb_handlers* handlers, T* closure) { - upb_sink_reset(&sink_, handlers, closure); - } - - /* Returns the top-level object that is bound to this sink. - * - * TODO: once the Handlers know the expected closure type, verify that T - * matches it. */ - template T* GetObject() const { - return static_cast(sink_.closure); - } - - /* Functions for pushing data into the sink. - * - * These return false if processing should stop (either due to error or just - * to suspend). - * - * These may not be called from within one of the same sink's handlers (in - * other words, handlers are not re-entrant). */ - - /* Should be called at the start and end of every message; both the top-level - * message and submessages. This means that submessages should use the - * following sequence: - * sink->StartSubMessage(startsubmsg_selector); - * sink->StartMessage(); - * // ... - * sink->EndMessage(&status); - * sink->EndSubMessage(endsubmsg_selector); */ - bool StartMessage() { return upb_sink_startmsg(sink_); } - bool EndMessage(upb_status *status) { - return upb_sink_endmsg(sink_, status); - } - - /* Putting of individual values. These work for both repeated and - * non-repeated fields, but for repeated fields you must wrap them in - * calls to StartSequence()/EndSequence(). */ - bool PutInt32(HandlersPtr::Selector s, int32_t val) { - return upb_sink_putint32(sink_, s, val); - } - - bool PutInt64(HandlersPtr::Selector s, int64_t val) { - return upb_sink_putint64(sink_, s, val); - } - - bool PutUInt32(HandlersPtr::Selector s, uint32_t val) { - return upb_sink_putuint32(sink_, s, val); - } - - bool PutUInt64(HandlersPtr::Selector s, uint64_t val) { - return upb_sink_putuint64(sink_, s, val); - } - - bool PutFloat(HandlersPtr::Selector s, float val) { - return upb_sink_putfloat(sink_, s, val); - } - - bool PutDouble(HandlersPtr::Selector s, double val) { - return upb_sink_putdouble(sink_, s, val); - } - - bool PutBool(HandlersPtr::Selector s, bool val) { - return upb_sink_putbool(sink_, s, val); - } - - /* Putting of string/bytes values. Each string can consist of zero or more - * non-contiguous buffers of data. - * - * For StartString(), the function will write a sink for the string to "sub." - * The sub-sink must be used for any/all PutStringBuffer() calls. */ - bool StartString(HandlersPtr::Selector s, size_t size_hint, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startstr(sink_, s, size_hint, &sub_c); - *sub = sub_c; - return ret; - } - - size_t PutStringBuffer(HandlersPtr::Selector s, const char *buf, size_t len, - const upb_bufhandle *handle) { - return upb_sink_putstring(sink_, s, buf, len, handle); - } - - bool EndString(HandlersPtr::Selector s) { - return upb_sink_endstr(sink_, s); - } - - /* For submessage fields. - * - * For StartSubMessage(), the function will write a sink for the string to - * "sub." The sub-sink must be used for any/all handlers called within the - * submessage. */ - bool StartSubMessage(HandlersPtr::Selector s, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startsubmsg(sink_, s, &sub_c); - *sub = sub_c; - return ret; - } - - bool EndSubMessage(HandlersPtr::Selector s, Sink sub) { - return upb_sink_endsubmsg(sink_, sub.sink_, s); - } - - /* For repeated fields of any type, the sequence of values must be wrapped in - * these calls. - * - * For StartSequence(), the function will write a sink for the string to - * "sub." The sub-sink must be used for any/all handlers called within the - * sequence. */ - bool StartSequence(HandlersPtr::Selector s, Sink* sub) { - upb_sink sub_c; - bool ret = upb_sink_startseq(sink_, s, &sub_c); - *sub = sub_c; - return ret; - } - - bool EndSequence(HandlersPtr::Selector s) { - return upb_sink_endseq(sink_, s); - } - - /* Copy and assign specifically allowed. - * We don't even bother making these members private because so many - * functions need them and this is mainly just a dumb data container anyway. - */ - - private: - upb_sink sink_; -}; - -#endif /* __cplusplus */ - -/* upb_bytessink **************************************************************/ - -typedef struct { - const upb_byteshandler *handler; - void *closure; -} upb_bytessink ; - -UPB_INLINE void upb_bytessink_reset(upb_bytessink* s, const upb_byteshandler *h, - void *closure) { - s->handler = h; - s->closure = closure; -} - -UPB_INLINE bool upb_bytessink_start(upb_bytessink s, size_t size_hint, - void **subc) { - typedef upb_startstr_handlerfunc func; - func *start; - *subc = s.closure; - if (!s.handler) return true; - start = (func *)s.handler->table[UPB_STARTSTR_SELECTOR].func; - - if (!start) return true; - *subc = start(s.closure, - s.handler->table[UPB_STARTSTR_SELECTOR].attr.handler_data, - size_hint); - return *subc != NULL; -} - -UPB_INLINE size_t upb_bytessink_putbuf(upb_bytessink s, void *subc, - const char *buf, size_t size, - const upb_bufhandle* handle) { - typedef upb_string_handlerfunc func; - func *putbuf; - if (!s.handler) return true; - putbuf = (func *)s.handler->table[UPB_STRING_SELECTOR].func; - - if (!putbuf) return true; - return putbuf(subc, s.handler->table[UPB_STRING_SELECTOR].attr.handler_data, - buf, size, handle); -} - -UPB_INLINE bool upb_bytessink_end(upb_bytessink s) { - typedef upb_endfield_handlerfunc func; - func *end; - if (!s.handler) return true; - end = (func *)s.handler->table[UPB_ENDSTR_SELECTOR].func; - - if (!end) return true; - return end(s.closure, - s.handler->table[UPB_ENDSTR_SELECTOR].attr.handler_data); -} - -#ifdef __cplusplus - -class upb::BytesSink { - public: - BytesSink() {} - - BytesSink(const BytesSink&) = default; - BytesSink& operator=(const BytesSink&) = default; - - BytesSink(const upb_bytessink& sink) : sink_(sink) {} - BytesSink &operator=(const upb_bytessink &sink) { - sink_ = sink; - return *this; - } - - upb_bytessink sink() { return sink_; } - - /* Constructs a new sink for the given frozen handlers and closure. - * - * TODO(haberman): once the Handlers know the expected closure type, verify - * that T matches it. */ - template BytesSink(const upb_byteshandler* handler, T* closure) { - upb_bytessink_reset(sink_, handler, closure); - } - - /* Resets the value of the sink. */ - template void Reset(const upb_byteshandler* handler, T* closure) { - upb_bytessink_reset(&sink_, handler, closure); - } - - bool Start(size_t size_hint, void **subc) { - return upb_bytessink_start(sink_, size_hint, subc); - } - - size_t PutBuffer(void *subc, const char *buf, size_t len, - const upb_bufhandle *handle) { - return upb_bytessink_putbuf(sink_, subc, buf, len, handle); - } - - bool End() { - return upb_bytessink_end(sink_); - } - - private: - upb_bytessink sink_; -}; - -#endif /* __cplusplus */ - -/* upb_bufsrc *****************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink); - -#ifdef __cplusplus -} /* extern "C" */ - -namespace upb { -template bool PutBuffer(const T& str, BytesSink sink) { - return upb_bufsrc_putbuf(str.data(), str.size(), sink.sink()); -} -} - -#endif /* __cplusplus */ - - -#endif -/* -** Internal-only definitions for the decoder. -*/ - -#ifndef UPB_DECODER_INT_H_ -#define UPB_DECODER_INT_H_ - -/* -** upb::pb::Decoder -** -** A high performance, streaming, resumable decoder for the binary protobuf -** format. -** -** This interface works the same regardless of what decoder backend is being -** used. A client of this class does not need to know whether decoding is using -** a JITted decoder (DynASM, LLVM, etc) or an interpreted decoder. By default, -** it will always use the fastest available decoder. However, you can call -** set_allow_jit(false) to disable any JIT decoder that might be available. -** This is primarily useful for testing purposes. -*/ - -#ifndef UPB_DECODER_H_ -#define UPB_DECODER_H_ - - -#ifdef __cplusplus -namespace upb { -namespace pb { -class CodeCache; -class DecoderPtr; -class DecoderMethodPtr; -class DecoderMethodOptions; -} /* namespace pb */ -} /* namespace upb */ -#endif - -/* The maximum number of bytes we are required to buffer internally between - * calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte - * varint, less one because we are buffering an incomplete value. - * - * Should only be used by unit tests. */ -#define UPB_DECODER_MAX_RESIDUAL_BYTES 14 - -/* upb_pbdecodermethod ********************************************************/ - -struct upb_pbdecodermethod; -typedef struct upb_pbdecodermethod upb_pbdecodermethod; - -#ifdef __cplusplus -extern "C" { -#endif - -const upb_handlers *upb_pbdecodermethod_desthandlers( - const upb_pbdecodermethod *m); -const upb_byteshandler *upb_pbdecodermethod_inputhandler( - const upb_pbdecodermethod *m); -bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Represents the code to parse a protobuf according to a destination - * Handlers. */ -class upb::pb::DecoderMethodPtr { - public: - DecoderMethodPtr() : ptr_(nullptr) {} - DecoderMethodPtr(const upb_pbdecodermethod* ptr) : ptr_(ptr) {} - - const upb_pbdecodermethod* ptr() { return ptr_; } - - /* The destination handlers that are statically bound to this method. - * This method is only capable of outputting to a sink that uses these - * handlers. */ - const Handlers *dest_handlers() const { - return upb_pbdecodermethod_desthandlers(ptr_); - } - - /* The input handlers for this decoder method. */ - const BytesHandler* input_handler() const { - return upb_pbdecodermethod_inputhandler(ptr_); - } - - /* Whether this method is native. */ - bool is_native() const { - return upb_pbdecodermethod_isnative(ptr_); - } - - private: - const upb_pbdecodermethod* ptr_; -}; - -#endif - -/* upb_pbdecoder **************************************************************/ - -/* Preallocation hint: decoder won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the decoder library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_PB_DECODER_SIZE 4416 - -struct upb_pbdecoder; -typedef struct upb_pbdecoder upb_pbdecoder; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_pbdecoder *upb_pbdecoder_create(upb_arena *arena, - const upb_pbdecodermethod *method, - upb_sink output, upb_status *status); -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d); -upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d); -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d); -size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d); -bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max); -void upb_pbdecoder_reset(upb_pbdecoder *d); - -#ifdef __cplusplus -} /* extern "C" */ - -/* A Decoder receives binary protobuf data on its input sink and pushes the - * decoded data to its output sink. */ -class upb::pb::DecoderPtr { - public: - DecoderPtr() : ptr_(nullptr) {} - DecoderPtr(upb_pbdecoder* ptr) : ptr_(ptr) {} - - upb_pbdecoder* ptr() { return ptr_; } - - /* Constructs a decoder instance for the given method, which must outlive this - * decoder. Any errors during parsing will be set on the given status, which - * must also outlive this decoder. - * - * The sink must match the given method. */ - static DecoderPtr Create(Arena *arena, DecoderMethodPtr method, - upb::Sink output, Status *status) { - return DecoderPtr(upb_pbdecoder_create(arena->ptr(), method.ptr(), - output.sink(), status->ptr())); - } - - /* Returns the DecoderMethod this decoder is parsing from. */ - const DecoderMethodPtr method() const { - return DecoderMethodPtr(upb_pbdecoder_method(ptr_)); - } - - /* The sink on which this decoder receives input. */ - BytesSink input() { return BytesSink(upb_pbdecoder_input(ptr())); } - - /* Returns number of bytes successfully parsed. - * - * This can be useful for determining the stream position where an error - * occurred. - * - * This value may not be up-to-date when called from inside a parsing - * callback. */ - uint64_t BytesParsed() { return upb_pbdecoder_bytesparsed(ptr()); } - - /* Gets/sets the parsing nexting limit. If the total number of nested - * submessages and repeated fields hits this limit, parsing will fail. This - * is a resource limit that controls the amount of memory used by the parsing - * stack. - * - * Setting the limit will fail if the parser is currently suspended at a depth - * greater than this, or if memory allocation of the stack fails. */ - size_t max_nesting() { return upb_pbdecoder_maxnesting(ptr()); } - bool set_max_nesting(size_t max) { return upb_pbdecoder_maxnesting(ptr()); } - - void Reset() { upb_pbdecoder_reset(ptr()); } - - static const size_t kSize = UPB_PB_DECODER_SIZE; - - private: - upb_pbdecoder *ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_pbcodecache ************************************************************/ - -/* Lazily builds and caches decoder methods that will push data to the given - * handlers. The destination handlercache must outlive this object. */ - -struct upb_pbcodecache; -typedef struct upb_pbcodecache upb_pbcodecache; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest); -void upb_pbcodecache_free(upb_pbcodecache *c); -bool upb_pbcodecache_allowjit(const upb_pbcodecache *c); -void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow); -void upb_pbcodecache_setlazy(upb_pbcodecache *c, bool lazy); -const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c, - const upb_msgdef *md); - -#ifdef __cplusplus -} /* extern "C" */ - -/* A class for caching protobuf processing code, whether bytecode for the - * interpreted decoder or machine code for the JIT. - * - * This class is not thread-safe. */ -class upb::pb::CodeCache { - public: - CodeCache(upb::HandlerCache *dest) - : ptr_(upb_pbcodecache_new(dest->ptr()), upb_pbcodecache_free) {} - CodeCache(CodeCache&&) = default; - CodeCache& operator=(CodeCache&&) = default; - - upb_pbcodecache* ptr() { return ptr_.get(); } - const upb_pbcodecache* ptr() const { return ptr_.get(); } - - /* Whether the cache is allowed to generate machine code. Defaults to true. - * There is no real reason to turn it off except for testing or if you are - * having a specific problem with the JIT. - * - * Note that allow_jit = true does not *guarantee* that the code will be JIT - * compiled. If this platform is not supported or the JIT was not compiled - * in, the code may still be interpreted. */ - bool allow_jit() const { return upb_pbcodecache_allowjit(ptr()); } - - /* This may only be called when the object is first constructed, and prior to - * any code generation. */ - void set_allow_jit(bool allow) { upb_pbcodecache_setallowjit(ptr(), allow); } - - /* Should the decoder push submessages to lazy handlers for fields that have - * them? The caller should set this iff the lazy handlers expect data that is - * in protobuf binary format and the caller wishes to lazy parse it. */ - void set_lazy(bool lazy) { upb_pbcodecache_setlazy(ptr(), lazy); } - - /* Returns a DecoderMethod that can push data to the given handlers. - * If a suitable method already exists, it will be returned from the cache. */ - const DecoderMethodPtr Get(MessageDefPtr md) { - return DecoderMethodPtr(upb_pbcodecache_get(ptr(), md.ptr())); - } - - private: - std::unique_ptr ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_DECODER_H_ */ - - -/* Opcode definitions. The canonical meaning of each opcode is its - * implementation in the interpreter (the JIT is written to match this). - * - * All instructions have the opcode in the low byte. - * Instruction format for most instructions is: - * - * +-------------------+--------+ - * | arg (24) | op (8) | - * +-------------------+--------+ - * - * Exceptions are indicated below. A few opcodes are multi-word. */ -typedef enum { - /* Opcodes 1-8, 13, 15-18 parse their respective descriptor types. - * Arg for all of these is the upb selector for this field. */ -#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type - T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32), - T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64), -#undef T - OP_STARTMSG = 9, /* No arg. */ - OP_ENDMSG = 10, /* No arg. */ - OP_STARTSEQ = 11, - OP_ENDSEQ = 12, - OP_STARTSUBMSG = 14, - OP_ENDSUBMSG = 19, - OP_STARTSTR = 20, - OP_STRING = 21, - OP_ENDSTR = 22, - - OP_PUSHTAGDELIM = 23, /* No arg. */ - OP_PUSHLENDELIM = 24, /* No arg. */ - OP_POP = 25, /* No arg. */ - OP_SETDELIM = 26, /* No arg. */ - OP_SETBIGGROUPNUM = 27, /* two words: - * | unused (24) | opc (8) | - * | groupnum (32) | */ - OP_CHECKDELIM = 28, - OP_CALL = 29, - OP_RET = 30, - OP_BRANCH = 31, - - /* Different opcodes depending on how many bytes expected. */ - OP_TAG1 = 32, /* | match tag (16) | jump target (8) | opc (8) | */ - OP_TAG2 = 33, /* | match tag (16) | jump target (8) | opc (8) | */ - OP_TAGN = 34, /* three words: */ - /* | unused (16) | jump target(8) | opc (8) | */ - /* | match tag 1 (32) | */ - /* | match tag 2 (32) | */ - - OP_SETDISPATCH = 35, /* N words: */ - /* | unused (24) | opc | */ - /* | upb_inttable* (32 or 64) | */ - - OP_DISPATCH = 36, /* No arg. */ - - OP_HALT = 37 /* No arg. */ -} opcode; - -#define OP_MAX OP_HALT - -UPB_INLINE opcode getop(uint32_t instr) { return (opcode)(instr & 0xff); } - -struct upb_pbcodecache { - upb_arena *arena; - upb_handlercache *dest; - bool allow_jit; - bool lazy; - - /* Map of upb_msgdef -> mgroup. */ - upb_inttable groups; -}; - -/* Method group; represents a set of decoder methods that had their code - * emitted together. Immutable once created. */ -typedef struct { - /* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. Owned by us. - * - * Ideally this would be on pbcodecache (if we were actually caching code). - * Right now we don't actually cache anything, which is wasteful. */ - upb_inttable methods; - - /* The bytecode for our methods, if any exists. Owned by us. */ - uint32_t *bytecode; - uint32_t *bytecode_end; -} mgroup; - -/* The maximum that any submessages can be nested. Matches proto2's limit. - * This specifies the size of the decoder's statically-sized array and therefore - * setting it high will cause the upb::pb::Decoder object to be larger. - * - * If necessary we can add a runtime-settable property to Decoder that allow - * this to be larger than the compile-time setting, but this would add - * complexity, particularly since we would have to decide how/if to give users - * the ability to set a custom memory allocation function. */ -#define UPB_DECODER_MAX_NESTING 64 - -/* Internal-only struct used by the decoder. */ -typedef struct { - /* Space optimization note: we store two pointers here that the JIT - * doesn't need at all; the upb_handlers* inside the sink and - * the dispatch table pointer. We can optimize so that the JIT uses - * smaller stack frames than the interpreter. The only thing we need - * to guarantee is that the fallback routines can find end_ofs. */ - upb_sink sink; - - /* The absolute stream offset of the end-of-frame delimiter. - * Non-delimited frames (groups and non-packed repeated fields) reuse the - * delimiter of their parent, even though the frame may not end there. - * - * NOTE: the JIT stores a slightly different value here for non-top frames. - * It stores the value relative to the end of the enclosed message. But the - * top frame is still stored the same way, which is important for ensuring - * that calls from the JIT into C work correctly. */ - uint64_t end_ofs; - const uint32_t *base; - - /* 0 indicates a length-delimited field. - * A positive number indicates a known group. - * A negative number indicates an unknown group. */ - int32_t groupnum; - upb_inttable *dispatch; /* Not used by the JIT. */ -} upb_pbdecoder_frame; - -struct upb_pbdecodermethod { - /* While compiling, the base is relative in "ofs", after compiling it is - * absolute in "ptr". */ - union { - uint32_t ofs; /* PC offset of method. */ - void *ptr; /* Pointer to bytecode or machine code for this method. */ - } code_base; - - /* The decoder method group to which this method belongs. */ - const mgroup *group; - - /* Whether this method is native code or bytecode. */ - bool is_native_; - - /* The handler one calls to invoke this method. */ - upb_byteshandler input_handler_; - - /* The destination handlers this method is bound to. We own a ref. */ - const upb_handlers *dest_handlers_; - - /* Dispatch table -- used by both bytecode decoder and JIT when encountering a - * field number that wasn't the one we were expecting to see. See - * decoder.int.h for the layout of this table. */ - upb_inttable dispatch; -}; - -struct upb_pbdecoder { - upb_arena *arena; - - /* Our input sink. */ - upb_bytessink input_; - - /* The decoder method we are parsing with (owned). */ - const upb_pbdecodermethod *method_; - - size_t call_len; - const uint32_t *pc, *last; - - /* Current input buffer and its stream offset. */ - const char *buf, *ptr, *end, *checkpoint; - - /* End of the delimited region, relative to ptr, NULL if not in this buf. */ - const char *delim_end; - - /* End of the delimited region, relative to ptr, end if not in this buf. */ - const char *data_end; - - /* Overall stream offset of "buf." */ - uint64_t bufstart_ofs; - - /* Buffer for residual bytes not parsed from the previous buffer. */ - char residual[UPB_DECODER_MAX_RESIDUAL_BYTES]; - char *residual_end; - - /* Bytes of data that should be discarded from the input beore we start - * parsing again. We set this when we internally determine that we can - * safely skip the next N bytes, but this region extends past the current - * user buffer. */ - size_t skip; - - /* Stores the user buffer passed to our decode function. */ - const char *buf_param; - size_t size_param; - const upb_bufhandle *handle; - - /* Our internal stack. */ - upb_pbdecoder_frame *stack, *top, *limit; - const uint32_t **callstack; - size_t stack_size; - - upb_status *status; -}; - -/* Decoder entry points; used as handlers. */ -void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint); -size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf, - size_t size, const upb_bufhandle *handle); -bool upb_pbdecoder_end(void *closure, const void *handler_data); - -/* Decoder-internal functions that the JIT calls to handle fallback paths. */ -int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle); -size_t upb_pbdecoder_suspend(upb_pbdecoder *d); -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, - uint8_t wire_type); -int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected); -int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64); -int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32); -int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64); -void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg); - -/* Error messages that are shared between the bytecode and JIT decoders. */ -extern const char *kPbDecoderStackOverflow; -extern const char *kPbDecoderSubmessageTooLong; - -/* Access to decoderplan members needed by the decoder. */ -const char *upb_pbdecoder_getopname(unsigned int op); - -/* A special label that means "do field dispatch for this message and branch to - * wherever that takes you." */ -#define LABEL_DISPATCH 0 - -/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or - * RET) for branching to when we find an appropriate ENDGROUP tag. */ -#define DISPATCH_ENDMSG 0 - -/* It's important to use this invalid wire type instead of 0 (which is a valid - * wire type). */ -#define NO_WIRE_TYPE 0xff - -/* The dispatch table layout is: - * [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ] - * - * If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup - * (UPB_MAX_FIELDNUMBER + fieldnum) and jump there. - * - * We need two wire types because of packed/non-packed compatibility. A - * primitive repeated field can use either wire type and be valid. While we - * could key the table on fieldnum+wiretype, the table would be 8x sparser. - * - * Storing two wire types in the primary value allows us to quickly rule out - * the second wire type without needing to do a separate lookup (this case is - * less common than an unknown field). */ -UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1, - uint8_t wt2) { - return (ofs << 16) | (wt2 << 8) | wt1; -} - -UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs, - uint8_t *wt1, uint8_t *wt2) { - *wt1 = (uint8_t)dispatch; - *wt2 = (uint8_t)(dispatch >> 8); - *ofs = dispatch >> 16; -} - -/* All of the functions in decoder.c that return int32_t return values according - * to the following scheme: - * 1. negative values indicate a return code from the following list. - * 2. positive values indicate that error or end of buffer was hit, and - * that the decode function should immediately return the given value - * (the decoder state has already been suspended and is ready to be - * resumed). */ -#define DECODE_OK -1 -#define DECODE_MISMATCH -2 /* Used only from checktag_slow(). */ -#define DECODE_ENDGROUP -3 /* Used only from checkunknown(). */ - -#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; } - - -#endif /* UPB_DECODER_INT_H_ */ -/* -** A number of routines for varint manipulation (we keep them all around to -** have multiple approaches available for benchmarking). -*/ - -#ifndef UPB_VARINT_DECODER_H_ -#define UPB_VARINT_DECODER_H_ - -#include -#include -#include - - -#ifdef __cplusplus -extern "C" { -#endif - -#define UPB_MAX_WIRE_TYPE 5 - -/* The maximum number of bytes that it takes to encode a 64-bit varint. */ -#define UPB_PB_VARINT_MAX_LEN 10 - -/* Array of the "native" (ie. non-packed-repeated) wire type for the given a - * descriptor type (upb_descriptortype_t). */ -extern const uint8_t upb_pb_native_wire_types[]; - -UPB_INLINE uint64_t byteswap64(uint64_t val) { - uint64_t byte = 0xff; - return (val & (byte << 56) >> 56) - | (val & (byte << 48) >> 40) - | (val & (byte << 40) >> 24) - | (val & (byte << 32) >> 8) - | (val & (byte << 24) << 8) - | (val & (byte << 16) << 24) - | (val & (byte << 8) << 40) - | (val & (byte << 0) << 56); -} - -/* Zig-zag encoding/decoding **************************************************/ - -UPB_INLINE int32_t upb_zzdec_32(uint64_t _n) { - uint32_t n = (uint32_t)_n; - return (n >> 1) ^ -(int32_t)(n & 1); -} -UPB_INLINE int64_t upb_zzdec_64(uint64_t n) { - return (n >> 1) ^ -(int64_t)(n & 1); -} -UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { - return ((uint32_t)n << 1) ^ (n >> 31); -} -UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { - return ((uint64_t)n << 1) ^ (n >> 63); -} - -/* Decoding *******************************************************************/ - -/* All decoding functions return this struct by value. */ -typedef struct { - const char *p; /* NULL if the varint was unterminated. */ - uint64_t val; -} upb_decoderet; - -UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) { - upb_decoderet ret; - ret.p = p; - ret.val = val; - return ret; -} - -upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r); -upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r); - -/* Template for a function that checks the first two bytes with branching - * and dispatches 2-10 bytes with a separate function. Note that this may read - * up to 10 bytes, so it must not be used unless there are at least ten bytes - * left in the buffer! */ -#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function) \ -UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) { \ - uint8_t *p = (uint8_t*)_p; \ - upb_decoderet r; \ - if ((*p & 0x80) == 0) { \ - /* Common case: one-byte varint. */ \ - return upb_decoderet_make(_p + 1, *p & 0x7fU); \ - } \ - r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7)); \ - if ((*(p + 1) & 0x80) == 0) { \ - /* Two-byte varint. */ \ - return r; \ - } \ - /* Longer varint, fallback to out-of-line function. */ \ - return decode_max8_function(r); \ -} - -UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32) -UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64) -#undef UPB_VARINT_DECODER_CHECK2 - -/* Our canonical functions for decoding varints, based on the currently - * favored best-performing implementations. */ -UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) { - if (sizeof(long) == 8) - return upb_vdecode_check2_branch64(p); - else - return upb_vdecode_check2_branch32(p); -} - - -/* Encoding *******************************************************************/ - -UPB_INLINE int upb_value_size(uint64_t val) { -#ifdef __GNUC__ - int high_bit = 63 - __builtin_clzll(val); /* 0-based, undef if val == 0. */ -#else - int high_bit = 0; - uint64_t tmp = val; - while(tmp >>= 1) high_bit++; -#endif - return val == 0 ? 1 : high_bit / 8 + 1; -} - -/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN - * bytes long), returning how many bytes were used. - * - * TODO: benchmark and optimize if necessary. */ -UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) { - size_t i; - if (val == 0) { buf[0] = 0; return 1; } - i = 0; - while (val) { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } - return i; -} - -UPB_INLINE size_t upb_varint_size(uint64_t val) { - char buf[UPB_PB_VARINT_MAX_LEN]; - return upb_vencode64(val, buf); -} - -/* Encodes a 32-bit varint, *not* sign-extended. */ -UPB_INLINE uint64_t upb_vencode32(uint32_t val) { - char buf[UPB_PB_VARINT_MAX_LEN]; - size_t bytes = upb_vencode64(val, buf); - uint64_t ret = 0; - UPB_ASSERT(bytes <= 5); - memcpy(&ret, buf, bytes); -#ifdef UPB_BIG_ENDIAN - ret = byteswap64(ret); -#endif - UPB_ASSERT(ret <= 0xffffffffffU); - return ret; -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_VARINT_DECODER_H_ */ -/* -** upb::pb::Encoder (upb_pb_encoder) -** -** Implements a set of upb_handlers that write protobuf data to the binary wire -** format. -** -** This encoder implementation does not have any access to any out-of-band or -** precomputed lengths for submessages, so it must buffer submessages internally -** before it can emit the first byte. -*/ - -#ifndef UPB_ENCODER_H_ -#define UPB_ENCODER_H_ - - -#ifdef __cplusplus -namespace upb { -namespace pb { -class EncoderPtr; -} /* namespace pb */ -} /* namespace upb */ -#endif - -#define UPB_PBENCODER_MAX_NESTING 100 - -/* upb_pb_encoder *************************************************************/ - -/* Preallocation hint: decoder won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the decoder library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_PB_ENCODER_SIZE 784 - -struct upb_pb_encoder; -typedef struct upb_pb_encoder upb_pb_encoder; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_sink upb_pb_encoder_input(upb_pb_encoder *p); -upb_pb_encoder* upb_pb_encoder_create(upb_arena* a, const upb_handlers* h, - upb_bytessink output); - -/* Lazily builds and caches handlers that will push encoded data to a bytessink. - * Any msgdef objects used with this object must outlive it. */ -upb_handlercache *upb_pb_encoder_newcache(void); - -#ifdef __cplusplus -} /* extern "C" { */ - -class upb::pb::EncoderPtr { - public: - EncoderPtr(upb_pb_encoder* ptr) : ptr_(ptr) {} - - upb_pb_encoder* ptr() { return ptr_; } - - /* Creates a new encoder in the given environment. The Handlers must have - * come from NewHandlers() below. */ - static EncoderPtr Create(Arena* arena, const Handlers* handlers, - BytesSink output) { - return EncoderPtr( - upb_pb_encoder_create(arena->ptr(), handlers, output.sink())); - } - - /* The input to the encoder. */ - upb::Sink input() { return upb_pb_encoder_input(ptr()); } - - /* Creates a new set of handlers for this MessageDef. */ - static HandlerCache NewCache() { - return HandlerCache(upb_pb_encoder_newcache()); - } - - static const size_t kSize = UPB_PB_ENCODER_SIZE; - - private: - upb_pb_encoder* ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_ENCODER_H_ */ -/* -** upb::pb::TextPrinter (upb_textprinter) -** -** Handlers for writing to protobuf text format. -*/ - -#ifndef UPB_TEXT_H_ -#define UPB_TEXT_H_ - - -#ifdef __cplusplus -namespace upb { -namespace pb { -class TextPrinterPtr; -} /* namespace pb */ -} /* namespace upb */ -#endif - -/* upb_textprinter ************************************************************/ - -struct upb_textprinter; -typedef struct upb_textprinter upb_textprinter; - -#ifdef __cplusplus -extern "C" { -#endif - -/* C API. */ -upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output); -void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line); -upb_sink upb_textprinter_input(upb_textprinter *p); -upb_handlercache *upb_textprinter_newcache(void); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::pb::TextPrinterPtr { - public: - TextPrinterPtr(upb_textprinter* ptr) : ptr_(ptr) {} - - /* The given handlers must have come from NewHandlers(). It must outlive the - * TextPrinter. */ - static TextPrinterPtr Create(Arena *arena, upb::HandlersPtr *handlers, - BytesSink output) { - return TextPrinterPtr( - upb_textprinter_create(arena->ptr(), handlers->ptr(), output.sink())); - } - - void SetSingleLineMode(bool single_line) { - upb_textprinter_setsingleline(ptr_, single_line); - } - - Sink input() { return upb_textprinter_input(ptr_); } - - /* If handler caching becomes a requirement we can add a code cache as in - * decoder.h */ - static HandlerCache NewCache() { - return HandlerCache(upb_textprinter_newcache()); - } - - private: - upb_textprinter* ptr_; -}; - -#endif - -#endif /* UPB_TEXT_H_ */ -/* -** upb::json::Parser (upb_json_parser) -** -** Parses JSON according to a specific schema. -** Support for parsing arbitrary JSON (schema-less) will be added later. -*/ - -#ifndef UPB_JSON_PARSER_H_ -#define UPB_JSON_PARSER_H_ - - -#ifdef __cplusplus -namespace upb { -namespace json { -class CodeCache; -class ParserPtr; -class ParserMethodPtr; -} /* namespace json */ -} /* namespace upb */ -#endif - -/* upb_json_parsermethod ******************************************************/ - -struct upb_json_parsermethod; -typedef struct upb_json_parsermethod upb_json_parsermethod; - -#ifdef __cplusplus -extern "C" { -#endif - -const upb_byteshandler* upb_json_parsermethod_inputhandler( - const upb_json_parsermethod* m); - -#ifdef __cplusplus -} /* extern "C" */ - -class upb::json::ParserMethodPtr { - public: - ParserMethodPtr() : ptr_(nullptr) {} - ParserMethodPtr(const upb_json_parsermethod* ptr) : ptr_(ptr) {} - - const upb_json_parsermethod* ptr() const { return ptr_; } - - const BytesHandler* input_handler() const { - return upb_json_parsermethod_inputhandler(ptr()); - } - - private: - const upb_json_parsermethod* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_json_parser ************************************************************/ - -/* Preallocation hint: parser won't allocate more bytes than this when first - * constructed. This hint may be an overestimate for some build configurations. - * But if the parser library is upgraded without recompiling the application, - * it may be an underestimate. */ -#define UPB_JSON_PARSER_SIZE 5712 - -struct upb_json_parser; -typedef struct upb_json_parser upb_json_parser; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_json_parser* upb_json_parser_create(upb_arena* a, - const upb_json_parsermethod* m, - const upb_symtab* symtab, - upb_sink output, - upb_status *status, - bool ignore_json_unknown); -upb_bytessink upb_json_parser_input(upb_json_parser* p); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Parses an incoming BytesStream, pushing the results to the destination - * sink. */ -class upb::json::ParserPtr { - public: - ParserPtr(upb_json_parser* ptr) : ptr_(ptr) {} - - static ParserPtr Create(Arena* arena, ParserMethodPtr method, - SymbolTable* symtab, Sink output, Status* status, - bool ignore_json_unknown) { - upb_symtab* symtab_ptr = symtab ? symtab->ptr() : nullptr; - return ParserPtr(upb_json_parser_create( - arena->ptr(), method.ptr(), symtab_ptr, output.sink(), status->ptr(), - ignore_json_unknown)); - } - - BytesSink input() { return upb_json_parser_input(ptr_); } - - private: - upb_json_parser* ptr_; -}; - -#endif /* __cplusplus */ - -/* upb_json_codecache *********************************************************/ - -/* Lazily builds and caches decoder methods that will push data to the given - * handlers. The upb_symtab object(s) must outlive this object. */ - -struct upb_json_codecache; -typedef struct upb_json_codecache upb_json_codecache; - -#ifdef __cplusplus -extern "C" { -#endif - -upb_json_codecache *upb_json_codecache_new(void); -void upb_json_codecache_free(upb_json_codecache *cache); -const upb_json_parsermethod* upb_json_codecache_get(upb_json_codecache* cache, - const upb_msgdef* md); + * Output is placed in the given buffer, and always NULL-terminated. The output + * size (excluding NULL) is returned. This means that a return value >= |size| + * implies that the output was truncated. (These are the same semantics as + * snprintf()). */ +size_t upb_json_encode(const upb_msg *msg, const upb_msgdef *m, + const upb_symtab *ext_pool, int options, char *buf, + size_t size, upb_status *status); #ifdef __cplusplus } /* extern "C" */ - -class upb::json::CodeCache { - public: - CodeCache() : ptr_(upb_json_codecache_new(), upb_json_codecache_free) {} - - /* Returns a DecoderMethod that can push data to the given handlers. - * If a suitable method already exists, it will be returned from the cache. */ - ParserMethodPtr Get(MessageDefPtr md) { - return upb_json_codecache_get(ptr_.get(), md.ptr()); - } - - private: - std::unique_ptr ptr_; -}; - #endif -#endif /* UPB_JSON_PARSER_H_ */ -/* -** upb::json::Printer -** -** Handlers that emit JSON according to a specific protobuf schema. -*/ - -#ifndef UPB_JSON_TYPED_PRINTER_H_ -#define UPB_JSON_TYPED_PRINTER_H_ - - -#ifdef __cplusplus -namespace upb { -namespace json { -class PrinterPtr; -} /* namespace json */ -} /* namespace upb */ -#endif - -/* upb_json_printer ***********************************************************/ - -#define UPB_JSON_PRINTER_SIZE 192 - -struct upb_json_printer; -typedef struct upb_json_printer upb_json_printer; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Native C API. */ -upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h, - upb_bytessink output); -upb_sink upb_json_printer_input(upb_json_printer *p); -const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, - bool preserve_fieldnames, - const void *owner); - -/* Lazily builds and caches handlers that will push encoded data to a bytessink. - * Any msgdef objects used with this object must outlive it. */ -upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames); - -#ifdef __cplusplus -} /* extern "C" */ - -/* Prints an incoming stream of data to a BytesSink in JSON format. */ -class upb::json::PrinterPtr { - public: - PrinterPtr(upb_json_printer* ptr) : ptr_(ptr) {} - - static PrinterPtr Create(Arena *arena, const upb::Handlers *handlers, - BytesSink output) { - return PrinterPtr( - upb_json_printer_create(arena->ptr(), handlers, output.sink())); - } - - /* The input to the printer. */ - Sink input() { return upb_json_printer_input(ptr_); } - - static const size_t kSize = UPB_JSON_PRINTER_SIZE; - - static HandlerCache NewCache(bool preserve_proto_fieldnames) { - return upb_json_printer_newcache(preserve_proto_fieldnames); - } - - private: - upb_json_printer* ptr_; -}; - -#endif /* __cplusplus */ - -#endif /* UPB_JSON_TYPED_PRINTER_H_ */ +#endif /* UPB_JSONENCODE_H_ */ /* See port_def.inc. This should #undef all macros #defined there. */ #undef UPB_MAPTYPE_STRING @@ -6760,6 +4434,10 @@ class upb::json::PrinterPtr { #undef UPB_READ_ONEOF #undef UPB_WRITE_ONEOF #undef UPB_INLINE +#undef UPB_ALIGN_UP +#undef UPB_ALIGN_DOWN +#undef UPB_ALIGN_MALLOC +#undef UPB_ALIGN_OF #undef UPB_FORCEINLINE #undef UPB_NOINLINE #undef UPB_NORETURN @@ -6768,10 +4446,7 @@ class upb::json::PrinterPtr { #undef UPB_UNUSED #undef UPB_ASSUME #undef UPB_ASSERT -#undef UPB_ASSERT_DEBUGVAR #undef UPB_UNREACHABLE -#undef UPB_INFINITY -#undef UPB_MSVC_VSNPRINTF -#undef _upb_snprintf -#undef _upb_vsnprintf -#undef _upb_va_copy +#undef UPB_POISON_MEMORY_REGION +#undef UPB_UNPOISON_MEMORY_REGION +#undef UPB_ASAN diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c deleted file mode 100644 index a3805c96a34f..000000000000 --- a/ruby/ext/google/protobuf_c/storage.c +++ /dev/null @@ -1,1198 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2014 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "protobuf.h" - -#include - -#include - -// ----------------------------------------------------------------------------- -// Ruby <-> native slot management. -// ----------------------------------------------------------------------------- - -#define CHARPTR_AT(msg, ofs) ((char*)msg + ofs) -#define DEREF_OFFSET(msg, ofs, type) *(type*)CHARPTR_AT(msg, ofs) -#define DEREF(memory, type) *(type*)(memory) - -size_t native_slot_size(upb_fieldtype_t type) { - switch (type) { - case UPB_TYPE_FLOAT: return 4; - case UPB_TYPE_DOUBLE: return 8; - case UPB_TYPE_BOOL: return 1; - case UPB_TYPE_STRING: return sizeof(VALUE); - case UPB_TYPE_BYTES: return sizeof(VALUE); - case UPB_TYPE_MESSAGE: return sizeof(VALUE); - case UPB_TYPE_ENUM: return 4; - case UPB_TYPE_INT32: return 4; - case UPB_TYPE_INT64: return 8; - case UPB_TYPE_UINT32: return 4; - case UPB_TYPE_UINT64: return 8; - default: return 0; - } -} - -static bool is_ruby_num(VALUE value) { - return (TYPE(value) == T_FLOAT || - TYPE(value) == T_FIXNUM || - TYPE(value) == T_BIGNUM); -} - -void native_slot_check_int_range_precision(const char* name, upb_fieldtype_t type, VALUE val) { - if (!is_ruby_num(val)) { - rb_raise(cTypeError, "Expected number type for integral field '%s' (given %s).", - name, rb_class2name(CLASS_OF(val))); - } - - // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper - // bound; we just need to do precision checks (i.e., disallow rounding) and - // check for < 0 on unsigned types. - if (TYPE(val) == T_FLOAT) { - double dbl_val = NUM2DBL(val); - if (floor(dbl_val) != dbl_val) { - rb_raise(rb_eRangeError, - "Non-integral floating point value assigned to integer field '%s' (given %s).", - name, rb_class2name(CLASS_OF(val))); - } - } - if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) { - if (NUM2DBL(val) < 0) { - rb_raise(rb_eRangeError, - "Assigning negative value to unsigned integer field '%s' (given %s).", - name, rb_class2name(CLASS_OF(val))); - } - } -} - -VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) { - rb_encoding* desired_encoding = (type == UPB_TYPE_STRING) ? - kRubyStringUtf8Encoding : kRubyString8bitEncoding; - VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding); - - if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) { - // Note: this will not duplicate underlying string data unless necessary. - value = rb_str_encode(value, desired_encoding_value, 0, Qnil); - - if (type == UPB_TYPE_STRING && - rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { - rb_raise(rb_eEncodingError, "String is invalid UTF-8"); - } - - // Ensure the data remains valid. Since we called #encode a moment ago, - // this does not freeze the string the user assigned. - rb_obj_freeze(value); - } - - return value; -} - -void native_slot_set(const char* name, - upb_fieldtype_t type, VALUE type_class, - void* memory, VALUE value) { - native_slot_set_value_and_case(name, type, type_class, memory, value, NULL, 0); -} - -void native_slot_set_value_and_case(const char* name, - upb_fieldtype_t type, VALUE type_class, - void* memory, VALUE value, - uint32_t* case_memory, - uint32_t case_number) { - // Note that in order to atomically change the value in memory and the case - // value (w.r.t. Ruby VM calls), we must set the value at |memory| only after - // all Ruby VM calls are complete. The case is then set at the bottom of this - // function. - switch (type) { - case UPB_TYPE_FLOAT: - if (!is_ruby_num(value)) { - rb_raise(cTypeError, "Expected number type for float field '%s' (given %s).", - name, rb_class2name(CLASS_OF(value))); - } - DEREF(memory, float) = NUM2DBL(value); - break; - case UPB_TYPE_DOUBLE: - if (!is_ruby_num(value)) { - rb_raise(cTypeError, "Expected number type for double field '%s' (given %s).", - name, rb_class2name(CLASS_OF(value))); - } - DEREF(memory, double) = NUM2DBL(value); - break; - case UPB_TYPE_BOOL: { - int8_t val = -1; - if (value == Qtrue) { - val = 1; - } else if (value == Qfalse) { - val = 0; - } else { - rb_raise(cTypeError, "Invalid argument for boolean field '%s' (given %s).", - name, rb_class2name(CLASS_OF(value))); - } - DEREF(memory, int8_t) = val; - break; - } - case UPB_TYPE_STRING: - if (CLASS_OF(value) == rb_cSymbol) { - value = rb_funcall(value, rb_intern("to_s"), 0); - } else if (CLASS_OF(value) != rb_cString) { - rb_raise(cTypeError, "Invalid argument for string field '%s' (given %s).", - name, rb_class2name(CLASS_OF(value))); - } - - DEREF(memory, VALUE) = native_slot_encode_and_freeze_string(type, value); - break; - - case UPB_TYPE_BYTES: { - if (CLASS_OF(value) != rb_cString) { - rb_raise(cTypeError, "Invalid argument for bytes field '%s' (given %s).", - name, rb_class2name(CLASS_OF(value))); - } - - DEREF(memory, VALUE) = native_slot_encode_and_freeze_string(type, value); - break; - } - case UPB_TYPE_MESSAGE: { - if (CLASS_OF(value) == CLASS_OF(Qnil)) { - value = Qnil; - } else if (CLASS_OF(value) != type_class) { - // check for possible implicit conversions - VALUE converted_value = Qnil; - const char* field_type_name = rb_class2name(type_class); - - if (strcmp(field_type_name, "Google::Protobuf::Timestamp") == 0 && - rb_obj_is_kind_of(value, rb_cTime)) { - // Time -> Google::Protobuf::Timestamp - VALUE hash = rb_hash_new(); - rb_hash_aset(hash, rb_str_new2("seconds"), - rb_funcall(value, rb_intern("to_i"), 0)); - rb_hash_aset(hash, rb_str_new2("nanos"), - rb_funcall(value, rb_intern("nsec"), 0)); - { - VALUE args[1] = {hash}; - converted_value = rb_class_new_instance(1, args, type_class); - } - } else if (strcmp(field_type_name, "Google::Protobuf::Duration") == 0 && - rb_obj_is_kind_of(value, rb_cNumeric)) { - // Numeric -> Google::Protobuf::Duration - VALUE hash = rb_hash_new(); - rb_hash_aset(hash, rb_str_new2("seconds"), - rb_funcall(value, rb_intern("to_i"), 0)); - { - VALUE n_value = - rb_funcall(value, rb_intern("remainder"), 1, INT2NUM(1)); - n_value = - rb_funcall(n_value, rb_intern("*"), 1, INT2NUM(1000000000)); - n_value = rb_funcall(n_value, rb_intern("round"), 0); - rb_hash_aset(hash, rb_str_new2("nanos"), n_value); - } - { - VALUE args[1] = { hash }; - converted_value = rb_class_new_instance(1, args, type_class); - } - } - - // raise if no suitable conversaion could be found - if (converted_value == Qnil) { - rb_raise(cTypeError, - "Invalid type %s to assign to submessage field '%s'.", - rb_class2name(CLASS_OF(value)), name); - } else { - value = converted_value; - } - } - DEREF(memory, VALUE) = value; - break; - } - case UPB_TYPE_ENUM: { - int32_t int_val = 0; - if (TYPE(value) == T_STRING) { - value = rb_funcall(value, rb_intern("to_sym"), 0); - } else if (!is_ruby_num(value) && TYPE(value) != T_SYMBOL) { - rb_raise(cTypeError, - "Expected number or symbol type for enum field '%s'.", name); - } - if (TYPE(value) == T_SYMBOL) { - // Ensure that the given symbol exists in the enum module. - VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value); - if (lookup == Qnil) { - rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name); - } else { - int_val = NUM2INT(lookup); - } - } else { - native_slot_check_int_range_precision(name, UPB_TYPE_INT32, value); - int_val = NUM2INT(value); - } - DEREF(memory, int32_t) = int_val; - break; - } - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - native_slot_check_int_range_precision(name, type, value); - switch (type) { - case UPB_TYPE_INT32: - DEREF(memory, int32_t) = NUM2INT(value); - break; - case UPB_TYPE_INT64: - DEREF(memory, int64_t) = NUM2LL(value); - break; - case UPB_TYPE_UINT32: - DEREF(memory, uint32_t) = NUM2UINT(value); - break; - case UPB_TYPE_UINT64: - DEREF(memory, uint64_t) = NUM2ULL(value); - break; - default: - break; - } - break; - default: - break; - } - - if (case_memory != NULL) { - *case_memory = case_number; - } -} - -VALUE native_slot_get(upb_fieldtype_t type, - VALUE type_class, - const void* memory) { - switch (type) { - case UPB_TYPE_FLOAT: - return DBL2NUM(DEREF(memory, float)); - case UPB_TYPE_DOUBLE: - return DBL2NUM(DEREF(memory, double)); - case UPB_TYPE_BOOL: - return DEREF(memory, int8_t) ? Qtrue : Qfalse; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - return DEREF(memory, VALUE); - case UPB_TYPE_MESSAGE: { - VALUE val = DEREF(memory, VALUE); - - // Lazily expand wrapper type if necessary. - int type = TYPE(val); - if (type != T_DATA && type != T_NIL) { - // This must be a wrapper type. - val = ruby_wrapper_type(type_class, val); - DEREF(memory, VALUE) = val; - } - - return val; - } - case UPB_TYPE_ENUM: { - int32_t val = DEREF(memory, int32_t); - VALUE symbol = enum_lookup(type_class, INT2NUM(val)); - if (symbol == Qnil) { - return INT2NUM(val); - } else { - return symbol; - } - } - case UPB_TYPE_INT32: - return INT2NUM(DEREF(memory, int32_t)); - case UPB_TYPE_INT64: - return LL2NUM(DEREF(memory, int64_t)); - case UPB_TYPE_UINT32: - return UINT2NUM(DEREF(memory, uint32_t)); - case UPB_TYPE_UINT64: - return ULL2NUM(DEREF(memory, uint64_t)); - default: - return Qnil; - } -} - -void native_slot_init(upb_fieldtype_t type, void* memory) { - switch (type) { - case UPB_TYPE_FLOAT: - DEREF(memory, float) = 0.0; - break; - case UPB_TYPE_DOUBLE: - DEREF(memory, double) = 0.0; - break; - case UPB_TYPE_BOOL: - DEREF(memory, int8_t) = 0; - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - DEREF(memory, VALUE) = rb_str_new2(""); - rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ? - kRubyString8bitEncoding : kRubyStringUtf8Encoding); - break; - case UPB_TYPE_MESSAGE: - DEREF(memory, VALUE) = Qnil; - break; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: - DEREF(memory, int32_t) = 0; - break; - case UPB_TYPE_INT64: - DEREF(memory, int64_t) = 0; - break; - case UPB_TYPE_UINT32: - DEREF(memory, uint32_t) = 0; - break; - case UPB_TYPE_UINT64: - DEREF(memory, uint64_t) = 0; - break; - default: - break; - } -} - -void native_slot_mark(upb_fieldtype_t type, void* memory) { - switch (type) { - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: - rb_gc_mark(DEREF(memory, VALUE)); - break; - default: - break; - } -} - -void native_slot_dup(upb_fieldtype_t type, void* to, void* from) { - memcpy(to, from, native_slot_size(type)); -} - -void native_slot_deep_copy(upb_fieldtype_t type, VALUE type_class, void* to, - void* from) { - switch (type) { - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - VALUE from_val = DEREF(from, VALUE); - DEREF(to, VALUE) = (from_val != Qnil) ? - rb_funcall(from_val, rb_intern("dup"), 0) : Qnil; - break; - } - case UPB_TYPE_MESSAGE: { - VALUE from_val = native_slot_get(type, type_class, from); - DEREF(to, VALUE) = (from_val != Qnil) ? - Message_deep_copy(from_val) : Qnil; - break; - } - default: - memcpy(to, from, native_slot_size(type)); - } -} - -bool native_slot_eq(upb_fieldtype_t type, VALUE type_class, void* mem1, - void* mem2) { - switch (type) { - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - case UPB_TYPE_MESSAGE: { - VALUE val1 = native_slot_get(type, type_class, mem1); - VALUE val2 = native_slot_get(type, type_class, mem2); - VALUE ret = rb_funcall(val1, rb_intern("=="), 1, val2); - return ret == Qtrue; - } - default: - return !memcmp(mem1, mem2, native_slot_size(type)); - } -} - -// ----------------------------------------------------------------------------- -// Map field utilities. -// ----------------------------------------------------------------------------- - -const upb_msgdef* tryget_map_entry_msgdef(const upb_fielddef* field) { - const upb_msgdef* subdef; - if (upb_fielddef_label(field) != UPB_LABEL_REPEATED || - upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { - return NULL; - } - subdef = upb_fielddef_msgsubdef(field); - return upb_msgdef_mapentry(subdef) ? subdef : NULL; -} - -const upb_msgdef *map_entry_msgdef(const upb_fielddef* field) { - const upb_msgdef* subdef = tryget_map_entry_msgdef(field); - assert(subdef); - return subdef; -} - -bool is_map_field(const upb_fielddef *field) { - const upb_msgdef* subdef = tryget_map_entry_msgdef(field); - if (subdef == NULL) return false; - - // Map fields are a proto3 feature. - // If we're using proto2 syntax we need to fallback to the repeated field. - return upb_msgdef_syntax(subdef) == UPB_SYNTAX_PROTO3; -} - -const upb_fielddef* map_field_key(const upb_fielddef* field) { - const upb_msgdef* subdef = map_entry_msgdef(field); - return map_entry_key(subdef); -} - -const upb_fielddef* map_field_value(const upb_fielddef* field) { - const upb_msgdef* subdef = map_entry_msgdef(field); - return map_entry_value(subdef); -} - -const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) { - const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD); - assert(key_field != NULL); - return key_field; -} - -const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) { - const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD); - assert(value_field != NULL); - return value_field; -} - -// ----------------------------------------------------------------------------- -// Memory layout management. -// ----------------------------------------------------------------------------- - -bool field_contains_hasbit(MessageLayout* layout, - const upb_fielddef* field) { - return layout->fields[upb_fielddef_index(field)].hasbit != - MESSAGE_FIELD_NO_HASBIT; -} - -static size_t align_up_to(size_t offset, size_t granularity) { - // Granularity must be a power of two. - return (offset + granularity - 1) & ~(granularity - 1); -} - -bool is_value_field(const upb_fielddef* f) { - return upb_fielddef_isseq(f) || upb_fielddef_issubmsg(f) || - upb_fielddef_isstring(f); -} - -void create_layout(Descriptor* desc) { - const upb_msgdef *msgdef = desc->msgdef; - MessageLayout* layout = ALLOC(MessageLayout); - int nfields = upb_msgdef_numfields(msgdef); - int noneofs = upb_msgdef_numrealoneofs(msgdef); - upb_msg_field_iter it; - upb_msg_oneof_iter oit; - size_t off = 0; - size_t hasbit = 0; - int i; - - (void)i; - - layout->empty_template = NULL; - layout->desc = desc; - desc->layout = layout; - - layout->fields = ALLOC_N(MessageField, nfields); - layout->oneofs = NULL; - - if (noneofs > 0) { - layout->oneofs = ALLOC_N(MessageOneof, noneofs); - } - -#ifndef NDEBUG - for (i = 0; i < nfields; i++) { - layout->fields[i].offset = -1; - } - - for (i = 0; i < noneofs; i++) { - layout->oneofs[i].offset = -1; - } -#endif - - for (upb_msg_field_begin(&it, msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - if (upb_fielddef_haspresence(field) && - !upb_fielddef_realcontainingoneof(field)) { - layout->fields[upb_fielddef_index(field)].hasbit = hasbit++; - } else { - layout->fields[upb_fielddef_index(field)].hasbit = - MESSAGE_FIELD_NO_HASBIT; - } - } - - if (hasbit != 0) { - off += (hasbit + 8 - 1) / 8; - } - - off = align_up_to(off, sizeof(VALUE)); - layout->value_offset = off; - layout->repeated_count = 0; - layout->map_count = 0; - layout->value_count = 0; - - // Place all VALUE fields for repeated fields. - for (upb_msg_field_begin(&it, msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - if (upb_fielddef_realcontainingoneof(field) || !upb_fielddef_isseq(field) || - upb_fielddef_ismap(field)) { - continue; - } - - layout->fields[upb_fielddef_index(field)].offset = off; - off += sizeof(VALUE); - layout->repeated_count++; - } - - // Place all VALUE fields for map fields. - for (upb_msg_field_begin(&it, msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - if (upb_fielddef_realcontainingoneof(field) || !upb_fielddef_isseq(field) || - !upb_fielddef_ismap(field)) { - continue; - } - - layout->fields[upb_fielddef_index(field)].offset = off; - off += sizeof(VALUE); - layout->map_count++; - } - - layout->value_count = layout->repeated_count + layout->map_count; - - // Next place all other (non-oneof) VALUE fields. - for (upb_msg_field_begin(&it, msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - if (upb_fielddef_realcontainingoneof(field) || !is_value_field(field) || - upb_fielddef_isseq(field)) { - continue; - } - - layout->fields[upb_fielddef_index(field)].offset = off; - off += sizeof(VALUE); - layout->value_count++; - } - - // Now place all other (non-oneof) fields. - for (upb_msg_field_begin(&it, msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - size_t field_size; - - if (upb_fielddef_realcontainingoneof(field) || is_value_field(field)) { - continue; - } - - // Allocate |field_size| bytes for this field in the layout. - field_size = native_slot_size(upb_fielddef_type(field)); - - // Align current offset up to |size| granularity. - off = align_up_to(off, field_size); - layout->fields[upb_fielddef_index(field)].offset = off; - off += field_size; - } - - // Handle oneofs now -- we iterate over oneofs specifically and allocate only - // one slot per oneof. - // - // We assign all value slots first, then pack the 'case' fields at the end, - // since in the common case (modern 64-bit platform) these are 8 bytes and 4 - // bytes respectively and we want to avoid alignment overhead. - // - // Note that we reserve 4 bytes (a uint32) per 'case' slot because the value - // space for oneof cases is conceptually as wide as field tag numbers. In - // practice, it's unlikely that a oneof would have more than e.g. 256 or 64K - // members (8 or 16 bits respectively), so conceivably we could assign - // consecutive case numbers and then pick a smaller oneof case slot size, but - // the complexity to implement this indirection is probably not worthwhile. - for (upb_msg_oneof_begin(&oit, msgdef); - !upb_msg_oneof_done(&oit); - upb_msg_oneof_next(&oit)) { - const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); - upb_oneof_iter fit; - - // Always allocate NATIVE_SLOT_MAX_SIZE bytes, but share the slot between - // all fields. - size_t field_size = NATIVE_SLOT_MAX_SIZE; - - if (upb_oneofdef_issynthetic(oneof)) continue; - assert(upb_oneofdef_index(oneof) < noneofs); - - // Align the offset. - off = align_up_to(off, field_size); - // Assign all fields in the oneof this same offset. - for (upb_oneof_begin(&fit, oneof); - !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* field = upb_oneof_iter_field(&fit); - layout->fields[upb_fielddef_index(field)].offset = off; - layout->oneofs[upb_oneofdef_index(oneof)].offset = off; - } - off += field_size; - } - - // Now the case fields. - for (upb_msg_oneof_begin(&oit, msgdef); - !upb_msg_oneof_done(&oit); - upb_msg_oneof_next(&oit)) { - const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); - size_t field_size = sizeof(uint32_t); - if (upb_oneofdef_issynthetic(oneof)) continue; - assert(upb_oneofdef_index(oneof) < noneofs); - // Align the offset. - off = (off + field_size - 1) & ~(field_size - 1); - layout->oneofs[upb_oneofdef_index(oneof)].case_offset = off; - off += field_size; - } - - layout->size = off; - layout->msgdef = msgdef; - -#ifndef NDEBUG - for (i = 0; i < nfields; i++) { - assert(layout->fields[i].offset != -1); - } - - for (i = 0; i < noneofs; i++) { - assert(layout->oneofs[i].offset != -1); - } -#endif - - // Create the empty message template. - layout->empty_template = ALLOC_N(char, layout->size); - memset(layout->empty_template, 0, layout->size); - - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - layout_clear(layout, layout->empty_template, upb_msg_iter_field(&it)); - } -} - -void free_layout(MessageLayout* layout) { - xfree(layout->empty_template); - xfree(layout->fields); - xfree(layout->oneofs); - xfree(layout); -} - -VALUE field_type_class(const MessageLayout* layout, const upb_fielddef* field) { - VALUE type_class = Qnil; - if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { - VALUE submsgdesc = get_msgdef_obj(layout->desc->descriptor_pool, - upb_fielddef_msgsubdef(field)); - type_class = Descriptor_msgclass(submsgdesc); - } else if (upb_fielddef_type(field) == UPB_TYPE_ENUM) { - VALUE subenumdesc = get_enumdef_obj(layout->desc->descriptor_pool, - upb_fielddef_enumsubdef(field)); - type_class = EnumDescriptor_enummodule(subenumdesc); - } - return type_class; -} - -static void* slot_memory(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - return ((uint8_t *)storage) + - layout->fields[upb_fielddef_index(field)].offset; -} - -static uint32_t* slot_oneof_case(MessageLayout* layout, - const void* storage, - const upb_oneofdef* oneof) { - return (uint32_t*)(((uint8_t*)storage) + - layout->oneofs[upb_oneofdef_index(oneof)].case_offset); -} - -uint32_t slot_read_oneof_case(MessageLayout* layout, const void* storage, - const upb_oneofdef* oneof) { - uint32_t* ptr = slot_oneof_case(layout, storage, oneof); - return *ptr & ~ONEOF_CASE_MASK; -} - -static void slot_set_hasbit(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit; - assert(hasbit != MESSAGE_FIELD_NO_HASBIT); - - ((uint8_t*)storage)[hasbit / 8] |= 1 << (hasbit % 8); -} - -static void slot_clear_hasbit(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit; - assert(hasbit != MESSAGE_FIELD_NO_HASBIT); - ((uint8_t*)storage)[hasbit / 8] &= ~(1 << (hasbit % 8)); -} - -static bool slot_is_hasbit_set(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - size_t hasbit = layout->fields[upb_fielddef_index(field)].hasbit; - assert(field_contains_hasbit(layout, field)); - return DEREF_OFFSET( - (uint8_t*)storage, hasbit / 8, char) & (1 << (hasbit % 8)); -} - -VALUE layout_has(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - assert(upb_fielddef_haspresence(field)); - if (oneof) { - uint32_t oneof_case = slot_read_oneof_case(layout, storage, oneof); - return oneof_case == upb_fielddef_number(field) ? Qtrue : Qfalse; - } else { - return slot_is_hasbit_set(layout, storage, field) ? Qtrue : Qfalse; - } -} - -void layout_clear(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - void* memory = slot_memory(layout, storage, field); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - - if (field_contains_hasbit(layout, field)) { - slot_clear_hasbit(layout, storage, field); - } - - if (oneof) { - uint32_t* oneof_case = slot_oneof_case(layout, storage, oneof); - memset(memory, 0, NATIVE_SLOT_MAX_SIZE); - *oneof_case = ONEOF_CASE_NONE; - } else if (is_map_field(field)) { - VALUE map = Qnil; - - const upb_fielddef* key_field = map_field_key(field); - const upb_fielddef* value_field = map_field_value(field); - VALUE type_class = field_type_class(layout, value_field); - - if (type_class != Qnil) { - VALUE args[3] = { - fieldtype_to_ruby(upb_fielddef_type(key_field)), - fieldtype_to_ruby(upb_fielddef_type(value_field)), - type_class, - }; - map = rb_class_new_instance(3, args, cMap); - } else { - VALUE args[2] = { - fieldtype_to_ruby(upb_fielddef_type(key_field)), - fieldtype_to_ruby(upb_fielddef_type(value_field)), - }; - map = rb_class_new_instance(2, args, cMap); - } - - DEREF(memory, VALUE) = map; - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - VALUE ary = Qnil; - - VALUE type_class = field_type_class(layout, field); - - if (type_class != Qnil) { - VALUE args[2] = { - fieldtype_to_ruby(upb_fielddef_type(field)), - type_class, - }; - ary = rb_class_new_instance(2, args, cRepeatedField); - } else { - VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) }; - ary = rb_class_new_instance(1, args, cRepeatedField); - } - - DEREF(memory, VALUE) = ary; - } else { - native_slot_set(upb_fielddef_name(field), upb_fielddef_type(field), - field_type_class(layout, field), memory, - layout_get_default(field)); - } -} - -VALUE layout_get_default(const upb_fielddef *field) { - switch (upb_fielddef_type(field)) { - case UPB_TYPE_FLOAT: return DBL2NUM(upb_fielddef_defaultfloat(field)); - case UPB_TYPE_DOUBLE: return DBL2NUM(upb_fielddef_defaultdouble(field)); - case UPB_TYPE_BOOL: - return upb_fielddef_defaultbool(field) ? Qtrue : Qfalse; - case UPB_TYPE_MESSAGE: return Qnil; - case UPB_TYPE_ENUM: { - const upb_enumdef *enumdef = upb_fielddef_enumsubdef(field); - int32_t num = upb_fielddef_defaultint32(field); - const char *label = upb_enumdef_iton(enumdef, num); - if (label) { - return ID2SYM(rb_intern(label)); - } else { - return INT2NUM(num); - } - } - case UPB_TYPE_INT32: return INT2NUM(upb_fielddef_defaultint32(field)); - case UPB_TYPE_INT64: return LL2NUM(upb_fielddef_defaultint64(field));; - case UPB_TYPE_UINT32: return UINT2NUM(upb_fielddef_defaultuint32(field)); - case UPB_TYPE_UINT64: return ULL2NUM(upb_fielddef_defaultuint64(field)); - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - size_t size; - const char *str = upb_fielddef_defaultstr(field, &size); - return get_frozen_string(str, size, - upb_fielddef_type(field) == UPB_TYPE_BYTES); - } - default: return Qnil; - } -} - -VALUE layout_get(MessageLayout* layout, - const void* storage, - const upb_fielddef* field) { - void* memory = slot_memory(layout, storage, field); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - bool field_set; - if (field_contains_hasbit(layout, field)) { - field_set = slot_is_hasbit_set(layout, storage, field); - } else { - field_set = true; - } - - if (oneof) { - uint32_t oneof_case = slot_read_oneof_case(layout, storage, oneof); - if (oneof_case != upb_fielddef_number(field)) { - return layout_get_default(field); - } - return native_slot_get(upb_fielddef_type(field), - field_type_class(layout, field), memory); - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - return *((VALUE *)memory); - } else if (!field_set) { - return layout_get_default(field); - } else { - return native_slot_get(upb_fielddef_type(field), - field_type_class(layout, field), memory); - } -} - -static void check_repeated_field_type(const MessageLayout* layout, VALUE val, - const upb_fielddef* field) { - RepeatedField* self; - assert(upb_fielddef_label(field) == UPB_LABEL_REPEATED); - - if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || - RTYPEDDATA_TYPE(val) != &RepeatedField_type) { - rb_raise(cTypeError, "Expected repeated field array"); - } - - self = ruby_to_RepeatedField(val); - if (self->field_type != upb_fielddef_type(field)) { - rb_raise(cTypeError, "Repeated field array has wrong element type"); - } - - if (self->field_type_class != field_type_class(layout, field)) { - rb_raise(cTypeError, "Repeated field array has wrong message/enum class"); - } -} - -static void check_map_field_type(const MessageLayout* layout, VALUE val, - const upb_fielddef* field) { - const upb_fielddef* key_field = map_field_key(field); - const upb_fielddef* value_field = map_field_value(field); - Map* self; - - if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || - RTYPEDDATA_TYPE(val) != &Map_type) { - rb_raise(cTypeError, "Expected Map instance"); - } - - self = ruby_to_Map(val); - if (self->key_type != upb_fielddef_type(key_field)) { - rb_raise(cTypeError, "Map key type does not match field's key type"); - } - if (self->value_type != upb_fielddef_type(value_field)) { - rb_raise(cTypeError, "Map value type does not match field's value type"); - } - if (self->value_type_class != field_type_class(layout, value_field)) { - rb_raise(cTypeError, "Map value type has wrong message/enum class"); - } -} - -void layout_set(MessageLayout* layout, - void* storage, - const upb_fielddef* field, - VALUE val) { - void* memory = slot_memory(layout, storage, field); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - - if (oneof) { - uint32_t* oneof_case = slot_oneof_case(layout, storage, oneof); - if (val == Qnil) { - // Assigning nil to a oneof field clears the oneof completely. - *oneof_case = ONEOF_CASE_NONE; - memset(memory, 0, NATIVE_SLOT_MAX_SIZE); - } else { - // The transition between field types for a single oneof (union) slot is - // somewhat complex because we need to ensure that a GC triggered at any - // point by a call into the Ruby VM sees a valid state for this field and - // does not either go off into the weeds (following what it thinks is a - // VALUE but is actually a different field type) or miss an object (seeing - // what it thinks is a primitive field but is actually a VALUE for the new - // field type). - // - // In order for the transition to be safe, the oneof case slot must be in - // sync with the value slot whenever the Ruby VM has been called. Thus, we - // use native_slot_set_value_and_case(), which ensures that both the value - // and case number are altered atomically (w.r.t. the Ruby VM). - uint32_t case_value = upb_fielddef_number(field); - if (upb_fielddef_issubmsg(field) || upb_fielddef_isstring(field)) { - case_value |= ONEOF_CASE_MASK; - } - - native_slot_set_value_and_case( - upb_fielddef_name(field), upb_fielddef_type(field), - field_type_class(layout, field), memory, val, oneof_case, case_value); - } - } else if (is_map_field(field)) { - check_map_field_type(layout, val, field); - DEREF(memory, VALUE) = val; - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - check_repeated_field_type(layout, val, field); - DEREF(memory, VALUE) = val; - } else { - native_slot_set(upb_fielddef_name(field), upb_fielddef_type(field), - field_type_class(layout, field), memory, val); - } - - if (layout->fields[upb_fielddef_index(field)].hasbit != - MESSAGE_FIELD_NO_HASBIT) { - if (val == Qnil) { - // No other field type has a hasbit and allows nil assignment. - if (upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { - fprintf(stderr, "field: %s\n", upb_fielddef_fullname(field)); - } - assert(upb_fielddef_type(field) == UPB_TYPE_MESSAGE); - slot_clear_hasbit(layout, storage, field); - } else { - slot_set_hasbit(layout, storage, field); - } - } -} - -void layout_init(MessageLayout* layout, void* storage) { - VALUE* value = (VALUE*)CHARPTR_AT(storage, layout->value_offset); - int i; - - for (i = 0; i < layout->repeated_count; i++, value++) { - *value = RepeatedField_new_this_type(*value); - } - - for (i = 0; i < layout->map_count; i++, value++) { - *value = Map_new_this_type(*value); - } -} - -void layout_mark(MessageLayout* layout, void* storage) { - VALUE* values = (VALUE*)CHARPTR_AT(storage, layout->value_offset); - int noneofs = upb_msgdef_numrealoneofs(layout->msgdef); - int i; - - for (i = 0; i < layout->value_count; i++) { - rb_gc_mark(values[i]); - } - - for (i = 0; i < noneofs; i++) { - MessageOneof* oneof = &layout->oneofs[i]; - uint32_t* case_ptr = (uint32_t*)CHARPTR_AT(storage, oneof->case_offset); - if (*case_ptr & ONEOF_CASE_MASK) { - rb_gc_mark(DEREF_OFFSET(storage, oneof->offset, VALUE)); - } - } -} - -void layout_dup(MessageLayout* layout, void* to, void* from) { - upb_msg_field_iter it; - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - - void* to_memory = slot_memory(layout, to, field); - void* from_memory = slot_memory(layout, from, field); - - if (oneof) { - uint32_t* to_oneof_case = slot_oneof_case(layout, to, oneof); - uint32_t* from_oneof_case = slot_oneof_case(layout, from, oneof); - if (slot_read_oneof_case(layout, from, oneof) == - upb_fielddef_number(field)) { - *to_oneof_case = *from_oneof_case; - native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); - } - } else if (is_map_field(field)) { - DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE)); - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE)); - } else { - if (field_contains_hasbit(layout, field)) { - if (!slot_is_hasbit_set(layout, from, field)) continue; - slot_set_hasbit(layout, to, field); - } - - native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); - } - } -} - -void layout_deep_copy(MessageLayout* layout, void* to, void* from) { - upb_msg_field_iter it; - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - - void* to_memory = slot_memory(layout, to, field); - void* from_memory = slot_memory(layout, from, field); - - if (oneof) { - uint32_t* to_oneof_case = slot_oneof_case(layout, to, oneof); - uint32_t* from_oneof_case = slot_oneof_case(layout, from, oneof); - if (slot_read_oneof_case(layout, from, oneof) == - upb_fielddef_number(field)) { - *to_oneof_case = *from_oneof_case; - native_slot_deep_copy(upb_fielddef_type(field), - field_type_class(layout, field), to_memory, - from_memory); - } - } else if (is_map_field(field)) { - DEREF(to_memory, VALUE) = - Map_deep_copy(DEREF(from_memory, VALUE)); - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - DEREF(to_memory, VALUE) = - RepeatedField_deep_copy(DEREF(from_memory, VALUE)); - } else { - if (field_contains_hasbit(layout, field)) { - if (!slot_is_hasbit_set(layout, from, field)) continue; - slot_set_hasbit(layout, to, field); - } - - native_slot_deep_copy(upb_fielddef_type(field), - field_type_class(layout, field), to_memory, - from_memory); - } - } -} - -VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { - upb_msg_field_iter it; - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - const upb_oneofdef* oneof = upb_fielddef_realcontainingoneof(field); - - void* msg1_memory = slot_memory(layout, msg1, field); - void* msg2_memory = slot_memory(layout, msg2, field); - - if (oneof) { - uint32_t* msg1_oneof_case = slot_oneof_case(layout, msg1, oneof); - uint32_t* msg2_oneof_case = slot_oneof_case(layout, msg2, oneof); - if (*msg1_oneof_case != *msg2_oneof_case || - (slot_read_oneof_case(layout, msg1, oneof) == - upb_fielddef_number(field) && - !native_slot_eq(upb_fielddef_type(field), - field_type_class(layout, field), msg1_memory, - msg2_memory))) { - return Qfalse; - } - } else if (is_map_field(field)) { - if (!Map_eq(DEREF(msg1_memory, VALUE), - DEREF(msg2_memory, VALUE))) { - return Qfalse; - } - } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { - if (!RepeatedField_eq(DEREF(msg1_memory, VALUE), - DEREF(msg2_memory, VALUE))) { - return Qfalse; - } - } else { - if (field_contains_hasbit(layout, field) && - slot_is_hasbit_set(layout, msg1, field) != - slot_is_hasbit_set(layout, msg2, field)) { - // TODO(haberman): I don't think we should actually care about hasbits - // here: an unset default should be able to equal a set default. But we - // can address this later (will also have to make sure defaults are - // being properly set when hasbit is clear). - return Qfalse; - } - if (!native_slot_eq(upb_fielddef_type(field), - field_type_class(layout, field), msg1_memory, - msg2_memory)) { - return Qfalse; - } - } - } - return Qtrue; -} - -VALUE layout_hash(MessageLayout* layout, void* storage) { - upb_msg_field_iter it; - st_index_t h = rb_hash_start(0); - VALUE hash_sym = rb_intern("hash"); - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - VALUE field_val = layout_get(layout, storage, field); - h = rb_hash_uint(h, NUM2LONG(rb_funcall(field_val, hash_sym, 0))); - } - h = rb_hash_end(h); - - return INT2FIX(h); -} - -VALUE layout_inspect(MessageLayout* layout, void* storage) { - VALUE str = rb_str_new2(""); - - upb_msg_field_iter it; - bool first = true; - for (upb_msg_field_begin(&it, layout->msgdef); - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* field = upb_msg_iter_field(&it); - VALUE field_val = layout_get(layout, storage, field); - - if (!first) { - str = rb_str_cat2(str, ", "); - } else { - first = false; - } - str = rb_str_cat2(str, upb_fielddef_name(field)); - str = rb_str_cat2(str, ": "); - - str = rb_str_append(str, rb_funcall(field_val, rb_intern("inspect"), 0)); - } - - return str; -} diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c deleted file mode 100644 index fb5009cb2cc6..000000000000 --- a/ruby/ext/google/protobuf_c/upb.c +++ /dev/null @@ -1,13817 +0,0 @@ -/* Amalgamated source file */ -#include "upb.h" -/* -* This is where we define macros used across upb. -* -* All of these macros are undef'd in port_undef.inc to avoid leaking them to -* users. -* -* The correct usage is: -* -* #include "upb/foobar.h" -* #include "upb/baz.h" -* -* // MUST be last included header. -* #include "upb/port_def.inc" -* -* // Code for this file. -* // <...> -* -* // Can be omitted for .c files, required for .h. -* #include "upb/port_undef.inc" -* -* This file is private and must not be included by users! -*/ -#include - -#if UINTPTR_MAX == 0xffffffff -#define UPB_SIZE(size32, size64) size32 -#else -#define UPB_SIZE(size32, size64) size64 -#endif - -/* If we always read/write as a consistent type to each address, this shouldn't - * violate aliasing. - */ -#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs))) - -#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \ - *UPB_PTR_AT(msg, case_offset, int) == case_val \ - ? *UPB_PTR_AT(msg, offset, fieldtype) \ - : default - -#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \ - *UPB_PTR_AT(msg, case_offset, int) = case_val; \ - *UPB_PTR_AT(msg, offset, fieldtype) = value; - -#define UPB_MAPTYPE_STRING 0 - -/* UPB_INLINE: inline if possible, emit standalone code if required. */ -#ifdef __cplusplus -#define UPB_INLINE inline -#elif defined (__GNUC__) || defined(__clang__) -#define UPB_INLINE static __inline__ -#else -#define UPB_INLINE static -#endif - -/* Hints to the compiler about likely/unlikely branches. */ -#if defined (__GNUC__) || defined(__clang__) -#define UPB_LIKELY(x) __builtin_expect((x),1) -#define UPB_UNLIKELY(x) __builtin_expect((x),0) -#else -#define UPB_LIKELY(x) (x) -#define UPB_UNLIKELY(x) (x) -#endif - -/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler - * doesn't provide these preprocessor symbols. */ -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#define UPB_BIG_ENDIAN -#endif - -/* Macros for function attributes on compilers that support them. */ -#ifdef __GNUC__ -#define UPB_FORCEINLINE __inline__ __attribute__((always_inline)) -#define UPB_NOINLINE __attribute__((noinline)) -#define UPB_NORETURN __attribute__((__noreturn__)) -#else /* !defined(__GNUC__) */ -#define UPB_FORCEINLINE -#define UPB_NOINLINE -#define UPB_NORETURN -#endif - -#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L -/* C99/C++11 versions. */ -#include -#define _upb_snprintf snprintf -#define _upb_vsnprintf vsnprintf -#define _upb_va_copy(a, b) va_copy(a, b) -#elif defined(_MSC_VER) -/* Microsoft C/C++ versions. */ -#include -#include -#if _MSC_VER < 1900 -int msvc_snprintf(char* s, size_t n, const char* format, ...); -int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); -#define UPB_MSVC_VSNPRINTF -#define _upb_snprintf msvc_snprintf -#define _upb_vsnprintf msvc_vsnprintf -#else -#define _upb_snprintf snprintf -#define _upb_vsnprintf vsnprintf -#endif -#define _upb_va_copy(a, b) va_copy(a, b) -#elif defined __GNUC__ -/* A few hacky workarounds for functions not in C89. - * For internal use only! - * TODO(haberman): fix these by including our own implementations, or finding - * another workaround. - */ -#define _upb_snprintf __builtin_snprintf -#define _upb_vsnprintf __builtin_vsnprintf -#define _upb_va_copy(a, b) __va_copy(a, b) -#else -#error Need implementations of [v]snprintf and va_copy -#endif - -#ifdef __cplusplus -#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \ - (defined(_MSC_VER) && _MSC_VER >= 1900) -/* C++11 is present */ -#else -#error upb requires C++11 for C++ support -#endif -#endif - -#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) -#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) - -#define UPB_UNUSED(var) (void)var - -/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true. - */ -#ifdef NDEBUG -#ifdef __GNUC__ -#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable() -#else -#define UPB_ASSUME(expr) do {} if (false && (expr)) -#endif -#else -#define UPB_ASSUME(expr) assert(expr) -#endif - -/* UPB_ASSERT(): in release mode, we use the expression without letting it be - * evaluated. This prevents "unused variable" warnings. */ -#ifdef NDEBUG -#define UPB_ASSERT(expr) do {} while (false && (expr)) -#else -#define UPB_ASSERT(expr) assert(expr) -#endif - -/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only - * exist in debug mode. This turns into regular assert. */ -#define UPB_ASSERT_DEBUGVAR(expr) assert(expr) - -#if defined(__GNUC__) || defined(__clang__) -#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) -#else -#define UPB_UNREACHABLE() do { assert(0); } while(0) -#endif - -/* UPB_INFINITY representing floating-point positive infinity. */ -#include -#ifdef INFINITY -#define UPB_INFINITY INFINITY -#else -#define UPB_INFINITY (1.0 / 0.0) -#endif - -#include -#include - - - -/* Maps descriptor type -> upb field type. */ -static const uint8_t desctype_to_fieldtype[] = { - -1, /* invalid descriptor type */ - UPB_TYPE_DOUBLE, /* DOUBLE */ - UPB_TYPE_FLOAT, /* FLOAT */ - UPB_TYPE_INT64, /* INT64 */ - UPB_TYPE_UINT64, /* UINT64 */ - UPB_TYPE_INT32, /* INT32 */ - UPB_TYPE_UINT64, /* FIXED64 */ - UPB_TYPE_UINT32, /* FIXED32 */ - UPB_TYPE_BOOL, /* BOOL */ - UPB_TYPE_STRING, /* STRING */ - UPB_TYPE_MESSAGE, /* GROUP */ - UPB_TYPE_MESSAGE, /* MESSAGE */ - UPB_TYPE_BYTES, /* BYTES */ - UPB_TYPE_UINT32, /* UINT32 */ - UPB_TYPE_ENUM, /* ENUM */ - UPB_TYPE_INT32, /* SFIXED32 */ - UPB_TYPE_INT64, /* SFIXED64 */ - UPB_TYPE_INT32, /* SINT32 */ - UPB_TYPE_INT64, /* SINT64 */ -}; - -/* Maps descriptor type -> upb map size. */ -static const uint8_t desctype_to_mapsize[] = { - -1, /* invalid descriptor type */ - 8, /* DOUBLE */ - 4, /* FLOAT */ - 8, /* INT64 */ - 8, /* UINT64 */ - 4, /* INT32 */ - 8, /* FIXED64 */ - 4, /* FIXED32 */ - 1, /* BOOL */ - UPB_MAPTYPE_STRING, /* STRING */ - sizeof(void *), /* GROUP */ - sizeof(void *), /* MESSAGE */ - UPB_MAPTYPE_STRING, /* BYTES */ - 4, /* UINT32 */ - 4, /* ENUM */ - 4, /* SFIXED32 */ - 8, /* SFIXED64 */ - 4, /* SINT32 */ - 8, /* SINT64 */ -}; - -static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) | - (1 << UPB_DTYPE_FIXED32) | - (1 << UPB_DTYPE_SFIXED32); - -static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) | - (1 << UPB_DTYPE_FIXED64) | - (1 << UPB_DTYPE_SFIXED64); - -/* Op: an action to be performed for a wire-type/field-type combination. */ -#define OP_SCALAR_LG2(n) (n) -#define OP_FIXPCK_LG2(n) (n + 4) -#define OP_VARPCK_LG2(n) (n + 8) -#define OP_STRING 4 -#define OP_SUBMSG 5 - -static const int8_t varint_ops[19] = { - -1, /* field not found */ - -1, /* DOUBLE */ - -1, /* FLOAT */ - OP_SCALAR_LG2(3), /* INT64 */ - OP_SCALAR_LG2(3), /* UINT64 */ - OP_SCALAR_LG2(2), /* INT32 */ - -1, /* FIXED64 */ - -1, /* FIXED32 */ - OP_SCALAR_LG2(0), /* BOOL */ - -1, /* STRING */ - -1, /* GROUP */ - -1, /* MESSAGE */ - -1, /* BYTES */ - OP_SCALAR_LG2(2), /* UINT32 */ - OP_SCALAR_LG2(2), /* ENUM */ - -1, /* SFIXED32 */ - -1, /* SFIXED64 */ - OP_SCALAR_LG2(2), /* SINT32 */ - OP_SCALAR_LG2(3), /* SINT64 */ -}; - -static const int8_t delim_ops[37] = { - /* For non-repeated field type. */ - -1, /* field not found */ - -1, /* DOUBLE */ - -1, /* FLOAT */ - -1, /* INT64 */ - -1, /* UINT64 */ - -1, /* INT32 */ - -1, /* FIXED64 */ - -1, /* FIXED32 */ - -1, /* BOOL */ - OP_STRING, /* STRING */ - -1, /* GROUP */ - OP_SUBMSG, /* MESSAGE */ - OP_STRING, /* BYTES */ - -1, /* UINT32 */ - -1, /* ENUM */ - -1, /* SFIXED32 */ - -1, /* SFIXED64 */ - -1, /* SINT32 */ - -1, /* SINT64 */ - /* For repeated field type. */ - OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ - OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ - OP_VARPCK_LG2(3), /* REPEATED INT64 */ - OP_VARPCK_LG2(3), /* REPEATED UINT64 */ - OP_VARPCK_LG2(2), /* REPEATED INT32 */ - OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ - OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ - OP_VARPCK_LG2(0), /* REPEATED BOOL */ - OP_STRING, /* REPEATED STRING */ - OP_SUBMSG, /* REPEATED GROUP */ - OP_SUBMSG, /* REPEATED MESSAGE */ - OP_STRING, /* REPEATED BYTES */ - OP_VARPCK_LG2(2), /* REPEATED UINT32 */ - OP_VARPCK_LG2(2), /* REPEATED ENUM */ - OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ - OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ - OP_VARPCK_LG2(2), /* REPEATED SINT32 */ - OP_VARPCK_LG2(3), /* REPEATED SINT64 */ -}; - -/* Data pertaining to the parse. */ -typedef struct { - const char *limit; /* End of delimited region or end of buffer. */ - upb_arena *arena; - int depth; - uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */ - jmp_buf err; -} upb_decstate; - -typedef union { - bool bool_val; - int32_t int32_val; - int64_t int64_val; - uint32_t uint32_val; - uint64_t uint64_val; - upb_strview str_val; -} wireval; - -static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout); - -UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); } - -static bool decode_reserve(upb_decstate *d, upb_array *arr, int elem) { - bool need_realloc = arr->size - arr->len < elem; - if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) { - decode_err(d); - } - return need_realloc; -} - -UPB_NOINLINE -static const char *decode_longvarint64(upb_decstate *d, const char *ptr, - const char *limit, uint64_t *val) { - uint8_t byte; - int bitpos = 0; - uint64_t out = 0; - - do { - if (bitpos >= 70 || ptr == limit) decode_err(d); - byte = *ptr; - out |= (uint64_t)(byte & 0x7F) << bitpos; - ptr++; - bitpos += 7; - } while (byte & 0x80); - - *val = out; - return ptr; -} - -UPB_FORCEINLINE -static const char *decode_varint64(upb_decstate *d, const char *ptr, - const char *limit, uint64_t *val) { - if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) { - *val = (uint8_t)*ptr; - return ptr + 1; - } else { - return decode_longvarint64(d, ptr, limit, val); - } -} - -static const char *decode_varint32(upb_decstate *d, const char *ptr, - const char *limit, uint32_t *val) { - uint64_t u64; - ptr = decode_varint64(d, ptr, limit, &u64); - if (u64 > UINT32_MAX) decode_err(d); - *val = (uint32_t)u64; - return ptr; -} - -static void decode_munge(int type, wireval *val) { - switch (type) { - case UPB_DESCRIPTOR_TYPE_BOOL: - val->bool_val = val->uint64_val != 0; - break; - case UPB_DESCRIPTOR_TYPE_SINT32: { - uint32_t n = val->uint32_val; - val->int32_val = (n >> 1) ^ -(int32_t)(n & 1); - break; - } - case UPB_DESCRIPTOR_TYPE_SINT64: { - uint64_t n = val->uint64_val; - val->int64_val = (n >> 1) ^ -(int64_t)(n & 1); - break; - } - } -} - -static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, - uint32_t field_number) { - static upb_msglayout_field none = {0}; - - /* Lots of optimization opportunities here. */ - int i; - if (l == NULL) return &none; - for (i = 0; i < l->field_count; i++) { - if (l->fields[i].number == field_number) { - return &l->fields[i]; - } - } - - return &none; /* Unknown field. */ -} - -static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout, - const upb_msglayout_field *field) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - return _upb_msg_new(subl, d->arena); -} - -static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg, - const upb_msglayout *layout, - const upb_msglayout_field *field, upb_strview val) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - const char *saved_limit = d->limit; - if (--d->depth < 0) decode_err(d); - d->limit = val.data + val.size; - decode_msg(d, val.data, submsg, subl); - d->limit = saved_limit; - if (d->end_group != 0) decode_err(d); - d->depth++; -} - -static const char *decode_group(upb_decstate *d, const char *ptr, - upb_msg *submsg, const upb_msglayout *subl, - uint32_t number) { - if (--d->depth < 0) decode_err(d); - ptr = decode_msg(d, ptr, submsg, subl); - if (d->end_group != number) decode_err(d); - d->end_group = 0; - d->depth++; - return ptr; -} - -static const char *decode_togroup(upb_decstate *d, const char *ptr, - upb_msg *submsg, const upb_msglayout *layout, - const upb_msglayout_field *field) { - const upb_msglayout *subl = layout->submsgs[field->submsg_index]; - return decode_group(d, ptr, submsg, subl, field->number); -} - -static const char *decode_toarray(upb_decstate *d, const char *ptr, - upb_msg *msg, const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val, - int op) { - upb_array **arrp = UPB_PTR_AT(msg, field->offset, void); - upb_array *arr = *arrp; - void *mem; - - if (!arr) { - upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype]; - arr = _upb_array_new(d->arena, type); - if (!arr) decode_err(d); - *arrp = arr; - } - - decode_reserve(d, arr, 1); - - switch (op) { - case OP_SCALAR_LG2(0): - case OP_SCALAR_LG2(2): - case OP_SCALAR_LG2(3): - /* Append scalar value. */ - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void); - arr->len++; - memcpy(mem, &val, 1 << op); - return ptr; - case OP_STRING: - /* Append string. */ - mem = - UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void); - arr->len++; - memcpy(mem, &val, sizeof(upb_strview)); - return ptr; - case OP_SUBMSG: { - /* Append submessage / group. */ - upb_msg *submsg = decode_newsubmsg(d, layout, field); - *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) = - submsg; - arr->len++; - if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) { - ptr = decode_togroup(d, ptr, submsg, layout, field); - } else { - decode_tosubmsg(d, submsg, layout, field, val.str_val); - } - return ptr; - } - case OP_FIXPCK_LG2(2): - case OP_FIXPCK_LG2(3): { - /* Fixed packed. */ - int lg2 = op - OP_FIXPCK_LG2(0); - int mask = (1 << lg2) - 1; - int count = val.str_val.size >> lg2; - if ((val.str_val.size & mask) != 0) { - decode_err(d); /* Length isn't a round multiple of elem size. */ - } - decode_reserve(d, arr, count); - mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - arr->len += count; - memcpy(mem, val.str_val.data, count << op); - return ptr; - } - case OP_VARPCK_LG2(0): - case OP_VARPCK_LG2(2): - case OP_VARPCK_LG2(3): { - /* Varint packed. */ - int lg2 = op - OP_VARPCK_LG2(0); - int scale = 1 << lg2; - const char *ptr = val.str_val.data; - const char *end = ptr + val.str_val.size; - char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - while (ptr < end) { - wireval elem; - ptr = decode_varint64(d, ptr, end, &elem.uint64_val); - decode_munge(field->descriptortype, &elem); - if (decode_reserve(d, arr, 1)) { - out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); - } - arr->len++; - memcpy(out, &elem, scale); - out += scale; - } - if (ptr != end) decode_err(d); - return ptr; - } - default: - UPB_UNREACHABLE(); - } -} - -static void decode_tomap(upb_decstate *d, upb_msg *msg, - const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val) { - upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *); - upb_map *map = *map_p; - upb_map_entry ent; - const upb_msglayout *entry = layout->submsgs[field->submsg_index]; - - if (!map) { - /* Lazily create map. */ - const upb_msglayout *entry = layout->submsgs[field->submsg_index]; - const upb_msglayout_field *key_field = &entry->fields[0]; - const upb_msglayout_field *val_field = &entry->fields[1]; - char key_size = desctype_to_mapsize[key_field->descriptortype]; - char val_size = desctype_to_mapsize[val_field->descriptortype]; - UPB_ASSERT(key_field->offset == 0); - UPB_ASSERT(val_field->offset == sizeof(upb_strview)); - map = _upb_map_new(d->arena, key_size, val_size); - *map_p = map; - } - - /* Parse map entry. */ - memset(&ent, 0, sizeof(ent)); - - if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || - entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { - /* Create proactively to handle the case where it doesn't appear. */ - ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena); - } - - decode_tosubmsg(d, &ent.k, layout, field, val.str_val); - - /* Insert into map. */ - _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena); -} - -static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout, - const upb_msglayout_field *field, wireval val, - int op) { - void *mem = UPB_PTR_AT(msg, field->offset, void); - int type = field->descriptortype; - - /* Set presence if necessary. */ - if (field->presence < 0) { - /* Oneof case */ - *UPB_PTR_AT(msg, -field->presence, int32_t) = field->number; - } else if (field->presence > 0) { - /* Hasbit */ - uint32_t hasbit = field->presence; - *UPB_PTR_AT(msg, hasbit / 8, uint8_t) |= (1 << (hasbit % 8)); - } - - /* Store into message. */ - switch (op) { - case OP_SUBMSG: { - upb_msg **submsgp = mem; - upb_msg *submsg = *submsgp; - if (!submsg) { - submsg = decode_newsubmsg(d, layout, field); - *submsgp = submsg; - } - if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) { - ptr = decode_togroup(d, ptr, submsg, layout, field); - } else { - decode_tosubmsg(d, submsg, layout, field, val.str_val); - } - break; - } - case OP_STRING: - memcpy(mem, &val, sizeof(upb_strview)); - break; - case OP_SCALAR_LG2(3): - memcpy(mem, &val, 8); - break; - case OP_SCALAR_LG2(2): - memcpy(mem, &val, 4); - break; - case OP_SCALAR_LG2(0): - memcpy(mem, &val, 1); - break; - default: - UPB_UNREACHABLE(); - } - - return ptr; -} - -static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, - const upb_msglayout *layout) { - while (ptr < d->limit) { - uint32_t tag; - const upb_msglayout_field *field; - int field_number; - int wire_type; - const char *field_start = ptr; - wireval val; - int op; - - ptr = decode_varint32(d, ptr, d->limit, &tag); - field_number = tag >> 3; - wire_type = tag & 7; - - field = upb_find_field(layout, field_number); - - switch (wire_type) { - case UPB_WIRE_TYPE_VARINT: - ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val); - op = varint_ops[field->descriptortype]; - decode_munge(field->descriptortype, &val); - break; - case UPB_WIRE_TYPE_32BIT: - if (d->limit - ptr < 4) decode_err(d); - memcpy(&val, ptr, 4); - ptr += 4; - op = OP_SCALAR_LG2(2); - if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_64BIT: - if (d->limit - ptr < 8) decode_err(d); - memcpy(&val, ptr, 8); - ptr += 8; - op = OP_SCALAR_LG2(3); - if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; - break; - case UPB_WIRE_TYPE_DELIMITED: { - uint32_t size; - int ndx = field->descriptortype; - if (_upb_isrepeated(field)) ndx += 18; - ptr = decode_varint32(d, ptr, d->limit, &size); - if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) { - decode_err(d); /* Length overflow. */ - } - val.str_val.data = ptr; - val.str_val.size = size; - ptr += size; - op = delim_ops[ndx]; - break; - } - case UPB_WIRE_TYPE_START_GROUP: - val.int32_val = field_number; - op = OP_SUBMSG; - if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown; - break; - case UPB_WIRE_TYPE_END_GROUP: - d->end_group = field_number; - return ptr; - default: - decode_err(d); - } - - if (op >= 0) { - /* Parse, using op for dispatch. */ - switch (field->label) { - case UPB_LABEL_REPEATED: - case _UPB_LABEL_PACKED: - ptr = decode_toarray(d, ptr, msg, layout, field, val, op); - break; - case _UPB_LABEL_MAP: - decode_tomap(d, msg, layout, field, val); - break; - default: - ptr = decode_tomsg(d, ptr, msg, layout, field, val, op); - break; - } - } else { - unknown: - /* Skip unknown field. */ - if (field_number == 0) decode_err(d); - if (wire_type == UPB_WIRE_TYPE_START_GROUP) { - ptr = decode_group(d, ptr, NULL, NULL, field_number); - } - if (msg) { - if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, - d->arena)) { - decode_err(d); - } - } - } - } - - if (ptr != d->limit) decode_err(d); - return ptr; -} - -bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, - upb_arena *arena) { - upb_decstate state; - state.limit = buf + size; - state.arena = arena; - state.depth = 64; - state.end_group = 0; - - if (setjmp(state.err)) return false; - - if (size == 0) return true; - decode_msg(&state, buf, msg, l); - - return state.end_group == 0; -} - -#undef OP_SCALAR_LG2 -#undef OP_FIXPCK_LG2 -#undef OP_VARPCK_LG2 -#undef OP_STRING -#undef OP_SUBMSG -/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ - - -#include - - - -#define UPB_PB_VARINT_MAX_LEN 10 -#define CHK(x) do { if (!(x)) { return false; } } while(0) - -static size_t upb_encode_varint(uint64_t val, char *buf) { - size_t i; - if (val < 128) { buf[0] = val; return 1; } - i = 0; - while (val) { - uint8_t byte = val & 0x7fU; - val >>= 7; - if (val) byte |= 0x80U; - buf[i++] = byte; - } - return i; -} - -static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); } -static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); } - -typedef struct { - upb_alloc *alloc; - char *buf, *ptr, *limit; -} upb_encstate; - -static size_t upb_roundup_pow2(size_t bytes) { - size_t ret = 128; - while (ret < bytes) { - ret *= 2; - } - return ret; -} - -static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) { - size_t old_size = e->limit - e->buf; - size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); - char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size); - CHK(new_buf); - - /* We want previous data at the end, realloc() put it at the beginning. */ - if (old_size > 0) { - memmove(new_buf + new_size - old_size, e->buf, old_size); - } - - e->ptr = new_buf + new_size - (e->limit - e->ptr); - e->limit = new_buf + new_size; - e->buf = new_buf; - return true; -} - -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -static bool upb_encode_reserve(upb_encstate *e, size_t bytes) { - CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) || - upb_encode_growbuffer(e, bytes)); - - e->ptr -= bytes; - return true; -} - -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { - if (len == 0) return true; - CHK(upb_encode_reserve(e, len)); - memcpy(e->ptr, data, len); - return true; -} - -static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return upb_put_bytes(e, &val, sizeof(uint64_t)); -} - -static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return upb_put_bytes(e, &val, sizeof(uint32_t)); -} - -static bool upb_put_varint(upb_encstate *e, uint64_t val) { - size_t len; - char *start; - CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)); - len = upb_encode_varint(val, e->ptr); - start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; - memmove(start, e->ptr, len); - e->ptr = start; - return true; -} - -static bool upb_put_double(upb_encstate *e, double d) { - uint64_t u64; - UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); - memcpy(&u64, &d, sizeof(uint64_t)); - return upb_put_fixed64(e, u64); -} - -static bool upb_put_float(upb_encstate *e, float d) { - uint32_t u32; - UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); - memcpy(&u32, &d, sizeof(uint32_t)); - return upb_put_fixed32(e, u32); -} - -static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) { - uint32_t ret; - memcpy(&ret, msg - f->presence, sizeof(ret)); - return ret; -} - -static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) { - uint32_t hasbit = f->presence; - UPB_ASSERT(f->presence > 0); - return (*UPB_PTR_AT(msg, hasbit / 8, uint8_t)) & (1 << (hasbit % 8)); -} - -static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) { - return upb_put_varint(e, (field_number << 3) | wire_type); -} - -static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr, - size_t elem_size, uint32_t tag) { - size_t bytes = arr->len * elem_size; - const char* data = _upb_array_constptr(arr); - const char* ptr = data + bytes - elem_size; - if (tag) { - while (true) { - CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag)); - if (ptr == data) break; - ptr -= elem_size; - } - return true; - } else { - return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes); - } -} - -bool upb_encode_message(upb_encstate *e, const char *msg, - const upb_msglayout *m, size_t *size); - -static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem, - const upb_msglayout *m, - const upb_msglayout_field *f, - bool skip_zero_value) { - const char *field_mem = _field_mem; -#define CASE(ctype, type, wire_type, encodeval) do { \ - ctype val = *(ctype*)field_mem; \ - if (skip_zero_value && val == 0) { \ - return true; \ - } \ - return upb_put_ ## type(e, encodeval) && \ - upb_put_tag(e, f->number, wire_type); \ -} while(0) - - switch (f->descriptortype) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: - CASE(double, double, UPB_WIRE_TYPE_64BIT, val); - case UPB_DESCRIPTOR_TYPE_FLOAT: - CASE(float, float, UPB_WIRE_TYPE_32BIT, val); - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: - CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val); - case UPB_DESCRIPTOR_TYPE_UINT32: - CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val); - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_ENUM: - CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val); - case UPB_DESCRIPTOR_TYPE_SFIXED64: - case UPB_DESCRIPTOR_TYPE_FIXED64: - CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val); - case UPB_DESCRIPTOR_TYPE_FIXED32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val); - case UPB_DESCRIPTOR_TYPE_BOOL: - CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val); - case UPB_DESCRIPTOR_TYPE_SINT32: - CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val)); - case UPB_DESCRIPTOR_TYPE_SINT64: - CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val)); - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: { - upb_strview view = *(upb_strview*)field_mem; - if (skip_zero_value && view.size == 0) { - return true; - } - return upb_put_bytes(e, view.data, view.size) && - upb_put_varint(e, view.size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); - } - case UPB_DESCRIPTOR_TYPE_GROUP: { - size_t size; - void *submsg = *(void **)field_mem; - const upb_msglayout *subm = m->submsgs[f->submsg_index]; - if (submsg == NULL) { - return true; - } - return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && - upb_encode_message(e, submsg, subm, &size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP); - } - case UPB_DESCRIPTOR_TYPE_MESSAGE: { - size_t size; - void *submsg = *(void **)field_mem; - const upb_msglayout *subm = m->submsgs[f->submsg_index]; - if (submsg == NULL) { - return true; - } - return upb_encode_message(e, submsg, subm, &size) && - upb_put_varint(e, size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); - } - } -#undef CASE - UPB_UNREACHABLE(); -} - -static bool upb_encode_array(upb_encstate *e, const char *field_mem, - const upb_msglayout *m, - const upb_msglayout_field *f) { - const upb_array *arr = *(const upb_array**)field_mem; - bool packed = f->label == _UPB_LABEL_PACKED; - - if (arr == NULL || arr->len == 0) { - return true; - } - -#define VARINT_CASE(ctype, encode) \ - { \ - const ctype *start = _upb_array_constptr(arr); \ - const ctype *ptr = start + arr->len; \ - size_t pre_len = e->limit - e->ptr; \ - uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \ - do { \ - ptr--; \ - CHK(upb_put_varint(e, encode)); \ - if (tag) CHK(upb_put_varint(e, tag)); \ - } while (ptr != start); \ - if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \ - } \ - break; \ - do { \ - ; \ - } while (0) - -#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) - - switch (f->descriptortype) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: - CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT))); - break; - case UPB_DESCRIPTOR_TYPE_FLOAT: - CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT))); - break; - case UPB_DESCRIPTOR_TYPE_SFIXED64: - case UPB_DESCRIPTOR_TYPE_FIXED64: - CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT))); - break; - case UPB_DESCRIPTOR_TYPE_FIXED32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT))); - break; - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_UINT64: - VARINT_CASE(uint64_t, *ptr); - case UPB_DESCRIPTOR_TYPE_UINT32: - VARINT_CASE(uint32_t, *ptr); - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_ENUM: - VARINT_CASE(int32_t, (int64_t)*ptr); - case UPB_DESCRIPTOR_TYPE_BOOL: - VARINT_CASE(bool, *ptr); - case UPB_DESCRIPTOR_TYPE_SINT32: - VARINT_CASE(int32_t, upb_zzencode_32(*ptr)); - case UPB_DESCRIPTOR_TYPE_SINT64: - VARINT_CASE(int64_t, upb_zzencode_64(*ptr)); - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: { - const upb_strview *start = _upb_array_constptr(arr); - const upb_strview *ptr = start + arr->len; - do { - ptr--; - CHK(upb_put_bytes(e, ptr->data, ptr->size) && - upb_put_varint(e, ptr->size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); - } while (ptr != start); - return true; - } - case UPB_DESCRIPTOR_TYPE_GROUP: { - const void *const*start = _upb_array_constptr(arr); - const void *const*ptr = start + arr->len; - const upb_msglayout *subm = m->submsgs[f->submsg_index]; - do { - size_t size; - ptr--; - CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && - upb_encode_message(e, *ptr, subm, &size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP)); - } while (ptr != start); - return true; - } - case UPB_DESCRIPTOR_TYPE_MESSAGE: { - const void *const*start = _upb_array_constptr(arr); - const void *const*ptr = start + arr->len; - const upb_msglayout *subm = m->submsgs[f->submsg_index]; - do { - size_t size; - ptr--; - CHK(upb_encode_message(e, *ptr, subm, &size) && - upb_put_varint(e, size) && - upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); - } while (ptr != start); - return true; - } - } -#undef VARINT_CASE - - if (packed) { - CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); - } - return true; -} - -static bool upb_encode_map(upb_encstate *e, const char *field_mem, - const upb_msglayout *m, - const upb_msglayout_field *f) { - const upb_map *map = *(const upb_map**)field_mem; - const upb_msglayout *entry = m->submsgs[f->submsg_index]; - const upb_msglayout_field *key_field = &entry->fields[0]; - const upb_msglayout_field *val_field = &entry->fields[1]; - upb_strtable_iter i; - if (map == NULL) { - return true; - } - - upb_strtable_begin(&i, &map->table); - for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { - size_t pre_len = e->limit - e->ptr; - size_t size; - upb_strview key = upb_strtable_iter_key(&i); - const upb_value val = upb_strtable_iter_value(&i); - const void *keyp = - map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data; - const void *valp = - map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val; - - CHK(upb_encode_scalarfield(e, valp, entry, val_field, false)); - CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false)); - size = (e->limit - e->ptr) - pre_len; - CHK(upb_put_varint(e, size)); - CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); - } - - return true; -} - - -bool upb_encode_message(upb_encstate *e, const char *msg, - const upb_msglayout *m, size_t *size) { - int i; - size_t pre_len = e->limit - e->ptr; - const char *unknown; - size_t unknown_size; - - unknown = upb_msg_getunknown(msg, &unknown_size); - - if (unknown) { - upb_put_bytes(e, unknown, unknown_size); - } - - for (i = m->field_count - 1; i >= 0; i--) { - const upb_msglayout_field *f = &m->fields[i]; - - if (_upb_isrepeated(f)) { - CHK(upb_encode_array(e, msg + f->offset, m, f)); - } else if (f->label == _UPB_LABEL_MAP) { - CHK(upb_encode_map(e, msg + f->offset, m, f)); - } else { - bool skip_empty = false; - if (f->presence == 0) { - /* Proto3 presence. */ - skip_empty = true; - } else if (f->presence > 0) { - /* Proto2 presence: hasbit. */ - if (!upb_readhasbit(msg, f)) { - continue; - } - } else { - /* Field is in a oneof. */ - if (upb_readcase(msg, f) != f->number) { - continue; - } - } - CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty)); - } - } - - *size = (e->limit - e->ptr) - pre_len; - return true; -} - -char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena, - size_t *size) { - upb_encstate e; - e.alloc = upb_arena_alloc(arena); - e.buf = NULL; - e.limit = NULL; - e.ptr = NULL; - - if (!upb_encode_message(&e, msg, m, size)) { - *size = 0; - return NULL; - } - - *size = e.limit - e.ptr; - - if (*size == 0) { - static char ch; - return &ch; - } else { - UPB_ASSERT(e.ptr); - return e.ptr; - } -} - -#undef CHK - - - - -/** upb_msg *******************************************************************/ - -static const char _upb_fieldtype_to_sizelg2[12] = { - 0, - 0, /* UPB_TYPE_BOOL */ - 2, /* UPB_TYPE_FLOAT */ - 2, /* UPB_TYPE_INT32 */ - 2, /* UPB_TYPE_UINT32 */ - 2, /* UPB_TYPE_ENUM */ - UPB_SIZE(2, 3), /* UPB_TYPE_MESSAGE */ - 3, /* UPB_TYPE_DOUBLE */ - 3, /* UPB_TYPE_INT64 */ - 3, /* UPB_TYPE_UINT64 */ - UPB_SIZE(3, 4), /* UPB_TYPE_STRING */ - UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ -}; - -static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { - UPB_ASSERT(elem_size_lg2 <= 4); - return (uintptr_t)ptr | elem_size_lg2; -} - -static int upb_msg_internalsize(const upb_msglayout *l) { - return sizeof(upb_msg_internal) - l->extendable * sizeof(void *); -} - -static size_t upb_msg_sizeof(const upb_msglayout *l) { - return l->size + upb_msg_internalsize(l); -} - -static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); -} - -static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); -} - -static upb_msg_internal_withext *upb_msg_getinternalwithext( - upb_msg *msg, const upb_msglayout *l) { - UPB_ASSERT(l->extendable); - return UPB_PTR_AT(msg, -sizeof(upb_msg_internal_withext), - upb_msg_internal_withext); -} - -upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { - void *mem = upb_arena_malloc(a, upb_msg_sizeof(l)); - upb_msg_internal *in; - upb_msg *msg; - - if (!mem) { - return NULL; - } - - msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg); - - /* Initialize normal members. */ - memset(msg, 0, l->size); - - /* Initialize internal members. */ - in = upb_msg_getinternal(msg); - in->unknown = NULL; - in->unknown_len = 0; - in->unknown_size = 0; - - if (l->extendable) { - upb_msg_getinternalwithext(msg, l)->extdict = NULL; - } - - return msg; -} - -bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, - upb_arena *arena) { - upb_msg_internal *in = upb_msg_getinternal(msg); - if (len > in->unknown_size - in->unknown_len) { - upb_alloc *alloc = upb_arena_alloc(arena); - size_t need = in->unknown_size + len; - size_t newsize = UPB_MAX(in->unknown_size * 2, need); - void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); - if (!mem) return false; - in->unknown = mem; - in->unknown_size = newsize; - } - memcpy(in->unknown + in->unknown_len, data, len); - in->unknown_len += len; - return true; -} - -const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { - const upb_msg_internal *in = upb_msg_getinternal_const(msg); - *len = in->unknown_len; - return in->unknown; -} - -/** upb_array *****************************************************************/ - -upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) { - upb_array *arr = upb_arena_malloc(a, sizeof(upb_array)); - - if (!arr) { - return NULL; - } - - arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]); - arr->len = 0; - arr->size = 0; - - return arr; -} - -bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) { - size_t new_size = UPB_MAX(arr->size, 4); - int elem_size_lg2 = arr->data & 7; - size_t old_bytes = arr->size << elem_size_lg2; - size_t new_bytes; - void* ptr = _upb_array_ptr(arr); - - /* Log2 ceiling of size. */ - while (new_size < min_size) new_size *= 2; - - new_bytes = new_size << elem_size_lg2; - ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes); - - if (!ptr) { - return false; - } - - arr->data = tag_arrptr(ptr, elem_size_lg2); - arr->size = new_size; - return true; -} - -static upb_array *getorcreate_array(upb_array **arr_ptr, upb_fieldtype_t type, - upb_arena *arena) { - upb_array *arr = *arr_ptr; - if (!arr) { - arr = _upb_array_new(arena, type); - if (!arr) return NULL; - *arr_ptr = arr; - } - return arr; -} - -static bool resize_array(upb_array *arr, size_t size, upb_arena *arena) { - if (size > arr->size && !_upb_array_realloc(arr, size, arena)) { - return false; - } - - arr->len = size; - return true; -} - -void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, - upb_fieldtype_t type, upb_arena *arena) { - upb_array *arr = getorcreate_array(arr_ptr, type, arena); - return arr && resize_array(arr, size, arena) ? _upb_array_ptr(arr) : NULL; -} - -bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value, - upb_fieldtype_t type, upb_arena *arena) { - upb_array *arr = getorcreate_array(arr_ptr, type, arena); - size_t elem = arr->len; - int lg2 = _upb_fieldtype_to_sizelg2[type]; - char *data; - - if (!arr || !resize_array(arr, elem + 1, arena)) return false; - - data = _upb_array_ptr(arr); - memcpy(data + (elem << lg2), value, 1 << lg2); - return true; -} - -/** upb_map *******************************************************************/ - -upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { - upb_map *map = upb_arena_malloc(a, sizeof(upb_map)); - - if (!map) { - return NULL; - } - - upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a)); - map->key_size = key_size; - map->val_size = value_size; - - return map; -} -/* -** upb_table Implementation -** -** Implementation is heavily inspired by Lua's ltable.c. -*/ - - -#include - - -#define UPB_MAXARRSIZE 16 /* 64k. */ - -/* From Chromium. */ -#define ARRAY_SIZE(x) \ - ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) - -static const double MAX_LOAD = 0.85; - -/* The minimum utilization of the array part of a mixed hash/array table. This - * is a speed/memory-usage tradeoff (though it's not straightforward because of - * cache effects). The lower this is, the more memory we'll use. */ -static const double MIN_DENSITY = 0.1; - -bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } - -int log2ceil(uint64_t v) { - int ret = 0; - bool pow2 = is_pow2(v); - while (v >>= 1) ret++; - ret = pow2 ? ret : ret + 1; /* Ceiling. */ - return UPB_MIN(UPB_MAXARRSIZE, ret); -} - -char *upb_strdup(const char *s, upb_alloc *a) { - return upb_strdup2(s, strlen(s), a); -} - -char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { - size_t n; - char *p; - - /* Prevent overflow errors. */ - if (len == SIZE_MAX) return NULL; - /* Always null-terminate, even if binary data; but don't rely on the input to - * have a null-terminating byte since it may be a raw binary buffer. */ - n = len + 1; - p = upb_malloc(a, n); - if (p) { - memcpy(p, s, len); - p[len] = 0; - } - return p; -} - -/* A type to represent the lookup key of either a strtable or an inttable. */ -typedef union { - uintptr_t num; - struct { - const char *str; - size_t len; - } str; -} lookupkey_t; - -static lookupkey_t strkey2(const char *str, size_t len) { - lookupkey_t k; - k.str.str = str; - k.str.len = len; - return k; -} - -static lookupkey_t intkey(uintptr_t key) { - lookupkey_t k; - k.num = key; - return k; -} - -typedef uint32_t hashfunc_t(upb_tabkey key); -typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); - -/* Base table (shared code) ***************************************************/ - -/* For when we need to cast away const. */ -static upb_tabent *mutable_entries(upb_table *t) { - return (upb_tabent*)t->entries; -} - -static bool isfull(upb_table *t) { - if (upb_table_size(t) == 0) { - return true; - } else { - return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD; - } -} - -static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { - size_t bytes; - - t->count = 0; - t->size_lg2 = size_lg2; - t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; - bytes = upb_table_size(t) * sizeof(upb_tabent); - if (bytes > 0) { - t->entries = upb_malloc(a, bytes); - if (!t->entries) return false; - memset(mutable_entries(t), 0, bytes); - } else { - t->entries = NULL; - } - return true; -} - -static void uninit(upb_table *t, upb_alloc *a) { - upb_free(a, mutable_entries(t)); -} - -static upb_tabent *emptyent(upb_table *t) { - upb_tabent *e = mutable_entries(t) + upb_table_size(t); - while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); } -} - -static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { - return (upb_tabent*)upb_getentry(t, hash); -} - -static const upb_tabent *findentry(const upb_table *t, lookupkey_t key, - uint32_t hash, eqlfunc_t *eql) { - const upb_tabent *e; - - if (t->size_lg2 == 0) return NULL; - e = upb_getentry(t, hash); - if (upb_tabent_isempty(e)) return NULL; - while (1) { - if (eql(e->key, key)) return e; - if ((e = e->next) == NULL) return NULL; - } -} - -static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key, - uint32_t hash, eqlfunc_t *eql) { - return (upb_tabent*)findentry(t, key, hash, eql); -} - -static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v, - uint32_t hash, eqlfunc_t *eql) { - const upb_tabent *e = findentry(t, key, hash, eql); - if (e) { - if (v) { - _upb_value_setval(v, e->val.val); - } - return true; - } else { - return false; - } -} - -/* The given key must not already exist in the table. */ -static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey, - upb_value val, uint32_t hash, - hashfunc_t *hashfunc, eqlfunc_t *eql) { - upb_tabent *mainpos_e; - upb_tabent *our_e; - - UPB_ASSERT(findentry(t, key, hash, eql) == NULL); - - t->count++; - mainpos_e = getentry_mutable(t, hash); - our_e = mainpos_e; - - if (upb_tabent_isempty(mainpos_e)) { - /* Our main position is empty; use it. */ - our_e->next = NULL; - } else { - /* Collision. */ - upb_tabent *new_e = emptyent(t); - /* Head of collider's chain. */ - upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); - if (chain == mainpos_e) { - /* Existing ent is in its main posisiton (it has the same hash as us, and - * is the head of our chain). Insert to new ent and append to this chain. */ - new_e->next = mainpos_e->next; - mainpos_e->next = new_e; - our_e = new_e; - } else { - /* Existing ent is not in its main position (it is a node in some other - * chain). This implies that no existing ent in the table has our hash. - * Evict it (updating its chain) and use its ent for head of our chain. */ - *new_e = *mainpos_e; /* copies next. */ - while (chain->next != mainpos_e) { - chain = (upb_tabent*)chain->next; - UPB_ASSERT(chain); - } - chain->next = new_e; - our_e = mainpos_e; - our_e->next = NULL; - } - } - our_e->key = tabkey; - our_e->val.val = val.val; - UPB_ASSERT(findentry(t, key, hash, eql) == our_e); -} - -static bool rm(upb_table *t, lookupkey_t key, upb_value *val, - upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) { - upb_tabent *chain = getentry_mutable(t, hash); - if (upb_tabent_isempty(chain)) return false; - if (eql(chain->key, key)) { - /* Element to remove is at the head of its chain. */ - t->count--; - if (val) _upb_value_setval(val, chain->val.val); - if (removed) *removed = chain->key; - if (chain->next) { - upb_tabent *move = (upb_tabent*)chain->next; - *chain = *move; - move->key = 0; /* Make the slot empty. */ - } else { - chain->key = 0; /* Make the slot empty. */ - } - return true; - } else { - /* Element to remove is either in a non-head position or not in the - * table. */ - while (chain->next && !eql(chain->next->key, key)) { - chain = (upb_tabent*)chain->next; - } - if (chain->next) { - /* Found element to remove. */ - upb_tabent *rm = (upb_tabent*)chain->next; - t->count--; - if (val) _upb_value_setval(val, chain->next->val.val); - if (removed) *removed = rm->key; - rm->key = 0; /* Make the slot empty. */ - chain->next = rm->next; - return true; - } else { - /* Element to remove is not in the table. */ - return false; - } - } -} - -static size_t next(const upb_table *t, size_t i) { - do { - if (++i >= upb_table_size(t)) - return SIZE_MAX; - } while(upb_tabent_isempty(&t->entries[i])); - - return i; -} - -static size_t begin(const upb_table *t) { - return next(t, -1); -} - - -/* upb_strtable ***************************************************************/ - -/* A simple "subclass" of upb_table that only adds a hash function for strings. */ - -static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { - uint32_t len = (uint32_t) k2.str.len; - char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); - if (str == NULL) return 0; - memcpy(str, &len, sizeof(uint32_t)); - memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); - str[sizeof(uint32_t) + k2.str.len] = '\0'; - return (uintptr_t)str; -} - -static uint32_t strhash(upb_tabkey key) { - uint32_t len; - char *str = upb_tabstr(key, &len); - return upb_murmur_hash2(str, len, 0); -} - -static bool streql(upb_tabkey k1, lookupkey_t k2) { - uint32_t len; - char *str = upb_tabstr(k1, &len); - return len == k2.str.len && memcmp(str, k2.str.str, len) == 0; -} - -bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) { - return init(&t->t, 2, a); -} - -void upb_strtable_clear(upb_strtable *t) { - size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent); - t->t.count = 0; - memset((char*)t->t.entries, 0, bytes); -} - -void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { - size_t i; - for (i = 0; i < upb_table_size(&t->t); i++) - upb_free(a, (void*)t->t.entries[i].key); - uninit(&t->t, a); -} - -bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { - upb_strtable new_table; - upb_strtable_iter i; - - if (!init(&new_table.t, size_lg2, a)) - return false; - upb_strtable_begin(&i, t); - for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { - upb_strview key = upb_strtable_iter_key(&i); - upb_strtable_insert3( - &new_table, key.data, key.size, - upb_strtable_iter_value(&i), a); - } - upb_strtable_uninit2(t, a); - *t = new_table; - return true; -} - -bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, - upb_value v, upb_alloc *a) { - lookupkey_t key; - upb_tabkey tabkey; - uint32_t hash; - - if (isfull(&t->t)) { - /* Need to resize. New table of double the size, add old elements to it. */ - if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) { - return false; - } - } - - key = strkey2(k, len); - tabkey = strcopy(key, a); - if (tabkey == 0) return false; - - hash = upb_murmur_hash2(key.str.str, key.str.len, 0); - insert(&t->t, key, tabkey, v, hash, &strhash, &streql); - return true; -} - -bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, - upb_value *v) { - uint32_t hash = upb_murmur_hash2(key, len, 0); - return lookup(&t->t, strkey2(key, len), v, hash, &streql); -} - -bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, - upb_value *val, upb_alloc *alloc) { - uint32_t hash = upb_murmur_hash2(key, len, 0); - upb_tabkey tabkey; - if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { - if (alloc) { - /* Arena-based allocs don't need to free and won't pass this. */ - upb_free(alloc, (void*)tabkey); - } - return true; - } else { - return false; - } -} - -/* Iteration */ - -void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { - i->t = t; - i->index = begin(&t->t); -} - -void upb_strtable_next(upb_strtable_iter *i) { - i->index = next(&i->t->t, i->index); -} - -bool upb_strtable_done(const upb_strtable_iter *i) { - if (!i->t) return true; - return i->index >= upb_table_size(&i->t->t) || - upb_tabent_isempty(str_tabent(i)); -} - -upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) { - upb_strview key; - uint32_t len; - UPB_ASSERT(!upb_strtable_done(i)); - key.data = upb_tabstr(str_tabent(i)->key, &len); - key.size = len; - return key; -} - -upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { - UPB_ASSERT(!upb_strtable_done(i)); - return _upb_value_val(str_tabent(i)->val.val); -} - -void upb_strtable_iter_setdone(upb_strtable_iter *i) { - i->t = NULL; - i->index = SIZE_MAX; -} - -bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, - const upb_strtable_iter *i2) { - if (upb_strtable_done(i1) && upb_strtable_done(i2)) - return true; - return i1->t == i2->t && i1->index == i2->index; -} - - -/* upb_inttable ***************************************************************/ - -/* For inttables we use a hybrid structure where small keys are kept in an - * array and large keys are put in the hash table. */ - -static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); } - -static bool inteql(upb_tabkey k1, lookupkey_t k2) { - return k1 == k2.num; -} - -static upb_tabval *mutable_array(upb_inttable *t) { - return (upb_tabval*)t->array; -} - -static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) { - if (key < t->array_size) { - return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL; - } else { - upb_tabent *e = - findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql); - return e ? &e->val : NULL; - } -} - -static const upb_tabval *inttable_val_const(const upb_inttable *t, - uintptr_t key) { - return inttable_val((upb_inttable*)t, key); -} - -size_t upb_inttable_count(const upb_inttable *t) { - return t->t.count + t->array_count; -} - -static void check(upb_inttable *t) { - UPB_UNUSED(t); -#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) - { - /* This check is very expensive (makes inserts/deletes O(N)). */ - size_t count = 0; - upb_inttable_iter i; - upb_inttable_begin(&i, t); - for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { - UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL)); - } - UPB_ASSERT(count == upb_inttable_count(t)); - } -#endif -} - -bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, - upb_alloc *a) { - size_t array_bytes; - - if (!init(&t->t, hsize_lg2, a)) return false; - /* Always make the array part at least 1 long, so that we know key 0 - * won't be in the hash part, which simplifies things. */ - t->array_size = UPB_MAX(1, asize); - t->array_count = 0; - array_bytes = t->array_size * sizeof(upb_value); - t->array = upb_malloc(a, array_bytes); - if (!t->array) { - uninit(&t->t, a); - return false; - } - memset(mutable_array(t), 0xff, array_bytes); - check(t); - return true; -} - -bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { - return upb_inttable_sizedinit(t, 0, 4, a); -} - -void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { - uninit(&t->t, a); - upb_free(a, mutable_array(t)); -} - -bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, - upb_alloc *a) { - upb_tabval tabval; - tabval.val = val.val; - UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ - - if (key < t->array_size) { - UPB_ASSERT(!upb_arrhas(t->array[key])); - t->array_count++; - mutable_array(t)[key].val = val.val; - } else { - if (isfull(&t->t)) { - /* Need to resize the hash part, but we re-use the array part. */ - size_t i; - upb_table new_table; - - if (!init(&new_table, t->t.size_lg2 + 1, a)) { - return false; - } - - for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) { - const upb_tabent *e = &t->t.entries[i]; - uint32_t hash; - upb_value v; - - _upb_value_setval(&v, e->val.val); - hash = upb_inthash(e->key); - insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql); - } - - UPB_ASSERT(t->t.count == new_table.count); - - uninit(&t->t, a); - t->t = new_table; - } - insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); - } - check(t); - return true; -} - -bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { - const upb_tabval *table_v = inttable_val_const(t, key); - if (!table_v) return false; - if (v) _upb_value_setval(v, table_v->val); - return true; -} - -bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) { - upb_tabval *table_v = inttable_val(t, key); - if (!table_v) return false; - table_v->val = val.val; - return true; -} - -bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { - bool success; - if (key < t->array_size) { - if (upb_arrhas(t->array[key])) { - upb_tabval empty = UPB_TABVALUE_EMPTY_INIT; - t->array_count--; - if (val) { - _upb_value_setval(val, t->array[key].val); - } - mutable_array(t)[key] = empty; - success = true; - } else { - success = false; - } - } else { - success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql); - } - check(t); - return success; -} - -bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) { - return upb_inttable_insert2(t, upb_inttable_count(t), val, a); -} - -upb_value upb_inttable_pop(upb_inttable *t) { - upb_value val; - bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val); - UPB_ASSERT(ok); - return val; -} - -bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, - upb_alloc *a) { - return upb_inttable_insert2(t, (uintptr_t)key, val, a); -} - -bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, - upb_value *v) { - return upb_inttable_lookup(t, (uintptr_t)key, v); -} - -bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { - return upb_inttable_remove(t, (uintptr_t)key, val); -} - -void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { - /* A power-of-two histogram of the table keys. */ - size_t counts[UPB_MAXARRSIZE + 1] = {0}; - - /* The max key in each bucket. */ - uintptr_t max[UPB_MAXARRSIZE + 1] = {0}; - - upb_inttable_iter i; - size_t arr_count; - int size_lg2; - upb_inttable new_t; - - upb_inttable_begin(&i, t); - for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { - uintptr_t key = upb_inttable_iter_key(&i); - int bucket = log2ceil(key); - max[bucket] = UPB_MAX(max[bucket], key); - counts[bucket]++; - } - - /* Find the largest power of two that satisfies the MIN_DENSITY - * definition (while actually having some keys). */ - arr_count = upb_inttable_count(t); - - for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) { - if (counts[size_lg2] == 0) { - /* We can halve again without losing any entries. */ - continue; - } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) { - break; - } - - arr_count -= counts[size_lg2]; - } - - UPB_ASSERT(arr_count <= upb_inttable_count(t)); - - { - /* Insert all elements into new, perfectly-sized table. */ - size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */ - size_t hash_count = upb_inttable_count(t) - arr_count; - size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0; - int hashsize_lg2 = log2ceil(hash_size); - - upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a); - upb_inttable_begin(&i, t); - for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { - uintptr_t k = upb_inttable_iter_key(&i); - upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); - } - UPB_ASSERT(new_t.array_size == arr_size); - UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); - } - upb_inttable_uninit2(t, a); - *t = new_t; -} - -/* Iteration. */ - -static const upb_tabent *int_tabent(const upb_inttable_iter *i) { - UPB_ASSERT(!i->array_part); - return &i->t->t.entries[i->index]; -} - -static upb_tabval int_arrent(const upb_inttable_iter *i) { - UPB_ASSERT(i->array_part); - return i->t->array[i->index]; -} - -void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { - i->t = t; - i->index = -1; - i->array_part = true; - upb_inttable_next(i); -} - -void upb_inttable_next(upb_inttable_iter *iter) { - const upb_inttable *t = iter->t; - if (iter->array_part) { - while (++iter->index < t->array_size) { - if (upb_arrhas(int_arrent(iter))) { - return; - } - } - iter->array_part = false; - iter->index = begin(&t->t); - } else { - iter->index = next(&t->t, iter->index); - } -} - -bool upb_inttable_done(const upb_inttable_iter *i) { - if (!i->t) return true; - if (i->array_part) { - return i->index >= i->t->array_size || - !upb_arrhas(int_arrent(i)); - } else { - return i->index >= upb_table_size(&i->t->t) || - upb_tabent_isempty(int_tabent(i)); - } -} - -uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) { - UPB_ASSERT(!upb_inttable_done(i)); - return i->array_part ? i->index : int_tabent(i)->key; -} - -upb_value upb_inttable_iter_value(const upb_inttable_iter *i) { - UPB_ASSERT(!upb_inttable_done(i)); - return _upb_value_val( - i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val); -} - -void upb_inttable_iter_setdone(upb_inttable_iter *i) { - i->t = NULL; - i->index = SIZE_MAX; - i->array_part = false; -} - -bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, - const upb_inttable_iter *i2) { - if (upb_inttable_done(i1) && upb_inttable_done(i2)) - return true; - return i1->t == i2->t && i1->index == i2->index && - i1->array_part == i2->array_part; -} - -#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__) -/* ----------------------------------------------------------------------------- - * MurmurHash2, by Austin Appleby (released as public domain). - * Reformatted and C99-ified by Joshua Haberman. - * Note - This code makes a few assumptions about how your machine behaves - - * 1. We can read a 4-byte value from any address without crashing - * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t - * And it has a few limitations - - * 1. It will not work incrementally. - * 2. It will not produce the same results on little-endian and big-endian - * machines. */ -uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) { - /* 'm' and 'r' are mixing constants generated offline. - * They're not really 'magic', they just happen to work well. */ - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - - /* Initialize the hash to a 'random' value */ - uint32_t h = seed ^ len; - - /* Mix 4 bytes at a time into the hash */ - const uint8_t * data = (const uint8_t *)key; - while(len >= 4) { - uint32_t k; - memcpy(&k, data, sizeof(k)); - - k *= m; - k ^= k >> r; - k *= m; - - h *= m; - h ^= k; - - data += 4; - len -= 4; - } - - /* Handle the last few bytes of the input array */ - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - /* Do a few final mixes of the hash to ensure the last few - * bytes are well-incorporated. */ - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; -} - -#else /* !UPB_UNALIGNED_READS_OK */ - -/* ----------------------------------------------------------------------------- - * MurmurHashAligned2, by Austin Appleby - * Same algorithm as MurmurHash2, but only does aligned reads - should be safer - * on certain platforms. - * Performance will be lower than MurmurHash2 */ - -#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } - -uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) { - const uint32_t m = 0x5bd1e995; - const int32_t r = 24; - const uint8_t * data = (const uint8_t *)key; - uint32_t h = (uint32_t)(seed ^ len); - uint8_t align = (uintptr_t)data & 3; - - if(align && (len >= 4)) { - /* Pre-load the temp registers */ - uint32_t t = 0, d = 0; - int32_t sl; - int32_t sr; - - switch(align) { - case 1: t |= data[2] << 16; - case 2: t |= data[1] << 8; - case 3: t |= data[0]; - } - - t <<= (8 * align); - - data += 4-align; - len -= 4-align; - - sl = 8 * (4-align); - sr = 8 * align; - - /* Mix */ - - while(len >= 4) { - uint32_t k; - - d = *(uint32_t *)data; - t = (t >> sr) | (d << sl); - - k = t; - - MIX(h,k,m); - - t = d; - - data += 4; - len -= 4; - } - - /* Handle leftover data in temp registers */ - - d = 0; - - if(len >= align) { - uint32_t k; - - switch(align) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - } - - k = (t >> sr) | (d << sl); - MIX(h,k,m); - - data += align; - len -= align; - - /* ---------- - * Handle tail bytes */ - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - } else { - switch(len) { - case 3: d |= data[2] << 16; - case 2: d |= data[1] << 8; - case 1: d |= data[0]; - case 0: h ^= (t >> sr) | (d << sl); h *= m; - } - } - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } else { - while(len >= 4) { - uint32_t k = *(uint32_t *)data; - - MIX(h,k,m); - - data += 4; - len -= 4; - } - - /* ---------- - * Handle tail bytes */ - - switch(len) { - case 3: h ^= data[2] << 16; - case 2: h ^= data[1] << 8; - case 1: h ^= data[0]; h *= m; - }; - - h ^= h >> 13; - h *= m; - h ^= h >> 15; - - return h; - } -} -#undef MIX - -#endif /* UPB_UNALIGNED_READS_OK */ - - -#include -#include -#include -#include -#include -#include -#include - - -/* upb_status *****************************************************************/ - -void upb_status_clear(upb_status *status) { - if (!status) return; - status->ok = true; - status->msg[0] = '\0'; -} - -bool upb_ok(const upb_status *status) { return status->ok; } - -const char *upb_status_errmsg(const upb_status *status) { return status->msg; } - -void upb_status_seterrmsg(upb_status *status, const char *msg) { - if (!status) return; - status->ok = false; - strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1); - status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; -} - -void upb_status_seterrf(upb_status *status, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - upb_status_vseterrf(status, fmt, args); - va_end(args); -} - -void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) { - if (!status) return; - status->ok = false; - _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args); - status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; -} - -/* upb_alloc ******************************************************************/ - -static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, - size_t size) { - UPB_UNUSED(alloc); - UPB_UNUSED(oldsize); - if (size == 0) { - free(ptr); - return NULL; - } else { - return realloc(ptr, size); - } -} - -upb_alloc upb_alloc_global = {&upb_global_allocfunc}; - -/* upb_arena ******************************************************************/ - -/* Be conservative and choose 16 in case anyone is using SSE. */ - -struct upb_arena { - _upb_arena_head head; - char *start; - - /* Allocator to allocate arena blocks. We are responsible for freeing these - * when we are destroyed. */ - upb_alloc *block_alloc; - - size_t bytes_allocated; - size_t next_block_size; - size_t max_block_size; - - /* Linked list of blocks. Points to an arena_block, defined in env.c */ - void *block_head; - - /* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */ - void *cleanup_head; -}; - -typedef struct mem_block { - struct mem_block *next; - bool owned; - /* Data follows. */ -} mem_block; - -typedef struct cleanup_ent { - struct cleanup_ent *next; - upb_cleanup_func *cleanup; - void *ud; -} cleanup_ent; - -static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size, - bool owned) { - mem_block *block = ptr; - - if (a->block_head) { - a->bytes_allocated += a->head.ptr - a->start; - } - - block->next = a->block_head; - block->owned = owned; - - a->block_head = block; - a->start = (char*)block + _upb_arena_alignup(sizeof(mem_block)); - a->head.ptr = a->start; - a->head.end = (char*)block + size; - - /* TODO(haberman): ASAN poison. */ -} - -static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) { - size_t block_size = UPB_MAX(size, a->next_block_size) + _upb_arena_alignup(sizeof(mem_block)); - mem_block *block = upb_malloc(a->block_alloc, block_size); - - if (!block) { - return NULL; - } - - upb_arena_addblock(a, block, block_size, true); - a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size); - - return block; -} - -void *_upb_arena_slowmalloc(upb_arena *a, size_t size) { - mem_block *block = upb_arena_allocblock(a, size); - if (!block) return NULL; /* Out of memory. */ - return upb_arena_malloc(a, size); -} - -static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize, - size_t size) { - upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */ - void *ret; - - if (size == 0) { - return NULL; /* We are an arena, don't need individual frees. */ - } - - ret = upb_arena_malloc(a, size); - if (!ret) return NULL; - - /* TODO(haberman): special-case if this is a realloc of the last alloc? */ - - if (oldsize > 0) { - memcpy(ret, ptr, oldsize); /* Preserve existing data. */ - } - - /* TODO(haberman): ASAN unpoison. */ - return ret; -} - -/* Public Arena API ***********************************************************/ - -#define upb_alignof(type) offsetof (struct { char c; type member; }, member) - -upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { - const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block); - upb_arena *a; - bool owned = false; - - /* Round block size down to alignof(*a) since we will allocate the arena - * itself at the end. */ - n &= ~(upb_alignof(upb_arena) - 1); - - if (n < first_block_overhead) { - /* We need to malloc the initial block. */ - n = first_block_overhead + 256; - owned = true; - if (!alloc || !(mem = upb_malloc(alloc, n))) { - return NULL; - } - } - - a = (void*)((char*)mem + n - sizeof(*a)); - n -= sizeof(*a); - - a->head.alloc.func = &upb_arena_doalloc; - a->head.ptr = NULL; - a->head.end = NULL; - a->start = NULL; - a->block_alloc = &upb_alloc_global; - a->bytes_allocated = 0; - a->next_block_size = 256; - a->max_block_size = 16384; - a->cleanup_head = NULL; - a->block_head = NULL; - a->block_alloc = alloc; - - upb_arena_addblock(a, mem, n, owned); - - return a; -} - -#undef upb_alignof - -void upb_arena_free(upb_arena *a) { - cleanup_ent *ent = a->cleanup_head; - mem_block *block = a->block_head; - - while (ent) { - ent->cleanup(ent->ud); - ent = ent->next; - } - - /* Must do this after running cleanup functions, because this will delete - * the memory we store our cleanup entries in! */ - while (block) { - /* Load first since we are deleting block. */ - mem_block *next = block->next; - - if (block->owned) { - upb_free(a->block_alloc, block); - } - - block = next; - } -} - -bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { - cleanup_ent *ent = upb_malloc(&a->head.alloc, sizeof(cleanup_ent)); - if (!ent) { - return false; /* Out of memory. */ - } - - ent->cleanup = func; - ent->ud = ud; - ent->next = a->cleanup_head; - a->cleanup_head = ent; - - return true; -} - -size_t upb_arena_bytesallocated(const upb_arena *a) { - return a->bytes_allocated + (a->head.ptr - a->start); -} -/* This file was generated by upbc (the upb compiler) from the input - * file: - * - * google/protobuf/descriptor.proto - * - * Do not edit -- your changes will be discarded when the file is - * regenerated. */ - -#include - - -static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = { - &google_protobuf_FileDescriptorProto_msginit, -}; - -static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = { - {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_FileDescriptorSet_msginit = { - &google_protobuf_FileDescriptorSet_submsgs[0], - &google_protobuf_FileDescriptorSet__fields[0], - UPB_SIZE(4, 8), 1, false, -}; - -static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = { - &google_protobuf_DescriptorProto_msginit, - &google_protobuf_EnumDescriptorProto_msginit, - &google_protobuf_FieldDescriptorProto_msginit, - &google_protobuf_FileOptions_msginit, - &google_protobuf_ServiceDescriptorProto_msginit, - &google_protobuf_SourceCodeInfo_msginit, -}; - -static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = { - {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {2, UPB_SIZE(12, 24), 2, 0, 9, 1}, - {3, UPB_SIZE(36, 72), 0, 0, 9, 3}, - {4, UPB_SIZE(40, 80), 0, 0, 11, 3}, - {5, UPB_SIZE(44, 88), 0, 1, 11, 3}, - {6, UPB_SIZE(48, 96), 0, 4, 11, 3}, - {7, UPB_SIZE(52, 104), 0, 2, 11, 3}, - {8, UPB_SIZE(28, 56), 4, 3, 11, 1}, - {9, UPB_SIZE(32, 64), 5, 5, 11, 1}, - {10, UPB_SIZE(56, 112), 0, 0, 5, 3}, - {11, UPB_SIZE(60, 120), 0, 0, 5, 3}, - {12, UPB_SIZE(20, 40), 3, 0, 9, 1}, -}; - -const upb_msglayout google_protobuf_FileDescriptorProto_msginit = { - &google_protobuf_FileDescriptorProto_submsgs[0], - &google_protobuf_FileDescriptorProto__fields[0], - UPB_SIZE(64, 128), 12, false, -}; - -static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = { - &google_protobuf_DescriptorProto_msginit, - &google_protobuf_DescriptorProto_ExtensionRange_msginit, - &google_protobuf_DescriptorProto_ReservedRange_msginit, - &google_protobuf_EnumDescriptorProto_msginit, - &google_protobuf_FieldDescriptorProto_msginit, - &google_protobuf_MessageOptions_msginit, - &google_protobuf_OneofDescriptorProto_msginit, -}; - -static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = { - {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {2, UPB_SIZE(16, 32), 0, 4, 11, 3}, - {3, UPB_SIZE(20, 40), 0, 0, 11, 3}, - {4, UPB_SIZE(24, 48), 0, 3, 11, 3}, - {5, UPB_SIZE(28, 56), 0, 1, 11, 3}, - {6, UPB_SIZE(32, 64), 0, 4, 11, 3}, - {7, UPB_SIZE(12, 24), 2, 5, 11, 1}, - {8, UPB_SIZE(36, 72), 0, 6, 11, 3}, - {9, UPB_SIZE(40, 80), 0, 2, 11, 3}, - {10, UPB_SIZE(44, 88), 0, 0, 9, 3}, -}; - -const upb_msglayout google_protobuf_DescriptorProto_msginit = { - &google_protobuf_DescriptorProto_submsgs[0], - &google_protobuf_DescriptorProto__fields[0], - UPB_SIZE(48, 96), 10, false, -}; - -static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = { - &google_protobuf_ExtensionRangeOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = { - {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, - {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, - {3, UPB_SIZE(12, 16), 3, 0, 11, 1}, -}; - -const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = { - &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0], - &google_protobuf_DescriptorProto_ExtensionRange__fields[0], - UPB_SIZE(16, 24), 3, false, -}; - -static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = { - {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, - {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, -}; - -const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = { - NULL, - &google_protobuf_DescriptorProto_ReservedRange__fields[0], - UPB_SIZE(12, 12), 2, false, -}; - -static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = { - {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = { - &google_protobuf_ExtensionRangeOptions_submsgs[0], - &google_protobuf_ExtensionRangeOptions__fields[0], - UPB_SIZE(4, 8), 1, false, -}; - -static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = { - &google_protobuf_FieldOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = { - {1, UPB_SIZE(36, 40), 6, 0, 9, 1}, - {2, UPB_SIZE(44, 56), 7, 0, 9, 1}, - {3, UPB_SIZE(24, 24), 3, 0, 5, 1}, - {4, UPB_SIZE(8, 8), 1, 0, 14, 1}, - {5, UPB_SIZE(16, 16), 2, 0, 14, 1}, - {6, UPB_SIZE(52, 72), 8, 0, 9, 1}, - {7, UPB_SIZE(60, 88), 9, 0, 9, 1}, - {8, UPB_SIZE(76, 120), 11, 0, 11, 1}, - {9, UPB_SIZE(28, 28), 4, 0, 5, 1}, - {10, UPB_SIZE(68, 104), 10, 0, 9, 1}, - {17, UPB_SIZE(32, 32), 5, 0, 8, 1}, -}; - -const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = { - &google_protobuf_FieldDescriptorProto_submsgs[0], - &google_protobuf_FieldDescriptorProto__fields[0], - UPB_SIZE(80, 128), 11, false, -}; - -static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = { - &google_protobuf_OneofOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = { - {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {2, UPB_SIZE(12, 24), 2, 0, 11, 1}, -}; - -const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = { - &google_protobuf_OneofDescriptorProto_submsgs[0], - &google_protobuf_OneofDescriptorProto__fields[0], - UPB_SIZE(16, 32), 2, false, -}; - -static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = { - &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, - &google_protobuf_EnumOptions_msginit, - &google_protobuf_EnumValueDescriptorProto_msginit, -}; - -static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = { - {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {2, UPB_SIZE(16, 32), 0, 2, 11, 3}, - {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, - {4, UPB_SIZE(20, 40), 0, 0, 11, 3}, - {5, UPB_SIZE(24, 48), 0, 0, 9, 3}, -}; - -const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = { - &google_protobuf_EnumDescriptorProto_submsgs[0], - &google_protobuf_EnumDescriptorProto__fields[0], - UPB_SIZE(32, 64), 5, false, -}; - -static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = { - {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, - {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, -}; - -const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = { - NULL, - &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0], - UPB_SIZE(12, 12), 2, false, -}; - -static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = { - &google_protobuf_EnumValueOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = { - {1, UPB_SIZE(8, 8), 2, 0, 9, 1}, - {2, UPB_SIZE(4, 4), 1, 0, 5, 1}, - {3, UPB_SIZE(16, 24), 3, 0, 11, 1}, -}; - -const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = { - &google_protobuf_EnumValueDescriptorProto_submsgs[0], - &google_protobuf_EnumValueDescriptorProto__fields[0], - UPB_SIZE(24, 32), 3, false, -}; - -static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = { - &google_protobuf_MethodDescriptorProto_msginit, - &google_protobuf_ServiceOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = { - {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {2, UPB_SIZE(16, 32), 0, 0, 11, 3}, - {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, -}; - -const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = { - &google_protobuf_ServiceDescriptorProto_submsgs[0], - &google_protobuf_ServiceDescriptorProto__fields[0], - UPB_SIZE(24, 48), 3, false, -}; - -static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = { - &google_protobuf_MethodOptions_msginit, -}; - -static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = { - {1, UPB_SIZE(4, 8), 3, 0, 9, 1}, - {2, UPB_SIZE(12, 24), 4, 0, 9, 1}, - {3, UPB_SIZE(20, 40), 5, 0, 9, 1}, - {4, UPB_SIZE(28, 56), 6, 0, 11, 1}, - {5, UPB_SIZE(1, 1), 1, 0, 8, 1}, - {6, UPB_SIZE(2, 2), 2, 0, 8, 1}, -}; - -const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = { - &google_protobuf_MethodDescriptorProto_submsgs[0], - &google_protobuf_MethodDescriptorProto__fields[0], - UPB_SIZE(32, 64), 6, false, -}; - -static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = { - {1, UPB_SIZE(28, 32), 11, 0, 9, 1}, - {8, UPB_SIZE(36, 48), 12, 0, 9, 1}, - {9, UPB_SIZE(8, 8), 1, 0, 14, 1}, - {10, UPB_SIZE(16, 16), 2, 0, 8, 1}, - {11, UPB_SIZE(44, 64), 13, 0, 9, 1}, - {16, UPB_SIZE(17, 17), 3, 0, 8, 1}, - {17, UPB_SIZE(18, 18), 4, 0, 8, 1}, - {18, UPB_SIZE(19, 19), 5, 0, 8, 1}, - {20, UPB_SIZE(20, 20), 6, 0, 8, 1}, - {23, UPB_SIZE(21, 21), 7, 0, 8, 1}, - {27, UPB_SIZE(22, 22), 8, 0, 8, 1}, - {31, UPB_SIZE(23, 23), 9, 0, 8, 1}, - {36, UPB_SIZE(52, 80), 14, 0, 9, 1}, - {37, UPB_SIZE(60, 96), 15, 0, 9, 1}, - {39, UPB_SIZE(68, 112), 16, 0, 9, 1}, - {40, UPB_SIZE(76, 128), 17, 0, 9, 1}, - {41, UPB_SIZE(84, 144), 18, 0, 9, 1}, - {42, UPB_SIZE(24, 24), 10, 0, 8, 1}, - {44, UPB_SIZE(92, 160), 19, 0, 9, 1}, - {45, UPB_SIZE(100, 176), 20, 0, 9, 1}, - {999, UPB_SIZE(108, 192), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_FileOptions_msginit = { - &google_protobuf_FileOptions_submsgs[0], - &google_protobuf_FileOptions__fields[0], - UPB_SIZE(112, 208), 21, false, -}; - -static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = { - {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, - {2, UPB_SIZE(2, 2), 2, 0, 8, 1}, - {3, UPB_SIZE(3, 3), 3, 0, 8, 1}, - {7, UPB_SIZE(4, 4), 4, 0, 8, 1}, - {999, UPB_SIZE(8, 8), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_MessageOptions_msginit = { - &google_protobuf_MessageOptions_submsgs[0], - &google_protobuf_MessageOptions__fields[0], - UPB_SIZE(12, 16), 5, false, -}; - -static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = { - {1, UPB_SIZE(8, 8), 1, 0, 14, 1}, - {2, UPB_SIZE(24, 24), 3, 0, 8, 1}, - {3, UPB_SIZE(25, 25), 4, 0, 8, 1}, - {5, UPB_SIZE(26, 26), 5, 0, 8, 1}, - {6, UPB_SIZE(16, 16), 2, 0, 14, 1}, - {10, UPB_SIZE(27, 27), 6, 0, 8, 1}, - {999, UPB_SIZE(28, 32), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_FieldOptions_msginit = { - &google_protobuf_FieldOptions_submsgs[0], - &google_protobuf_FieldOptions__fields[0], - UPB_SIZE(32, 40), 7, false, -}; - -static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = { - {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_OneofOptions_msginit = { - &google_protobuf_OneofOptions_submsgs[0], - &google_protobuf_OneofOptions__fields[0], - UPB_SIZE(4, 8), 1, false, -}; - -static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = { - {2, UPB_SIZE(1, 1), 1, 0, 8, 1}, - {3, UPB_SIZE(2, 2), 2, 0, 8, 1}, - {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_EnumOptions_msginit = { - &google_protobuf_EnumOptions_submsgs[0], - &google_protobuf_EnumOptions__fields[0], - UPB_SIZE(8, 16), 3, false, -}; - -static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = { - {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, - {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_EnumValueOptions_msginit = { - &google_protobuf_EnumValueOptions_submsgs[0], - &google_protobuf_EnumValueOptions__fields[0], - UPB_SIZE(8, 16), 2, false, -}; - -static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = { - {33, UPB_SIZE(1, 1), 1, 0, 8, 1}, - {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_ServiceOptions_msginit = { - &google_protobuf_ServiceOptions_submsgs[0], - &google_protobuf_ServiceOptions__fields[0], - UPB_SIZE(8, 16), 2, false, -}; - -static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = { - &google_protobuf_UninterpretedOption_msginit, -}; - -static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = { - {33, UPB_SIZE(16, 16), 2, 0, 8, 1}, - {34, UPB_SIZE(8, 8), 1, 0, 14, 1}, - {999, UPB_SIZE(20, 24), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_MethodOptions_msginit = { - &google_protobuf_MethodOptions_submsgs[0], - &google_protobuf_MethodOptions__fields[0], - UPB_SIZE(24, 32), 3, false, -}; - -static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = { - &google_protobuf_UninterpretedOption_NamePart_msginit, -}; - -static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = { - {2, UPB_SIZE(56, 80), 0, 0, 11, 3}, - {3, UPB_SIZE(32, 32), 4, 0, 9, 1}, - {4, UPB_SIZE(8, 8), 1, 0, 4, 1}, - {5, UPB_SIZE(16, 16), 2, 0, 3, 1}, - {6, UPB_SIZE(24, 24), 3, 0, 1, 1}, - {7, UPB_SIZE(40, 48), 5, 0, 12, 1}, - {8, UPB_SIZE(48, 64), 6, 0, 9, 1}, -}; - -const upb_msglayout google_protobuf_UninterpretedOption_msginit = { - &google_protobuf_UninterpretedOption_submsgs[0], - &google_protobuf_UninterpretedOption__fields[0], - UPB_SIZE(64, 96), 7, false, -}; - -static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = { - {1, UPB_SIZE(4, 8), 2, 0, 9, 2}, - {2, UPB_SIZE(1, 1), 1, 0, 8, 2}, -}; - -const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = { - NULL, - &google_protobuf_UninterpretedOption_NamePart__fields[0], - UPB_SIZE(16, 32), 2, false, -}; - -static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = { - &google_protobuf_SourceCodeInfo_Location_msginit, -}; - -static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = { - {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_SourceCodeInfo_msginit = { - &google_protobuf_SourceCodeInfo_submsgs[0], - &google_protobuf_SourceCodeInfo__fields[0], - UPB_SIZE(4, 8), 1, false, -}; - -static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = { - {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED}, - {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED}, - {3, UPB_SIZE(4, 8), 1, 0, 9, 1}, - {4, UPB_SIZE(12, 24), 2, 0, 9, 1}, - {6, UPB_SIZE(28, 56), 0, 0, 9, 3}, -}; - -const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = { - NULL, - &google_protobuf_SourceCodeInfo_Location__fields[0], - UPB_SIZE(32, 64), 5, false, -}; - -static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = { - &google_protobuf_GeneratedCodeInfo_Annotation_msginit, -}; - -static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = { - {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, -}; - -const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = { - &google_protobuf_GeneratedCodeInfo_submsgs[0], - &google_protobuf_GeneratedCodeInfo__fields[0], - UPB_SIZE(4, 8), 1, false, -}; - -static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = { - {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED}, - {2, UPB_SIZE(12, 16), 3, 0, 9, 1}, - {3, UPB_SIZE(4, 4), 1, 0, 5, 1}, - {4, UPB_SIZE(8, 8), 2, 0, 5, 1}, -}; - -const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { - NULL, - &google_protobuf_GeneratedCodeInfo_Annotation__fields[0], - UPB_SIZE(24, 48), 4, false, -}; - - - - -#include -#include -#include -#include - - -typedef struct { - size_t len; - char str[1]; /* Null-terminated string data follows. */ -} str_t; - -static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) { - str_t *ret = upb_malloc(alloc, sizeof(*ret) + len); - if (!ret) return NULL; - ret->len = len; - memcpy(ret->str, data, len); - ret->str[len] = '\0'; - return ret; -} - -struct upb_fielddef { - const upb_filedef *file; - const upb_msgdef *msgdef; - const char *full_name; - const char *json_name; - union { - int64_t sint; - uint64_t uint; - double dbl; - float flt; - bool boolean; - str_t *str; - } defaultval; - const upb_oneofdef *oneof; - union { - const upb_msgdef *msgdef; - const upb_enumdef *enumdef; - const google_protobuf_FieldDescriptorProto *unresolved; - } sub; - uint32_t number_; - uint16_t index_; - uint16_t layout_index; - uint32_t selector_base; /* Used to index into a upb::Handlers table. */ - bool is_extension_; - bool lazy_; - bool packed_; - bool proto3_optional_; - upb_descriptortype_t type_; - upb_label_t label_; -}; - -struct upb_msgdef { - const upb_msglayout *layout; - const upb_filedef *file; - const char *full_name; - uint32_t selector_count; - uint32_t submsg_field_count; - - /* Tables for looking up fields by number and name. */ - upb_inttable itof; - upb_strtable ntof; - - const upb_fielddef *fields; - const upb_oneofdef *oneofs; - int field_count; - int oneof_count; - int real_oneof_count; - - /* Is this a map-entry message? */ - bool map_entry; - upb_wellknowntype_t well_known_type; - - /* TODO(haberman): proper extension ranges (there can be multiple). */ -}; - -struct upb_enumdef { - const upb_filedef *file; - const char *full_name; - upb_strtable ntoi; - upb_inttable iton; - int32_t defaultval; -}; - -struct upb_oneofdef { - const upb_msgdef *parent; - const char *full_name; - uint32_t index; - upb_strtable ntof; - upb_inttable itof; -}; - -struct upb_filedef { - const char *name; - const char *package; - const char *phpprefix; - const char *phpnamespace; - upb_syntax_t syntax; - - const upb_filedef **deps; - const upb_msgdef *msgs; - const upb_enumdef *enums; - const upb_fielddef *exts; - - int dep_count; - int msg_count; - int enum_count; - int ext_count; -}; - -struct upb_symtab { - upb_arena *arena; - upb_strtable syms; /* full_name -> packed def ptr */ - upb_strtable files; /* file_name -> upb_filedef* */ -}; - -/* Inside a symtab we store tagged pointers to specific def types. */ -typedef enum { - UPB_DEFTYPE_FIELD = 0, - - /* Only inside symtab table. */ - UPB_DEFTYPE_MSG = 1, - UPB_DEFTYPE_ENUM = 2, - - /* Only inside message table. */ - UPB_DEFTYPE_ONEOF = 1, - UPB_DEFTYPE_FIELD_JSONNAME = 2 -} upb_deftype_t; - -static const void *unpack_def(upb_value v, upb_deftype_t type) { - uintptr_t num = (uintptr_t)upb_value_getconstptr(v); - return (num & 3) == type ? (const void*)(num & ~3) : NULL; -} - -static upb_value pack_def(const void *ptr, upb_deftype_t type) { - uintptr_t num = (uintptr_t)ptr | type; - return upb_value_constptr((const void*)num); -} - -/* isalpha() etc. from are locale-dependent, which we don't want. */ -static bool upb_isbetween(char c, char low, char high) { - return c >= low && c <= high; -} - -static bool upb_isletter(char c) { - return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; -} - -static bool upb_isalphanum(char c) { - return upb_isletter(c) || upb_isbetween(c, '0', '9'); -} - -static bool upb_isident(upb_strview name, bool full, upb_status *s) { - const char *str = name.data; - size_t len = name.size; - bool start = true; - size_t i; - for (i = 0; i < len; i++) { - char c = str[i]; - if (c == '.') { - if (start || !full) { - upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str); - return false; - } - start = true; - } else if (start) { - if (!upb_isletter(c)) { - upb_status_seterrf( - s, "invalid name: path components must start with a letter (%s)", - str); - return false; - } - start = false; - } else { - if (!upb_isalphanum(c)) { - upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)", - str); - return false; - } - } - } - return !start; -} - -static const char *shortdefname(const char *fullname) { - const char *p; - - if (fullname == NULL) { - return NULL; - } else if ((p = strrchr(fullname, '.')) == NULL) { - /* No '.' in the name, return the full string. */ - return fullname; - } else { - /* Return one past the last '.'. */ - return p + 1; - } -} - -/* All submessage fields are lower than all other fields. - * Secondly, fields are increasing in order. */ -uint32_t field_rank(const upb_fielddef *f) { - uint32_t ret = upb_fielddef_number(f); - const uint32_t high_bit = 1 << 30; - UPB_ASSERT(ret < high_bit); - if (!upb_fielddef_issubmsg(f)) - ret |= high_bit; - return ret; -} - -int cmp_fields(const void *p1, const void *p2) { - const upb_fielddef *f1 = *(upb_fielddef*const*)p1; - const upb_fielddef *f2 = *(upb_fielddef*const*)p2; - return field_rank(f1) - field_rank(f2); -} - -/* A few implementation details of handlers. We put these here to avoid - * a def -> handlers dependency. */ - -#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ - -static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { - return upb_fielddef_isseq(f) ? 2 : 0; -} - -static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { - uint32_t ret = 1; - if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ - if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ - if (upb_fielddef_issubmsg(f)) { - /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ - ret += 0; - if (upb_fielddef_lazy(f)) { - /* STARTSTR/ENDSTR/STRING (for lazy) */ - ret += 3; - } - } - return ret; -} - -static void upb_status_setoom(upb_status *status) { - upb_status_seterrmsg(status, "out of memory"); -} - -static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { - /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the - * lowest indexes, but we do not publicly guarantee this. */ - upb_msg_field_iter j; - int i; - uint32_t selector; - int n = upb_msgdef_numfields(m); - upb_fielddef **fields; - - if (n == 0) { - m->selector_count = UPB_STATIC_SELECTOR_COUNT; - m->submsg_field_count = 0; - return true; - } - - fields = upb_gmalloc(n * sizeof(*fields)); - if (!fields) { - upb_status_setoom(s); - return false; - } - - m->submsg_field_count = 0; - for(i = 0, upb_msg_field_begin(&j, m); - !upb_msg_field_done(&j); - upb_msg_field_next(&j), i++) { - upb_fielddef *f = upb_msg_iter_field(&j); - UPB_ASSERT(f->msgdef == m); - if (upb_fielddef_issubmsg(f)) { - m->submsg_field_count++; - } - fields[i] = f; - } - - qsort(fields, n, sizeof(*fields), cmp_fields); - - selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; - for (i = 0; i < n; i++) { - upb_fielddef *f = fields[i]; - f->index_ = i; - f->selector_base = selector + upb_handlers_selectorbaseoffset(f); - selector += upb_handlers_selectorcount(f); - } - m->selector_count = selector; - - upb_gfree(fields); - return true; -} - -static bool check_oneofs(upb_msgdef *m, upb_status *s) { - int i; - int first_synthetic = -1; - upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs; - - for (i = 0; i < m->oneof_count; i++) { - mutable_oneofs[i].index = i; - - if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) { - if (first_synthetic == -1) { - first_synthetic = i; - } - } else { - if (first_synthetic != -1) { - upb_status_seterrf( - s, "Synthetic oneofs must be after all other oneofs: %s", - upb_oneofdef_name(&mutable_oneofs[i])); - return false; - } - } - } - - if (first_synthetic == -1) { - m->real_oneof_count = m->oneof_count; - } else { - m->real_oneof_count = first_synthetic; - } - - return true; -} - -static void assign_msg_wellknowntype(upb_msgdef *m) { - const char *name = upb_msgdef_fullname(m); - if (name == NULL) { - m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; - return; - } - if (!strcmp(name, "google.protobuf.Any")) { - m->well_known_type = UPB_WELLKNOWN_ANY; - } else if (!strcmp(name, "google.protobuf.FieldMask")) { - m->well_known_type = UPB_WELLKNOWN_FIELDMASK; - } else if (!strcmp(name, "google.protobuf.Duration")) { - m->well_known_type = UPB_WELLKNOWN_DURATION; - } else if (!strcmp(name, "google.protobuf.Timestamp")) { - m->well_known_type = UPB_WELLKNOWN_TIMESTAMP; - } else if (!strcmp(name, "google.protobuf.DoubleValue")) { - m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE; - } else if (!strcmp(name, "google.protobuf.FloatValue")) { - m->well_known_type = UPB_WELLKNOWN_FLOATVALUE; - } else if (!strcmp(name, "google.protobuf.Int64Value")) { - m->well_known_type = UPB_WELLKNOWN_INT64VALUE; - } else if (!strcmp(name, "google.protobuf.UInt64Value")) { - m->well_known_type = UPB_WELLKNOWN_UINT64VALUE; - } else if (!strcmp(name, "google.protobuf.Int32Value")) { - m->well_known_type = UPB_WELLKNOWN_INT32VALUE; - } else if (!strcmp(name, "google.protobuf.UInt32Value")) { - m->well_known_type = UPB_WELLKNOWN_UINT32VALUE; - } else if (!strcmp(name, "google.protobuf.BoolValue")) { - m->well_known_type = UPB_WELLKNOWN_BOOLVALUE; - } else if (!strcmp(name, "google.protobuf.StringValue")) { - m->well_known_type = UPB_WELLKNOWN_STRINGVALUE; - } else if (!strcmp(name, "google.protobuf.BytesValue")) { - m->well_known_type = UPB_WELLKNOWN_BYTESVALUE; - } else if (!strcmp(name, "google.protobuf.Value")) { - m->well_known_type = UPB_WELLKNOWN_VALUE; - } else if (!strcmp(name, "google.protobuf.ListValue")) { - m->well_known_type = UPB_WELLKNOWN_LISTVALUE; - } else if (!strcmp(name, "google.protobuf.Struct")) { - m->well_known_type = UPB_WELLKNOWN_STRUCT; - } else { - m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; - } -} - - -/* upb_enumdef ****************************************************************/ - -const char *upb_enumdef_fullname(const upb_enumdef *e) { - return e->full_name; -} - -const char *upb_enumdef_name(const upb_enumdef *e) { - return shortdefname(e->full_name); -} - -const upb_filedef *upb_enumdef_file(const upb_enumdef *e) { - return e->file; -} - -int32_t upb_enumdef_default(const upb_enumdef *e) { - UPB_ASSERT(upb_enumdef_iton(e, e->defaultval)); - return e->defaultval; -} - -int upb_enumdef_numvals(const upb_enumdef *e) { - return (int)upb_strtable_count(&e->ntoi); -} - -void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { - /* We iterate over the ntoi table, to account for duplicate numbers. */ - upb_strtable_begin(i, &e->ntoi); -} - -void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } -bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } - -bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, - size_t len, int32_t *num) { - upb_value v; - if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { - return false; - } - if (num) *num = upb_value_getint32(v); - return true; -} - -const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { - upb_value v; - return upb_inttable_lookup32(&def->iton, num, &v) ? - upb_value_getcstr(v) : NULL; -} - -const char *upb_enum_iter_name(upb_enum_iter *iter) { - return upb_strtable_iter_key(iter).data; -} - -int32_t upb_enum_iter_number(upb_enum_iter *iter) { - return upb_value_getint32(upb_strtable_iter_value(iter)); -} - - -/* upb_fielddef ***************************************************************/ - -const char *upb_fielddef_fullname(const upb_fielddef *f) { - return f->full_name; -} - -upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { - switch (f->type_) { - case UPB_DESCRIPTOR_TYPE_DOUBLE: - return UPB_TYPE_DOUBLE; - case UPB_DESCRIPTOR_TYPE_FLOAT: - return UPB_TYPE_FLOAT; - case UPB_DESCRIPTOR_TYPE_INT64: - case UPB_DESCRIPTOR_TYPE_SINT64: - case UPB_DESCRIPTOR_TYPE_SFIXED64: - return UPB_TYPE_INT64; - case UPB_DESCRIPTOR_TYPE_INT32: - case UPB_DESCRIPTOR_TYPE_SFIXED32: - case UPB_DESCRIPTOR_TYPE_SINT32: - return UPB_TYPE_INT32; - case UPB_DESCRIPTOR_TYPE_UINT64: - case UPB_DESCRIPTOR_TYPE_FIXED64: - return UPB_TYPE_UINT64; - case UPB_DESCRIPTOR_TYPE_UINT32: - case UPB_DESCRIPTOR_TYPE_FIXED32: - return UPB_TYPE_UINT32; - case UPB_DESCRIPTOR_TYPE_ENUM: - return UPB_TYPE_ENUM; - case UPB_DESCRIPTOR_TYPE_BOOL: - return UPB_TYPE_BOOL; - case UPB_DESCRIPTOR_TYPE_STRING: - return UPB_TYPE_STRING; - case UPB_DESCRIPTOR_TYPE_BYTES: - return UPB_TYPE_BYTES; - case UPB_DESCRIPTOR_TYPE_GROUP: - case UPB_DESCRIPTOR_TYPE_MESSAGE: - return UPB_TYPE_MESSAGE; - } - UPB_UNREACHABLE(); -} - -upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) { - return f->type_; -} - -uint32_t upb_fielddef_index(const upb_fielddef *f) { - return f->index_; -} - -upb_label_t upb_fielddef_label(const upb_fielddef *f) { - return f->label_; -} - -uint32_t upb_fielddef_number(const upb_fielddef *f) { - return f->number_; -} - -bool upb_fielddef_isextension(const upb_fielddef *f) { - return f->is_extension_; -} - -bool upb_fielddef_lazy(const upb_fielddef *f) { - return f->lazy_; -} - -bool upb_fielddef_packed(const upb_fielddef *f) { - return f->packed_; -} - -const char *upb_fielddef_name(const upb_fielddef *f) { - return shortdefname(f->full_name); -} - -const char *upb_fielddef_jsonname(const upb_fielddef *f) { - return f->json_name; -} - -uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { - return f->selector_base; -} - -const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { - return f->file; -} - -const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { - return f->msgdef; -} - -const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { - return f->oneof; -} - -const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) { - if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL; - return f->oneof; -} - -static void chkdefaulttype(const upb_fielddef *f, int ctype) { - UPB_UNUSED(f); - UPB_UNUSED(ctype); -} - -int64_t upb_fielddef_defaultint64(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_INT64); - return f->defaultval.sint; -} - -int32_t upb_fielddef_defaultint32(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_INT32); - return (int32_t)f->defaultval.sint; -} - -uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_UINT64); - return f->defaultval.uint; -} - -uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_UINT32); - return (uint32_t)f->defaultval.uint; -} - -bool upb_fielddef_defaultbool(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_BOOL); - return f->defaultval.boolean; -} - -float upb_fielddef_defaultfloat(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_FLOAT); - return f->defaultval.flt; -} - -double upb_fielddef_defaultdouble(const upb_fielddef *f) { - chkdefaulttype(f, UPB_TYPE_DOUBLE); - return f->defaultval.dbl; -} - -const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) { - str_t *str = f->defaultval.str; - UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING || - upb_fielddef_type(f) == UPB_TYPE_BYTES || - upb_fielddef_type(f) == UPB_TYPE_ENUM); - if (str) { - if (len) *len = str->len; - return str->str; - } else { - if (len) *len = 0; - return NULL; - } -} - -const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) { - UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE); - return f->sub.msgdef; -} - -const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) { - UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM); - return f->sub.enumdef; -} - -const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) { - return &f->msgdef->layout->fields[f->layout_index]; -} - -bool upb_fielddef_issubmsg(const upb_fielddef *f) { - return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; -} - -bool upb_fielddef_isstring(const upb_fielddef *f) { - return upb_fielddef_type(f) == UPB_TYPE_STRING || - upb_fielddef_type(f) == UPB_TYPE_BYTES; -} - -bool upb_fielddef_isseq(const upb_fielddef *f) { - return upb_fielddef_label(f) == UPB_LABEL_REPEATED; -} - -bool upb_fielddef_isprimitive(const upb_fielddef *f) { - return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); -} - -bool upb_fielddef_ismap(const upb_fielddef *f) { - return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && - upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); -} - -bool upb_fielddef_hassubdef(const upb_fielddef *f) { - return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; -} - -bool upb_fielddef_haspresence(const upb_fielddef *f) { - if (upb_fielddef_isseq(f)) return false; - return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) || - f->file->syntax == UPB_SYNTAX_PROTO2; -} - -static bool between(int32_t x, int32_t low, int32_t high) { - return x >= low && x <= high; -} - -bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); } -bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); } -bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } - -bool upb_fielddef_checkdescriptortype(int32_t type) { - return between(type, 1, 18); -} - -/* upb_msgdef *****************************************************************/ - -const char *upb_msgdef_fullname(const upb_msgdef *m) { - return m->full_name; -} - -const upb_filedef *upb_msgdef_file(const upb_msgdef *m) { - return m->file; -} - -const char *upb_msgdef_name(const upb_msgdef *m) { - return shortdefname(m->full_name); -} - -upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { - return m->file->syntax; -} - -size_t upb_msgdef_selectorcount(const upb_msgdef *m) { - return m->selector_count; -} - -uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { - return m->submsg_field_count; -} - -const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { - upb_value val; - return upb_inttable_lookup32(&m->itof, i, &val) ? - upb_value_getconstptr(val) : NULL; -} - -const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, - size_t len) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - return unpack_def(val, UPB_DEFTYPE_FIELD); -} - -const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, - size_t len) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - return unpack_def(val, UPB_DEFTYPE_ONEOF); -} - -bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, - const upb_fielddef **f, const upb_oneofdef **o) { - upb_value val; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return false; - } - - *o = unpack_def(val, UPB_DEFTYPE_ONEOF); - *f = unpack_def(val, UPB_DEFTYPE_FIELD); - return *o || *f; /* False if this was a JSON name. */ -} - -const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, - const char *name, size_t len) { - upb_value val; - const upb_fielddef* f; - - if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { - return NULL; - } - - f = unpack_def(val, UPB_DEFTYPE_FIELD); - if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME); - - return f; -} - -int upb_msgdef_numfields(const upb_msgdef *m) { - return m->field_count; -} - -int upb_msgdef_numoneofs(const upb_msgdef *m) { - return m->oneof_count; -} - -int upb_msgdef_numrealoneofs(const upb_msgdef *m) { - return m->real_oneof_count; -} - -const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) { - return m->layout; -} - -const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) { - if (i >= m->field_count) return NULL; - return &m->fields[i]; -} - -bool upb_msgdef_mapentry(const upb_msgdef *m) { - return m->map_entry; -} - -upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) { - return m->well_known_type; -} - -bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type >= UPB_WELLKNOWN_DOUBLEVALUE && - type <= UPB_WELLKNOWN_UINT32VALUE; -} - -void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { - upb_inttable_begin(iter, &m->itof); -} - -void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } - -bool upb_msg_field_done(const upb_msg_field_iter *iter) { - return upb_inttable_done(iter); -} - -upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { - return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); -} - -void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { - upb_inttable_iter_setdone(iter); -} - -bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1, - const upb_msg_field_iter * iter2) { - return upb_inttable_iter_isequal(iter1, iter2); -} - -void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { - upb_strtable_begin(iter, &m->ntof); - /* We need to skip past any initial fields. */ - while (!upb_strtable_done(iter) && - !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) { - upb_strtable_next(iter); - } -} - -void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { - /* We need to skip past fields to return only oneofs. */ - do { - upb_strtable_next(iter); - } while (!upb_strtable_done(iter) && - !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)); -} - -bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { - return upb_strtable_done(iter); -} - -const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { - return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF); -} - -void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { - upb_strtable_iter_setdone(iter); -} - -bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1, - const upb_msg_oneof_iter *iter2) { - return upb_strtable_iter_isequal(iter1, iter2); -} - -/* upb_oneofdef ***************************************************************/ - -const char *upb_oneofdef_name(const upb_oneofdef *o) { - return shortdefname(o->full_name); -} - -const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { - return o->parent; -} - -int upb_oneofdef_numfields(const upb_oneofdef *o) { - return (int)upb_strtable_count(&o->ntof); -} - -uint32_t upb_oneofdef_index(const upb_oneofdef *o) { - return o->index; -} - -bool upb_oneofdef_issynthetic(const upb_oneofdef *o) { - upb_inttable_iter iter; - const upb_fielddef *f; - upb_inttable_begin(&iter, &o->itof); - if (upb_oneofdef_numfields(o) != 1) return false; - f = upb_value_getptr(upb_inttable_iter_value(&iter)); - UPB_ASSERT(f); - return f->proto3_optional_; -} - -const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, - const char *name, size_t length) { - upb_value val; - return upb_strtable_lookup2(&o->ntof, name, length, &val) ? - upb_value_getptr(val) : NULL; -} - -const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { - upb_value val; - return upb_inttable_lookup32(&o->itof, num, &val) ? - upb_value_getptr(val) : NULL; -} - -void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { - upb_inttable_begin(iter, &o->itof); -} - -void upb_oneof_next(upb_oneof_iter *iter) { - upb_inttable_next(iter); -} - -bool upb_oneof_done(upb_oneof_iter *iter) { - return upb_inttable_done(iter); -} - -upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { - return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); -} - -void upb_oneof_iter_setdone(upb_oneof_iter *iter) { - upb_inttable_iter_setdone(iter); -} - -/* Dynamic Layout Generation. *************************************************/ - -static bool is_power_of_two(size_t val) { - return (val & (val - 1)) == 0; -} - -/* Align up to the given power of 2. */ -static size_t align_up(size_t val, size_t align) { - UPB_ASSERT(is_power_of_two(align)); - return (val + align - 1) & ~(align - 1); -} - -static size_t div_round_up(size_t n, size_t d) { - return (n + d - 1) / d; -} - -static size_t upb_msgval_sizeof(upb_fieldtype_t type) { - switch (type) { - case UPB_TYPE_DOUBLE: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT64: - return 8; - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: - case UPB_TYPE_UINT32: - case UPB_TYPE_FLOAT: - return 4; - case UPB_TYPE_BOOL: - return 1; - case UPB_TYPE_MESSAGE: - return sizeof(void*); - case UPB_TYPE_BYTES: - case UPB_TYPE_STRING: - return sizeof(upb_strview); - } - UPB_UNREACHABLE(); -} - -static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { - if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) { - upb_map_entry ent; - UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); - return sizeof(ent.k); - } else if (upb_fielddef_isseq(f)) { - return sizeof(void*); - } else { - return upb_msgval_sizeof(upb_fielddef_type(f)); - } -} - -static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) { - uint32_t ret; - - l->size = align_up(l->size, size); - ret = l->size; - l->size += size; - return ret; -} - -/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. - * It computes a dynamic layout for all of the fields in |m|. */ -static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { - upb_msglayout *l = (upb_msglayout*)m->layout; - upb_msg_field_iter it; - upb_msg_oneof_iter oit; - size_t hasbit; - size_t submsg_count = m->submsg_field_count; - const upb_msglayout **submsgs; - upb_msglayout_field *fields; - upb_alloc *alloc = upb_arena_alloc(symtab->arena); - - memset(l, 0, sizeof(*l)); - - fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields)); - submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs)); - - if ((!fields && upb_msgdef_numfields(m)) || - (!submsgs && submsg_count)) { - /* OOM. */ - return false; - } - - l->field_count = upb_msgdef_numfields(m); - l->fields = fields; - l->submsgs = submsgs; - - if (upb_msgdef_mapentry(m)) { - /* TODO(haberman): refactor this method so this special case is more - * elegant. */ - const upb_fielddef *key = upb_msgdef_itof(m, 1); - const upb_fielddef *val = upb_msgdef_itof(m, 2); - fields[0].number = 1; - fields[1].number = 2; - fields[0].label = UPB_LABEL_OPTIONAL; - fields[1].label = UPB_LABEL_OPTIONAL; - fields[0].presence = 0; - fields[1].presence = 0; - fields[0].descriptortype = upb_fielddef_descriptortype(key); - fields[1].descriptortype = upb_fielddef_descriptortype(val); - fields[0].offset = 0; - fields[1].offset = sizeof(upb_strview); - fields[1].submsg_index = 0; - - if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) { - submsgs[0] = upb_fielddef_msgsubdef(val)->layout; - } - - l->field_count = 2; - l->size = 2 * sizeof(upb_strview);align_up(l->size, 8); - return true; - } - - /* Allocate data offsets in three stages: - * - * 1. hasbits. - * 2. regular fields. - * 3. oneof fields. - * - * OPT: There is a lot of room for optimization here to minimize the size. - */ - - /* Allocate hasbits and set basic field attributes. */ - submsg_count = 0; - for (upb_msg_field_begin(&it, m), hasbit = 0; - !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - upb_fielddef* f = upb_msg_iter_field(&it); - upb_msglayout_field *field = &fields[upb_fielddef_index(f)]; - - field->number = upb_fielddef_number(f); - field->descriptortype = upb_fielddef_descriptortype(f); - field->label = upb_fielddef_label(f); - - if (upb_fielddef_ismap(f)) { - field->label = _UPB_LABEL_MAP; - } else if (upb_fielddef_packed(f)) { - field->label = _UPB_LABEL_PACKED; - } - - /* TODO: we probably should sort the fields by field number to match the - * output of upbc, and to improve search speed for the table parser. */ - f->layout_index = f->index_; - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subm = upb_fielddef_msgsubdef(f); - field->submsg_index = submsg_count++; - submsgs[field->submsg_index] = subm->layout; - } - - if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) { - /* We don't use hasbit 0, so that 0 can indicate "no presence" in the - * table. This wastes one hasbit, but we don't worry about it for now. */ - field->presence = ++hasbit; - } else { - field->presence = 0; - } - } - - /* Account for space used by hasbits. */ - l->size = div_round_up(hasbit, 8); - - /* Allocate non-oneof fields. */ - for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); - upb_msg_field_next(&it)) { - const upb_fielddef* f = upb_msg_iter_field(&it); - size_t field_size = upb_msg_fielddefsize(f); - size_t index = upb_fielddef_index(f); - - if (upb_fielddef_realcontainingoneof(f)) { - /* Oneofs are handled separately below. */ - continue; - } - - fields[index].offset = upb_msglayout_place(l, field_size); - } - - /* Allocate oneof fields. Each oneof field consists of a uint32 for the case - * and space for the actual data. */ - for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); - upb_msg_oneof_next(&oit)) { - const upb_oneofdef* o = upb_msg_iter_oneof(&oit); - upb_oneof_iter fit; - - if (upb_oneofdef_issynthetic(o)) continue; - - size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ - size_t field_size = 0; - uint32_t case_offset; - uint32_t data_offset; - - /* Calculate field size: the max of all field sizes. */ - for (upb_oneof_begin(&fit, o); - !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* f = upb_oneof_iter_field(&fit); - field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); - } - - /* Align and allocate case offset. */ - case_offset = upb_msglayout_place(l, case_size); - data_offset = upb_msglayout_place(l, field_size); - - for (upb_oneof_begin(&fit, o); - !upb_oneof_done(&fit); - upb_oneof_next(&fit)) { - const upb_fielddef* f = upb_oneof_iter_field(&fit); - fields[upb_fielddef_index(f)].offset = data_offset; - fields[upb_fielddef_index(f)].presence = ~case_offset; - } - } - - /* Size of the entire structure should be a multiple of its greatest - * alignment. TODO: track overall alignment for real? */ - l->size = align_up(l->size, 8); - - return true; -} - -/* Code to build defs from descriptor protos. *********************************/ - -/* There is a question of how much validation to do here. It will be difficult - * to perfectly match the amount of validation performed by proto2. But since - * this code is used to directly build defs from Ruby (for example) we do need - * to validate important constraints like uniqueness of names and numbers. */ - -#define CHK(x) if (!(x)) { return false; } -#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; } - -typedef struct { - const upb_symtab *symtab; - upb_filedef *file; /* File we are building. */ - upb_alloc *alloc; /* Allocate defs here. */ - upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ - upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ - const upb_msglayout **layouts; /* NULL if we should build layouts. */ - upb_status *status; /* Record errors here. */ -} symtab_addctx; - -static char* strviewdup(const symtab_addctx *ctx, upb_strview view) { - return upb_strdup2(view.data, view.size, ctx->alloc); -} - -static bool streql2(const char *a, size_t n, const char *b) { - return n == strlen(b) && memcmp(a, b, n) == 0; -} - -static bool streql_view(upb_strview view, const char *b) { - return streql2(view.data, view.size, b); -} - -static const char *makefullname(const symtab_addctx *ctx, const char *prefix, - upb_strview name) { - if (prefix) { - /* ret = prefix + '.' + name; */ - size_t n = strlen(prefix); - char *ret = upb_malloc(ctx->alloc, n + name.size + 2); - CHK_OOM(ret); - strcpy(ret, prefix); - ret[n] = '.'; - memcpy(&ret[n + 1], name.data, name.size); - ret[n + 1 + name.size] = '\0'; - return ret; - } else { - return strviewdup(ctx, name); - } -} - -size_t getjsonname(const char *name, char *buf, size_t len) { - size_t src, dst = 0; - bool ucase_next = false; - -#define WRITE(byte) \ - ++dst; \ - if (dst < len) buf[dst - 1] = byte; \ - else if (dst == len) buf[dst - 1] = '\0' - - if (!name) { - WRITE('\0'); - return 0; - } - - /* Implement the transformation as described in the spec: - * 1. upper case all letters after an underscore. - * 2. remove all underscores. - */ - for (src = 0; name[src]; src++) { - if (name[src] == '_') { - ucase_next = true; - continue; - } - - if (ucase_next) { - WRITE(toupper(name[src])); - ucase_next = false; - } else { - WRITE(name[src]); - } - } - - WRITE('\0'); - return dst; - -#undef WRITE -} - -static char* makejsonname(const char* name, upb_alloc *alloc) { - size_t size = getjsonname(name, NULL, 0); - char* json_name = upb_malloc(alloc, size); - getjsonname(name, json_name, size); - return json_name; -} - -static bool symtab_add(const symtab_addctx *ctx, const char *name, - upb_value v) { - upb_value tmp; - if (upb_strtable_lookup(ctx->addtab, name, &tmp) || - upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) { - upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name); - return false; - } - - CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp)); - return true; -} - -/* Given a symbol and the base symbol inside which it is defined, find the - * symbol's definition in t. */ -static bool resolvename(const upb_strtable *t, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type, upb_status *status, - const void **def) { - if(sym.size == 0) return NULL; - if(sym.data[0] == '.') { - /* Symbols starting with '.' are absolute, so we do a single lookup. - * Slice to omit the leading '.' */ - upb_value v; - if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { - return false; - } - - *def = unpack_def(v, type); - - if (!*def) { - upb_status_seterrf(status, - "type mismatch when resolving field %s, name %s", - f->full_name, sym.data); - return false; - } - - return true; - } else { - /* Remove components from base until we find an entry or run out. - * TODO: This branch is totally broken, but currently not used. */ - (void)base; - UPB_ASSERT(false); - return false; - } -} - -const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f, - const char *base, upb_strview sym, - upb_deftype_t type) { - const void *ret; - if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) && - !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) { - if (upb_ok(ctx->status)) { - upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data); - } - return false; - } - return ret; -} - -static bool create_oneofdef( - const symtab_addctx *ctx, upb_msgdef *m, - const google_protobuf_OneofDescriptorProto *oneof_proto) { - upb_oneofdef *o; - upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); - upb_value v; - - o = (upb_oneofdef*)&m->oneofs[m->oneof_count++]; - o->parent = m; - o->full_name = makefullname(ctx, m->full_name, name); - - v = pack_def(o, UPB_DEFTYPE_ONEOF); - CHK_OOM(symtab_add(ctx, o->full_name, v)); - CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); - - CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - - return true; -} - -static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, - upb_fielddef *f) { - char *end; - char nullz[64]; - errno = 0; - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_FLOAT: - /* Standard C number parsing functions expect null-terminated strings. */ - if (len >= sizeof(nullz) - 1) { - return false; - } - memcpy(nullz, str, len); - nullz[len] = '\0'; - str = nullz; - break; - default: - break; - } - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: { - long val = strtol(str, &end, 0); - CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end); - f->defaultval.sint = val; - break; - } - case UPB_TYPE_ENUM: { - const upb_enumdef *e = f->sub.enumdef; - int32_t val; - CHK(upb_enumdef_ntoi(e, str, len, &val)); - f->defaultval.sint = val; - break; - } - case UPB_TYPE_INT64: { - /* XXX: Need to write our own strtoll, since it's not available in c89. */ - int64_t val = strtol(str, &end, 0); - CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end); - f->defaultval.sint = val; - break; - } - case UPB_TYPE_UINT32: { - unsigned long val = strtoul(str, &end, 0); - CHK(val <= UINT32_MAX && errno != ERANGE && !*end); - f->defaultval.uint = val; - break; - } - case UPB_TYPE_UINT64: { - /* XXX: Need to write our own strtoull, since it's not available in c89. */ - uint64_t val = strtoul(str, &end, 0); - CHK(val <= UINT64_MAX && errno != ERANGE && !*end); - f->defaultval.uint = val; - break; - } - case UPB_TYPE_DOUBLE: { - double val = strtod(str, &end); - CHK(errno != ERANGE && !*end); - f->defaultval.dbl = val; - break; - } - case UPB_TYPE_FLOAT: { - /* XXX: Need to write our own strtof, since it's not available in c89. */ - float val = strtod(str, &end); - CHK(errno != ERANGE && !*end); - f->defaultval.flt = val; - break; - } - case UPB_TYPE_BOOL: { - if (streql2(str, len, "false")) { - f->defaultval.boolean = false; - } else if (streql2(str, len, "true")) { - f->defaultval.boolean = true; - } else { - return false; - } - break; - } - case UPB_TYPE_STRING: - f->defaultval.str = newstr(ctx->alloc, str, len); - break; - case UPB_TYPE_BYTES: - /* XXX: need to interpret the C-escaped value. */ - f->defaultval.str = newstr(ctx->alloc, str, len); - break; - case UPB_TYPE_MESSAGE: - /* Should not have a default value. */ - return false; - } - return true; -} - -static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_ENUM: - f->defaultval.sint = 0; - break; - case UPB_TYPE_UINT64: - case UPB_TYPE_UINT32: - f->defaultval.uint = 0; - break; - case UPB_TYPE_DOUBLE: - case UPB_TYPE_FLOAT: - f->defaultval.dbl = 0; - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - f->defaultval.str = newstr(ctx->alloc, NULL, 0); - break; - case UPB_TYPE_BOOL: - f->defaultval.boolean = false; - break; - case UPB_TYPE_MESSAGE: - break; - } -} - -static bool create_fielddef( - const symtab_addctx *ctx, const char *prefix, upb_msgdef *m, - const google_protobuf_FieldDescriptorProto *field_proto) { - upb_alloc *alloc = ctx->alloc; - upb_fielddef *f; - const google_protobuf_FieldOptions *options; - upb_strview name; - const char *full_name; - const char *json_name; - const char *shortname; - uint32_t field_number; - - if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { - upb_status_seterrmsg(ctx->status, "field has no name"); - return false; - } - - name = google_protobuf_FieldDescriptorProto_name(field_proto); - CHK(upb_isident(name, false, ctx->status)); - full_name = makefullname(ctx, prefix, name); - shortname = shortdefname(full_name); - - if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { - json_name = strviewdup( - ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); - } else { - json_name = makejsonname(shortname, ctx->alloc); - } - - field_number = google_protobuf_FieldDescriptorProto_number(field_proto); - - if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { - upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number); - return false; - } - - if (m) { - /* direct message field. */ - upb_value v, field_v, json_v; - size_t json_size; - - f = (upb_fielddef*)&m->fields[m->field_count++]; - f->msgdef = m; - f->is_extension_ = false; - - if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); - return false; - } - - if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { - upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); - return false; - } - - if (upb_inttable_lookup(&m->itof, field_number, NULL)) { - upb_status_seterrf(ctx->status, "duplicate field number (%u)", - field_number); - return false; - } - - field_v = pack_def(f, UPB_DEFTYPE_FIELD); - json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); - v = upb_value_constptr(f); - json_size = strlen(json_name); - - CHK_OOM( - upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); - CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); - - if (strcmp(shortname, json_name) != 0) { - upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); - } - - if (ctx->layouts) { - const upb_msglayout_field *fields = m->layout->fields; - int count = m->layout->field_count; - bool found = false; - int i; - for (i = 0; i < count; i++) { - if (fields[i].number == field_number) { - f->layout_index = i; - found = true; - break; - } - } - UPB_ASSERT(found); - } - } else { - /* extension field. */ - f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; - f->is_extension_ = true; - CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); - } - - f->full_name = full_name; - f->json_name = json_name; - f->file = ctx->file; - f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); - f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); - f->number_ = field_number; - f->oneof = NULL; - f->proto3_optional_ = - google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); - - /* We can't resolve the subdef or (in the case of extensions) the containing - * message yet, because it may not have been defined yet. We stash a pointer - * to the field_proto until later when we can properly resolve it. */ - f->sub.unresolved = field_proto; - - if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)", - f->full_name); - return false; - } - - if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { - int oneof_index = - google_protobuf_FieldDescriptorProto_oneof_index(field_proto); - upb_oneofdef *oneof; - upb_value v = upb_value_constptr(f); - - if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { - upb_status_seterrf(ctx->status, - "fields in oneof must have OPTIONAL label (%s)", - f->full_name); - return false; - } - - if (!m) { - upb_status_seterrf(ctx->status, - "oneof_index provided for extension field (%s)", - f->full_name); - return false; - } - - if (oneof_index >= m->oneof_count) { - upb_status_seterrf(ctx->status, "oneof_index out of range (%s)", - f->full_name); - return false; - } - - oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; - f->oneof = oneof; - - CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); - CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); - } else { - f->oneof = NULL; - } - - if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) { - options = google_protobuf_FieldDescriptorProto_options(field_proto); - f->lazy_ = google_protobuf_FieldOptions_lazy(options); - f->packed_ = google_protobuf_FieldOptions_packed(options); - } else { - f->lazy_ = false; - f->packed_ = false; - } - - return true; -} - -static bool create_enumdef( - const symtab_addctx *ctx, const char *prefix, - const google_protobuf_EnumDescriptorProto *enum_proto) { - upb_enumdef *e; - const google_protobuf_EnumValueDescriptorProto *const *values; - upb_strview name; - size_t i, n; - - name = google_protobuf_EnumDescriptorProto_name(enum_proto); - CHK(upb_isident(name, false, ctx->status)); - - e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++]; - e->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM))); - - CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc)); - CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); - - e->file = ctx->file; - e->defaultval = 0; - - values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); - - if (n == 0) { - upb_status_seterrf(ctx->status, - "enums must contain at least one value (%s)", - e->full_name); - return false; - } - - for (i = 0; i < n; i++) { - const google_protobuf_EnumValueDescriptorProto *value = values[i]; - upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value); - char *name2 = strviewdup(ctx, name); - int32_t num = google_protobuf_EnumValueDescriptorProto_number(value); - upb_value v = upb_value_int32(num); - - if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) { - upb_status_seterrf(ctx->status, - "for proto3, the first enum value must be zero (%s)", - e->full_name); - return false; - } - - if (upb_strtable_lookup(&e->ntoi, name2, NULL)) { - upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2); - return false; - } - - CHK_OOM(name2) - CHK_OOM( - upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); - - if (!upb_inttable_lookup(&e->iton, num, NULL)) { - upb_value v = upb_value_cstr(name2); - CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); - } - } - - upb_inttable_compact2(&e->iton, ctx->alloc); - - return true; -} - -static bool create_msgdef(symtab_addctx *ctx, const char *prefix, - const google_protobuf_DescriptorProto *msg_proto) { - upb_msgdef *m; - const google_protobuf_MessageOptions *options; - const google_protobuf_OneofDescriptorProto *const *oneofs; - const google_protobuf_FieldDescriptorProto *const *fields; - const google_protobuf_EnumDescriptorProto *const *enums; - const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; - upb_strview name; - - name = google_protobuf_DescriptorProto_name(msg_proto); - CHK(upb_isident(name, false, ctx->status)); - - m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++]; - m->full_name = makefullname(ctx, prefix, name); - CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG))); - - CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); - - m->file = ctx->file; - m->map_entry = false; - - options = google_protobuf_DescriptorProto_options(msg_proto); - - if (options) { - m->map_entry = google_protobuf_MessageOptions_map_entry(options); - } - - if (ctx->layouts) { - m->layout = *ctx->layouts; - ctx->layouts++; - } else { - /* Allocate now (to allow cross-linking), populate later. */ - m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout)); - } - - oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n); - m->oneof_count = 0; - m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n); - for (i = 0; i < n; i++) { - CHK(create_oneofdef(ctx, m, oneofs[i])); - } - - fields = google_protobuf_DescriptorProto_field(msg_proto, &n); - m->field_count = 0; - m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n); - for (i = 0; i < n; i++) { - CHK(create_fielddef(ctx, m->full_name, m, fields[i])); - } - - CHK(assign_msg_indices(m, ctx->status)); - CHK(check_oneofs(m, ctx->status)); - assign_msg_wellknowntype(m); - upb_inttable_compact2(&m->itof, ctx->alloc); - - /* This message is built. Now build nested messages and enums. */ - - enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, m->full_name, enums[i])); - } - - msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, m->full_name, msgs[i])); - } - - return true; -} - -typedef struct { - int msg_count; - int enum_count; - int ext_count; -} decl_counts; - -static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, - decl_counts *counts) { - const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; - - counts->msg_count++; - - msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); - for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); - } - - google_protobuf_DescriptorProto_enum_type(msg_proto, &n); - counts->enum_count += n; - - google_protobuf_DescriptorProto_extension(msg_proto, &n); - counts->ext_count += n; -} - -static void count_types_in_file( - const google_protobuf_FileDescriptorProto *file_proto, - decl_counts *counts) { - const google_protobuf_DescriptorProto *const *msgs; - size_t i, n; - - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); - for (i = 0; i < n; i++) { - count_types_in_msg(msgs[i], counts); - } - - google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - counts->enum_count += n; - - google_protobuf_FileDescriptorProto_extension(file_proto, &n); - counts->ext_count += n; -} - -static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, - upb_fielddef *f) { - upb_strview name; - const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; - - if (f->is_extension_) { - if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { - upb_status_seterrf(ctx->status, - "extension for field '%s' had no extendee", - f->full_name); - return false; - } - - name = google_protobuf_FieldDescriptorProto_extendee(field_proto); - f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->msgdef); - } - - if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && - !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { - upb_status_seterrf(ctx->status, "field '%s' is missing type name", - f->full_name); - return false; - } - - name = google_protobuf_FieldDescriptorProto_type_name(field_proto); - - if (upb_fielddef_issubmsg(f)) { - f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); - CHK(f->sub.msgdef); - } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { - f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); - CHK(f->sub.enumdef); - } - - /* Have to delay resolving of the default value until now because of the enum - * case, since enum defaults are specified with a label. */ - if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { - upb_strview defaultval = - google_protobuf_FieldDescriptorProto_default_value(field_proto); - - if (f->file->syntax == UPB_SYNTAX_PROTO3) { - upb_status_seterrf(ctx->status, - "proto3 fields cannot have explicit defaults (%s)", - f->full_name); - return false; - } - - if (upb_fielddef_issubmsg(f)) { - upb_status_seterrf(ctx->status, - "message fields cannot have explicit defaults (%s)", - f->full_name); - return false; - } - - if (!parse_default(ctx, defaultval.data, defaultval.size, f)) { - upb_status_seterrf(ctx->status, - "couldn't parse default '" UPB_STRVIEW_FORMAT - "' for field (%s)", - UPB_STRVIEW_ARGS(defaultval), f->full_name); - return false; - } - } else { - set_default_default(ctx, f); - } - - return true; -} - -static bool build_filedef( - symtab_addctx *ctx, upb_filedef *file, - const google_protobuf_FileDescriptorProto *file_proto) { - upb_alloc *alloc = ctx->alloc; - const google_protobuf_FileOptions *file_options_proto; - const google_protobuf_DescriptorProto *const *msgs; - const google_protobuf_EnumDescriptorProto *const *enums; - const google_protobuf_FieldDescriptorProto *const *exts; - const upb_strview* strs; - size_t i, n; - decl_counts counts = {0}; - - count_types_in_file(file_proto, &counts); - - file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count); - file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count); - - CHK_OOM(counts.msg_count == 0 || file->msgs); - CHK_OOM(counts.enum_count == 0 || file->enums); - CHK_OOM(counts.ext_count == 0 || file->exts); - - /* We increment these as defs are added. */ - file->msg_count = 0; - file->enum_count = 0; - file->ext_count = 0; - - if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { - upb_status_seterrmsg(ctx->status, "File has no name"); - return false; - } - - file->name = - strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); - file->phpprefix = NULL; - file->phpnamespace = NULL; - - if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { - upb_strview package = - google_protobuf_FileDescriptorProto_package(file_proto); - CHK(upb_isident(package, true, ctx->status)); - file->package = strviewdup(ctx, package); - } else { - file->package = NULL; - } - - if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { - upb_strview syntax = - google_protobuf_FileDescriptorProto_syntax(file_proto); - - if (streql_view(syntax, "proto2")) { - file->syntax = UPB_SYNTAX_PROTO2; - } else if (streql_view(syntax, "proto3")) { - file->syntax = UPB_SYNTAX_PROTO3; - } else { - upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", - UPB_STRVIEW_ARGS(syntax)); - return false; - } - } else { - file->syntax = UPB_SYNTAX_PROTO2; - } - - /* Read options. */ - file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto); - if (file_options_proto) { - if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) { - file->phpprefix = strviewdup( - ctx, - google_protobuf_FileOptions_php_class_prefix(file_options_proto)); - } - if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) { - file->phpnamespace = strviewdup( - ctx, google_protobuf_FileOptions_php_namespace(file_options_proto)); - } - } - - /* Verify dependencies. */ - strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); - file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ; - CHK_OOM(n == 0 || file->deps); - - for (i = 0; i < n; i++) { - upb_strview dep_name = strs[i]; - upb_value v; - if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, - dep_name.size, &v)) { - upb_status_seterrf(ctx->status, - "Depends on file '" UPB_STRVIEW_FORMAT - "', but it has not been loaded", - UPB_STRVIEW_ARGS(dep_name)); - return false; - } - file->deps[i] = upb_value_getconstptr(v); - } - - /* Create messages. */ - msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); - for (i = 0; i < n; i++) { - CHK(create_msgdef(ctx, file->package, msgs[i])); - } - - /* Create enums. */ - enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); - for (i = 0; i < n; i++) { - CHK(create_enumdef(ctx, file->package, enums[i])); - } - - /* Create extensions. */ - exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); - file->exts = upb_malloc(alloc, sizeof(*file->exts) * n); - CHK_OOM(n == 0 || file->exts); - for (i = 0; i < n; i++) { - CHK(create_fielddef(ctx, file->package, NULL, exts[i])); - } - - /* Now that all names are in the table, build layouts and resolve refs. */ - for (i = 0; i < file->ext_count; i++) { - CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i])); - } - - for (i = 0; i < file->msg_count; i++) { - const upb_msgdef *m = &file->msgs[i]; - int j; - for (j = 0; j < m->field_count; j++) { - CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j])); - } - } - - if (!ctx->layouts) { - for (i = 0; i < file->msg_count; i++) { - const upb_msgdef *m = &file->msgs[i]; - make_layout(ctx->symtab, m); - } - } - - return true; - } - -static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx, - upb_status *status) { - const upb_filedef *file = ctx->file; - upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_strtable_iter iter; - - CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name), - upb_value_constptr(file), alloc)); - - upb_strtable_begin(&iter, ctx->addtab); - for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { - upb_strview key = upb_strtable_iter_key(&iter); - upb_value value = upb_strtable_iter_value(&iter); - CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc)); - } - - return true; -} - -/* upb_filedef ****************************************************************/ - -const char *upb_filedef_name(const upb_filedef *f) { - return f->name; -} - -const char *upb_filedef_package(const upb_filedef *f) { - return f->package; -} - -const char *upb_filedef_phpprefix(const upb_filedef *f) { - return f->phpprefix; -} - -const char *upb_filedef_phpnamespace(const upb_filedef *f) { - return f->phpnamespace; -} - -upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { - return f->syntax; -} - -int upb_filedef_msgcount(const upb_filedef *f) { - return f->msg_count; -} - -int upb_filedef_depcount(const upb_filedef *f) { - return f->dep_count; -} - -int upb_filedef_enumcount(const upb_filedef *f) { - return f->enum_count; -} - -const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { - return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; -} - -const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { - return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; -} - -const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { - return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; -} - -void upb_symtab_free(upb_symtab *s) { - upb_arena_free(s->arena); - upb_gfree(s); -} - -upb_symtab *upb_symtab_new(void) { - upb_symtab *s = upb_gmalloc(sizeof(*s)); - upb_alloc *alloc; - - if (!s) { - return NULL; - } - - s->arena = upb_arena_new(); - alloc = upb_arena_alloc(s->arena); - - if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) || - !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) { - upb_arena_free(s->arena); - upb_gfree(s); - s = NULL; - } - return s; -} - -const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, - size_t len) { - upb_value v; - return upb_strtable_lookup2(&s->syms, sym, len, &v) ? - unpack_def(v, UPB_DEFTYPE_MSG) : NULL; -} - -const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { - upb_value v; - return upb_strtable_lookup(&s->syms, sym, &v) ? - unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; -} - -const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { - upb_value v; - return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) - : NULL; -} - -int upb_symtab_filecount(const upb_symtab *s) { - return (int)upb_strtable_count(&s->files); -} - -static const upb_filedef *_upb_symtab_addfile( - upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, - const upb_msglayout **layouts, upb_status *status) { - upb_arena *tmparena = upb_arena_new(); - upb_strtable addtab; - upb_alloc *alloc = upb_arena_alloc(s->arena); - upb_filedef *file = upb_malloc(alloc, sizeof(*file)); - bool ok; - symtab_addctx ctx; - - ctx.file = file; - ctx.symtab = s; - ctx.alloc = alloc; - ctx.tmp = upb_arena_alloc(tmparena); - ctx.addtab = &addtab; - ctx.layouts = layouts; - ctx.status = status; - - ok = file && - upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) && - build_filedef(&ctx, file, file_proto) && - upb_symtab_addtotabs(s, &ctx, status); - - upb_arena_free(tmparena); - return ok ? file : NULL; -} - -const upb_filedef *upb_symtab_addfile( - upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, - upb_status *status) { - return _upb_symtab_addfile(s, file_proto, NULL, status); -} - -/* Include here since we want most of this file to be stdio-free. */ -#include - -bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { - /* Since this function should never fail (it would indicate a bug in upb) we - * print errors to stderr instead of returning error status to the user. */ - upb_def_init **deps = init->deps; - google_protobuf_FileDescriptorProto *file; - upb_arena *arena; - upb_status status; - - upb_status_clear(&status); - - if (upb_strtable_lookup(&s->files, init->filename, NULL)) { - return true; - } - - arena = upb_arena_new(); - - for (; *deps; deps++) { - if (!_upb_symtab_loaddefinit(s, *deps)) goto err; - } - - file = google_protobuf_FileDescriptorProto_parse( - init->descriptor.data, init->descriptor.size, arena); - - if (!file) { - upb_status_seterrf( - &status, - "Failed to parse compiled-in descriptor for file '%s'. This should " - "never happen.", - init->filename); - goto err; - } - - if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err; - - upb_arena_free(arena); - return true; - -err: - fprintf(stderr, "Error loading compiled-in descriptor: %s\n", - upb_status_errmsg(&status)); - upb_arena_free(arena); - return false; -} - -#undef CHK -#undef CHK_OOM - - -#include - - -static char field_size[] = { - 0,/* 0 */ - 8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */ - 4, /* UPB_DESCRIPTOR_TYPE_FLOAT */ - 8, /* UPB_DESCRIPTOR_TYPE_INT64 */ - 8, /* UPB_DESCRIPTOR_TYPE_UINT64 */ - 4, /* UPB_DESCRIPTOR_TYPE_INT32 */ - 8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */ - 4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */ - 1, /* UPB_DESCRIPTOR_TYPE_BOOL */ - sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */ - sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */ - sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */ - sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */ - 4, /* UPB_DESCRIPTOR_TYPE_UINT32 */ - 4, /* UPB_DESCRIPTOR_TYPE_ENUM */ - 4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */ - 8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */ - 4, /* UPB_DESCRIPTOR_TYPE_SINT32 */ - 8, /* UPB_DESCRIPTOR_TYPE_SINT64 */ -}; - -/* Strings/bytes are special-cased in maps. */ -static char _upb_fieldtype_to_mapsize[12] = { - 0, - 1, /* UPB_TYPE_BOOL */ - 4, /* UPB_TYPE_FLOAT */ - 4, /* UPB_TYPE_INT32 */ - 4, /* UPB_TYPE_UINT32 */ - 4, /* UPB_TYPE_ENUM */ - sizeof(void*), /* UPB_TYPE_MESSAGE */ - 8, /* UPB_TYPE_DOUBLE */ - 8, /* UPB_TYPE_INT64 */ - 8, /* UPB_TYPE_UINT64 */ - 0, /* UPB_TYPE_STRING */ - 0, /* UPB_TYPE_BYTES */ -}; - -/** upb_msg *******************************************************************/ - -upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) { - return _upb_msg_new(upb_msgdef_layout(m), a); -} - -static bool in_oneof(const upb_msglayout_field *field) { - return field->presence < 0; -} - -static uint32_t *oneofcase(const upb_msg *msg, - const upb_msglayout_field *field) { - UPB_ASSERT(in_oneof(field)); - return UPB_PTR_AT(msg, -field->presence, uint32_t); -} - -static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) { - const upb_msglayout_field *field = upb_fielddef_layout(f); - const char *mem = UPB_PTR_AT(msg, field->offset, char); - upb_msgval val = {0}; - int size = upb_fielddef_isseq(f) ? sizeof(void *) - : field_size[field->descriptortype]; - memcpy(&val, mem, size); - return val; -} - -bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { - const upb_msglayout_field *field = upb_fielddef_layout(f); - if (in_oneof(field)) { - return *oneofcase(msg, field) == field->number; - } else if (field->presence > 0) { - uint32_t hasbit = field->presence; - return *UPB_PTR_AT(msg, hasbit / 8, uint8_t) & (1 << (hasbit % 8)); - } else { - UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || - field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP); - return _upb_msg_getraw(msg, f).msg_val != NULL; - } -} - -bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) { - upb_oneof_iter i; - const upb_fielddef *f; - const upb_msglayout_field *field; - - upb_oneof_begin(&i, o); - if (upb_oneof_done(&i)) return false; - f = upb_oneof_iter_field(&i); - field = upb_fielddef_layout(f); - return *oneofcase(msg, field) != 0; -} - -upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { - if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { - return _upb_msg_getraw(msg, f); - } else { - /* TODO(haberman): change upb_fielddef to not require this switch(). */ - upb_msgval val = {0}; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: - val.int32_val = upb_fielddef_defaultint32(f); - break; - case UPB_TYPE_INT64: - val.int64_val = upb_fielddef_defaultint64(f); - break; - case UPB_TYPE_UINT32: - val.uint32_val = upb_fielddef_defaultuint32(f); - break; - case UPB_TYPE_UINT64: - val.uint64_val = upb_fielddef_defaultuint64(f); - break; - case UPB_TYPE_FLOAT: - val.float_val = upb_fielddef_defaultfloat(f); - break; - case UPB_TYPE_DOUBLE: - val.double_val = upb_fielddef_defaultdouble(f); - break; - case UPB_TYPE_BOOL: - val.double_val = upb_fielddef_defaultbool(f); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); - break; - case UPB_TYPE_MESSAGE: - val.msg_val = NULL; - break; - } - return val; - } -} - -upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, - upb_arena *a) { - const upb_msglayout_field *field = upb_fielddef_layout(f); - upb_mutmsgval ret; - char *mem = UPB_PTR_AT(msg, field->offset, char); - bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number; - - memcpy(&ret, mem, sizeof(void*)); - - if (a && (!ret.msg || wrong_oneof)) { - if (upb_fielddef_ismap(f)) { - const upb_msgdef *entry = upb_fielddef_msgsubdef(f); - const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY); - const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE); - ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value)); - } else if (upb_fielddef_isseq(f)) { - ret.array = upb_array_new(a, upb_fielddef_type(f)); - } else { - UPB_ASSERT(upb_fielddef_issubmsg(f)); - ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a); - } - - memcpy(mem, &ret, sizeof(void*)); - - if (wrong_oneof) { - *oneofcase(msg, field) = field->number; - } - } - return ret; -} - -void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, - upb_arena *a) { - const upb_msglayout_field *field = upb_fielddef_layout(f); - char *mem = UPB_PTR_AT(msg, field->offset, char); - int size = upb_fielddef_isseq(f) ? sizeof(void *) - : field_size[field->descriptortype]; - memcpy(mem, &val, size); - if (in_oneof(field)) { - *oneofcase(msg, field) = field->number; - } -} - -bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, - const upb_symtab *ext_pool, const upb_fielddef **out_f, - upb_msgval *out_val, size_t *iter) { - size_t i = *iter; - const upb_msgval zero = {0}; - const upb_fielddef *f; - while ((f = _upb_msgdef_field(m, (int)++i)) != NULL) { - upb_msgval val = _upb_msg_getraw(msg, f); - - /* Skip field if unset or empty. */ - if (upb_fielddef_haspresence(f)) { - if (!upb_msg_has(msg, f)) continue; - } else { - upb_msgval test = val; - if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) { - /* Clear string pointer, only size matters (ptr could be non-NULL). */ - test.str_val.data = NULL; - } - /* Continue if NULL or 0. */ - if (memcmp(&test, &zero, sizeof(test)) == 0) continue; - - /* Continue on empty array or map. */ - if (upb_fielddef_ismap(f)) { - if (upb_map_size(test.map_val) == 0) continue; - } else if (upb_fielddef_isseq(f)) { - if (upb_array_size(test.array_val) == 0) continue; - } - } - - *out_val = val; - *out_f = f; - *iter = i; - return true; - } - *iter = i; - return false; -} - -/** upb_array *****************************************************************/ - -upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) { - return _upb_array_new(a, type); -} - -size_t upb_array_size(const upb_array *arr) { - return arr->len; -} - -upb_msgval upb_array_get(const upb_array *arr, size_t i) { - upb_msgval ret; - const char* data = _upb_array_constptr(arr); - int lg2 = arr->data & 7; - UPB_ASSERT(i < arr->len); - memcpy(&ret, data + (i << lg2), 1 << lg2); - return ret; -} - -void upb_array_set(upb_array *arr, size_t i, upb_msgval val) { - char* data = _upb_array_ptr(arr); - int lg2 = arr->data & 7; - UPB_ASSERT(i < arr->len); - memcpy(data + (i << lg2), &val, 1 << lg2); -} - -bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) { - if (!_upb_array_realloc(arr, arr->len + 1, arena)) { - return false; - } - arr->len++; - upb_array_set(arr, arr->len - 1, val); - return true; -} - -/* Resizes the array to the given size, reallocating if necessary, and returns a - * pointer to the new array elements. */ -bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) { - return _upb_array_realloc(arr, size, arena); -} - -/** upb_map *******************************************************************/ - -upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type, - upb_fieldtype_t value_type) { - return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type], - _upb_fieldtype_to_mapsize[value_type]); -} - -size_t upb_map_size(const upb_map *map) { - return _upb_map_size(map); -} - -bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) { - return _upb_map_get(map, &key, map->key_size, val, map->val_size); -} - -bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, - upb_arena *arena) { - return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena); -} - -bool upb_map_delete(upb_map *map, upb_msgval key) { - return _upb_map_delete(map, &key, map->key_size); -} - -bool upb_mapiter_next(const upb_map *map, size_t *iter) { - return _upb_map_next(map, iter); -} - -/* Returns the key and value for this entry of the map. */ -upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) { - upb_strtable_iter i; - upb_msgval ret; - i.t = &map->table; - i.index = iter; - _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size); - return ret; -} - -upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { - upb_strtable_iter i; - upb_msgval ret; - i.t = &map->table; - i.index = iter; - _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size); - return ret; -} - -/* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ -/* -** TODO(haberman): it's unclear whether a lot of the consistency checks should -** UPB_ASSERT() or return false. -*/ - - -#include - - - -struct upb_handlers { - upb_handlercache *cache; - const upb_msgdef *msg; - const upb_handlers **sub; - const void *top_closure_type; - upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */ -}; - -static void *upb_calloc(upb_arena *arena, size_t size) { - void *mem = upb_malloc(upb_arena_alloc(arena), size); - if (mem) { - memset(mem, 0, size); - } - return mem; -} - -/* Defined for the sole purpose of having a unique pointer value for - * UPB_NO_CLOSURE. */ -char _upb_noclosure; - -/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the - * subhandlers for this submessage field. */ -#define SUBH(h, selector) (h->sub[selector]) - -/* The selector for a submessage field is the field index. */ -#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f)) - -static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - upb_selector_t sel; - bool ok; - - ok = upb_handlers_getselector(f, type, &sel); - - UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f)); - UPB_ASSERT(ok); - - return sel; -} - -static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - int32_t sel = trygetsel(h, f, type); - UPB_ASSERT(sel >= 0); - return sel; -} - -static const void **returntype(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type; -} - -static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, - upb_handlertype_t type, upb_func *func, - const upb_handlerattr *attr) { - upb_handlerattr set_attr = UPB_HANDLERATTR_INIT; - const void *closure_type; - const void **context_closure_type; - - UPB_ASSERT(!h->table[sel].func); - - if (attr) { - set_attr = *attr; - } - - /* Check that the given closure type matches the closure type that has been - * established for this context (if any). */ - closure_type = set_attr.closure_type; - - if (type == UPB_HANDLER_STRING) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); - } else if (f && upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ) { - context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); - } else { - context_closure_type = &h->top_closure_type; - } - - if (closure_type && *context_closure_type && - closure_type != *context_closure_type) { - return false; - } - - if (closure_type) - *context_closure_type = closure_type; - - /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer - * matches any pre-existing expectations about what type is expected. */ - if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { - const void *return_type = set_attr.return_closure_type; - const void *table_return_type = h->table[sel].attr.return_closure_type; - if (return_type && table_return_type && return_type != table_return_type) { - return false; - } - - if (table_return_type && !return_type) { - set_attr.return_closure_type = table_return_type; - } - } - - h->table[sel].func = (upb_func*)func; - h->table[sel].attr = set_attr; - return true; -} - -/* Returns the effective closure type for this handler (which will propagate - * from outer frames if this frame has no START* handler). Not implemented for - * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is - * the effective closure type is unspecified (either no handler was registered - * to specify it or the handler that was registered did not specify the closure - * type). */ -const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, - upb_handlertype_t type) { - const void *ret; - upb_selector_t sel; - - UPB_ASSERT(type != UPB_HANDLER_STRING); - ret = h->top_closure_type; - - if (upb_fielddef_isseq(f) && - type != UPB_HANDLER_STARTSEQ && - type != UPB_HANDLER_ENDSEQ && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { - ret = h->table[sel].attr.return_closure_type; - } - - if (type == UPB_HANDLER_STRING && - h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { - ret = h->table[sel].attr.return_closure_type; - } - - /* The effective type of the submessage; not used yet. - * if (type == SUBMESSAGE && - * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { - * ret = h->table[sel].attr.return_closure_type; - * } */ - - return ret; -} - -/* Checks whether the START* handler specified by f & type is missing even - * though it is required to convert the established type of an outer frame - * ("closure_type") into the established type of an inner frame (represented in - * the return closure type of this handler's attr. */ -bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type, - upb_status *status) { - const void *closure_type; - const upb_handlerattr *attr; - const void *return_closure_type; - - upb_selector_t sel = handlers_getsel(h, f, type); - if (h->table[sel].func) return true; - closure_type = effective_closure_type(h, f, type); - attr = &h->table[sel].attr; - return_closure_type = attr->return_closure_type; - if (closure_type && return_closure_type && - closure_type != return_closure_type) { - return false; - } - return true; -} - -static upb_handlers *upb_handlers_new(const upb_msgdef *md, - upb_handlercache *cache, - upb_arena *arena) { - int extra; - upb_handlers *h; - - extra = - (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1)); - h = upb_calloc(arena, sizeof(*h) + extra); - if (!h) return NULL; - - h->cache = cache; - h->msg = md; - - if (upb_msgdef_submsgfieldcount(md) > 0) { - size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub); - h->sub = upb_calloc(arena, bytes); - if (!h->sub) return NULL; - } else { - h->sub = 0; - } - - /* calloc() above initialized all handlers to NULL. */ - return h; -} - -/* Public interface ***********************************************************/ - -#define SETTER(name, handlerctype, handlertype) \ - bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \ - handlerctype func, \ - const upb_handlerattr *attr) { \ - int32_t sel = trygetsel(h, f, handlertype); \ - return doset(h, sel, f, handlertype, (upb_func *)func, attr); \ - } - -SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) -SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) -SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) -SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) -SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) -SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) -SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) -SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) -SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) -SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) -SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) -SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) -SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) -SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) - -#undef SETTER - -bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, - const upb_handlerattr *attr) { - return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, - (upb_func *)func, attr); -} - -bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, - const upb_handlers *sub) { - UPB_ASSERT(sub); - UPB_ASSERT(upb_fielddef_issubmsg(f)); - if (SUBH_F(h, f)) return false; /* Can't reset. */ - if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) { - return false; - } - SUBH_F(h, f) = sub; - return true; -} - -const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, - const upb_fielddef *f) { - UPB_ASSERT(upb_fielddef_issubmsg(f)); - return SUBH_F(h, f); -} - -upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s, - const void **handler_data) { - upb_func *ret = (upb_func *)h->table[s].func; - if (ret && handler_data) { - *handler_data = h->table[s].attr.handler_data; - } - return ret; -} - -bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, - upb_handlerattr *attr) { - if (!upb_handlers_gethandler(h, sel, NULL)) - return false; - *attr = h->table[sel].attr; - return true; -} - -const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, - upb_selector_t sel) { - /* STARTSUBMSG selector in sel is the field's selector base. */ - return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); -} - -const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } - -bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { - return upb_handlercache_addcleanup(h->cache, p, func); -} - -upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; - case UPB_TYPE_INT64: return UPB_HANDLER_INT64; - case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; - case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; - case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; - case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; - case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; - default: UPB_ASSERT(false); return -1; /* Invalid input. */ - } -} - -bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, - upb_selector_t *s) { - uint32_t selector_base = upb_fielddef_selectorbase(f); - switch (type) { - case UPB_HANDLER_INT32: - case UPB_HANDLER_INT64: - case UPB_HANDLER_UINT32: - case UPB_HANDLER_UINT64: - case UPB_HANDLER_FLOAT: - case UPB_HANDLER_DOUBLE: - case UPB_HANDLER_BOOL: - if (!upb_fielddef_isprimitive(f) || - upb_handlers_getprimitivehandlertype(f) != type) - return false; - *s = selector_base; - break; - case UPB_HANDLER_STRING: - if (upb_fielddef_isstring(f)) { - *s = selector_base; - } else if (upb_fielddef_lazy(f)) { - *s = selector_base + 3; - } else { - return false; - } - break; - case UPB_HANDLER_STARTSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = selector_base + 1; - } else { - return false; - } - break; - case UPB_HANDLER_ENDSTR: - if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { - *s = selector_base + 2; - } else { - return false; - } - break; - case UPB_HANDLER_STARTSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = selector_base - 2; - break; - case UPB_HANDLER_ENDSEQ: - if (!upb_fielddef_isseq(f)) return false; - *s = selector_base - 1; - break; - case UPB_HANDLER_STARTSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - /* Selectors for STARTSUBMSG are at the beginning of the table so that the - * selector can also be used as an index into the "sub" array of - * subhandlers. The indexes for the two into these two tables are the - * same, except that in the handler table the static selectors come first. */ - *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT; - break; - case UPB_HANDLER_ENDSUBMSG: - if (!upb_fielddef_issubmsg(f)) return false; - *s = selector_base; - break; - } - UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f))); - return true; -} - -/* upb_handlercache ***********************************************************/ - -struct upb_handlercache { - upb_arena *arena; - upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ - upb_handlers_callback *callback; - const void *closure; -}; - -const upb_handlers *upb_handlercache_get(upb_handlercache *c, - const upb_msgdef *md) { - upb_msg_field_iter i; - upb_value v; - upb_handlers *h; - - if (upb_inttable_lookupptr(&c->tab, md, &v)) { - return upb_value_getptr(v); - } - - h = upb_handlers_new(md, c, c->arena); - v = upb_value_ptr(h); - - if (!h) return NULL; - if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL; - - c->callback(c->closure, h); - - /* For each submessage field, get or create a handlers object and set it as - * the subhandlers. */ - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); - const upb_handlers *sub_mh = upb_handlercache_get(c, subdef); - - if (!sub_mh) return NULL; - - upb_handlers_setsubhandlers(h, f, sub_mh); - } - } - - return h; -} - - -upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback, - const void *closure) { - upb_handlercache *cache = upb_gmalloc(sizeof(*cache)); - - if (!cache) return NULL; - - cache->arena = upb_arena_new(); - - cache->callback = callback; - cache->closure = closure; - - if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom; - - return cache; - -oom: - upb_gfree(cache); - return NULL; -} - -void upb_handlercache_free(upb_handlercache *cache) { - upb_inttable_uninit(&cache->tab); - upb_arena_free(cache->arena); - upb_gfree(cache); -} - -bool upb_handlercache_addcleanup(upb_handlercache *c, void *p, - upb_handlerfree *func) { - return upb_arena_addcleanup(c->arena, p, func); -} - -/* upb_byteshandler ***********************************************************/ - -bool upb_byteshandler_setstartstr(upb_byteshandler *h, - upb_startstr_handlerfunc *func, void *d) { - h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d; - return true; -} - -bool upb_byteshandler_setstring(upb_byteshandler *h, - upb_string_handlerfunc *func, void *d) { - h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; - h->table[UPB_STRING_SELECTOR].attr.handler_data = d; - return true; -} - -bool upb_byteshandler_setendstr(upb_byteshandler *h, - upb_endfield_handlerfunc *func, void *d) { - h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; - h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d; - return true; -} - -/** Handlers for upb_msg ******************************************************/ - -typedef struct { - size_t offset; - int32_t hasbit; -} upb_msg_handlerdata; - -/* Fallback implementation if the handler is not specialized by the producer. */ -#define MSG_WRITER(type, ctype) \ - bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \ - uint8_t *m = c; \ - const upb_msg_handlerdata *d = hd; \ - if (d->hasbit > 0) \ - *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ - *(ctype*)&m[d->offset] = val; \ - return true; \ - } \ - -MSG_WRITER(double, double) -MSG_WRITER(float, float) -MSG_WRITER(int32, int32_t) -MSG_WRITER(int64, int64_t) -MSG_WRITER(uint32, uint32_t) -MSG_WRITER(uint64, uint64_t) -MSG_WRITER(bool, bool) - -bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f, - size_t offset, int32_t hasbit) { - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - bool ok; - - upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d)); - if (!d) return false; - d->offset = offset; - d->hasbit = hasbit; - - attr.handler_data = d; - attr.alwaysok = true; - upb_handlers_addcleanup(h, d, upb_gfree); - -#define TYPE(u, l) \ - case UPB_TYPE_##u: \ - ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break; - - ok = false; - - switch (upb_fielddef_type(f)) { - TYPE(INT64, int64); - TYPE(INT32, int32); - TYPE(ENUM, int32); - TYPE(UINT64, uint64); - TYPE(UINT32, uint32); - TYPE(DOUBLE, double); - TYPE(FLOAT, float); - TYPE(BOOL, bool); - default: UPB_ASSERT(false); break; - } -#undef TYPE - - return ok; -} - -bool upb_msg_getscalarhandlerdata(const upb_handlers *h, - upb_selector_t s, - upb_fieldtype_t *type, - size_t *offset, - int32_t *hasbit) { - const upb_msg_handlerdata *d; - const void *p; - upb_func *f = upb_handlers_gethandler(h, s, &p); - - if ((upb_int64_handlerfunc*)f == upb_msg_setint64) { - *type = UPB_TYPE_INT64; - } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) { - *type = UPB_TYPE_INT32; - } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) { - *type = UPB_TYPE_UINT64; - } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) { - *type = UPB_TYPE_UINT32; - } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) { - *type = UPB_TYPE_DOUBLE; - } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) { - *type = UPB_TYPE_FLOAT; - } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) { - *type = UPB_TYPE_BOOL; - } else { - return false; - } - - d = p; - *offset = d->offset; - *hasbit = d->hasbit; - return true; -} - - -bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) { - void *subc; - bool ret; - upb_bufhandle handle = UPB_BUFHANDLE_INIT; - handle.buf = buf; - ret = upb_bytessink_start(sink, len, &subc); - if (ret && len != 0) { - ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); - } - if (ret) { - ret = upb_bytessink_end(sink); - } - return ret; -} - - -#ifdef UPB_MSVC_VSNPRINTF -/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and - * vsnprintf. To support them, missing functions are manually implemented - * using the existing secure functions. */ -int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) { - if (!s) { - return _vscprintf(format, arg); - } - int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg); - if (ret < 0) { - ret = _vscprintf(format, arg); - } - return ret; -} - -int msvc_snprintf(char* s, size_t n, const char* format, ...) { - va_list arg; - va_start(arg, format); - int ret = msvc_vsnprintf(s, n, format, arg); - va_end(arg); - return ret; -} -#endif -/* -** protobuf decoder bytecode compiler -** -** Code to compile a upb::Handlers into bytecode for decoding a protobuf -** according to that specific schema and destination handlers. -** -** Bytecode definition is in decoder.int.h. -*/ - -#include - -#ifdef UPB_DUMP_BYTECODE -#include -#endif - - -#define MAXLABEL 5 -#define EMPTYLABEL -1 - -/* upb_pbdecodermethod ********************************************************/ - -static void freemethod(upb_pbdecodermethod *method) { - upb_inttable_uninit(&method->dispatch); - upb_gfree(method); -} - -static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers, - mgroup *group) { - upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret)); - upb_byteshandler_init(&ret->input_handler_); - - ret->group = group; - ret->dest_handlers_ = dest_handlers; - upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); - - return ret; -} - -const upb_handlers *upb_pbdecodermethod_desthandlers( - const upb_pbdecodermethod *m) { - return m->dest_handlers_; -} - -const upb_byteshandler *upb_pbdecodermethod_inputhandler( - const upb_pbdecodermethod *m) { - return &m->input_handler_; -} - -bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { - return m->is_native_; -} - - -/* mgroup *********************************************************************/ - -static void freegroup(mgroup *g) { - upb_inttable_iter i; - - upb_inttable_begin(&i, &g->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - freemethod(upb_value_getptr(upb_inttable_iter_value(&i))); - } - - upb_inttable_uninit(&g->methods); - upb_gfree(g->bytecode); - upb_gfree(g); -} - -mgroup *newgroup(void) { - mgroup *g = upb_gmalloc(sizeof(*g)); - upb_inttable_init(&g->methods, UPB_CTYPE_PTR); - g->bytecode = NULL; - g->bytecode_end = NULL; - return g; -} - - -/* bytecode compiler **********************************************************/ - -/* Data used only at compilation time. */ -typedef struct { - mgroup *group; - - uint32_t *pc; - int fwd_labels[MAXLABEL]; - int back_labels[MAXLABEL]; - - /* For fields marked "lazy", parse them lazily or eagerly? */ - bool lazy; -} compiler; - -static compiler *newcompiler(mgroup *group, bool lazy) { - compiler *ret = upb_gmalloc(sizeof(*ret)); - int i; - - ret->group = group; - ret->lazy = lazy; - for (i = 0; i < MAXLABEL; i++) { - ret->fwd_labels[i] = EMPTYLABEL; - ret->back_labels[i] = EMPTYLABEL; - } - return ret; -} - -static void freecompiler(compiler *c) { - upb_gfree(c); -} - -const size_t ptr_words = sizeof(void*) / sizeof(uint32_t); - -/* How many words an instruction is. */ -static int instruction_len(uint32_t instr) { - switch (getop(instr)) { - case OP_SETDISPATCH: return 1 + ptr_words; - case OP_TAGN: return 3; - case OP_SETBIGGROUPNUM: return 2; - default: return 1; - } -} - -bool op_has_longofs(int32_t instruction) { - switch (getop(instruction)) { - case OP_CALL: - case OP_BRANCH: - case OP_CHECKDELIM: - return true; - /* The "tag" instructions only have 8 bytes available for the jump target, - * but that is ok because these opcodes only require short jumps. */ - case OP_TAG1: - case OP_TAG2: - case OP_TAGN: - return false; - default: - UPB_ASSERT(false); - return false; - } -} - -static int32_t getofs(uint32_t instruction) { - if (op_has_longofs(instruction)) { - return (int32_t)instruction >> 8; - } else { - return (int8_t)(instruction >> 8); - } -} - -static void setofs(uint32_t *instruction, int32_t ofs) { - if (op_has_longofs(*instruction)) { - *instruction = getop(*instruction) | (uint32_t)ofs << 8; - } else { - *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8); - } - UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */ -} - -static uint32_t pcofs(compiler *c) { - return (uint32_t)(c->pc - c->group->bytecode); -} - -/* Defines a local label at the current PC location. All previous forward - * references are updated to point to this location. The location is noted - * for any future backward references. */ -static void label(compiler *c, unsigned int label) { - int val; - uint32_t *codep; - - UPB_ASSERT(label < MAXLABEL); - val = c->fwd_labels[label]; - codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val; - while (codep) { - int ofs = getofs(*codep); - setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep))); - codep = ofs ? codep + ofs : NULL; - } - c->fwd_labels[label] = EMPTYLABEL; - c->back_labels[label] = pcofs(c); -} - -/* Creates a reference to a numbered label; either a forward reference - * (positive arg) or backward reference (negative arg). For forward references - * the value returned now is actually a "next" pointer into a linked list of all - * instructions that use this label and will be patched later when the label is - * defined with label(). - * - * The returned value is the offset that should be written into the instruction. - */ -static int32_t labelref(compiler *c, int label) { - UPB_ASSERT(label < MAXLABEL); - if (label == LABEL_DISPATCH) { - /* No resolving required. */ - return 0; - } else if (label < 0) { - /* Backward local label. Relative to the next instruction. */ - uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode); - return c->back_labels[-label] - from; - } else { - /* Forward local label: prepend to (possibly-empty) linked list. */ - int *lptr = &c->fwd_labels[label]; - int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c); - *lptr = pcofs(c); - return ret; - } -} - -static void put32(compiler *c, uint32_t v) { - mgroup *g = c->group; - if (c->pc == g->bytecode_end) { - int ofs = pcofs(c); - size_t oldsize = g->bytecode_end - g->bytecode; - size_t newsize = UPB_MAX(oldsize * 2, 64); - /* TODO(haberman): handle OOM. */ - g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t), - newsize * sizeof(uint32_t)); - g->bytecode_end = g->bytecode + newsize; - c->pc = g->bytecode + ofs; - } - *c->pc++ = v; -} - -static void putop(compiler *c, int op, ...) { - va_list ap; - va_start(ap, op); - - switch (op) { - case OP_SETDISPATCH: { - uintptr_t ptr = (uintptr_t)va_arg(ap, void*); - put32(c, OP_SETDISPATCH); - put32(c, (uint32_t)ptr); - if (sizeof(uintptr_t) > sizeof(uint32_t)) - put32(c, (uint64_t)ptr >> 32); - break; - } - case OP_STARTMSG: - case OP_ENDMSG: - case OP_PUSHLENDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_HALT: - case OP_RET: - case OP_DISPATCH: - put32(c, op); - break; - case OP_PARSE_DOUBLE: - case OP_PARSE_FLOAT: - case OP_PARSE_INT64: - case OP_PARSE_UINT64: - case OP_PARSE_INT32: - case OP_PARSE_FIXED64: - case OP_PARSE_FIXED32: - case OP_PARSE_BOOL: - case OP_PARSE_UINT32: - case OP_PARSE_SFIXED32: - case OP_PARSE_SFIXED64: - case OP_PARSE_SINT32: - case OP_PARSE_SINT64: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_STRING: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - put32(c, op | va_arg(ap, upb_selector_t) << 8); - break; - case OP_SETBIGGROUPNUM: - put32(c, op); - put32(c, va_arg(ap, int)); - break; - case OP_CALL: { - const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); - put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8); - break; - } - case OP_CHECKDELIM: - case OP_BRANCH: { - uint32_t instruction = op; - int label = va_arg(ap, int); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - break; - } - case OP_TAG1: - case OP_TAG2: { - int label = va_arg(ap, int); - uint64_t tag = va_arg(ap, uint64_t); - uint32_t instruction = (uint32_t)(op | (tag << 16)); - UPB_ASSERT(tag <= 0xffff); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - break; - } - case OP_TAGN: { - int label = va_arg(ap, int); - uint64_t tag = va_arg(ap, uint64_t); - uint32_t instruction = op | (upb_value_size(tag) << 16); - setofs(&instruction, labelref(c, label)); - put32(c, instruction); - put32(c, (uint32_t)tag); - put32(c, tag >> 32); - break; - } - } - - va_end(ap); -} - -#if defined(UPB_DUMP_BYTECODE) - -const char *upb_pbdecoder_getopname(unsigned int op) { -#define QUOTE(x) #x -#define EXPAND_AND_QUOTE(x) QUOTE(x) -#define OPNAME(x) OP_##x -#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x)); -#define T(x) OP(PARSE_##x) - /* Keep in sync with list in decoder.int.h. */ - switch ((opcode)op) { - T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32) - T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64) - OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG) - OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET) - OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM) - OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP) - OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT) - } - return ""; -#undef OP -#undef T -} - -#endif - -#ifdef UPB_DUMP_BYTECODE - -static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { - - uint32_t *begin = p; - - while (p < end) { - fprintf(f, "%p %8tx", p, p - begin); - uint32_t instr = *p++; - uint8_t op = getop(instr); - fprintf(f, " %s", upb_pbdecoder_getopname(op)); - switch ((opcode)op) { - case OP_SETDISPATCH: { - const upb_inttable *dispatch; - memcpy(&dispatch, p, sizeof(void*)); - p += ptr_words; - const upb_pbdecodermethod *method = - (void *)((char *)dispatch - - offsetof(upb_pbdecodermethod, dispatch)); - fprintf(f, " %s", upb_msgdef_fullname( - upb_handlers_msgdef(method->dest_handlers_))); - break; - } - case OP_DISPATCH: - case OP_STARTMSG: - case OP_ENDMSG: - case OP_PUSHLENDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_HALT: - case OP_RET: - break; - case OP_PARSE_DOUBLE: - case OP_PARSE_FLOAT: - case OP_PARSE_INT64: - case OP_PARSE_UINT64: - case OP_PARSE_INT32: - case OP_PARSE_FIXED64: - case OP_PARSE_FIXED32: - case OP_PARSE_BOOL: - case OP_PARSE_UINT32: - case OP_PARSE_SFIXED32: - case OP_PARSE_SFIXED64: - case OP_PARSE_SINT32: - case OP_PARSE_SINT64: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_STRING: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - fprintf(f, " %d", instr >> 8); - break; - case OP_SETBIGGROUPNUM: - fprintf(f, " %d", *p++); - break; - case OP_CHECKDELIM: - case OP_CALL: - case OP_BRANCH: - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - break; - case OP_TAG1: - case OP_TAG2: { - fprintf(f, " tag:0x%x", instr >> 16); - if (getofs(instr)) { - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - } - break; - } - case OP_TAGN: { - uint64_t tag = *p++; - tag |= (uint64_t)*p++ << 32; - fprintf(f, " tag:0x%llx", (long long)tag); - fprintf(f, " n:%d", instr >> 16); - if (getofs(instr)) { - fprintf(f, " =>0x%tx", p + getofs(instr) - begin); - } - break; - } - } - fputs("\n", f); - } -} - -#endif - -static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) { - uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type; - uint64_t encoded_tag = upb_vencode32(tag); - /* No tag should be greater than 5 bytes. */ - UPB_ASSERT(encoded_tag <= 0xffffffffff); - return encoded_tag; -} - -static void putchecktag(compiler *c, const upb_fielddef *f, - int wire_type, int dest) { - uint64_t tag = get_encoded_tag(f, wire_type); - switch (upb_value_size(tag)) { - case 1: - putop(c, OP_TAG1, dest, tag); - break; - case 2: - putop(c, OP_TAG2, dest, tag); - break; - default: - putop(c, OP_TAGN, dest, tag); - break; - } -} - -static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { - upb_selector_t selector; - bool ok = upb_handlers_getselector(f, type, &selector); - UPB_ASSERT(ok); - return selector; -} - -/* Takes an existing, primary dispatch table entry and repacks it with a - * different alternate wire type. Called when we are inserting a secondary - * dispatch table entry for an alternate wire type. */ -static uint64_t repack(uint64_t dispatch, int new_wt2) { - uint64_t ofs; - uint8_t wt1; - uint8_t old_wt2; - upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2); - UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */ - return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2); -} - -/* Marks the current bytecode position as the dispatch target for this message, - * field, and wire type. */ -static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, - const upb_fielddef *f, int wire_type) { - /* Offset is relative to msg base. */ - uint64_t ofs = pcofs(c) - method->code_base.ofs; - uint32_t fn = upb_fielddef_number(f); - upb_inttable *d = &method->dispatch; - upb_value v; - if (upb_inttable_remove(d, fn, &v)) { - /* TODO: prioritize based on packed setting in .proto file. */ - uint64_t repacked = repack(upb_value_getuint64(v), wire_type); - upb_inttable_insert(d, fn, upb_value_uint64(repacked)); - upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs)); - } else { - uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE); - upb_inttable_insert(d, fn, upb_value_uint64(val)); - } -} - -static void putpush(compiler *c, const upb_fielddef *f) { - if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { - putop(c, OP_PUSHLENDELIM); - } else { - uint32_t fn = upb_fielddef_number(f); - if (fn >= 1 << 24) { - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_SETBIGGROUPNUM, fn); - } else { - putop(c, OP_PUSHTAGDELIM, fn); - } - } -} - -static upb_pbdecodermethod *find_submethod(const compiler *c, - const upb_pbdecodermethod *method, - const upb_fielddef *f) { - const upb_handlers *sub = - upb_handlers_getsubhandlers(method->dest_handlers_, f); - upb_value v; - return upb_inttable_lookupptr(&c->group->methods, sub, &v) - ? upb_value_getptr(v) - : NULL; -} - -static void putsel(compiler *c, opcode op, upb_selector_t sel, - const upb_handlers *h) { - if (upb_handlers_gethandler(h, sel, NULL)) { - putop(c, op, sel); - } -} - -/* Puts an opcode to call a callback, but only if a callback actually exists for - * this field and handler type. */ -static void maybeput(compiler *c, opcode op, const upb_handlers *h, - const upb_fielddef *f, upb_handlertype_t type) { - putsel(c, op, getsel(f, type), h); -} - -static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { - if (!upb_fielddef_lazy(f)) - return false; - - return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) || - upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) || - upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL); -} - - -/* bytecode compiler code generation ******************************************/ - -/* Symbolic names for our local labels. */ -#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */ -#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */ -#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */ -#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */ - -/* Generates bytecode to parse a single non-lazy message field. */ -static void generate_msgfield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); - int wire_type; - - if (!sub_m) { - /* Don't emit any code for this field at all; it will be parsed as an - * unknown field. - * - * TODO(haberman): we should change this to parse it as a string field - * instead. It will probably be faster, but more importantly, once we - * start vending unknown fields, a field shouldn't be treated as unknown - * just because it doesn't have subhandlers registered. */ - return; - } - - label(c, LABEL_FIELD); - - wire_type = - (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) - ? UPB_WIRE_TYPE_DELIMITED - : UPB_WIRE_TYPE_START_GROUP; - - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putpush(c, f); - putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); - putop(c, OP_CALL, sub_m); - putop(c, OP_POP); - maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); - if (wire_type == UPB_WIRE_TYPE_DELIMITED) { - putop(c, OP_SETDELIM); - } - } -} - -/* Generates bytecode to parse a single string or lazy submessage field. */ -static void generate_delimfield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - - label(c, LABEL_FIELD); - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); - label(c, LABEL_LOOPSTART); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - /* Need to emit even if no handler to skip past the string. */ - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_POP); - putop(c, OP_SETDELIM); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); - putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); - maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); - putop(c, OP_POP); - putop(c, OP_SETDELIM); - } -} - -/* Generates bytecode to parse a single primitive field. */ -static void generate_primitivefield(compiler *c, const upb_fielddef *f, - upb_pbdecodermethod *method) { - const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); - upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); - opcode parse_type; - upb_selector_t sel; - int wire_type; - - label(c, LABEL_FIELD); - - /* From a decoding perspective, ENUM is the same as INT32. */ - if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) - descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; - - parse_type = (opcode)descriptor_type; - - /* TODO(haberman): generate packed or non-packed first depending on "packed" - * setting in the fielddef. This will favor (in speed) whichever was - * specified. */ - - UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX); - sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); - wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; - if (upb_fielddef_isseq(f)) { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); - dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); - putop(c, OP_PUSHLENDELIM); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */ - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - dispatchtarget(c, method, f, wire_type); - putop(c, OP_PUSHTAGDELIM, 0); - putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */ - label(c, LABEL_LOOPSTART); - putop(c, parse_type, sel); - putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); - putchecktag(c, f, wire_type, LABEL_LOOPBREAK); - putop(c, OP_BRANCH, -LABEL_LOOPSTART); - label(c, LABEL_LOOPBREAK); - putop(c, OP_POP); /* Packed and non-packed join. */ - maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); - putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */ - } else { - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - putchecktag(c, f, wire_type, LABEL_DISPATCH); - dispatchtarget(c, method, f, wire_type); - putop(c, parse_type, sel); - } -} - -/* Adds bytecode for parsing the given message to the given decoderplan, - * while adding all dispatch targets to this message's dispatch table. */ -static void compile_method(compiler *c, upb_pbdecodermethod *method) { - const upb_handlers *h; - const upb_msgdef *md; - uint32_t* start_pc; - upb_msg_field_iter i; - upb_value val; - - UPB_ASSERT(method); - - /* Clear all entries in the dispatch table. */ - upb_inttable_uninit(&method->dispatch); - upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); - - h = upb_pbdecodermethod_desthandlers(method); - md = upb_handlers_msgdef(h); - - method->code_base.ofs = pcofs(c); - putop(c, OP_SETDISPATCH, &method->dispatch); - putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); - label(c, LABEL_FIELD); - start_pc = c->pc; - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - upb_fieldtype_t type = upb_fielddef_type(f); - - if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { - generate_msgfield(c, f, method); - } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || - type == UPB_TYPE_MESSAGE) { - generate_delimfield(c, f, method); - } else { - generate_primitivefield(c, f, method); - } - } - - /* If there were no fields, or if no handlers were defined, we need to - * generate a non-empty loop body so that we can at least dispatch for unknown - * fields and check for the end of the message. */ - if (c->pc == start_pc) { - /* Check for end-of-message. */ - putop(c, OP_CHECKDELIM, LABEL_ENDMSG); - /* Unconditionally dispatch. */ - putop(c, OP_DISPATCH, 0); - } - - /* For now we just loop back to the last field of the message (or if none, - * the DISPATCH opcode for the message). */ - putop(c, OP_BRANCH, -LABEL_FIELD); - - /* Insert both a label and a dispatch table entry for this end-of-msg. */ - label(c, LABEL_ENDMSG); - val = upb_value_uint64(pcofs(c) - method->code_base.ofs); - upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val); - - putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h); - putop(c, OP_RET); - - upb_inttable_compact(&method->dispatch); -} - -/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h". - * Returns the method for these handlers. - * - * Generates a new method for every destination handlers reachable from "h". */ -static void find_methods(compiler *c, const upb_handlers *h) { - upb_value v; - upb_msg_field_iter i; - const upb_msgdef *md; - upb_pbdecodermethod *method; - - if (upb_inttable_lookupptr(&c->group->methods, h, &v)) - return; - - method = newmethod(h, c->group); - upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method)); - - /* Find submethods. */ - md = upb_handlers_msgdef(h); - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - const upb_handlers *sub_h; - if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && - (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) { - /* We only generate a decoder method for submessages with handlers. - * Others will be parsed as unknown fields. */ - find_methods(c, sub_h); - } - } -} - -/* (Re-)compile bytecode for all messages in "msgs." - * Overwrites any existing bytecode in "c". */ -static void compile_methods(compiler *c) { - upb_inttable_iter i; - - /* Start over at the beginning of the bytecode. */ - c->pc = c->group->bytecode; - - upb_inttable_begin(&i, &c->group->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); - compile_method(c, method); - } -} - -static void set_bytecode_handlers(mgroup *g) { - upb_inttable_iter i; - upb_inttable_begin(&i, &g->methods); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i)); - upb_byteshandler *h = &m->input_handler_; - - m->code_base.ptr = g->bytecode + m->code_base.ofs; - - upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr); - upb_byteshandler_setstring(h, upb_pbdecoder_decode, g); - upb_byteshandler_setendstr(h, upb_pbdecoder_end, m); - } -} - - -/* TODO(haberman): allow this to be constructed for an arbitrary set of dest - * handlers and other mgroups (but verify we have a transitive closure). */ -const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) { - mgroup *g; - compiler *c; - - g = newgroup(); - c = newcompiler(g, lazy); - find_methods(c, dest); - - /* We compile in two passes: - * 1. all messages are assigned relative offsets from the beginning of the - * bytecode (saved in method->code_base). - * 2. forwards OP_CALL instructions can be correctly linked since message - * offsets have been previously assigned. - * - * Could avoid the second pass by linking OP_CALL instructions somehow. */ - compile_methods(c); - compile_methods(c); - g->bytecode_end = c->pc; - freecompiler(c); - -#ifdef UPB_DUMP_BYTECODE - { - FILE *f = fopen("/tmp/upb-bytecode", "w"); - UPB_ASSERT(f); - dumpbc(g->bytecode, g->bytecode_end, stderr); - dumpbc(g->bytecode, g->bytecode_end, f); - fclose(f); - - f = fopen("/tmp/upb-bytecode.bin", "wb"); - UPB_ASSERT(f); - fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f); - fclose(f); - } -#endif - - set_bytecode_handlers(g); - return g; -} - - -/* upb_pbcodecache ************************************************************/ - -upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) { - upb_pbcodecache *c = upb_gmalloc(sizeof(*c)); - - if (!c) return NULL; - - c->dest = dest; - c->lazy = false; - - c->arena = upb_arena_new(); - if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL; - - return c; -} - -void upb_pbcodecache_free(upb_pbcodecache *c) { - upb_inttable_iter i; - - upb_inttable_begin(&i, &c->groups); - for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { - upb_value val = upb_inttable_iter_value(&i); - freegroup((void*)upb_value_getconstptr(val)); - } - - upb_inttable_uninit(&c->groups); - upb_arena_free(c->arena); - upb_gfree(c); -} - -void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) { - UPB_ASSERT(upb_inttable_count(&c->groups) == 0); - c->lazy = lazy; -} - -const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c, - const upb_msgdef *md) { - upb_value v; - bool ok; - const upb_handlers *h; - const mgroup *g; - - h = upb_handlercache_get(c->dest, md); - if (upb_inttable_lookupptr(&c->groups, md, &v)) { - g = upb_value_getconstptr(v); - } else { - g = mgroup_new(h, c->lazy); - ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g)); - UPB_ASSUME(ok); - } - - ok = upb_inttable_lookupptr(&g->methods, h, &v); - UPB_ASSUME(ok); - return upb_value_getptr(v); -} -/* -** upb::Decoder (Bytecode Decoder VM) -** -** Bytecode must previously have been generated using the bytecode compiler in -** compile_decoder.c. This decoder then walks through the bytecode op-by-op to -** parse the input. -** -** Decoding is fully resumable; we just keep a pointer to the current bytecode -** instruction and resume from there. A fair amount of the logic here is to -** handle the fact that values can span buffer seams and we have to be able to -** be capable of suspending/resuming from any byte in the stream. This -** sometimes requires keeping a few trailing bytes from the last buffer around -** in the "residual" buffer. -*/ - -#include -#include - -#ifdef UPB_DUMP_BYTECODE -#include -#endif - - -#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); - -/* Error messages that are shared between the bytecode and JIT decoders. */ -const char *kPbDecoderStackOverflow = "Nesting too deep."; -const char *kPbDecoderSubmessageTooLong = - "Submessage end extends past enclosing submessage."; - -/* Error messages shared within this file. */ -static const char *kUnterminatedVarint = "Unterminated varint."; - -/* upb_pbdecoder **************************************************************/ - -static opcode halt = OP_HALT; - -/* A dummy character we can point to when the user passes us a NULL buffer. - * We need this because in C (NULL + 0) and (NULL - NULL) are undefined - * behavior, which would invalidate functions like curbufleft(). */ -static const char dummy_char; - -/* Whether an op consumes any of the input buffer. */ -static bool consumes_input(opcode op) { - switch (op) { - case OP_SETDISPATCH: - case OP_STARTMSG: - case OP_ENDMSG: - case OP_STARTSEQ: - case OP_ENDSEQ: - case OP_STARTSUBMSG: - case OP_ENDSUBMSG: - case OP_STARTSTR: - case OP_ENDSTR: - case OP_PUSHTAGDELIM: - case OP_POP: - case OP_SETDELIM: - case OP_SETBIGGROUPNUM: - case OP_CHECKDELIM: - case OP_CALL: - case OP_RET: - case OP_BRANCH: - return false; - default: - return true; - } -} - -static size_t stacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - return entries * sizeof(upb_pbdecoder_frame); -} - -static size_t callstacksize(upb_pbdecoder *d, size_t entries) { - UPB_UNUSED(d); - - return entries * sizeof(uint32_t*); -} - - -static bool in_residual_buf(const upb_pbdecoder *d, const char *p); - -/* It's unfortunate that we have to micro-manage the compiler with - * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily - * specific to one hardware configuration. But empirically on a Core i7, - * performance increases 30-50% with these annotations. Every instance where - * these appear, gcc 4.2.1 made the wrong decision and degraded performance in - * benchmarks. */ - -static void seterr(upb_pbdecoder *d, const char *msg) { - upb_status_seterrmsg(d->status, msg); -} - -void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { - seterr(d, msg); -} - - -/* Buffering ******************************************************************/ - -/* We operate on one buffer at a time, which is either the user's buffer passed - * to our "decode" callback or some residual bytes from the previous buffer. */ - -/* How many bytes can be safely read from d->ptr without reading past end-of-buf - * or past the current delimited end. */ -static size_t curbufleft(const upb_pbdecoder *d) { - UPB_ASSERT(d->data_end >= d->ptr); - return d->data_end - d->ptr; -} - -/* How many bytes are available before end-of-buffer. */ -static size_t bufleft(const upb_pbdecoder *d) { - return d->end - d->ptr; -} - -/* Overall stream offset of d->ptr. */ -uint64_t offset(const upb_pbdecoder *d) { - return d->bufstart_ofs + (d->ptr - d->buf); -} - -/* How many bytes are available before the end of this delimited region. */ -size_t delim_remaining(const upb_pbdecoder *d) { - return d->top->end_ofs - offset(d); -} - -/* Advances d->ptr. */ -static void advance(upb_pbdecoder *d, size_t len) { - UPB_ASSERT(curbufleft(d) >= len); - d->ptr += len; -} - -static bool in_buf(const char *p, const char *buf, const char *end) { - return p >= buf && p <= end; -} - -static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { - return in_buf(p, d->residual, d->residual_end); -} - -/* Calculates the delim_end value, which is affected by both the current buffer - * and the parsing stack, so must be called whenever either is updated. */ -static void set_delim_end(upb_pbdecoder *d) { - size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; - if (delim_ofs <= (size_t)(d->end - d->buf)) { - d->delim_end = d->buf + delim_ofs; - d->data_end = d->delim_end; - } else { - d->data_end = d->end; - d->delim_end = NULL; - } -} - -static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { - d->ptr = buf; - d->buf = buf; - d->end = end; - set_delim_end(d); -} - -static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { - UPB_ASSERT(curbufleft(d) == 0); - d->bufstart_ofs += (d->end - d->buf); - switchtobuf(d, buf, buf + len); -} - -static void checkpoint(upb_pbdecoder *d) { - /* The assertion here is in the interests of efficiency, not correctness. - * We are trying to ensure that we don't checkpoint() more often than - * necessary. */ - UPB_ASSERT(d->checkpoint != d->ptr); - d->checkpoint = d->ptr; -} - -/* Skips "bytes" bytes in the stream, which may be more than available. If we - * skip more bytes than are available, we return a long read count to the caller - * indicating how many bytes can be skipped over before passing actual data - * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they - * won't actually be read. - */ -static int32_t skip(upb_pbdecoder *d, size_t bytes) { - UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0); - UPB_ASSERT(d->skip == 0); - if (bytes > delim_remaining(d)) { - seterr(d, "Skipped value extended beyond enclosing submessage."); - return (int32_t)upb_pbdecoder_suspend(d); - } else if (bufleft(d) >= bytes) { - /* Skipped data is all in current buffer, and more is still available. */ - advance(d, bytes); - d->skip = 0; - return DECODE_OK; - } else { - /* Skipped data extends beyond currently available buffers. */ - d->pc = d->last; - d->skip = bytes - curbufleft(d); - d->bufstart_ofs += (d->end - d->buf); - d->residual_end = d->residual; - switchtobuf(d, d->residual, d->residual_end); - return (int32_t)(d->size_param + d->skip); - } -} - - -/* Resumes the decoder from an initial state or from a previous suspend. */ -int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, - size_t size, const upb_bufhandle *handle) { - UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ - - /* d->skip and d->residual_end could probably elegantly be represented - * as a single variable, to more easily represent this invariant. */ - UPB_ASSERT(!(d->skip && d->residual_end > d->residual)); - - /* We need to remember the original size_param, so that the value we return - * is relative to it, even if we do some skipping first. */ - d->size_param = size; - d->handle = handle; - - /* Have to handle this case specially (ie. not with skip()) because the user - * is allowed to pass a NULL buffer here, which won't allow us to safely - * calculate a d->end or use our normal functions like curbufleft(). */ - if (d->skip && d->skip >= size) { - d->skip -= size; - d->bufstart_ofs += size; - buf = &dummy_char; - size = 0; - - /* We can't just return now, because we might need to execute some ops - * like CHECKDELIM, which could call some callbacks and pop the stack. */ - } - - /* We need to pretend that this was the actual buffer param, since some of the - * calculations assume that d->ptr/d->buf is relative to this. */ - d->buf_param = buf; - - if (!buf) { - /* NULL buf is ok if its entire span is covered by the "skip" above, but - * by this point we know that "skip" doesn't cover the buffer. */ - seterr(d, "Passed NULL buffer over non-skippable region."); - return (int32_t)upb_pbdecoder_suspend(d); - } - - if (d->residual_end > d->residual) { - /* We have residual bytes from the last buffer. */ - UPB_ASSERT(d->ptr == d->residual); - } else { - switchtobuf(d, buf, buf + size); - } - - d->checkpoint = d->ptr; - - /* Handle skips that don't cover the whole buffer (as above). */ - if (d->skip) { - size_t skip_bytes = d->skip; - d->skip = 0; - CHECK_RETURN(skip(d, skip_bytes)); - checkpoint(d); - } - - /* If we're inside an unknown group, continue to parse unknown values. */ - if (d->top->groupnum < 0) { - CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); - checkpoint(d); - } - - return DECODE_OK; -} - -/* Suspends the decoder at the last checkpoint, without saving any residual - * bytes. If there are any unconsumed bytes, returns a short byte count. */ -size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { - d->pc = d->last; - if (d->checkpoint == d->residual) { - /* Checkpoint was in residual buf; no user bytes were consumed. */ - d->ptr = d->residual; - return 0; - } else { - size_t ret = d->size_param - (d->end - d->checkpoint); - UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); - UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char); - - d->bufstart_ofs += (d->checkpoint - d->buf); - d->residual_end = d->residual; - switchtobuf(d, d->residual, d->residual_end); - return ret; - } -} - -/* Suspends the decoder at the last checkpoint, and saves any unconsumed - * bytes in our residual buffer. This is necessary if we need more user - * bytes to form a complete value, which might not be contiguous in the - * user's buffers. Always consumes all user bytes. */ -static size_t suspend_save(upb_pbdecoder *d) { - /* We hit end-of-buffer before we could parse a full value. - * Save any unconsumed bytes (if any) to the residual buffer. */ - d->pc = d->last; - - if (d->checkpoint == d->residual) { - /* Checkpoint was in residual buf; append user byte(s) to residual buf. */ - UPB_ASSERT((d->residual_end - d->residual) + d->size_param <= - sizeof(d->residual)); - if (!in_residual_buf(d, d->ptr)) { - d->bufstart_ofs -= (d->residual_end - d->residual); - } - memcpy(d->residual_end, d->buf_param, d->size_param); - d->residual_end += d->size_param; - } else { - /* Checkpoint was in user buf; old residual bytes not needed. */ - size_t save; - UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); - - d->ptr = d->checkpoint; - save = curbufleft(d); - UPB_ASSERT(save <= sizeof(d->residual)); - memcpy(d->residual, d->ptr, save); - d->residual_end = d->residual + save; - d->bufstart_ofs = offset(d); - } - - switchtobuf(d, d->residual, d->residual_end); - return d->size_param; -} - -/* Copies the next "bytes" bytes into "buf" and advances the stream. - * Requires that this many bytes are available in the current buffer. */ -UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - UPB_ASSERT(bytes <= curbufleft(d)); - memcpy(buf, d->ptr, bytes); - advance(d, bytes); -} - -/* Slow path for getting the next "bytes" bytes, regardless of whether they are - * available in the current buffer or not. Returns a status code as described - * in decoder.int.h. */ -UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { - const size_t avail = curbufleft(d); - consumebytes(d, buf, avail); - bytes -= avail; - UPB_ASSERT(bytes > 0); - if (in_residual_buf(d, d->ptr)) { - advancetobuf(d, d->buf_param, d->size_param); - } - if (curbufleft(d) >= bytes) { - consumebytes(d, (char *)buf + avail, bytes); - return DECODE_OK; - } else if (d->data_end == d->delim_end) { - seterr(d, "Submessage ended in the middle of a value or group"); - return (int32_t)upb_pbdecoder_suspend(d); - } else { - return (int32_t)suspend_save(d); - } -} - -/* Gets the next "bytes" bytes, regardless of whether they are available in the - * current buffer or not. Returns a status code as described in decoder.int.h. - */ -UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - if (curbufleft(d) >= bytes) { - /* Buffer has enough data to satisfy. */ - consumebytes(d, buf, bytes); - return DECODE_OK; - } else { - return getbytes_slow(d, buf, bytes); - } -} - -UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, - size_t bytes) { - size_t ret = curbufleft(d); - memcpy(buf, d->ptr, ret); - if (in_residual_buf(d, d->ptr)) { - size_t copy = UPB_MIN(bytes - ret, d->size_param); - memcpy((char *)buf + ret, d->buf_param, copy); - ret += copy; - } - return ret; -} - -UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, - size_t bytes) { - if (curbufleft(d) >= bytes) { - memcpy(buf, d->ptr, bytes); - return bytes; - } else { - return peekbytes_slow(d, buf, bytes); - } -} - - -/* Decoding of wire types *****************************************************/ - -/* Slow path for decoding a varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, - uint64_t *u64) { - uint8_t byte = 0x80; - int bitpos; - *u64 = 0; - for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - CHECK_RETURN(getbytes(d, &byte, 1)); - *u64 |= (uint64_t)(byte & 0x7F) << bitpos; - } - if(bitpos == 70 && (byte & 0x80)) { - seterr(d, kUnterminatedVarint); - return (int32_t)upb_pbdecoder_suspend(d); - } - return DECODE_OK; -} - -/* Decodes a varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { - if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { - *u64 = *d->ptr; - advance(d, 1); - return DECODE_OK; - } else if (curbufleft(d) >= 10) { - /* Fast case. */ - upb_decoderet r = upb_vdecode_fast(d->ptr); - if (r.p == NULL) { - seterr(d, kUnterminatedVarint); - return (int32_t)upb_pbdecoder_suspend(d); - } - advance(d, r.p - d->ptr); - *u64 = r.val; - return DECODE_OK; - } else { - /* Slow case -- varint spans buffer seam. */ - return upb_pbdecoder_decode_varint_slow(d, u64); - } -} - -/* Decodes a 32-bit varint from the current buffer position. - * Returns a status code as described in decoder.int.h. */ -UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { - uint64_t u64; - int32_t ret = decode_varint(d, &u64); - if (ret >= 0) return ret; - if (u64 > UINT32_MAX) { - seterr(d, "Unterminated 32-bit varint"); - /* TODO(haberman) guarantee that this function return is >= 0 somehow, - * so we know this path will always be treated as error by our caller. - * Right now the size_t -> int32_t can overflow and produce negative values. - */ - *u32 = 0; - return (int32_t)upb_pbdecoder_suspend(d); - } - *u32 = (uint32_t)u64; - return DECODE_OK; -} - -/* Decodes a fixed32 from the current buffer position. - * Returns a status code as described in decoder.int.h. - * TODO: proper byte swapping for big-endian machines. */ -UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { - return getbytes(d, u32, 4); -} - -/* Decodes a fixed64 from the current buffer position. - * Returns a status code as described in decoder.int.h. - * TODO: proper byte swapping for big-endian machines. */ -UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { - return getbytes(d, u64, 8); -} - -/* Non-static versions of the above functions. - * These are called by the JIT for fallback paths. */ -int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) { - return decode_fixed32(d, u32); -} - -int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) { - return decode_fixed64(d, u64); -} - -static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } -static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } - -/* Pushes a frame onto the decoder stack. */ -static bool decoder_push(upb_pbdecoder *d, uint64_t end) { - upb_pbdecoder_frame *fr = d->top; - - if (end > fr->end_ofs) { - seterr(d, kPbDecoderSubmessageTooLong); - return false; - } else if (fr == d->limit) { - seterr(d, kPbDecoderStackOverflow); - return false; - } - - fr++; - fr->end_ofs = end; - fr->dispatch = NULL; - fr->groupnum = 0; - d->top = fr; - return true; -} - -static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { - /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence - * field number) prior to hitting any enclosing submessage end, pushing our - * existing delim end prevents us from continuing to parse values from a - * corrupt proto that doesn't give us an END tag in time. */ - if (!decoder_push(d, d->top->end_ofs)) - return false; - d->top->groupnum = arg; - return true; -} - -/* Pops a frame from the decoder stack. */ -static void decoder_pop(upb_pbdecoder *d) { d->top--; } - -UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, - uint64_t expected) { - uint64_t data = 0; - size_t bytes = upb_value_size(expected); - size_t read = peekbytes(d, &data, bytes); - if (read == bytes && data == expected) { - /* Advance past matched bytes. */ - int32_t ok = getbytes(d, &data, read); - UPB_ASSERT(ok < 0); - return DECODE_OK; - } else if (read < bytes && memcmp(&data, &expected, read) == 0) { - return (int32_t)suspend_save(d); - } else { - return DECODE_MISMATCH; - } -} - -int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, - uint8_t wire_type) { - if (fieldnum >= 0) - goto have_tag; - - while (true) { - uint32_t tag; - CHECK_RETURN(decode_v32(d, &tag)); - wire_type = tag & 0x7; - fieldnum = tag >> 3; - -have_tag: - if (fieldnum == 0) { - seterr(d, "Saw invalid field number (0)"); - return (int32_t)upb_pbdecoder_suspend(d); - } - - switch (wire_type) { - case UPB_WIRE_TYPE_32BIT: - CHECK_RETURN(skip(d, 4)); - break; - case UPB_WIRE_TYPE_64BIT: - CHECK_RETURN(skip(d, 8)); - break; - case UPB_WIRE_TYPE_VARINT: { - uint64_t u64; - CHECK_RETURN(decode_varint(d, &u64)); - break; - } - case UPB_WIRE_TYPE_DELIMITED: { - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - CHECK_RETURN(skip(d, len)); - break; - } - case UPB_WIRE_TYPE_START_GROUP: - if (!pushtagdelim(d, -fieldnum)) { - return (int32_t)upb_pbdecoder_suspend(d); - } - break; - case UPB_WIRE_TYPE_END_GROUP: - if (fieldnum == -d->top->groupnum) { - decoder_pop(d); - } else if (fieldnum == d->top->groupnum) { - return DECODE_ENDGROUP; - } else { - seterr(d, "Unmatched ENDGROUP tag."); - return (int32_t)upb_pbdecoder_suspend(d); - } - break; - default: - seterr(d, "Invalid wire type"); - return (int32_t)upb_pbdecoder_suspend(d); - } - - if (d->top->groupnum >= 0) { - /* TODO: More code needed for handling unknown groups. */ - upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint); - return DECODE_OK; - } - - /* Unknown group -- continue looping over unknown fields. */ - checkpoint(d); - } -} - -static void goto_endmsg(upb_pbdecoder *d) { - upb_value v; - bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); - UPB_ASSERT(found); - d->pc = d->top->base + upb_value_getuint64(v); -} - -/* Parses a tag and jumps to the corresponding bytecode instruction for this - * field. - * - * If the tag is unknown (or the wire type doesn't match), parses the field as - * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode - * instruction for the end of message. */ -static int32_t dispatch(upb_pbdecoder *d) { - upb_inttable *dispatch = d->top->dispatch; - uint32_t tag; - uint8_t wire_type; - uint32_t fieldnum; - upb_value val; - int32_t retval; - - /* Decode tag. */ - CHECK_RETURN(decode_v32(d, &tag)); - wire_type = tag & 0x7; - fieldnum = tag >> 3; - - /* Lookup tag. Because of packed/non-packed compatibility, we have to - * check the wire type against two possibilities. */ - if (fieldnum != DISPATCH_ENDMSG && - upb_inttable_lookup32(dispatch, fieldnum, &val)) { - uint64_t v = upb_value_getuint64(val); - if (wire_type == (v & 0xff)) { - d->pc = d->top->base + (v >> 16); - return DECODE_OK; - } else if (wire_type == ((v >> 8) & 0xff)) { - bool found = - upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val); - UPB_ASSERT(found); - d->pc = d->top->base + upb_value_getuint64(val); - return DECODE_OK; - } - } - - /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG - * bytecode that triggered this is preceded by a CHECKDELIM bytecode which - * we need to back up to, so that when we're done skipping unknown data we - * can re-check the delimited end. */ - d->last--; /* Necessary if we get suspended */ - d->pc = d->last; - UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM); - - /* Unknown field or ENDGROUP. */ - retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); - - CHECK_RETURN(retval); - - if (retval == DECODE_ENDGROUP) { - goto_endmsg(d); - return DECODE_OK; - } - - return DECODE_OK; -} - -/* Callers know that the stack is more than one deep because the opcodes that - * call this only occur after PUSH operations. */ -upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { - UPB_ASSERT(d->top != d->stack); - return d->top - 1; -} - - -/* The main decoding loop *****************************************************/ - -/* The main decoder VM function. Uses traditional bytecode dispatch loop with a - * switch() statement. */ -size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group, - const upb_bufhandle* handle) { - -#define VMCASE(op, code) \ - case op: { code; if (consumes_input(op)) checkpoint(d); break; } -#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \ - VMCASE(OP_PARSE_ ## type, { \ - ctype val; \ - CHECK_RETURN(decode_ ## wt(d, &val)); \ - upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \ - }) - - while(1) { - int32_t instruction; - opcode op; - uint32_t arg; - int32_t longofs; - - d->last = d->pc; - instruction = *d->pc++; - op = getop(instruction); - arg = instruction >> 8; - longofs = arg; - UPB_ASSERT(d->ptr != d->residual_end); - UPB_UNUSED(group); -#ifdef UPB_DUMP_BYTECODE - fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " - "%x %s (%d)\n", - (int)offset(d), - (int)(d->ptr - d->buf), - (int)(d->data_end - d->ptr), - (int)(d->end - d->ptr), - (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)), - (int)(d->pc - 1 - group->bytecode), - upb_pbdecoder_getopname(op), - arg); -#endif - switch (op) { - /* Technically, we are losing data if we see a 32-bit varint that is not - * properly sign-extended. We could detect this and error about the data - * loss, but proto2 does not do this, so we pass. */ - PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t) - PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t) - PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t) - PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t) - PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t) - PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t) - PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t) - PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t) - PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t) - PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t) - PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t) - PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t) - PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t) - - VMCASE(OP_SETDISPATCH, - d->top->base = d->pc - 1; - memcpy(&d->top->dispatch, d->pc, sizeof(void*)); - d->pc += sizeof(void*) / sizeof(uint32_t); - ) - VMCASE(OP_STARTMSG, - CHECK_SUSPEND(upb_sink_startmsg(d->top->sink)); - ) - VMCASE(OP_ENDMSG, - CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status)); - ) - VMCASE(OP_STARTSEQ, - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink)); - ) - VMCASE(OP_ENDSEQ, - CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg)); - ) - VMCASE(OP_STARTSUBMSG, - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink)); - ) - VMCASE(OP_ENDSUBMSG, - upb_sink subsink = (d->top + 1)->sink; - CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg)); - ) - VMCASE(OP_STARTSTR, - uint32_t len = (uint32_t)delim_remaining(d); - upb_pbdecoder_frame *outer = outer_frame(d); - CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink)); - if (len == 0) { - d->pc++; /* Skip OP_STRING. */ - } - ) - VMCASE(OP_STRING, - uint32_t len = (uint32_t)curbufleft(d); - size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle); - if (n > len) { - if (n > delim_remaining(d)) { - seterr(d, "Tried to skip past end of string."); - return upb_pbdecoder_suspend(d); - } else { - int32_t ret = skip(d, n); - /* This shouldn't return DECODE_OK, because n > len. */ - UPB_ASSERT(ret >= 0); - return ret; - } - } - advance(d, n); - if (n < len || d->delim_end == NULL) { - /* We aren't finished with this string yet. */ - d->pc--; /* Repeat OP_STRING. */ - if (n > 0) checkpoint(d); - return upb_pbdecoder_suspend(d); - } - ) - VMCASE(OP_ENDSTR, - CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg)); - ) - VMCASE(OP_PUSHTAGDELIM, - CHECK_SUSPEND(pushtagdelim(d, arg)); - ) - VMCASE(OP_SETBIGGROUPNUM, - d->top->groupnum = *d->pc++; - ) - VMCASE(OP_POP, - UPB_ASSERT(d->top > d->stack); - decoder_pop(d); - ) - VMCASE(OP_PUSHLENDELIM, - uint32_t len; - CHECK_RETURN(decode_v32(d, &len)); - CHECK_SUSPEND(decoder_push(d, offset(d) + len)); - set_delim_end(d); - ) - VMCASE(OP_SETDELIM, - set_delim_end(d); - ) - VMCASE(OP_CHECKDELIM, - /* We are guaranteed of this assert because we never allow ourselves to - * consume bytes beyond data_end, which covers delim_end when non-NULL. - */ - UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end)); - if (d->ptr == d->delim_end) - d->pc += longofs; - ) - VMCASE(OP_CALL, - d->callstack[d->call_len++] = d->pc; - d->pc += longofs; - ) - VMCASE(OP_RET, - UPB_ASSERT(d->call_len > 0); - d->pc = d->callstack[--d->call_len]; - ) - VMCASE(OP_BRANCH, - d->pc += longofs; - ) - VMCASE(OP_TAG1, - uint8_t expected; - CHECK_SUSPEND(curbufleft(d) > 0); - expected = (arg >> 8) & 0xff; - if (*d->ptr == expected) { - advance(d, 1); - } else { - int8_t shortofs; - badtag: - shortofs = arg; - if (shortofs == LABEL_DISPATCH) { - CHECK_RETURN(dispatch(d)); - } else { - d->pc += shortofs; - break; /* Avoid checkpoint(). */ - } - } - ) - VMCASE(OP_TAG2, - uint16_t expected; - CHECK_SUSPEND(curbufleft(d) > 0); - expected = (arg >> 8) & 0xffff; - if (curbufleft(d) >= 2) { - uint16_t actual; - memcpy(&actual, d->ptr, 2); - if (expected == actual) { - advance(d, 2); - } else { - goto badtag; - } - } else { - int32_t result = upb_pbdecoder_checktag_slow(d, expected); - if (result == DECODE_MISMATCH) goto badtag; - if (result >= 0) return result; - } - ) - VMCASE(OP_TAGN, { - uint64_t expected; - int32_t result; - memcpy(&expected, d->pc, 8); - d->pc += 2; - result = upb_pbdecoder_checktag_slow(d, expected); - if (result == DECODE_MISMATCH) goto badtag; - if (result >= 0) return result; - }) - VMCASE(OP_DISPATCH, { - CHECK_RETURN(dispatch(d)); - }) - VMCASE(OP_HALT, { - return d->size_param; - }) - } - } -} - - -/* BytesHandler handlers ******************************************************/ - -void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { - upb_pbdecoder *d = closure; - UPB_UNUSED(size_hint); - d->top->end_ofs = UINT64_MAX; - d->bufstart_ofs = 0; - d->call_len = 1; - d->callstack[0] = &halt; - d->pc = pc; - d->skip = 0; - return d; -} - -bool upb_pbdecoder_end(void *closure, const void *handler_data) { - upb_pbdecoder *d = closure; - const upb_pbdecodermethod *method = handler_data; - uint64_t end; - char dummy; - - if (d->residual_end > d->residual) { - seterr(d, "Unexpected EOF: decoder still has buffered unparsed data"); - return false; - } - - if (d->skip) { - seterr(d, "Unexpected EOF inside skipped data"); - return false; - } - - if (d->top->end_ofs != UINT64_MAX) { - seterr(d, "Unexpected EOF inside delimited string"); - return false; - } - - /* The user's end() call indicates that the message ends here. */ - end = offset(d); - d->top->end_ofs = end; - - { - const uint32_t *p = d->pc; - d->stack->end_ofs = end; - /* Check the previous bytecode, but guard against beginning. */ - if (p != method->code_base.ptr) p--; - if (getop(*p) == OP_CHECKDELIM) { - /* Rewind from OP_TAG* to OP_CHECKDELIM. */ - UPB_ASSERT(getop(*d->pc) == OP_TAG1 || - getop(*d->pc) == OP_TAG2 || - getop(*d->pc) == OP_TAGN || - getop(*d->pc) == OP_DISPATCH); - d->pc = p; - } - upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); - } - - if (d->call_len != 0) { - seterr(d, "Unexpected EOF inside submessage or group"); - return false; - } - - return true; -} - -size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf, - size_t size, const upb_bufhandle *handle) { - int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle); - - if (result == DECODE_ENDGROUP) goto_endmsg(decoder); - CHECK_RETURN(result); - - return run_decoder_vm(decoder, group, handle); -} - - -/* Public API *****************************************************************/ - -void upb_pbdecoder_reset(upb_pbdecoder *d) { - d->top = d->stack; - d->top->groupnum = 0; - d->ptr = d->residual; - d->buf = d->residual; - d->end = d->residual; - d->residual_end = d->residual; -} - -upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m, - upb_sink sink, upb_status *status) { - const size_t default_max_nesting = 64; -#ifndef NDEBUG - size_t size_before = upb_arena_bytesallocated(a); -#endif - - upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder)); - if (!d) return NULL; - - d->method_ = m; - d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting)); - d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting)); - if (!d->stack || !d->callstack) { - return NULL; - } - - d->arena = a; - d->limit = d->stack + default_max_nesting - 1; - d->stack_size = default_max_nesting; - d->status = status; - - upb_pbdecoder_reset(d); - upb_bytessink_reset(&d->input_, &m->input_handler_, d); - - if (d->method_->dest_handlers_) { - if (sink.handlers != d->method_->dest_handlers_) - return NULL; - } - d->top->sink = sink; - - /* If this fails, increase the value in decoder.h. */ - UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <= - UPB_PB_DECODER_SIZE); - return d; -} - -uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { - return offset(d); -} - -const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { - return d->method_; -} - -upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) { - return d->input_; -} - -size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { - return d->stack_size; -} - -bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { - UPB_ASSERT(d->top >= d->stack); - - if (max < (size_t)(d->top - d->stack)) { - /* Can't set a limit smaller than what we are currently at. */ - return false; - } - - if (max > d->stack_size) { - /* Need to reallocate stack and callstack to accommodate. */ - size_t old_size = stacksize(d, d->stack_size); - size_t new_size = stacksize(d, max); - void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size); - if (!p) { - return false; - } - d->stack = p; - - old_size = callstacksize(d, d->stack_size); - new_size = callstacksize(d, max); - p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size); - if (!p) { - return false; - } - d->callstack = p; - - d->stack_size = max; - } - - d->limit = d->stack + max - 1; - return true; -} -/* -** upb::Encoder -** -** Since we are implementing pure handlers (ie. without any out-of-band access -** to pre-computed lengths), we have to buffer all submessages before we can -** emit even their first byte. -** -** Not knowing the size of submessages also means we can't write a perfect -** zero-copy implementation, even with buffering. Lengths are stored as -** varints, which means that we don't know how many bytes to reserve for the -** length until we know what the length is. -** -** This leaves us with three main choices: -** -** 1. buffer all submessage data in a temporary buffer, then copy it exactly -** once into the output buffer. -** -** 2. attempt to buffer data directly into the output buffer, estimating how -** many bytes each length will take. When our guesses are wrong, use -** memmove() to grow or shrink the allotted space. -** -** 3. buffer directly into the output buffer, allocating a max length -** ahead-of-time for each submessage length. If we overallocated, we waste -** space, but no memcpy() or memmove() is required. This approach requires -** defining a maximum size for submessages and rejecting submessages that -** exceed that size. -** -** (2) and (3) have the potential to have better performance, but they are more -** complicated and subtle to implement: -** -** (3) requires making an arbitrary choice of the maximum message size; it -** wastes space when submessages are shorter than this and fails -** completely when they are longer. This makes it more finicky and -** requires configuration based on the input. It also makes it impossible -** to perfectly match the output of reference encoders that always use the -** optimal amount of space for each length. -** -** (2) requires guessing the the size upfront, and if multiple lengths are -** guessed wrong the minimum required number of memmove() operations may -** be complicated to compute correctly. Implemented properly, it may have -** a useful amortized or average cost, but more investigation is required -** to determine this and what the optimal algorithm is to achieve it. -** -** (1) makes you always pay for exactly one copy, but its implementation is -** the simplest and its performance is predictable. -** -** So for now, we implement (1) only. If we wish to optimize later, we should -** be able to do it without affecting users. -** -** The strategy is to buffer the segments of data that do *not* depend on -** unknown lengths in one buffer, and keep a separate buffer of segment pointers -** and lengths. When the top-level submessage ends, we can go beginning to end, -** alternating the writing of lengths with memcpy() of the rest of the data. -** At the top level though, no buffering is required. -*/ - - - -/* The output buffer is divided into segments; a segment is a string of data - * that is "ready to go" -- it does not need any varint lengths inserted into - * the middle. The seams between segments are where varints will be inserted - * once they are known. - * - * We also use the concept of a "run", which is a range of encoded bytes that - * occur at a single submessage level. Every segment contains one or more runs. - * - * A segment can span messages. Consider: - * - * .--Submessage lengths---------. - * | | | - * | V V - * V | |--------------- | |----------------- - * Submessages: | |----------------------------------------------- - * Top-level msg: ------------------------------------------------------------ - * - * Segments: ----- ------------------- ----------------- - * Runs: *---- *--------------*--- *---------------- - * (* marks the start) - * - * Note that the top-level menssage is not in any segment because it does not - * have any length preceding it. - * - * A segment is only interrupted when another length needs to be inserted. So - * observe how the second segment spans both the inner submessage and part of - * the next enclosing message. */ -typedef struct { - uint32_t msglen; /* The length to varint-encode before this segment. */ - uint32_t seglen; /* Length of the segment. */ -} upb_pb_encoder_segment; - -struct upb_pb_encoder { - upb_arena *arena; - - /* Our input and output. */ - upb_sink input_; - upb_bytessink output_; - - /* The "subclosure" -- used as the inner closure as part of the bytessink - * protocol. */ - void *subc; - - /* The output buffer and limit, and our current write position. "buf" - * initially points to "initbuf", but is dynamically allocated if we need to - * grow beyond the initial size. */ - char *buf, *ptr, *limit; - - /* The beginning of the current run, or undefined if we are at the top - * level. */ - char *runbegin; - - /* The list of segments we are accumulating. */ - upb_pb_encoder_segment *segbuf, *segptr, *seglimit; - - /* The stack of enclosing submessages. Each entry in the stack points to the - * segment where this submessage's length is being accumulated. */ - int *stack, *top, *stacklimit; - - /* Depth of startmsg/endmsg calls. */ - int depth; -}; - -/* low-level buffering ********************************************************/ - -/* Low-level functions for interacting with the output buffer. */ - -/* TODO(haberman): handle pushback */ -static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) { - size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL); - UPB_ASSERT(n == len); -} - -static upb_pb_encoder_segment *top(upb_pb_encoder *e) { - return &e->segbuf[*e->top]; -} - -/* Call to ensure that at least "bytes" bytes are available for writing at - * e->ptr. Returns false if the bytes could not be allocated. */ -static bool reserve(upb_pb_encoder *e, size_t bytes) { - if ((size_t)(e->limit - e->ptr) < bytes) { - /* Grow buffer. */ - char *new_buf; - size_t needed = bytes + (e->ptr - e->buf); - size_t old_size = e->limit - e->buf; - - size_t new_size = old_size; - - while (new_size < needed) { - new_size *= 2; - } - - new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size); - - if (new_buf == NULL) { - return false; - } - - e->ptr = new_buf + (e->ptr - e->buf); - e->runbegin = new_buf + (e->runbegin - e->buf); - e->limit = new_buf + new_size; - e->buf = new_buf; - } - - return true; -} - -/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have - * previously called reserve() with at least this many bytes. */ -static void encoder_advance(upb_pb_encoder *e, size_t bytes) { - UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes); - e->ptr += bytes; -} - -/* Call when all of the bytes for a handler have been written. Flushes the - * bytes if possible and necessary, returning false if this failed. */ -static bool commit(upb_pb_encoder *e) { - if (!e->top) { - /* We aren't inside a delimited region. Flush our accumulated bytes to - * the output. - * - * TODO(haberman): in the future we may want to delay flushing for - * efficiency reasons. */ - putbuf(e, e->buf, e->ptr - e->buf); - e->ptr = e->buf; - } - - return true; -} - -/* Writes the given bytes to the buffer, handling reserve/advance. */ -static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) { - if (!reserve(e, len)) { - return false; - } - - memcpy(e->ptr, data, len); - encoder_advance(e, len); - return true; -} - -/* Finish the current run by adding the run totals to the segment and message - * length. */ -static void accumulate(upb_pb_encoder *e) { - size_t run_len; - UPB_ASSERT(e->ptr >= e->runbegin); - run_len = e->ptr - e->runbegin; - e->segptr->seglen += run_len; - top(e)->msglen += run_len; - e->runbegin = e->ptr; -} - -/* Call to indicate the start of delimited region for which the full length is - * not yet known. All data will be buffered until the length is known. - * Delimited regions may be nested; their lengths will all be tracked properly. */ -static bool start_delim(upb_pb_encoder *e) { - if (e->top) { - /* We are already buffering, advance to the next segment and push it on the - * stack. */ - accumulate(e); - - if (++e->top == e->stacklimit) { - /* TODO(haberman): grow stack? */ - return false; - } - - if (++e->segptr == e->seglimit) { - /* Grow segment buffer. */ - size_t old_size = - (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); - size_t new_size = old_size * 2; - upb_pb_encoder_segment *new_buf = - upb_arena_realloc(e->arena, e->segbuf, old_size, new_size); - - if (new_buf == NULL) { - return false; - } - - e->segptr = new_buf + (e->segptr - e->segbuf); - e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); - e->segbuf = new_buf; - } - } else { - /* We were previously at the top level, start buffering. */ - e->segptr = e->segbuf; - e->top = e->stack; - e->runbegin = e->ptr; - } - - *e->top = (int)(e->segptr - e->segbuf); - e->segptr->seglen = 0; - e->segptr->msglen = 0; - - return true; -} - -/* Call to indicate the end of a delimited region. We now know the length of - * the delimited region. If we are not nested inside any other delimited - * regions, we can now emit all of the buffered data we accumulated. */ -static bool end_delim(upb_pb_encoder *e) { - size_t msglen; - accumulate(e); - msglen = top(e)->msglen; - - if (e->top == e->stack) { - /* All lengths are now available, emit all buffered data. */ - char buf[UPB_PB_VARINT_MAX_LEN]; - upb_pb_encoder_segment *s; - const char *ptr = e->buf; - for (s = e->segbuf; s <= e->segptr; s++) { - size_t lenbytes = upb_vencode64(s->msglen, buf); - putbuf(e, buf, lenbytes); - putbuf(e, ptr, s->seglen); - ptr += s->seglen; - } - - e->ptr = e->buf; - e->top = NULL; - } else { - /* Need to keep buffering; propagate length info into enclosing - * submessages. */ - --e->top; - top(e)->msglen += msglen + upb_varint_size(msglen); - } - - return true; -} - - -/* tag_t **********************************************************************/ - -/* A precomputed (pre-encoded) tag and length. */ - -typedef struct { - uint8_t bytes; - char tag[7]; -} tag_t; - -/* Allocates a new tag for this field, and sets it in these handlerattr. */ -static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt, - upb_handlerattr *attr) { - uint32_t n = upb_fielddef_number(f); - - tag_t *tag = upb_gmalloc(sizeof(tag_t)); - tag->bytes = upb_vencode64((n << 3) | wt, tag->tag); - - attr->handler_data = tag; - upb_handlers_addcleanup(h, tag, upb_gfree); -} - -static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) { - return encode_bytes(e, tag->tag, tag->bytes); -} - - -/* encoding of wire types *****************************************************/ - -static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return encode_bytes(e, &val, sizeof(uint64_t)); -} - -static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) { - /* TODO(haberman): byte-swap for big endian. */ - return encode_bytes(e, &val, sizeof(uint32_t)); -} - -static bool encode_varint(upb_pb_encoder *e, uint64_t val) { - if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) { - return false; - } - - encoder_advance(e, upb_vencode64(val, e->ptr)); - return true; -} - -static uint64_t dbl2uint64(double d) { - uint64_t ret; - memcpy(&ret, &d, sizeof(uint64_t)); - return ret; -} - -static uint32_t flt2uint32(float d) { - uint32_t ret; - memcpy(&ret, &d, sizeof(uint32_t)); - return ret; -} - - -/* encoding of proto types ****************************************************/ - -static bool startmsg(void *c, const void *hd) { - upb_pb_encoder *e = c; - UPB_UNUSED(hd); - if (e->depth++ == 0) { - upb_bytessink_start(e->output_, 0, &e->subc); - } - return true; -} - -static bool endmsg(void *c, const void *hd, upb_status *status) { - upb_pb_encoder *e = c; - UPB_UNUSED(hd); - UPB_UNUSED(status); - if (--e->depth == 0) { - upb_bytessink_end(e->output_); - } - return true; -} - -static void *encode_startdelimfield(void *c, const void *hd) { - bool ok = encode_tag(c, hd) && commit(c) && start_delim(c); - return ok ? c : UPB_BREAK; -} - -static bool encode_unknown(void *c, const void *hd, const char *buf, - size_t len) { - UPB_UNUSED(hd); - return encode_bytes(c, buf, len) && commit(c); -} - -static bool encode_enddelimfield(void *c, const void *hd) { - UPB_UNUSED(hd); - return end_delim(c); -} - -static void *encode_startgroup(void *c, const void *hd) { - return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK; -} - -static bool encode_endgroup(void *c, const void *hd) { - return encode_tag(c, hd) && commit(c); -} - -static void *encode_startstr(void *c, const void *hd, size_t size_hint) { - UPB_UNUSED(size_hint); - return encode_startdelimfield(c, hd); -} - -static size_t encode_strbuf(void *c, const void *hd, const char *buf, - size_t len, const upb_bufhandle *h) { - UPB_UNUSED(hd); - UPB_UNUSED(h); - return encode_bytes(c, buf, len) ? len : 0; -} - -#define T(type, ctype, convert, encode) \ - static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \ - return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \ - } \ - static bool encode_packed_##type(void *e, const void *hd, ctype val) { \ - UPB_UNUSED(hd); \ - return encode(e, (convert)(val)); \ - } - -T(double, double, dbl2uint64, encode_fixed64) -T(float, float, flt2uint32, encode_fixed32) -T(int64, int64_t, uint64_t, encode_varint) -T(int32, int32_t, int64_t, encode_varint) -T(fixed64, uint64_t, uint64_t, encode_fixed64) -T(fixed32, uint32_t, uint32_t, encode_fixed32) -T(bool, bool, bool, encode_varint) -T(uint32, uint32_t, uint32_t, encode_varint) -T(uint64, uint64_t, uint64_t, encode_varint) -T(enum, int32_t, uint32_t, encode_varint) -T(sfixed32, int32_t, uint32_t, encode_fixed32) -T(sfixed64, int64_t, uint64_t, encode_fixed64) -T(sint32, int32_t, upb_zzenc_32, encode_varint) -T(sint64, int64_t, upb_zzenc_64, encode_varint) - -#undef T - - -/* code to build the handlers *************************************************/ - -#include -static void newhandlers_callback(const void *closure, upb_handlers *h) { - const upb_msgdef *m; - upb_msg_field_iter i; - - UPB_UNUSED(closure); - - upb_handlers_setstartmsg(h, startmsg, NULL); - upb_handlers_setendmsg(h, endmsg, NULL); - upb_handlers_setunknown(h, encode_unknown, NULL); - - m = upb_handlers_msgdef(h); - for(upb_msg_field_begin(&i, m); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && - upb_fielddef_packed(f); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - upb_wiretype_t wt = - packed ? UPB_WIRE_TYPE_DELIMITED - : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; - - /* Pre-encode the tag for this field. */ - new_tag(h, f, wt, &attr); - - if (packed) { - upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr); - upb_handlers_setendseq(h, f, encode_enddelimfield, &attr); - } - -#define T(upper, lower, upbtype) \ - case UPB_DESCRIPTOR_TYPE_##upper: \ - if (packed) { \ - upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \ - } else { \ - upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \ - } \ - break; - - switch (upb_fielddef_descriptortype(f)) { - T(DOUBLE, double, double); - T(FLOAT, float, float); - T(INT64, int64, int64); - T(INT32, int32, int32); - T(FIXED64, fixed64, uint64); - T(FIXED32, fixed32, uint32); - T(BOOL, bool, bool); - T(UINT32, uint32, uint32); - T(UINT64, uint64, uint64); - T(ENUM, enum, int32); - T(SFIXED32, sfixed32, int32); - T(SFIXED64, sfixed64, int64); - T(SINT32, sint32, int32); - T(SINT64, sint64, int64); - case UPB_DESCRIPTOR_TYPE_STRING: - case UPB_DESCRIPTOR_TYPE_BYTES: - upb_handlers_setstartstr(h, f, encode_startstr, &attr); - upb_handlers_setendstr(h, f, encode_enddelimfield, &attr); - upb_handlers_setstring(h, f, encode_strbuf, &attr); - break; - case UPB_DESCRIPTOR_TYPE_MESSAGE: - upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr); - upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr); - break; - case UPB_DESCRIPTOR_TYPE_GROUP: { - /* Endgroup takes a different tag (wire_type = END_GROUP). */ - upb_handlerattr attr2 = UPB_HANDLERATTR_INIT; - new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2); - - upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr); - upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2); - - break; - } - } - -#undef T - } -} - -void upb_pb_encoder_reset(upb_pb_encoder *e) { - e->segptr = NULL; - e->top = NULL; - e->depth = 0; -} - - -/* public API *****************************************************************/ - -upb_handlercache *upb_pb_encoder_newcache(void) { - return upb_handlercache_new(newhandlers_callback, NULL); -} - -upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output) { - const size_t initial_bufsize = 256; - const size_t initial_segbufsize = 16; - /* TODO(haberman): make this configurable. */ - const size_t stack_size = 64; -#ifndef NDEBUG - const size_t size_before = upb_arena_bytesallocated(arena); -#endif - - upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder)); - if (!e) return NULL; - - e->buf = upb_arena_malloc(arena, initial_bufsize); - e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf)); - e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack)); - - if (!e->buf || !e->segbuf || !e->stack) { - return NULL; - } - - e->limit = e->buf + initial_bufsize; - e->seglimit = e->segbuf + initial_segbufsize; - e->stacklimit = e->stack + stack_size; - - upb_pb_encoder_reset(e); - upb_sink_reset(&e->input_, h, e); - - e->arena = arena; - e->output_ = output; - e->subc = output.closure; - e->ptr = e->buf; - - /* If this fails, increase the value in encoder.h. */ - UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <= - UPB_PB_ENCODER_SIZE); - return e; -} - -upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; } -/* - * upb::pb::TextPrinter - * - * OPT: This is not optimized at all. It uses printf() which parses the format - * string every time, and it allocates memory for every put. - */ - - -#include -#include -#include -#include -#include -#include - - - -struct upb_textprinter { - upb_sink input_; - upb_bytessink output_; - int indent_depth_; - bool single_line_; - void *subc; -}; - -#define CHECK(x) if ((x) < 0) goto err; - -static const char *shortname(const char *longname) { - const char *last = strrchr(longname, '.'); - return last ? last + 1 : longname; -} - -static int indent(upb_textprinter *p) { - int i; - if (!p->single_line_) - for (i = 0; i < p->indent_depth_; i++) - upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL); - return 0; -} - -static int endfield(upb_textprinter *p) { - const char ch = (p->single_line_ ? ' ' : '\n'); - upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL); - return 0; -} - -static int putescaped(upb_textprinter *p, const char *buf, size_t len, - bool preserve_utf8) { - /* Based on CEscapeInternal() from Google's protobuf release. */ - char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); - const char *end = buf + len; - - /* I think hex is prettier and more useful, but proto2 uses octal; should - * investigate whether it can parse hex also. */ - const bool use_hex = false; - bool last_hex_escape = false; /* true if last output char was \xNN */ - - for (; buf < end; buf++) { - bool is_hex_escape; - - if (dstend - dst < 4) { - upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); - dst = dstbuf; - } - - is_hex_escape = false; - switch (*buf) { - case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; - case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; - case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; - case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break; - case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; - case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; - default: - /* Note that if we emit \xNN and the buf character after that is a hex - * digit then that digit must be escaped too to prevent it being - * interpreted as part of the character code by C. */ - if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && - (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { - sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); - is_hex_escape = use_hex; - dst += 4; - } else { - *(dst++) = *buf; break; - } - } - last_hex_escape = is_hex_escape; - } - /* Flush remaining data. */ - upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); - return 0; -} - -bool putf(upb_textprinter *p, const char *fmt, ...) { - va_list args; - va_list args_copy; - char *str; - int written; - int len; - bool ok; - - va_start(args, fmt); - - /* Run once to get the length of the string. */ - _upb_va_copy(args_copy, args); - len = _upb_vsnprintf(NULL, 0, fmt, args_copy); - va_end(args_copy); - - /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */ - str = upb_gmalloc(len + 1); - if (!str) return false; - written = vsprintf(str, fmt, args); - va_end(args); - UPB_ASSERT(written == len); - - ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL); - upb_gfree(str); - return ok; -} - - -/* handlers *******************************************************************/ - -static bool textprinter_startmsg(void *c, const void *hd) { - upb_textprinter *p = c; - UPB_UNUSED(hd); - if (p->indent_depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc); - } - return true; -} - -static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) { - upb_textprinter *p = c; - UPB_UNUSED(hd); - UPB_UNUSED(s); - if (p->indent_depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -#define TYPE(name, ctype, fmt) \ - static bool textprinter_put ## name(void *closure, const void *handler_data, \ - ctype val) { \ - upb_textprinter *p = closure; \ - const upb_fielddef *f = handler_data; \ - CHECK(indent(p)); \ - putf(p, "%s: " fmt, upb_fielddef_name(f), val); \ - CHECK(endfield(p)); \ - return true; \ - err: \ - return false; \ -} - -static bool textprinter_putbool(void *closure, const void *handler_data, - bool val) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - CHECK(indent(p)); - putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false"); - CHECK(endfield(p)); - return true; -err: - return false; -} - -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) - -TYPE(int32, int32_t, "%" PRId32) -TYPE(int64, int64_t, "%" PRId64) -TYPE(uint32, uint32_t, "%" PRIu32) -TYPE(uint64, uint64_t, "%" PRIu64) -TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") -TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") - -#undef TYPE - -/* Output a symbolic value from the enum if found, else just print as int32. */ -static bool textprinter_putenum(void *closure, const void *handler_data, - int32_t val) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f); - const char *label = upb_enumdef_iton(enum_def, val); - if (label) { - indent(p); - putf(p, "%s: %s", upb_fielddef_name(f), label); - endfield(p); - } else { - if (!textprinter_putint32(closure, handler_data, val)) - return false; - } - return true; -} - -static void *textprinter_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_textprinter *p = closure; - const upb_fielddef *f = handler_data; - UPB_UNUSED(size_hint); - indent(p); - putf(p, "%s: \"", upb_fielddef_name(f)); - return p; -} - -static bool textprinter_endstr(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - UPB_UNUSED(handler_data); - putf(p, "\""); - endfield(p); - return true; -} - -static size_t textprinter_putstr(void *closure, const void *hd, const char *buf, - size_t len, const upb_bufhandle *handle) { - upb_textprinter *p = closure; - const upb_fielddef *f = hd; - UPB_UNUSED(handle); - CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); - return len; -err: - return 0; -} - -static void *textprinter_startsubmsg(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - const char *name = handler_data; - CHECK(indent(p)); - putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n'); - p->indent_depth_++; - return p; -err: - return UPB_BREAK; -} - -static bool textprinter_endsubmsg(void *closure, const void *handler_data) { - upb_textprinter *p = closure; - UPB_UNUSED(handler_data); - p->indent_depth_--; - CHECK(indent(p)); - upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL); - CHECK(endfield(p)); - return true; -err: - return false; -} - -static void onmreg(const void *c, upb_handlers *h) { - const upb_msgdef *m = upb_handlers_msgdef(h); - upb_msg_field_iter i; - UPB_UNUSED(c); - - upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); - upb_handlers_setendmsg(h, textprinter_endmsg, NULL); - - for(upb_msg_field_begin(&i, m); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - upb_handlerattr attr = UPB_HANDLERATTR_INIT; - attr.handler_data = f; - switch (upb_fielddef_type(f)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, f, textprinter_putint32, &attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, f, textprinter_putint64, &attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, f, textprinter_putuint32, &attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, f, textprinter_putuint64, &attr); - break; - case UPB_TYPE_FLOAT: - upb_handlers_setfloat(h, f, textprinter_putfloat, &attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, f, textprinter_putdouble, &attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, f, textprinter_putbool, &attr); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: - upb_handlers_setstartstr(h, f, textprinter_startstr, &attr); - upb_handlers_setstring(h, f, textprinter_putstr, &attr); - upb_handlers_setendstr(h, f, textprinter_endstr, &attr); - break; - case UPB_TYPE_MESSAGE: { - const char *name = - upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP - ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f))) - : upb_fielddef_name(f); - attr.handler_data = name; - upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr); - upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr); - break; - } - case UPB_TYPE_ENUM: - upb_handlers_setint32(h, f, textprinter_putenum, &attr); - break; - } - } -} - -static void textprinter_reset(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; - p->indent_depth_ = 0; -} - - -/* Public API *****************************************************************/ - -upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h, - upb_bytessink output) { - upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter)); - if (!p) return NULL; - - p->output_ = output; - upb_sink_reset(&p->input_, h, p); - textprinter_reset(p, false); - - return p; -} - -upb_handlercache *upb_textprinter_newcache(void) { - return upb_handlercache_new(&onmreg, NULL); -} - -upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; } - -void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { - p->single_line_ = single_line; -} - - -/* Index is descriptor type. */ -const uint8_t upb_pb_native_wire_types[] = { - UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ - UPB_WIRE_TYPE_64BIT, /* DOUBLE */ - UPB_WIRE_TYPE_32BIT, /* FLOAT */ - UPB_WIRE_TYPE_VARINT, /* INT64 */ - UPB_WIRE_TYPE_VARINT, /* UINT64 */ - UPB_WIRE_TYPE_VARINT, /* INT32 */ - UPB_WIRE_TYPE_64BIT, /* FIXED64 */ - UPB_WIRE_TYPE_32BIT, /* FIXED32 */ - UPB_WIRE_TYPE_VARINT, /* BOOL */ - UPB_WIRE_TYPE_DELIMITED, /* STRING */ - UPB_WIRE_TYPE_START_GROUP, /* GROUP */ - UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ - UPB_WIRE_TYPE_DELIMITED, /* BYTES */ - UPB_WIRE_TYPE_VARINT, /* UINT32 */ - UPB_WIRE_TYPE_VARINT, /* ENUM */ - UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ - UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ - UPB_WIRE_TYPE_VARINT, /* SINT32 */ - UPB_WIRE_TYPE_VARINT, /* SINT64 */ -}; - -/* A basic branch-based decoder, uses 32-bit values to get good performance - * on 32-bit architectures (but performs well on 64-bits also). - * This scheme comes from the original Google Protobuf implementation - * (proto2). */ -upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { - upb_decoderet err = {NULL, 0}; - const char *p = r.p; - uint32_t low = (uint32_t)r.val; - uint32_t high = 0; - uint32_t b; - b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; - b = *(p++); low |= (b & 0x7fU) << 28; - high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; - b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; - return err; - -done: - r.val = ((uint64_t)high << 32) | low; - r.p = p; - return r; -} - -/* Like the previous, but uses 64-bit values. */ -upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { - const char *p = r.p; - uint64_t val = r.val; - uint64_t b; - upb_decoderet err = {NULL, 0}; - b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; - b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; - return err; - -done: - r.val = val; - r.p = p; - return r; -} - -#line 1 "upb/json/parser.rl" -/* -** upb::json::Parser (upb_json_parser) -** -** A parser that uses the Ragel State Machine Compiler to generate -** the finite automata. -** -** Ragel only natively handles regular languages, but we can manually -** program it a bit to handle context-free languages like JSON, by using -** the "fcall" and "fret" constructs. -** -** This parser can handle the basics, but needs several things to be fleshed -** out: -** -** - handling of unicode escape sequences (including high surrogate pairs). -** - properly check and report errors for unknown fields, stack overflow, -** improper array nesting (or lack of nesting). -** - handling of base64 sequences with padding characters. -** - handling of push-back (non-success returns from sink functions). -** - handling of keys/escape-sequences/etc that span input buffers. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - - - -#define UPB_JSON_MAX_DEPTH 64 - -/* Type of value message */ -enum { - VALUE_NULLVALUE = 0, - VALUE_NUMBERVALUE = 1, - VALUE_STRINGVALUE = 2, - VALUE_BOOLVALUE = 3, - VALUE_STRUCTVALUE = 4, - VALUE_LISTVALUE = 5 -}; - -/* Forward declare */ -static bool is_top_level(upb_json_parser *p); -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type); -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type); - -static bool is_number_wrapper_object(upb_json_parser *p); -static bool does_number_wrapper_start(upb_json_parser *p); -static bool does_number_wrapper_end(upb_json_parser *p); - -static bool is_string_wrapper_object(upb_json_parser *p); -static bool does_string_wrapper_start(upb_json_parser *p); -static bool does_string_wrapper_end(upb_json_parser *p); - -static bool does_fieldmask_start(upb_json_parser *p); -static bool does_fieldmask_end(upb_json_parser *p); -static void start_fieldmask_object(upb_json_parser *p); -static void end_fieldmask_object(upb_json_parser *p); - -static void start_wrapper_object(upb_json_parser *p); -static void end_wrapper_object(upb_json_parser *p); - -static void start_value_object(upb_json_parser *p, int value_type); -static void end_value_object(upb_json_parser *p); - -static void start_listvalue_object(upb_json_parser *p); -static void end_listvalue_object(upb_json_parser *p); - -static void start_structvalue_object(upb_json_parser *p); -static void end_structvalue_object(upb_json_parser *p); - -static void start_object(upb_json_parser *p); -static void end_object(upb_json_parser *p); - -static void start_any_object(upb_json_parser *p, const char *ptr); -static bool end_any_object(upb_json_parser *p, const char *ptr); - -static bool start_subobject(upb_json_parser *p); -static void end_subobject(upb_json_parser *p); - -static void start_member(upb_json_parser *p); -static void end_member(upb_json_parser *p); -static bool end_membername(upb_json_parser *p); - -static void start_any_member(upb_json_parser *p, const char *ptr); -static void end_any_member(upb_json_parser *p, const char *ptr); -static bool end_any_membername(upb_json_parser *p); - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle); -static bool end(void *closure, const void *hd); - -static const char eof_ch = 'e'; - -/* stringsink */ -typedef struct { - upb_byteshandler handler; - upb_bytessink sink; - char *ptr; - size_t len, size; -} upb_stringsink; - - -static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { - upb_stringsink *sink = _sink; - sink->len = 0; - UPB_UNUSED(hd); - UPB_UNUSED(size_hint); - return sink; -} - -static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, - size_t len, const upb_bufhandle *handle) { - upb_stringsink *sink = _sink; - size_t new_size = sink->size; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - while (sink->len + len > new_size) { - new_size *= 2; - } - - if (new_size != sink->size) { - sink->ptr = realloc(sink->ptr, new_size); - sink->size = new_size; - } - - memcpy(sink->ptr + sink->len, ptr, len); - sink->len += len; - - return len; -} - -void upb_stringsink_init(upb_stringsink *sink) { - upb_byteshandler_init(&sink->handler); - upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); - upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); - - upb_bytessink_reset(&sink->sink, &sink->handler, sink); - - sink->size = 32; - sink->ptr = malloc(sink->size); - sink->len = 0; -} - -void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } - -typedef struct { - /* For encoding Any value field in binary format. */ - upb_handlercache *encoder_handlercache; - upb_stringsink stringsink; - - /* For decoding Any value field in json format. */ - upb_json_codecache *parser_codecache; - upb_sink sink; - upb_json_parser *parser; - - /* Mark the range of uninterpreted values in json input before type url. */ - const char *before_type_url_start; - const char *before_type_url_end; - - /* Mark the range of uninterpreted values in json input after type url. */ - const char *after_type_url_start; -} upb_jsonparser_any_frame; - -typedef struct { - upb_sink sink; - - /* The current message in which we're parsing, and the field whose value we're - * expecting next. */ - const upb_msgdef *m; - const upb_fielddef *f; - - /* The table mapping json name to fielddef for this message. */ - const upb_strtable *name_table; - - /* We are in a repeated-field context. We need this flag to decide whether to - * handle the array as a normal repeated field or a - * google.protobuf.ListValue/google.protobuf.Value. */ - bool is_repeated; - - /* We are in a repeated-field context, ready to emit mapentries as - * submessages. This flag alters the start-of-object (open-brace) behavior to - * begin a sequence of mapentry messages rather than a single submessage. */ - bool is_map; - - /* We are in a map-entry message context. This flag is set when parsing the - * value field of a single map entry and indicates to all value-field parsers - * (subobjects, strings, numbers, and bools) that the map-entry submessage - * should end as soon as the value is parsed. */ - bool is_mapentry; - - /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent - * message's map field that we're currently parsing. This differs from |f| - * because |f| is the field in the *current* message (i.e., the map-entry - * message itself), not the parent's field that leads to this map. */ - const upb_fielddef *mapfield; - - /* We are in an Any message context. This flag is set when parsing the Any - * message and indicates to all field parsers (subobjects, strings, numbers, - * and bools) that the parsed field should be serialized as binary data or - * cached (type url not found yet). */ - bool is_any; - - /* The type of packed message in Any. */ - upb_jsonparser_any_frame *any_frame; - - /* True if the field to be parsed is unknown. */ - bool is_unknown_field; -} upb_jsonparser_frame; - -static void init_frame(upb_jsonparser_frame* frame) { - frame->m = NULL; - frame->f = NULL; - frame->name_table = NULL; - frame->is_repeated = false; - frame->is_map = false; - frame->is_mapentry = false; - frame->mapfield = NULL; - frame->is_any = false; - frame->any_frame = NULL; - frame->is_unknown_field = false; -} - -struct upb_json_parser { - upb_arena *arena; - const upb_json_parsermethod *method; - upb_bytessink input_; - - /* Stack to track the JSON scopes we are in. */ - upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; - upb_jsonparser_frame *top; - upb_jsonparser_frame *limit; - - upb_status *status; - - /* Ragel's internal parsing stack for the parsing state machine. */ - int current_state; - int parser_stack[UPB_JSON_MAX_DEPTH]; - int parser_top; - - /* The handle for the current buffer. */ - const upb_bufhandle *handle; - - /* Accumulate buffer. See details in parser.rl. */ - const char *accumulated; - size_t accumulated_len; - char *accumulate_buf; - size_t accumulate_buf_size; - - /* Multi-part text data. See details in parser.rl. */ - int multipart_state; - upb_selector_t string_selector; - - /* Input capture. See details in parser.rl. */ - const char *capture; - - /* Intermediate result of parsing a unicode escape sequence. */ - uint32_t digit; - - /* For resolve type url in Any. */ - const upb_symtab *symtab; - - /* Whether to proceed if unknown field is met. */ - bool ignore_json_unknown; - - /* Cache for parsing timestamp due to base and zone are handled in different - * handlers. */ - struct tm tm; -}; - -static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) { - upb_jsonparser_frame *inner; - inner = p->top + 1; - init_frame(inner); - return inner; -} - -struct upb_json_codecache { - upb_arena *arena; - upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ -}; - -struct upb_json_parsermethod { - const upb_json_codecache *cache; - upb_byteshandler input_handler_; - - /* Maps json_name -> fielddef */ - upb_strtable name_table; -}; - -#define PARSER_CHECK_RETURN(x) if (!(x)) return false - -static upb_jsonparser_any_frame *json_parser_any_frame_new( - upb_json_parser *p) { - upb_jsonparser_any_frame *frame; - - frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame)); - - frame->encoder_handlercache = upb_pb_encoder_newcache(); - frame->parser_codecache = upb_json_codecache_new(); - frame->parser = NULL; - frame->before_type_url_start = NULL; - frame->before_type_url_end = NULL; - frame->after_type_url_start = NULL; - - upb_stringsink_init(&frame->stringsink); - - return frame; -} - -static void json_parser_any_frame_set_payload_type( - upb_json_parser *p, - upb_jsonparser_any_frame *frame, - const upb_msgdef *payload_type) { - const upb_handlers *h; - const upb_json_parsermethod *parser_method; - upb_pb_encoder *encoder; - - /* Initialize encoder. */ - h = upb_handlercache_get(frame->encoder_handlercache, payload_type); - encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink); - - /* Initialize parser. */ - parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); - upb_sink_reset(&frame->sink, h, encoder); - frame->parser = - upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink, - p->status, p->ignore_json_unknown); -} - -static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { - upb_handlercache_free(frame->encoder_handlercache); - upb_json_codecache_free(frame->parser_codecache); - upb_stringsink_uninit(&frame->stringsink); -} - -static bool json_parser_any_frame_has_type_url( - upb_jsonparser_any_frame *frame) { - return frame->parser != NULL; -} - -static bool json_parser_any_frame_has_value_before_type_url( - upb_jsonparser_any_frame *frame) { - return frame->before_type_url_start != frame->before_type_url_end; -} - -static bool json_parser_any_frame_has_value_after_type_url( - upb_jsonparser_any_frame *frame) { - return frame->after_type_url_start != NULL; -} - -static bool json_parser_any_frame_has_value( - upb_jsonparser_any_frame *frame) { - return json_parser_any_frame_has_value_before_type_url(frame) || - json_parser_any_frame_has_value_after_type_url(frame); -} - -static void json_parser_any_frame_set_before_type_url_end( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (frame->parser == NULL) { - frame->before_type_url_end = ptr; - } -} - -static void json_parser_any_frame_set_after_type_url_start_once( - upb_jsonparser_any_frame *frame, - const char *ptr) { - if (json_parser_any_frame_has_type_url(frame) && - frame->after_type_url_start == NULL) { - frame->after_type_url_start = ptr; - } -} - -/* Used to signal that a capture has been suspended. */ -static char suspend_capture; - -static upb_selector_t getsel_for_handlertype(upb_json_parser *p, - upb_handlertype_t type) { - upb_selector_t sel; - bool ok = upb_handlers_getselector(p->top->f, type, &sel); - UPB_ASSUME(ok); - return sel; -} - -static upb_selector_t parser_getsel(upb_json_parser *p) { - return getsel_for_handlertype( - p, upb_handlers_getprimitivehandlertype(p->top->f)); -} - -static bool check_stack(upb_json_parser *p) { - if ((p->top + 1) == p->limit) { - upb_status_seterrmsg(p->status, "Nesting too deep"); - return false; - } - - return true; -} - -static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { - upb_value v; - const upb_json_codecache *cache = p->method->cache; - bool ok; - const upb_json_parsermethod *method; - - ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); - UPB_ASSUME(ok); - method = upb_value_getconstptr(v); - - frame->name_table = &method->name_table; -} - -/* There are GCC/Clang built-ins for overflow checking which we could start - * using if there was any performance benefit to it. */ - -static bool checked_add(size_t a, size_t b, size_t *c) { - if (SIZE_MAX - a < b) return false; - *c = a + b; - return true; -} - -static size_t saturating_multiply(size_t a, size_t b) { - /* size_t is unsigned, so this is defined behavior even on overflow. */ - size_t ret = a * b; - if (b != 0 && ret / b != a) { - ret = SIZE_MAX; - } - return ret; -} - - -/* Base64 decoding ************************************************************/ - -/* TODO(haberman): make this streaming. */ - -static const signed char b64table[] = { - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, - 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, - 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, - -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, - 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, - 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, - -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, - 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, - 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, - 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1 -}; - -/* Returns the table value sign-extended to 32 bits. Knowing that the upper - * bits will be 1 for unrecognized characters makes it easier to check for - * this error condition later (see below). */ -int32_t b64lookup(unsigned char ch) { return b64table[ch]; } - -/* Returns true if the given character is not a valid base64 character or - * padding. */ -bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } - -static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, - size_t len) { - const char *limit = ptr + len; - for (; ptr < limit; ptr += 4) { - uint32_t val; - char output[3]; - - if (limit - ptr < 4) { - upb_status_seterrf(p->status, - "Base64 input for bytes field not a multiple of 4: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6 | - b64lookup(ptr[3]); - - /* Test the upper bit; returns true if any of the characters returned -1. */ - if (val & 0x80000000) { - goto otherchar; - } - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - output[2] = val & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 3, NULL); - } - return true; - -otherchar: - if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || - nonbase64(ptr[3]) ) { - upb_status_seterrf(p->status, - "Non-base64 characters in bytes field: %s", - upb_fielddef_name(p->top->f)); - return false; - } if (ptr[2] == '=') { - uint32_t val; - char output; - - /* Last group contains only two input bytes, one output byte. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12; - - UPB_ASSERT(!(val & 0x80000000)); - output = val >> 16; - upb_sink_putstring(p->top->sink, sel, &output, 1, NULL); - return true; - } else { - uint32_t val; - char output[2]; - - /* Last group contains only three input bytes, two output bytes. */ - if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { - goto badpadding; - } - - val = b64lookup(ptr[0]) << 18 | - b64lookup(ptr[1]) << 12 | - b64lookup(ptr[2]) << 6; - - output[0] = val >> 16; - output[1] = (val >> 8) & 0xff; - upb_sink_putstring(p->top->sink, sel, output, 2, NULL); - return true; - } - -badpadding: - upb_status_seterrf(p->status, - "Incorrect base64 padding for field: %s (%.*s)", - upb_fielddef_name(p->top->f), - 4, ptr); - return false; -} - - -/* Accumulate buffer **********************************************************/ - -/* Functionality for accumulating a buffer. - * - * Some parts of the parser need an entire value as a contiguous string. For - * example, to look up a member name in a hash table, or to turn a string into - * a number, the relevant library routines need the input string to be in - * contiguous memory, even if the value spanned two or more buffers in the - * input. These routines handle that. - * - * In the common case we can just point to the input buffer to get this - * contiguous string and avoid any actual copy. So we optimistically begin - * this way. But there are a few cases where we must instead copy into a - * separate buffer: - * - * 1. The string was not contiguous in the input (it spanned buffers). - * - * 2. The string included escape sequences that need to be interpreted to get - * the true value in a contiguous buffer. */ - -static void assert_accumulate_empty(upb_json_parser *p) { - UPB_ASSERT(p->accumulated == NULL); - UPB_ASSERT(p->accumulated_len == 0); -} - -static void accumulate_clear(upb_json_parser *p) { - p->accumulated = NULL; - p->accumulated_len = 0; -} - -/* Used internally by accumulate_append(). */ -static bool accumulate_realloc(upb_json_parser *p, size_t need) { - void *mem; - size_t old_size = p->accumulate_buf_size; - size_t new_size = UPB_MAX(old_size, 128); - while (new_size < need) { - new_size = saturating_multiply(new_size, 2); - } - - mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size); - if (!mem) { - upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); - return false; - } - - p->accumulate_buf = mem; - p->accumulate_buf_size = new_size; - return true; -} - -/* Logically appends the given data to the append buffer. - * If "can_alias" is true, we will try to avoid actually copying, but the buffer - * must be valid until the next accumulate_append() call (if any). */ -static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - size_t need; - - if (!p->accumulated && can_alias) { - p->accumulated = buf; - p->accumulated_len = len; - return true; - } - - if (!checked_add(p->accumulated_len, len, &need)) { - upb_status_seterrmsg(p->status, "Integer overflow."); - return false; - } - - if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { - return false; - } - - if (p->accumulated != p->accumulate_buf) { - memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); - p->accumulated = p->accumulate_buf; - } - - memcpy(p->accumulate_buf + p->accumulated_len, buf, len); - p->accumulated_len += len; - return true; -} - -/* Returns a pointer to the data accumulated since the last accumulate_clear() - * call, and writes the length to *len. This with point either to the input - * buffer or a temporary accumulate buffer. */ -static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { - UPB_ASSERT(p->accumulated); - *len = p->accumulated_len; - return p->accumulated; -} - - -/* Mult-part text data ********************************************************/ - -/* When we have text data in the input, it can often come in multiple segments. - * For example, there may be some raw string data followed by an escape - * sequence. The two segments are processed with different logic. Also buffer - * seams in the input can cause multiple segments. - * - * As we see segments, there are two main cases for how we want to process them: - * - * 1. we want to push the captured input directly to string handlers. - * - * 2. we need to accumulate all the parts into a contiguous buffer for further - * processing (field name lookup, string->number conversion, etc). */ - -/* This is the set of states for p->multipart_state. */ -enum { - /* We are not currently processing multipart data. */ - MULTIPART_INACTIVE = 0, - - /* We are processing multipart data by accumulating it into a contiguous - * buffer. */ - MULTIPART_ACCUMULATE = 1, - - /* We are processing multipart data by pushing each part directly to the - * current string handlers. */ - MULTIPART_PUSHEAGERLY = 2 -}; - -/* Start a multi-part text value where we accumulate the data for processing at - * the end. */ -static void multipart_startaccum(upb_json_parser *p) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_ACCUMULATE; -} - -/* Start a multi-part text value where we immediately push text data to a string - * value with the given selector. */ -static void multipart_start(upb_json_parser *p, upb_selector_t sel) { - assert_accumulate_empty(p); - UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); - p->multipart_state = MULTIPART_PUSHEAGERLY; - p->string_selector = sel; -} - -static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, - bool can_alias) { - switch (p->multipart_state) { - case MULTIPART_INACTIVE: - upb_status_seterrmsg( - p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); - return false; - - case MULTIPART_ACCUMULATE: - if (!accumulate_append(p, buf, len, can_alias)) { - return false; - } - break; - - case MULTIPART_PUSHEAGERLY: { - const upb_bufhandle *handle = can_alias ? p->handle : NULL; - upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle); - break; - } - } - - return true; -} - -/* Note: this invalidates the accumulate buffer! Call only after reading its - * contents. */ -static void multipart_end(upb_json_parser *p) { - /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */ - p->multipart_state = MULTIPART_INACTIVE; - accumulate_clear(p); -} - - -/* Input capture **************************************************************/ - -/* Functionality for capturing a region of the input as text. Gracefully - * handles the case where a buffer seam occurs in the middle of the captured - * region. */ - -static void capture_begin(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); - UPB_ASSERT(p->capture == NULL); - p->capture = ptr; -} - -static bool capture_end(upb_json_parser *p, const char *ptr) { - UPB_ASSERT(p->capture); - if (multipart_text(p, p->capture, ptr - p->capture, true)) { - p->capture = NULL; - return true; - } else { - return false; - } -} - -/* This is called at the end of each input buffer (ie. when we have hit a - * buffer seam). If we are in the middle of capturing the input, this - * processes the unprocessed capture region. */ -static void capture_suspend(upb_json_parser *p, const char **ptr) { - if (!p->capture) return; - - if (multipart_text(p, p->capture, *ptr - p->capture, false)) { - /* We use this as a signal that we were in the middle of capturing, and - * that capturing should resume at the beginning of the next buffer. - * - * We can't use *ptr here, because we have no guarantee that this pointer - * will be valid when we resume (if the underlying memory is freed, then - * using the pointer at all, even to compare to NULL, is likely undefined - * behavior). */ - p->capture = &suspend_capture; - } else { - /* Need to back up the pointer to the beginning of the capture, since - * we were not able to actually preserve it. */ - *ptr = p->capture; - } -} - -static void capture_resume(upb_json_parser *p, const char *ptr) { - if (p->capture) { - UPB_ASSERT(p->capture == &suspend_capture); - p->capture = ptr; - } -} - - -/* Callbacks from the parser **************************************************/ - -/* These are the functions called directly from the parser itself. - * We define these in the same order as their declarations in the parser. */ - -static char escape_char(char in) { - switch (in) { - case 'r': return '\r'; - case 't': return '\t'; - case 'n': return '\n'; - case 'f': return '\f'; - case 'b': return '\b'; - case '/': return '/'; - case '"': return '"'; - case '\\': return '\\'; - default: - UPB_ASSERT(0); - return 'x'; - } -} - -static bool escape(upb_json_parser *p, const char *ptr) { - char ch = escape_char(*ptr); - return multipart_text(p, &ch, 1, false); -} - -static void start_hex(upb_json_parser *p) { - p->digit = 0; -} - -static void hexdigit(upb_json_parser *p, const char *ptr) { - char ch = *ptr; - - p->digit <<= 4; - - if (ch >= '0' && ch <= '9') { - p->digit += (ch - '0'); - } else if (ch >= 'a' && ch <= 'f') { - p->digit += ((ch - 'a') + 10); - } else { - UPB_ASSERT(ch >= 'A' && ch <= 'F'); - p->digit += ((ch - 'A') + 10); - } -} - -static bool end_hex(upb_json_parser *p) { - uint32_t codepoint = p->digit; - - /* emit the codepoint as UTF-8. */ - char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ - int length = 0; - if (codepoint <= 0x7F) { - utf8[0] = codepoint; - length = 1; - } else if (codepoint <= 0x07FF) { - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x1F) | 0xC0; - length = 2; - } else /* codepoint <= 0xFFFF */ { - utf8[2] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[1] = (codepoint & 0x3F) | 0x80; - codepoint >>= 6; - utf8[0] = (codepoint & 0x0F) | 0xE0; - length = 3; - } - /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate - * we have to wait for the next escape to get the full code point). */ - - return multipart_text(p, utf8, length, false); -} - -static void start_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_number(upb_json_parser *p, const char *ptr) { - if (is_top_level(p)) { - if (is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NUMBERVALUE); - } else { - return false; - } - } else if (does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NUMBERVALUE); - } - - multipart_startaccum(p); - capture_begin(p, ptr); - return true; -} - -static bool parse_number(upb_json_parser *p, bool is_quoted); - -static bool end_number_nontop(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - return parse_number(p, false); -} - -static bool end_number(upb_json_parser *p, const char *ptr) { - if (!end_number_nontop(p, ptr)) { - return false; - } - - if (does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -/* |buf| is NULL-terminated. |buf| itself will never include quotes; - * |is_quoted| tells us whether this text originally appeared inside quotes. */ -static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, - bool is_quoted) { - size_t len = strlen(buf); - const char *bufend = buf + len; - char *end; - upb_fieldtype_t type = upb_fielddef_type(p->top->f); - double val; - double dummy; - double inf = UPB_INFINITY; - - errno = 0; - - if (len == 0 || buf[0] == ' ') { - return false; - } - - /* For integer types, first try parsing with integer-specific routines. - * If these succeed, they will be more accurate for int64/uint64 than - * strtod(). - */ - switch (type) { - case UPB_TYPE_ENUM: - case UPB_TYPE_INT32: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else if (val > INT32_MAX || val < INT32_MIN) { - return false; - } else { - upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val); - return true; - } - } - case UPB_TYPE_UINT32: { - unsigned long val = strtoul(buf, &end, 0); - if (end != bufend) { - break; - } else if (val > UINT32_MAX || errno == ERANGE) { - return false; - } else { - upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val); - return true; - } - } - /* XXX: We can't handle [u]int64 properly on 32-bit machines because - * strto[u]ll isn't in C89. */ - case UPB_TYPE_INT64: { - long val = strtol(buf, &end, 0); - if (errno == ERANGE || end != bufend) { - break; - } else { - upb_sink_putint64(p->top->sink, parser_getsel(p), val); - return true; - } - } - case UPB_TYPE_UINT64: { - unsigned long val = strtoul(p->accumulated, &end, 0); - if (end != bufend) { - break; - } else if (errno == ERANGE) { - return false; - } else { - upb_sink_putuint64(p->top->sink, parser_getsel(p), val); - return true; - } - } - default: - break; - } - - if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { - /* Quoted numbers for integer types are not allowed to be in double form. */ - return false; - } - - if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { - /* C89 does not have an INFINITY macro. */ - val = inf; - } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { - val = -inf; - } else { - val = strtod(buf, &end); - if (errno == ERANGE || end != bufend) { - return false; - } - } - - switch (type) { -#define CASE(capitaltype, smalltype, ctype, min, max) \ - case UPB_TYPE_ ## capitaltype: { \ - if (modf(val, &dummy) != 0 || val > max || val < min) { \ - return false; \ - } else { \ - upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \ - (ctype)val); \ - return true; \ - } \ - break; \ - } - case UPB_TYPE_ENUM: - CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); - CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); - CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); - CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); -#undef CASE - - case UPB_TYPE_DOUBLE: - upb_sink_putdouble(p->top->sink, parser_getsel(p), val); - return true; - case UPB_TYPE_FLOAT: - if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { - return false; - } else { - upb_sink_putfloat(p->top->sink, parser_getsel(p), val); - return true; - } - default: - return false; - } -} - -static bool parse_number(upb_json_parser *p, bool is_quoted) { - size_t len; - const char *buf; - - /* strtol() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - if (!multipart_text(p, "\0", 1, false)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (parse_number_from_buffer(p, buf, is_quoted)) { - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "error parsing number: %s", buf); - multipart_end(p); - return false; - } -} - -static bool parser_putbool(upb_json_parser *p, bool val) { - bool ok; - - if (p->top->f == NULL) { - return true; - } - - if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { - upb_status_seterrf(p->status, - "Boolean value specified for non-bool field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val); - UPB_ASSERT(ok); - - return true; -} - -static bool end_bool(upb_json_parser *p, bool val) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_BOOLVALUE); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_BOOLVALUE); - } - - if (p->top->is_unknown_field) { - return true; - } - - if (!parser_putbool(p, val)) { - return false; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static bool end_null(upb_json_parser *p) { - const char *zero_ptr = "0"; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_NULLVALUE); - } else { - return true; - } - - /* Fill null_value field. */ - multipart_startaccum(p); - capture_begin(p, zero_ptr); - capture_end(p, zero_ptr + 1); - parse_number(p, false); - - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - - return true; -} - -static bool start_any_stringval(upb_json_parser *p) { - multipart_startaccum(p); - return true; -} - -static bool start_stringval(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_string_wrapper_object(p) || - is_number_wrapper_object(p)) { - start_wrapper_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - start_fieldmask_object(p); - return true; - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - start_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRINGVALUE); - } else { - return false; - } - } else if (does_string_wrapper_start(p) || - does_number_wrapper_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_wrapper_object(p); - } else if (does_fieldmask_start(p)) { - if (!start_subobject(p)) { - return false; - } - start_fieldmask_object(p); - return true; - } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) { - if (!start_subobject(p)) { - return false; - } - start_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) { - return false; - } - start_value_object(p, VALUE_STRINGVALUE); - } - - if (p->top->f == NULL) { - multipart_startaccum(p); - return true; - } - - if (p->top->is_any) { - return start_any_stringval(p); - } - - if (upb_fielddef_isstring(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { - /* For STRING fields we push data directly to the handlers as it is - * parsed. We don't do this yet for BYTES fields, because our base64 - * decoder is not streaming. - * - * TODO(haberman): make base64 decoding streaming also. */ - multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); - return true; - } else { - multipart_startaccum(p); - return true; - } - } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && - upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { - /* No need to push a frame -- numeric values in quotes remain in the - * current parser frame. These values must accmulate so we can convert - * them all at once at the end. */ - multipart_startaccum(p); - return true; - } else { - upb_status_seterrf(p->status, - "String specified for bool or submessage field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool end_any_stringval(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Set type_url */ - upb_selector_t sel; - upb_jsonparser_frame *inner; - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - multipart_end(p); - - /* Resolve type url */ - if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) { - const upb_msgdef *payload_type = NULL; - buf += 20; - len -= 20; - - payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len); - if (payload_type == NULL) { - upb_status_seterrf( - p->status, "Cannot find packed type: %.*s\n", (int)len, buf); - return false; - } - - json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type); - - return true; - } else { - upb_status_seterrf( - p->status, "Invalid type url: %.*s\n", (int)len, buf); - return false; - } -} - -static bool end_stringval_nontop(upb_json_parser *p) { - bool ok = true; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { - multipart_end(p); - return true; - } - - if (p->top->f == NULL) { - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_stringval(p); - } - - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_BYTES: - if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - /* Fall through. */ - - case UPB_TYPE_STRING: { - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - break; - } - - case UPB_TYPE_ENUM: { - /* Resolve enum symbolic name to integer value. */ - const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f); - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - int32_t int_val = 0; - ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); - - if (ok) { - upb_selector_t sel = parser_getsel(p); - upb_sink_putint32(p->top->sink, sel, int_val); - } else { - upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf); - } - - break; - } - - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - case UPB_TYPE_DOUBLE: - case UPB_TYPE_FLOAT: - ok = parse_number(p, true); - break; - - default: - UPB_ASSERT(false); - upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); - ok = false; - break; - } - - multipart_end(p); - - return ok; -} - -static bool end_stringval(upb_json_parser *p) { - /* FieldMask's stringvals have been ended when handling them. Only need to - * close FieldMask here.*/ - if (does_fieldmask_end(p)) { - end_fieldmask_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (!end_stringval_nontop(p)) { - return false; - } - - if (does_string_wrapper_end(p) || - does_number_wrapper_end(p)) { - end_wrapper_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || - is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) || - is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { - end_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - return true; - } - - return true; -} - -static void start_duration_base(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_duration_base(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char seconds_buf[14]; - char nanos_buf[12]; - char *end; - int64_t seconds = 0; - int32_t nanos = 0; - double val = 0.0; - const char *seconds_membername = "seconds"; - const char *nanos_membername = "nanos"; - size_t fraction_start; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - memset(seconds_buf, 0, 14); - memset(nanos_buf, 0, 12); - - /* Find out base end. The maximus duration is 315576000000, which cannot be - * represented by double without losing precision. Thus, we need to handle - * fraction and base separately. */ - for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.'; - fraction_start++); - - /* Parse base */ - memcpy(seconds_buf, buf, fraction_start); - seconds = strtol(seconds_buf, &end, 10); - if (errno == ERANGE || end != seconds_buf + fraction_start) { - upb_status_seterrf(p->status, "error parsing duration: %s", - seconds_buf); - return false; - } - - if (seconds > 315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "maximum acceptable value is " - "315576000000"); - return false; - } - - if (seconds < -315576000000) { - upb_status_seterrf(p->status, "error parsing duration: " - "minimum acceptable value is " - "-315576000000"); - return false; - } - - /* Parse fraction */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start); - val = strtod(nanos_buf, &end); - if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) { - upb_status_seterrf(p->status, "error parsing duration: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - if (seconds < 0) nanos = -nanos; - - /* Clean up buffer */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous arena */ - multipart_startaccum(p); - - return true; -} - -static int parse_timestamp_number(upb_json_parser *p) { - size_t len; - const char *buf; - int val; - - /* atoi() and friends unfortunately do not support specifying the length of - * the input string, so we need to force a copy into a NULL-terminated buffer. */ - multipart_text(p, "\0", 1, false); - - buf = accumulate_getptr(p, &len); - val = atoi(buf); - multipart_end(p); - multipart_startaccum(p); - - return val; -} - -static void start_year(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_year(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_year = parse_timestamp_number(p) - 1900; - return true; -} - -static void start_month(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_month(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mon = parse_timestamp_number(p) - 1; - return true; -} - -static void start_day(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_day(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_mday = parse_timestamp_number(p); - return true; -} - -static void start_hour(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_hour(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_hour = parse_timestamp_number(p); - return true; -} - -static void start_minute(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_minute(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_min = parse_timestamp_number(p); - return true; -} - -static void start_second(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_second(upb_json_parser *p, const char *ptr) { - if (!capture_end(p, ptr)) { - return false; - } - p->tm.tm_sec = parse_timestamp_number(p); - return true; -} - -static void start_timestamp_base(upb_json_parser *p) { - memset(&p->tm, 0, sizeof(struct tm)); -} - -static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - char nanos_buf[12]; - char *end; - double val = 0.0; - int32_t nanos; - const char *nanos_membername = "nanos"; - - memset(nanos_buf, 0, 12); - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (len > 10) { - upb_status_seterrf(p->status, - "error parsing timestamp: at most 9-digit fraction."); - return false; - } - - /* Parse nanos */ - nanos_buf[0] = '0'; - memcpy(nanos_buf + 1, buf, len); - val = strtod(nanos_buf, &end); - - if (errno == ERANGE || end != nanos_buf + len + 1) { - upb_status_seterrf(p->status, "error parsing timestamp nanos: %s", - nanos_buf); - return false; - } - - nanos = val * 1000000000; - - /* Clean up previous environment */ - multipart_end(p); - - /* Set nanos */ - start_member(p); - capture_begin(p, nanos_membername); - capture_end(p, nanos_membername + 5); - end_membername(p); - upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static int div_round_up2(int n, int d) { - return (n + d - 1) / d; -} - -/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ -static int epoch_days(int year, int month, int day) { - static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, - 181, 212, 243, 273, 304, 334}; - int febs_since_0 = month > 2 ? year + 1 : year; - int leap_days_since_0 = div_round_up2(febs_since_0, 4) - - div_round_up2(febs_since_0, 100) + - div_round_up2(febs_since_0, 400); - int days_since_0 = - 365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0; - - /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD). - * Since the "BC" system does not have a year zero, 1 BC == year zero. */ - return days_since_0 - 719528; -} - -static int64_t upb_timegm(const struct tm *tp) { - int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); - ret = (ret * 24) + tp->tm_hour; - ret = (ret * 60) + tp->tm_min; - ret = (ret * 60) + tp->tm_sec; - return ret; -} - -static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { - size_t len; - const char *buf; - int hours; - int64_t seconds; - const char *seconds_membername = "seconds"; - - if (!capture_end(p, ptr)) { - return false; - } - - buf = accumulate_getptr(p, &len); - - if (buf[0] != 'Z') { - if (sscanf(buf + 1, "%2d:00", &hours) != 1) { - upb_status_seterrf(p->status, "error parsing timestamp offset"); - return false; - } - - if (buf[0] == '+') { - hours = -hours; - } - - p->tm.tm_hour += hours; - } - - /* Normalize tm */ - seconds = upb_timegm(&p->tm); - - /* Check timestamp boundary */ - if (seconds < -62135596800) { - upb_status_seterrf(p->status, "error parsing timestamp: " - "minimum acceptable value is " - "0001-01-01T00:00:00Z"); - return false; - } - - /* Clean up previous environment */ - multipart_end(p); - - /* Set seconds */ - start_member(p); - capture_begin(p, seconds_membername); - capture_end(p, seconds_membername + 7); - end_membername(p); - upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); - end_member(p); - - /* Continue previous environment */ - multipart_startaccum(p); - - return true; -} - -static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - capture_begin(p, ptr); -} - -static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) { - return capture_end(p, ptr); -} - -static bool start_fieldmask_path(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (!check_stack(p)) return false; - - /* Start a new parser frame: parser frames correspond one-to-one with - * handler frames, and string events occur in a sub-frame. */ - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - p->top = inner; - - multipart_startaccum(p); - return true; -} - -static bool lower_camel_push( - upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { - const char *limit = ptr + len; - bool first = true; - for (;ptr < limit; ptr++) { - if (*ptr >= 'A' && *ptr <= 'Z' && !first) { - char lower = tolower(*ptr); - upb_sink_putstring(p->top->sink, sel, "_", 1, NULL); - upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL); - } else { - upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL); - } - first = false; - } - return true; -} - -static bool end_fieldmask_path(upb_json_parser *p) { - upb_selector_t sel; - - if (!lower_camel_push( - p, getsel_for_handlertype(p, UPB_HANDLER_STRING), - p->accumulated, p->accumulated_len)) { - return false; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(p->top->sink, sel); - p->top--; - - multipart_end(p); - return true; -} - -static void start_member(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - multipart_startaccum(p); -} - -/* Helper: invoked during parse_mapentry() to emit the mapentry message's key - * field based on the current contents of the accumulate buffer. */ -static bool parse_mapentry_key(upb_json_parser *p) { - - size_t len; - const char *buf = accumulate_getptr(p, &len); - - /* Emit the key field. We do a bit of ad-hoc parsing here because the - * parser state machine has already decided that this is a string field - * name, and we are reinterpreting it as some arbitrary key type. In - * particular, integer and bool keys are quoted, so we need to parse the - * quoted string contents here. */ - - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no key"); - return false; - } - switch (upb_fielddef_type(p->top->f)) { - case UPB_TYPE_INT32: - case UPB_TYPE_INT64: - case UPB_TYPE_UINT32: - case UPB_TYPE_UINT64: - /* Invoke end_number. The accum buffer has the number's text already. */ - if (!parse_number(p, true)) { - return false; - } - break; - case UPB_TYPE_BOOL: - if (len == 4 && !strncmp(buf, "true", 4)) { - if (!parser_putbool(p, true)) { - return false; - } - } else if (len == 5 && !strncmp(buf, "false", 5)) { - if (!parser_putbool(p, false)) { - return false; - } - } else { - upb_status_seterrmsg(p->status, - "Map bool key not 'true' or 'false'"); - return false; - } - multipart_end(p); - break; - case UPB_TYPE_STRING: - case UPB_TYPE_BYTES: { - upb_sink subsink; - upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, len, &subsink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(subsink, sel, buf, len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(subsink, sel); - multipart_end(p); - break; - } - default: - upb_status_seterrmsg(p->status, "Invalid field type for map key"); - return false; - } - - return true; -} - -/* Helper: emit one map entry (as a submessage in the map field sequence). This - * is invoked from end_membername(), at the end of the map entry's key string, - * with the map key in the accumulate buffer. It parses the key from that - * buffer, emits the handler calls to start the mapentry submessage (setting up - * its subframe in the process), and sets up state in the subframe so that the - * value parser (invoked next) will emit the mapentry's value field and then - * end the mapentry message. */ - -static bool handle_mapentry(upb_json_parser *p) { - const upb_fielddef *mapfield; - const upb_msgdef *mapentrymsg; - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Map entry: p->top->sink is the seq frame, so we need to start a frame - * for the mapentry itself, and then set |f| in that frame so that the map - * value field is parsed, and also set a flag to end the frame after the - * map-entry value is parsed. */ - if (!check_stack(p)) return false; - - mapfield = p->top->mapfield; - mapentrymsg = upb_fielddef_msgsubdef(mapfield); - - inner = start_jsonparser_frame(p); - p->top->f = mapfield; - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = mapentrymsg; - inner->mapfield = mapfield; - - /* Don't set this to true *yet* -- we reuse parsing handlers below to push - * the key field value to the sink, and these handlers will pop the frame - * if they see is_mapentry (when invoked by the parser state machine, they - * would have just seen the map-entry value, not key). */ - inner->is_mapentry = false; - p->top = inner; - - /* send STARTMSG in submsg frame. */ - upb_sink_startmsg(p->top->sink); - - parse_mapentry_key(p); - - /* Set up the value field to receive the map-entry value. */ - p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); - p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ - p->top->mapfield = mapfield; - if (p->top->f == NULL) { - upb_status_seterrmsg(p->status, "mapentry message has no value"); - return false; - } - - return true; -} - -static bool end_membername(upb_json_parser *p) { - UPB_ASSERT(!p->top->f); - - if (!p->top->m) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } - - if (p->top->is_any) { - return end_any_membername(p); - } else if (p->top->is_map) { - return handle_mapentry(p); - } else { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - - return true; - } else if (p->ignore_json_unknown) { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } else { - upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); - return false; - } - } -} - -static bool end_any_membername(upb_json_parser *p) { - size_t len; - const char *buf = accumulate_getptr(p, &len); - upb_value v; - - if (len == 5 && strncmp(buf, "@type", len) == 0) { - upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v); - p->top->f = upb_value_getconstptr(v); - multipart_end(p); - return true; - } else { - p->top->is_unknown_field = true; - multipart_end(p); - return true; - } -} - -static void end_member(upb_json_parser *p) { - /* If we just parsed a map-entry value, end that frame too. */ - if (p->top->is_mapentry) { - upb_selector_t sel; - bool ok; - const upb_fielddef *mapfield; - - UPB_ASSERT(p->top > p->stack); - /* send ENDMSG on submsg. */ - upb_sink_endmsg(p->top->sink, p->status); - mapfield = p->top->mapfield; - - /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ - p->top--; - ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); - UPB_ASSUME(ok); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - - p->top->f = NULL; - p->top->is_unknown_field = false; -} - -static void start_any_member(upb_json_parser *p, const char *ptr) { - start_member(p); - json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr); -} - -static void end_any_member(upb_json_parser *p, const char *ptr) { - json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr); - end_member(p); -} - -static bool start_subobject(upb_json_parser *p) { - if (p->top->is_unknown_field) { - if (!check_stack(p)) return false; - - p->top = start_jsonparser_frame(p); - return true; - } - - if (upb_fielddef_ismap(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a map. Start a new parser frame in a repeated-field - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - inner->mapfield = p->top->f; - inner->is_map = true; - p->top = inner; - - return true; - } else if (upb_fielddef_issubmsg(p->top->f)) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - /* Beginning of a subobject. Start a new parser frame in the submsg - * context. */ - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); - upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); - inner->m = upb_fielddef_msgsubdef(p->top->f); - set_name_table(p, inner); - p->top = inner; - - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - - return true; - } else { - upb_status_seterrf(p->status, - "Object specified for non-message/group field: %s", - upb_fielddef_name(p->top->f)); - return false; - } -} - -static bool start_subobject_full(upb_json_parser *p) { - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - start_structvalue_object(p); - } else { - return true; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) { - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_STRUCTVALUE); - if (!start_subobject(p)) return false; - start_structvalue_object(p); - } - - return start_subobject(p); -} - -static void end_subobject(upb_json_parser *p) { - if (is_top_level(p)) { - return; - } - - if (p->top->is_map) { - upb_selector_t sel; - p->top--; - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - } else { - upb_selector_t sel; - bool is_unknown = p->top->m == NULL; - p->top--; - if (!is_unknown) { - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); - upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); - } - } -} - -static void end_subobject_full(upb_json_parser *p) { - end_subobject(p); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { - end_structvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static bool start_array(upb_json_parser *p) { - upb_jsonparser_frame *inner; - upb_selector_t sel; - - if (is_top_level(p)) { - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - start_listvalue_object(p); - } else { - return false; - } - } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) && - (!upb_fielddef_isseq(p->top->f) || - p->top->is_repeated)) { - if (!start_subobject(p)) return false; - start_value_object(p, VALUE_LISTVALUE); - if (!start_subobject(p)) return false; - start_listvalue_object(p); - } - - if (p->top->is_unknown_field) { - inner = start_jsonparser_frame(p); - inner->is_unknown_field = true; - p->top = inner; - - return true; - } - - if (!upb_fielddef_isseq(p->top->f)) { - upb_status_seterrf(p->status, - "Array specified for non-repeated field: %s", - upb_fielddef_name(p->top->f)); - return false; - } - - if (!check_stack(p)) return false; - - inner = start_jsonparser_frame(p); - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); - upb_sink_startseq(p->top->sink, sel, &inner->sink); - inner->m = p->top->m; - inner->f = p->top->f; - inner->is_repeated = true; - p->top = inner; - - return true; -} - -static void end_array(upb_json_parser *p) { - upb_selector_t sel; - - UPB_ASSERT(p->top > p->stack); - - p->top--; - - if (p->top->is_unknown_field) { - return; - } - - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); - upb_sink_endseq(p->top->sink, sel); - - if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { - end_listvalue_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } - - if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { - end_value_object(p); - if (!is_top_level(p)) { - end_subobject(p); - } - } -} - -static void start_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_startmsg(p->top->sink); - } -} - -static void end_object(upb_json_parser *p) { - if (!p->top->is_map && p->top->m != NULL) { - upb_sink_endmsg(p->top->sink, p->status); - } -} - -static void start_any_object(upb_json_parser *p, const char *ptr) { - start_object(p); - p->top->any_frame->before_type_url_start = ptr; - p->top->any_frame->before_type_url_end = ptr; -} - -static bool end_any_object(upb_json_parser *p, const char *ptr) { - const char *value_membername = "value"; - bool is_well_known_packed = false; - const char *packed_end = ptr + 1; - upb_selector_t sel; - upb_jsonparser_frame *inner; - - if (json_parser_any_frame_has_value(p->top->any_frame) && - !json_parser_any_frame_has_type_url(p->top->any_frame)) { - upb_status_seterrmsg(p->status, "No valid type url"); - return false; - } - - /* Well known types data is represented as value field. */ - if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) != - UPB_WELLKNOWN_UNSPECIFIED) { - is_well_known_packed = true; - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - p->top->any_frame->before_type_url_start = - memchr(p->top->any_frame->before_type_url_start, ':', - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start); - if (p->top->any_frame->before_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "invalid data for well known type."); - return false; - } - p->top->any_frame->before_type_url_start++; - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - p->top->any_frame->after_type_url_start = - memchr(p->top->any_frame->after_type_url_start, ':', - (ptr + 1) - - p->top->any_frame->after_type_url_start); - if (p->top->any_frame->after_type_url_start == NULL) { - upb_status_seterrmsg(p->status, "Invalid data for well known type."); - return false; - } - p->top->any_frame->after_type_url_start++; - packed_end = ptr; - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->before_type_url_start, - p->top->any_frame->before_type_url_end - - p->top->any_frame->before_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) { - return false; - } - } - } - - if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) && - json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) { - return false; - } - } - - if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { - if (!parse(p->top->any_frame->parser, NULL, - p->top->any_frame->after_type_url_start, - packed_end - p->top->any_frame->after_type_url_start, NULL)) { - return false; - } - } else { - if (!is_well_known_packed) { - if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) { - return false; - } - } - } - - if (!end(p->top->any_frame->parser, NULL)) { - return false; - } - - p->top->is_any = false; - - /* Set value */ - start_member(p); - capture_begin(p, value_membername); - capture_end(p, value_membername + 5); - end_membername(p); - - if (!check_stack(p)) return false; - inner = p->top + 1; - - sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); - upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); - sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); - upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr, - p->top->any_frame->stringsink.len, NULL); - sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(inner->sink, sel); - - end_member(p); - - end_object(p); - - /* Deallocate any parse frame. */ - json_parser_any_frame_free(p->top->any_frame); - - return true; -} - -static bool is_string_wrapper(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_STRINGVALUE || - type == UPB_WELLKNOWN_BYTESVALUE; -} - -static bool is_fieldmask(const upb_msgdef *m) { - upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); - return type == UPB_WELLKNOWN_FIELDMASK; -} - -static void start_fieldmask_object(upb_json_parser *p) { - const char *membername = "paths"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); - - start_array(p); -} - -static void end_fieldmask_object(upb_json_parser *p) { - end_array(p); - end_member(p); - end_object(p); -} - -static void start_wrapper_object(upb_json_parser *p) { - const char *membername = "value"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + 5); - end_membername(p); -} - -static void end_wrapper_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_value_object(upb_json_parser *p, int value_type) { - const char *nullmember = "null_value"; - const char *numbermember = "number_value"; - const char *stringmember = "string_value"; - const char *boolmember = "bool_value"; - const char *structmember = "struct_value"; - const char *listmember = "list_value"; - const char *membername = ""; - - switch (value_type) { - case VALUE_NULLVALUE: - membername = nullmember; - break; - case VALUE_NUMBERVALUE: - membername = numbermember; - break; - case VALUE_STRINGVALUE: - membername = stringmember; - break; - case VALUE_BOOLVALUE: - membername = boolmember; - break; - case VALUE_STRUCTVALUE: - membername = structmember; - break; - case VALUE_LISTVALUE: - membername = listmember; - break; - } - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_value_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_listvalue_object(upb_json_parser *p) { - const char *membername = "values"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_listvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static void start_structvalue_object(upb_json_parser *p) { - const char *membername = "fields"; - - start_object(p); - - /* Set up context for parsing value */ - start_member(p); - capture_begin(p, membername); - capture_end(p, membername + strlen(membername)); - end_membername(p); -} - -static void end_structvalue_object(upb_json_parser *p) { - end_member(p); - end_object(p); -} - -static bool is_top_level(upb_json_parser *p) { - return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; -} - -static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type; -} - -static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f)) - == type); -} - -static bool does_number_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_number_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool is_number_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); -} - -static bool does_string_wrapper_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_string_wrapper(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_string_wrapper_end(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool is_string_wrapper_object(upb_json_parser *p) { - return p->top->m != NULL && is_string_wrapper(p->top->m); -} - -static bool does_fieldmask_start(upb_json_parser *p) { - return p->top->f != NULL && - upb_fielddef_issubmsg(p->top->f) && - is_fieldmask(upb_fielddef_msgsubdef(p->top->f)); -} - -static bool does_fieldmask_end(upb_json_parser *p) { - return p->top->m != NULL && is_fieldmask(p->top->m); -} - -#define CHECK_RETURN_TOP(x) if (!(x)) goto error - - -/* The actual parser **********************************************************/ - -/* What follows is the Ragel parser itself. The language is specified in Ragel - * and the actions call our C functions above. - * - * Ragel has an extensive set of functionality, and we use only a small part of - * it. There are many action types but we only use a few: - * - * ">" -- transition into a machine - * "%" -- transition out of a machine - * "@" -- transition into a final state of a machine. - * - * "@" transitions are tricky because a machine can transition into a final - * state repeatedly. But in some cases we know this can't happen, for example - * a string which is delimited by a final '"' can only transition into its - * final state once, when the closing '"' is seen. */ - - -#line 2780 "upb/json/parser.rl" - - - -#line 2583 "upb/json/parser.c" -static const char _json_actions[] = { - 0, 1, 0, 1, 1, 1, 3, 1, - 4, 1, 6, 1, 7, 1, 8, 1, - 9, 1, 11, 1, 12, 1, 13, 1, - 14, 1, 15, 1, 16, 1, 17, 1, - 18, 1, 19, 1, 20, 1, 22, 1, - 23, 1, 24, 1, 35, 1, 37, 1, - 39, 1, 40, 1, 42, 1, 43, 1, - 44, 1, 46, 1, 48, 1, 49, 1, - 50, 1, 51, 1, 53, 1, 54, 2, - 4, 9, 2, 5, 6, 2, 7, 3, - 2, 7, 9, 2, 21, 26, 2, 25, - 10, 2, 27, 28, 2, 29, 30, 2, - 32, 34, 2, 33, 31, 2, 38, 36, - 2, 40, 42, 2, 45, 2, 2, 46, - 54, 2, 47, 36, 2, 49, 54, 2, - 50, 54, 2, 51, 54, 2, 52, 41, - 2, 53, 54, 3, 32, 34, 35, 4, - 21, 26, 27, 28 -}; - -static const short _json_key_offsets[] = { - 0, 0, 12, 13, 18, 23, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, - 38, 43, 44, 48, 53, 58, 63, 67, - 71, 74, 77, 79, 83, 87, 89, 91, - 96, 98, 100, 109, 115, 121, 127, 133, - 135, 139, 142, 144, 146, 149, 150, 154, - 156, 158, 160, 162, 163, 165, 167, 168, - 170, 172, 173, 175, 177, 178, 180, 182, - 183, 185, 187, 191, 193, 195, 196, 197, - 198, 199, 201, 206, 208, 210, 212, 221, - 222, 222, 222, 227, 232, 237, 238, 239, - 240, 241, 241, 242, 243, 244, 244, 245, - 246, 247, 247, 252, 253, 257, 262, 267, - 272, 276, 276, 279, 282, 285, 288, 291, - 294, 294, 294, 294, 294, 294 -}; - -static const char _json_trans_keys[] = { - 32, 34, 45, 91, 102, 110, 116, 123, - 9, 13, 48, 57, 34, 32, 93, 125, - 9, 13, 32, 44, 93, 9, 13, 32, - 93, 125, 9, 13, 97, 108, 115, 101, - 117, 108, 108, 114, 117, 101, 32, 34, - 125, 9, 13, 34, 32, 58, 9, 13, - 32, 93, 125, 9, 13, 32, 44, 125, - 9, 13, 32, 44, 125, 9, 13, 32, - 34, 9, 13, 45, 48, 49, 57, 48, - 49, 57, 46, 69, 101, 48, 57, 69, - 101, 48, 57, 43, 45, 48, 57, 48, - 57, 48, 57, 46, 69, 101, 48, 57, - 34, 92, 34, 92, 34, 47, 92, 98, - 102, 110, 114, 116, 117, 48, 57, 65, - 70, 97, 102, 48, 57, 65, 70, 97, - 102, 48, 57, 65, 70, 97, 102, 48, - 57, 65, 70, 97, 102, 34, 92, 45, - 48, 49, 57, 48, 49, 57, 46, 115, - 48, 57, 115, 48, 57, 34, 46, 115, - 48, 57, 48, 57, 48, 57, 48, 57, - 48, 57, 45, 48, 57, 48, 57, 45, - 48, 57, 48, 57, 84, 48, 57, 48, - 57, 58, 48, 57, 48, 57, 58, 48, - 57, 48, 57, 43, 45, 46, 90, 48, - 57, 48, 57, 58, 48, 48, 34, 48, - 57, 43, 45, 90, 48, 57, 34, 44, - 34, 44, 34, 44, 34, 45, 91, 102, - 110, 116, 123, 48, 57, 34, 32, 93, - 125, 9, 13, 32, 44, 93, 9, 13, - 32, 93, 125, 9, 13, 97, 108, 115, - 101, 117, 108, 108, 114, 117, 101, 32, - 34, 125, 9, 13, 34, 32, 58, 9, - 13, 32, 93, 125, 9, 13, 32, 44, - 125, 9, 13, 32, 44, 125, 9, 13, - 32, 34, 9, 13, 32, 9, 13, 32, - 9, 13, 32, 9, 13, 32, 9, 13, - 32, 9, 13, 32, 9, 13, 0 -}; - -static const char _json_single_lengths[] = { - 0, 8, 1, 3, 3, 3, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 2, 3, 3, 3, 2, 2, - 1, 3, 0, 2, 2, 0, 0, 3, - 2, 2, 9, 0, 0, 0, 0, 2, - 2, 1, 2, 0, 1, 1, 2, 0, - 0, 0, 0, 1, 0, 0, 1, 0, - 0, 1, 0, 0, 1, 0, 0, 1, - 0, 0, 4, 0, 0, 1, 1, 1, - 1, 0, 3, 2, 2, 2, 7, 1, - 0, 0, 3, 3, 3, 1, 1, 1, - 1, 0, 1, 1, 1, 0, 1, 1, - 1, 0, 3, 1, 2, 3, 3, 3, - 2, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0 -}; - -static const char _json_range_lengths[] = { - 0, 2, 0, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 3, 3, 3, 3, 0, - 1, 1, 0, 1, 1, 0, 1, 1, - 1, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 0, 0, - 0, 1, 1, 0, 0, 0, 1, 0, - 0, 0, 1, 1, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0 -}; - -static const short _json_index_offsets[] = { - 0, 0, 11, 13, 18, 23, 28, 30, - 32, 34, 36, 38, 40, 42, 44, 46, - 48, 53, 55, 59, 64, 69, 74, 78, - 82, 85, 89, 91, 95, 99, 101, 103, - 108, 111, 114, 124, 128, 132, 136, 140, - 143, 147, 150, 153, 155, 158, 160, 164, - 166, 168, 170, 172, 174, 176, 178, 180, - 182, 184, 186, 188, 190, 192, 194, 196, - 198, 200, 202, 207, 209, 211, 213, 215, - 217, 219, 221, 226, 229, 232, 235, 244, - 246, 247, 248, 253, 258, 263, 265, 267, - 269, 271, 272, 274, 276, 278, 279, 281, - 283, 285, 286, 291, 293, 297, 302, 307, - 312, 316, 317, 320, 323, 326, 329, 332, - 335, 336, 337, 338, 339, 340 -}; - -static const unsigned char _json_indicies[] = { - 0, 2, 3, 4, 5, 6, 7, 8, - 0, 3, 1, 9, 1, 11, 12, 1, - 11, 10, 13, 14, 12, 13, 1, 14, - 1, 1, 14, 10, 15, 1, 16, 1, - 17, 1, 18, 1, 19, 1, 20, 1, - 21, 1, 22, 1, 23, 1, 24, 1, - 25, 26, 27, 25, 1, 28, 1, 29, - 30, 29, 1, 30, 1, 1, 30, 31, - 32, 33, 34, 32, 1, 35, 36, 27, - 35, 1, 36, 26, 36, 1, 37, 38, - 39, 1, 38, 39, 1, 41, 42, 42, - 40, 43, 1, 42, 42, 43, 40, 44, - 44, 45, 1, 45, 1, 45, 40, 41, - 42, 42, 39, 40, 47, 48, 46, 50, - 51, 49, 52, 52, 52, 52, 52, 52, - 52, 52, 53, 1, 54, 54, 54, 1, - 55, 55, 55, 1, 56, 56, 56, 1, - 57, 57, 57, 1, 59, 60, 58, 61, - 62, 63, 1, 64, 65, 1, 66, 67, - 1, 68, 1, 67, 68, 1, 69, 1, - 66, 67, 65, 1, 70, 1, 71, 1, - 72, 1, 73, 1, 74, 1, 75, 1, - 76, 1, 77, 1, 78, 1, 79, 1, - 80, 1, 81, 1, 82, 1, 83, 1, - 84, 1, 85, 1, 86, 1, 87, 1, - 88, 1, 89, 89, 90, 91, 1, 92, - 1, 93, 1, 94, 1, 95, 1, 96, - 1, 97, 1, 98, 1, 99, 99, 100, - 98, 1, 102, 1, 101, 104, 105, 103, - 1, 1, 101, 106, 107, 108, 109, 110, - 111, 112, 107, 1, 113, 1, 114, 115, - 117, 118, 1, 117, 116, 119, 120, 118, - 119, 1, 120, 1, 1, 120, 116, 121, - 1, 122, 1, 123, 1, 124, 1, 125, - 126, 1, 127, 1, 128, 1, 129, 130, - 1, 131, 1, 132, 1, 133, 134, 135, - 136, 134, 1, 137, 1, 138, 139, 138, - 1, 139, 1, 1, 139, 140, 141, 142, - 143, 141, 1, 144, 145, 136, 144, 1, - 145, 135, 145, 1, 146, 147, 147, 1, - 148, 148, 1, 149, 149, 1, 150, 150, - 1, 151, 151, 1, 152, 152, 1, 1, - 1, 1, 1, 1, 1, 0 -}; - -static const char _json_trans_targs[] = { - 1, 0, 2, 107, 3, 6, 10, 13, - 16, 106, 4, 3, 106, 4, 5, 7, - 8, 9, 108, 11, 12, 109, 14, 15, - 110, 16, 17, 111, 18, 18, 19, 20, - 21, 22, 111, 21, 22, 24, 25, 31, - 112, 26, 28, 27, 29, 30, 33, 113, - 34, 33, 113, 34, 32, 35, 36, 37, - 38, 39, 33, 113, 34, 41, 42, 46, - 42, 46, 43, 45, 44, 114, 48, 49, - 50, 51, 52, 53, 54, 55, 56, 57, - 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 73, 72, 68, 69, 70, 71, - 72, 115, 74, 67, 72, 76, 116, 76, - 116, 77, 79, 81, 82, 85, 90, 94, - 98, 80, 117, 117, 83, 82, 80, 83, - 84, 86, 87, 88, 89, 117, 91, 92, - 93, 117, 95, 96, 97, 117, 98, 99, - 105, 100, 100, 101, 102, 103, 104, 105, - 103, 104, 117, 106, 106, 106, 106, 106, - 106 -}; - -static const unsigned char _json_trans_actions[] = { - 0, 0, 113, 107, 53, 0, 0, 0, - 125, 59, 45, 0, 55, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 101, 51, 47, 0, 0, 45, - 49, 49, 104, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 5, 15, - 0, 0, 71, 7, 13, 0, 74, 9, - 9, 9, 77, 80, 11, 37, 37, 37, - 0, 0, 0, 39, 0, 41, 86, 0, - 0, 0, 17, 19, 0, 21, 23, 0, - 25, 27, 0, 29, 31, 0, 33, 35, - 0, 135, 83, 135, 0, 0, 0, 0, - 0, 92, 0, 89, 89, 98, 43, 0, - 131, 95, 113, 107, 53, 0, 0, 0, - 125, 59, 69, 110, 45, 0, 55, 0, - 0, 0, 0, 0, 0, 119, 0, 0, - 0, 122, 0, 0, 0, 116, 0, 101, - 51, 47, 0, 0, 45, 49, 49, 104, - 0, 0, 128, 0, 57, 63, 65, 61, - 67 -}; - -static const unsigned char _json_eof_actions[] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 0, 0, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 57, 63, 65, 61, 67, - 0, 0, 0, 0, 0, 0 -}; - -static const int json_start = 1; - -static const int json_en_number_machine = 23; -static const int json_en_string_machine = 32; -static const int json_en_duration_machine = 40; -static const int json_en_timestamp_machine = 47; -static const int json_en_fieldmask_machine = 75; -static const int json_en_value_machine = 78; -static const int json_en_main = 1; - - -#line 2783 "upb/json/parser.rl" - -size_t parse(void *closure, const void *hd, const char *buf, size_t size, - const upb_bufhandle *handle) { - upb_json_parser *parser = closure; - - /* Variables used by Ragel's generated code. */ - int cs = parser->current_state; - int *stack = parser->parser_stack; - int top = parser->parser_top; - - const char *p = buf; - const char *pe = buf + size; - const char *eof = &eof_ch; - - parser->handle = handle; - - UPB_UNUSED(hd); - UPB_UNUSED(handle); - - capture_resume(parser, buf); - - -#line 2861 "upb/json/parser.c" - { - int _klen; - unsigned int _trans; - const char *_acts; - unsigned int _nacts; - const char *_keys; - - if ( p == pe ) - goto _test_eof; - if ( cs == 0 ) - goto _out; -_resume: - _keys = _json_trans_keys + _json_key_offsets[cs]; - _trans = _json_index_offsets[cs]; - - _klen = _json_single_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + _klen - 1; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( (*p) < *_mid ) - _upper = _mid - 1; - else if ( (*p) > *_mid ) - _lower = _mid + 1; - else { - _trans += (unsigned int)(_mid - _keys); - goto _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _json_range_lengths[cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + (_klen<<1) - 2; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( (*p) < _mid[0] ) - _upper = _mid - 2; - else if ( (*p) > _mid[1] ) - _lower = _mid + 2; - else { - _trans += (unsigned int)((_mid - _keys)>>1); - goto _match; - } - } - _trans += _klen; - } - -_match: - _trans = _json_indicies[_trans]; - cs = _json_trans_targs[_trans]; - - if ( _json_trans_actions[_trans] == 0 ) - goto _again; - - _acts = _json_actions + _json_trans_actions[_trans]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - { - switch ( *_acts++ ) - { - case 1: -#line 2588 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 2: -#line 2590 "upb/json/parser.rl" - { p--; {stack[top++] = cs; cs = 23;goto _again;} } - break; - case 3: -#line 2594 "upb/json/parser.rl" - { start_text(parser, p); } - break; - case 4: -#line 2595 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_text(parser, p)); } - break; - case 5: -#line 2601 "upb/json/parser.rl" - { start_hex(parser); } - break; - case 6: -#line 2602 "upb/json/parser.rl" - { hexdigit(parser, p); } - break; - case 7: -#line 2603 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_hex(parser)); } - break; - case 8: -#line 2609 "upb/json/parser.rl" - { CHECK_RETURN_TOP(escape(parser, p)); } - break; - case 9: -#line 2615 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 10: -#line 2620 "upb/json/parser.rl" - { start_year(parser, p); } - break; - case 11: -#line 2621 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_year(parser, p)); } - break; - case 12: -#line 2625 "upb/json/parser.rl" - { start_month(parser, p); } - break; - case 13: -#line 2626 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_month(parser, p)); } - break; - case 14: -#line 2630 "upb/json/parser.rl" - { start_day(parser, p); } - break; - case 15: -#line 2631 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_day(parser, p)); } - break; - case 16: -#line 2635 "upb/json/parser.rl" - { start_hour(parser, p); } - break; - case 17: -#line 2636 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_hour(parser, p)); } - break; - case 18: -#line 2640 "upb/json/parser.rl" - { start_minute(parser, p); } - break; - case 19: -#line 2641 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_minute(parser, p)); } - break; - case 20: -#line 2645 "upb/json/parser.rl" - { start_second(parser, p); } - break; - case 21: -#line 2646 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_second(parser, p)); } - break; - case 22: -#line 2651 "upb/json/parser.rl" - { start_duration_base(parser, p); } - break; - case 23: -#line 2652 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_duration_base(parser, p)); } - break; - case 24: -#line 2654 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 25: -#line 2659 "upb/json/parser.rl" - { start_timestamp_base(parser); } - break; - case 26: -#line 2661 "upb/json/parser.rl" - { start_timestamp_fraction(parser, p); } - break; - case 27: -#line 2662 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } - break; - case 28: -#line 2664 "upb/json/parser.rl" - { start_timestamp_zone(parser, p); } - break; - case 29: -#line 2665 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } - break; - case 30: -#line 2667 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 31: -#line 2672 "upb/json/parser.rl" - { start_fieldmask_path_text(parser, p); } - break; - case 32: -#line 2673 "upb/json/parser.rl" - { end_fieldmask_path_text(parser, p); } - break; - case 33: -#line 2678 "upb/json/parser.rl" - { start_fieldmask_path(parser); } - break; - case 34: -#line 2679 "upb/json/parser.rl" - { end_fieldmask_path(parser); } - break; - case 35: -#line 2685 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; - case 36: -#line 2690 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { - {stack[top++] = cs; cs = 47;goto _again;} - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) { - {stack[top++] = cs; cs = 40;goto _again;} - } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) { - {stack[top++] = cs; cs = 75;goto _again;} - } else { - {stack[top++] = cs; cs = 32;goto _again;} - } - } - break; - case 37: -#line 2703 "upb/json/parser.rl" - { p--; {stack[top++] = cs; cs = 78;goto _again;} } - break; - case 38: -#line 2708 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_member(parser, p); - } else { - start_member(parser); - } - } - break; - case 39: -#line 2715 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_membername(parser)); } - break; - case 40: -#line 2718 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - end_any_member(parser, p); - } else { - end_member(parser); - } - } - break; - case 41: -#line 2729 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - start_any_object(parser, p); - } else { - start_object(parser); - } - } - break; - case 42: -#line 2738 "upb/json/parser.rl" - { - if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { - CHECK_RETURN_TOP(end_any_object(parser, p)); - } else { - end_object(parser); - } - } - break; - case 43: -#line 2750 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_array(parser)); } - break; - case 44: -#line 2754 "upb/json/parser.rl" - { end_array(parser); } - break; - case 45: -#line 2759 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_number(parser, p)); } - break; - case 46: -#line 2760 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_number(parser, p)); } - break; - case 47: -#line 2762 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_stringval(parser)); } - break; - case 48: -#line 2763 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_stringval(parser)); } - break; - case 49: -#line 2765 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, true)); } - break; - case 50: -#line 2767 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, false)); } - break; - case 51: -#line 2769 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_null(parser)); } - break; - case 52: -#line 2771 "upb/json/parser.rl" - { CHECK_RETURN_TOP(start_subobject_full(parser)); } - break; - case 53: -#line 2772 "upb/json/parser.rl" - { end_subobject_full(parser); } - break; - case 54: -#line 2777 "upb/json/parser.rl" - { p--; {cs = stack[--top]; goto _again;} } - break; -#line 3185 "upb/json/parser.c" - } - } - -_again: - if ( cs == 0 ) - goto _out; - if ( ++p != pe ) - goto _resume; - _test_eof: {} - if ( p == eof ) - { - const char *__acts = _json_actions + _json_eof_actions[cs]; - unsigned int __nacts = (unsigned int) *__acts++; - while ( __nacts-- > 0 ) { - switch ( *__acts++ ) { - case 0: -#line 2586 "upb/json/parser.rl" - { p--; {cs = stack[--top]; if ( p == pe ) - goto _test_eof; -goto _again;} } - break; - case 46: -#line 2760 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_number(parser, p)); } - break; - case 49: -#line 2765 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, true)); } - break; - case 50: -#line 2767 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_bool(parser, false)); } - break; - case 51: -#line 2769 "upb/json/parser.rl" - { CHECK_RETURN_TOP(end_null(parser)); } - break; - case 53: -#line 2772 "upb/json/parser.rl" - { end_subobject_full(parser); } - break; -#line 3227 "upb/json/parser.c" - } - } - } - - _out: {} - } - -#line 2805 "upb/json/parser.rl" - - if (p != pe) { - upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p); - } else { - capture_suspend(parser, &p); - } - -error: - /* Save parsing state back to parser. */ - parser->current_state = cs; - parser->parser_top = top; - - return p - buf; -} - -static bool end(void *closure, const void *hd) { - upb_json_parser *parser = closure; - - /* Prevent compile warning on unused static constants. */ - UPB_UNUSED(json_start); - UPB_UNUSED(json_en_duration_machine); - UPB_UNUSED(json_en_fieldmask_machine); - UPB_UNUSED(json_en_number_machine); - UPB_UNUSED(json_en_string_machine); - UPB_UNUSED(json_en_timestamp_machine); - UPB_UNUSED(json_en_value_machine); - UPB_UNUSED(json_en_main); - - parse(parser, hd, &eof_ch, 0, NULL); - - return parser->current_state >= 106; -} - -static void json_parser_reset(upb_json_parser *p) { - int cs; - int top; - - p->top = p->stack; - init_frame(p->top); - - /* Emit Ragel initialization of the parser. */ - -#line 3278 "upb/json/parser.c" - { - cs = json_start; - top = 0; - } - -#line 2847 "upb/json/parser.rl" - p->current_state = cs; - p->parser_top = top; - accumulate_clear(p); - p->multipart_state = MULTIPART_INACTIVE; - p->capture = NULL; - p->accumulated = NULL; -} - -static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, - const upb_msgdef *md) { - upb_msg_field_iter i; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m)); - - m->cache = c; - - upb_byteshandler_init(&m->input_handler_); - upb_byteshandler_setstring(&m->input_handler_, parse, m); - upb_byteshandler_setendstr(&m->input_handler_, end, m); - - upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc); - - /* Build name_table */ - - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - upb_value v = upb_value_constptr(f); - const char *name; - - /* Add an entry for the JSON name. */ - name = upb_fielddef_jsonname(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - - if (strcmp(name, upb_fielddef_name(f)) != 0) { - /* Since the JSON name is different from the regular field name, add an - * entry for the raw name (compliant proto3 JSON parsers must accept - * both). */ - const char *name = upb_fielddef_name(f); - upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); - } - } - - return m; -} - -/* Public API *****************************************************************/ - -upb_json_parser *upb_json_parser_create(upb_arena *arena, - const upb_json_parsermethod *method, - const upb_symtab* symtab, - upb_sink output, - upb_status *status, - bool ignore_json_unknown) { -#ifndef NDEBUG - const size_t size_before = upb_arena_bytesallocated(arena); -#endif - upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser)); - if (!p) return false; - - p->arena = arena; - p->method = method; - p->status = status; - p->limit = p->stack + UPB_JSON_MAX_DEPTH; - p->accumulate_buf = NULL; - p->accumulate_buf_size = 0; - upb_bytessink_reset(&p->input_, &method->input_handler_, p); - - json_parser_reset(p); - p->top->sink = output; - p->top->m = upb_handlers_msgdef(output.handlers); - if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { - p->top->is_any = true; - p->top->any_frame = json_parser_any_frame_new(p); - } else { - p->top->is_any = false; - p->top->any_frame = NULL; - } - set_name_table(p, p->top); - p->symtab = symtab; - - p->ignore_json_unknown = ignore_json_unknown; - - /* If this fails, uncomment and increase the value in parser.h. */ - /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */ - UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <= - UPB_JSON_PARSER_SIZE); - return p; -} - -upb_bytessink upb_json_parser_input(upb_json_parser *p) { - return p->input_; -} - -const upb_byteshandler *upb_json_parsermethod_inputhandler( - const upb_json_parsermethod *m) { - return &m->input_handler_; -} - -upb_json_codecache *upb_json_codecache_new(void) { - upb_alloc *alloc; - upb_json_codecache *c; - - c = upb_gmalloc(sizeof(*c)); - - c->arena = upb_arena_new(); - alloc = upb_arena_alloc(c->arena); - - upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); - - return c; -} - -void upb_json_codecache_free(upb_json_codecache *c) { - upb_arena_free(c->arena); - upb_gfree(c); -} - -const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, - const upb_msgdef *md) { - upb_json_parsermethod *m; - upb_value v; - upb_msg_field_iter i; - upb_alloc *alloc = upb_arena_alloc(c->arena); - - if (upb_inttable_lookupptr(&c->methods, md, &v)) { - return upb_value_getconstptr(v); - } - - m = parsermethod_new(c, md); - v = upb_value_constptr(m); - - if (!m) return NULL; - if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL; - - /* Populate parser methods for all submessages, so the name tables will - * be available during parsing. */ - for(upb_msg_field_begin(&i, md); - !upb_msg_field_done(&i); - upb_msg_field_next(&i)) { - upb_fielddef *f = upb_msg_iter_field(&i); - - if (upb_fielddef_issubmsg(f)) { - const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); - const upb_json_parsermethod *sub_method = - upb_json_codecache_get(c, subdef); - - if (!sub_method) return NULL; - } - } - - return m; -} -/* -** This currently uses snprintf() to format primitives, and could be optimized -** further. -*/ - - -#include -#include -#include -#include -#include - - -struct upb_json_printer { - upb_sink input_; - /* BytesSink closure. */ - void *subc_; - upb_bytessink output_; - - /* We track the depth so that we know when to emit startstr/endstr on the - * output. */ - int depth_; - - /* Have we emitted the first element? This state is necessary to emit commas - * without leaving a trailing comma in arrays/maps. We keep this state per - * frame depth. - * - * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. - * We count frames (contexts in which we separate elements by commas) as both - * repeated fields and messages (maps), and the worst case is a - * message->repeated field->submessage->repeated field->... nesting. */ - bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; - - /* To print timestamp, printer needs to cache its seconds and nanos values - * and convert them when ending timestamp message. See comments of - * printer_sethandlers_timestamp for more detail. */ - int64_t seconds; - int32_t nanos; -}; - -/* StringPiece; a pointer plus a length. */ -typedef struct { - char *ptr; - size_t len; -} strpc; - -void freestrpc(void *ptr) { - strpc *pc = ptr; - upb_gfree(pc->ptr); - upb_gfree(pc); -} - -typedef struct { - bool preserve_fieldnames; -} upb_json_printercache; - -/* Convert fielddef name to JSON name and return as a string piece. */ -strpc *newstrpc(upb_handlers *h, const upb_fielddef *f, - bool preserve_fieldnames) { - /* TODO(haberman): handle malloc failure. */ - strpc *ret = upb_gmalloc(sizeof(*ret)); - if (preserve_fieldnames) { - ret->ptr = upb_gstrdup(upb_fielddef_name(f)); - ret->len = strlen(ret->ptr); - } else { - ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f)); - ret->len = strlen(ret->ptr); - } - - upb_handlers_addcleanup(h, ret, freestrpc); - return ret; -} - -/* Convert a null-terminated const char* to a string piece. */ -strpc *newstrpc_str(upb_handlers *h, const char * str) { - strpc * ret = upb_gmalloc(sizeof(*ret)); - ret->ptr = upb_gstrdup(str); - ret->len = strlen(str); - upb_handlers_addcleanup(h, ret, freestrpc); - return ret; -} - -/* ------------ JSON string printing: values, maps, arrays ------------------ */ - -static void print_data( - upb_json_printer *p, const char *buf, size_t len) { - /* TODO: Will need to change if we support pushback from the sink. */ - size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL); - UPB_ASSERT(n == len); -} - -static void print_comma(upb_json_printer *p) { - if (!p->first_elem_[p->depth_]) { - print_data(p, ",", 1); - } - p->first_elem_[p->depth_] = false; -} - -/* Helpers that print properly formatted elements to the JSON output stream. */ - -/* Used for escaping control chars in strings. */ -static const char kControlCharLimit = 0x20; - -UPB_INLINE bool is_json_escaped(char c) { - /* See RFC 4627. */ - unsigned char uc = (unsigned char)c; - return uc < kControlCharLimit || uc == '"' || uc == '\\'; -} - -UPB_INLINE const char* json_nice_escape(char c) { - switch (c) { - case '"': return "\\\""; - case '\\': return "\\\\"; - case '\b': return "\\b"; - case '\f': return "\\f"; - case '\n': return "\\n"; - case '\r': return "\\r"; - case '\t': return "\\t"; - default: return NULL; - } -} - -/* Write a properly escaped string chunk. The surrounding quotes are *not* - * printed; this is so that the caller has the option of emitting the string - * content in chunks. */ -static void putstring(upb_json_printer *p, const char *buf, size_t len) { - const char* unescaped_run = NULL; - unsigned int i; - for (i = 0; i < len; i++) { - char c = buf[i]; - /* Handle escaping. */ - if (is_json_escaped(c)) { - /* Use a "nice" escape, like \n, if one exists for this character. */ - const char* escape = json_nice_escape(c); - /* If we don't have a specific 'nice' escape code, use a \uXXXX-style - * escape. */ - char escape_buf[8]; - if (!escape) { - unsigned char byte = (unsigned char)c; - _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte); - escape = escape_buf; - } - - /* N.B. that we assume that the input encoding is equal to the output - * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we - * can simply pass the bytes through. */ - - /* If there's a current run of unescaped chars, print that run first. */ - if (unescaped_run) { - print_data(p, unescaped_run, &buf[i] - unescaped_run); - unescaped_run = NULL; - } - /* Then print the escape code. */ - print_data(p, escape, strlen(escape)); - } else { - /* Add to the current unescaped run of characters. */ - if (unescaped_run == NULL) { - unescaped_run = &buf[i]; - } - } - } - - /* If the string ended in a run of unescaped characters, print that last run. */ - if (unescaped_run) { - print_data(p, unescaped_run, &buf[len] - unescaped_run); - } -} - -#define CHKLENGTH(x) if (!(x)) return -1; - -/* Helpers that format floating point values according to our custom formats. - * Right now we use %.8g and %.17g for float/double, respectively, to match - * proto2::util::JsonFormat's defaults. May want to change this later. */ - -const char neginf[] = "\"-Infinity\""; -const char inf[] = "\"Infinity\""; - -static size_t fmt_double(double val, char* buf, size_t length) { - if (val == UPB_INFINITY) { - CHKLENGTH(length >= strlen(inf)); - strcpy(buf, inf); - return strlen(inf); - } else if (val == -UPB_INFINITY) { - CHKLENGTH(length >= strlen(neginf)); - strcpy(buf, neginf); - return strlen(neginf); - } else { - size_t n = _upb_snprintf(buf, length, "%.17g", val); - CHKLENGTH(n > 0 && n < length); - return n; - } -} - -static size_t fmt_float(float val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "%.8g", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_bool(bool val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false")); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "%" PRId64, val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "%" PRIu64, val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) { - size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val); - CHKLENGTH(n > 0 && n < length); - return n; -} - -/* Print a map key given a field name. Called by scalar field handlers and by - * startseq for repeated fields. */ -static bool putkey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - const strpc *key = handler_data; - print_comma(p); - print_data(p, "\"", 1); - putstring(p, key->ptr, key->len); - print_data(p, "\":", 2); - return true; -} - -#define CHKFMT(val) if ((val) == (size_t)-1) return false; -#define CHK(val) if (!(val)) return false; - -#define TYPE_HANDLERS(type, fmt_func) \ - static bool put##type(void *closure, const void *handler_data, type val) { \ - upb_json_printer *p = closure; \ - char data[64]; \ - size_t length = fmt_func(val, data, sizeof(data)); \ - UPB_UNUSED(handler_data); \ - CHKFMT(length); \ - print_data(p, data, length); \ - return true; \ - } \ - static bool scalar_##type(void *closure, const void *handler_data, \ - type val) { \ - CHK(putkey(closure, handler_data)); \ - CHK(put##type(closure, handler_data, val)); \ - return true; \ - } \ - static bool repeated_##type(void *closure, const void *handler_data, \ - type val) { \ - upb_json_printer *p = closure; \ - print_comma(p); \ - CHK(put##type(closure, handler_data, val)); \ - return true; \ - } - -#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ - static bool putmapkey_##type(void *closure, const void *handler_data, \ - type val) { \ - upb_json_printer *p = closure; \ - char data[64]; \ - size_t length = fmt_func(val, data, sizeof(data)); \ - UPB_UNUSED(handler_data); \ - print_data(p, "\"", 1); \ - print_data(p, data, length); \ - print_data(p, "\":", 2); \ - return true; \ - } - -TYPE_HANDLERS(double, fmt_double) -TYPE_HANDLERS(float, fmt_float) -TYPE_HANDLERS(bool, fmt_bool) -TYPE_HANDLERS(int32_t, fmt_int64_as_number) -TYPE_HANDLERS(uint32_t, fmt_int64_as_number) -TYPE_HANDLERS(int64_t, fmt_int64_as_string) -TYPE_HANDLERS(uint64_t, fmt_uint64_as_string) - -/* double and float are not allowed to be map keys. */ -TYPE_HANDLERS_MAPKEY(bool, fmt_bool) -TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number) -TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number) - -#undef TYPE_HANDLERS -#undef TYPE_HANDLERS_MAPKEY - -typedef struct { - void *keyname; - const upb_enumdef *enumdef; -} EnumHandlerData; - -static bool scalar_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - const char *symbolic_name; - - CHK(putkey(closure, hd->keyname)); - - symbolic_name = upb_enumdef_iton(hd->enumdef, val); - if (symbolic_name) { - print_data(p, "\"", 1); - putstring(p, symbolic_name, strlen(symbolic_name)); - print_data(p, "\"", 1); - } else { - putint32_t(closure, NULL, val); - } - - return true; -} - -static void print_enum_symbolic_name(upb_json_printer *p, - const upb_enumdef *def, - int32_t val) { - const char *symbolic_name = upb_enumdef_iton(def, val); - if (symbolic_name) { - print_data(p, "\"", 1); - putstring(p, symbolic_name, strlen(symbolic_name)); - print_data(p, "\"", 1); - } else { - putint32_t(p, NULL, val); - } -} - -static bool repeated_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - print_comma(p); - - print_enum_symbolic_name(p, hd->enumdef, val); - - return true; -} - -static bool mapvalue_enum(void *closure, const void *handler_data, - int32_t val) { - const EnumHandlerData *hd = handler_data; - upb_json_printer *p = closure; - - print_enum_symbolic_name(p, hd->enumdef, val); - - return true; -} - -static void *scalar_startsubmsg(void *closure, const void *handler_data) { - return putkey(closure, handler_data) ? closure : UPB_BREAK; -} - -static void *repeated_startsubmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_comma(p); - return closure; -} - -static void start_frame(upb_json_printer *p) { - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); -} - -static void end_frame(upb_json_printer *p) { - print_data(p, "}", 1); - p->depth_--; -} - -static bool printer_startmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - start_frame(p); - return true; -} - -static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - end_frame(p); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static void *startseq(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - CHK(putkey(closure, handler_data)); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "[", 1); - return closure; -} - -static bool endseq(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "]", 1); - p->depth_--; - return true; -} - -static void *startmap(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - CHK(putkey(closure, handler_data)); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); - return closure; -} - -static bool endmap(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "}", 1); - p->depth_--; - return true; -} - -static size_t putstr(void *closure, const void *handler_data, const char *str, - size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - putstring(p, str, len); - return len; -} - -/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */ -static size_t putbytes(void *closure, const void *handler_data, const char *str, - size_t len, const upb_bufhandle *handle) { - upb_json_printer *p = closure; - - /* This is the regular base64, not the "web-safe" version. */ - static const char base64[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - /* Base64-encode. */ - char data[16000]; - const char *limit = data + sizeof(data); - const unsigned char *from = (const unsigned char*)str; - char *to = data; - size_t remaining = len; - size_t bytes; - - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - - print_data(p, "\"", 1); - - while (remaining > 2) { - if (limit - to < 4) { - bytes = to - data; - putstring(p, data, bytes); - to = data; - } - - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; - to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)]; - to[3] = base64[from[2] & 0x3f]; - - remaining -= 3; - to += 4; - from += 3; - } - - switch (remaining) { - case 2: - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; - to[2] = base64[(from[1] & 0xf) << 2]; - to[3] = '='; - to += 4; - from += 2; - break; - case 1: - to[0] = base64[from[0] >> 2]; - to[1] = base64[((from[0] & 0x3) << 4)]; - to[2] = '='; - to[3] = '='; - to += 4; - from += 1; - break; - } - - bytes = to - data; - putstring(p, data, bytes); - print_data(p, "\"", 1); - return len; -} - -static void *scalar_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - CHK(putkey(closure, handler_data)); - print_data(p, "\"", 1); - return p; -} - -static size_t scalar_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool scalar_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static void *repeated_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_comma(p); - print_data(p, "\"", 1); - return p; -} - -static size_t repeated_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool repeated_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static void *mapkeyval_startstr(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_data(p, "\"", 1); - return p; -} - -static size_t mapkey_str(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putstr(closure, handler_data, str, len, handle)); - return len; -} - -static bool mapkey_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\":", 2); - return true; -} - -static bool mapvalue_endstr(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - print_data(p, "\"", 1); - return true; -} - -static size_t scalar_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - CHK(putkey(closure, handler_data)); - CHK(putbytes(closure, handler_data, str, len, handle)); - return len; -} - -static size_t repeated_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - print_comma(p); - CHK(putbytes(closure, handler_data, str, len, handle)); - return len; -} - -static size_t mapkey_bytes(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - CHK(putbytes(closure, handler_data, str, len, handle)); - print_data(p, ":", 1); - return len; -} - -static void set_enum_hd(upb_handlers *h, - const upb_fielddef *f, - bool preserve_fieldnames, - upb_handlerattr *attr) { - EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData)); - hd->enumdef = upb_fielddef_enumsubdef(f); - hd->keyname = newstrpc(h, f, preserve_fieldnames); - upb_handlers_addcleanup(h, hd, upb_gfree); - attr->handler_data = hd; -} - -/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair - * in a map). - * - * TODO: Handle missing key, missing value, out-of-order key/value, or repeated - * key or value cases properly. The right way to do this is to allocate a - * temporary structure at the start of a mapentry submessage, store key and - * value data in it as key and value handlers are called, and then print the - * key/value pair once at the end of the submessage. If we don't do this, we - * should at least detect the case and throw an error. However, so far all of - * our sources that emit mapentry messages do so canonically (with one key - * field, and then one value field), so this is not a pressing concern at the - * moment. */ -void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames, - upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - /* A mapentry message is printed simply as '"key": value'. Rather than - * special-case key and value for every type below, we just handle both - * fields explicitly here. */ - const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); - const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - UPB_UNUSED(closure); - - switch (upb_fielddef_type(key_field)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); - upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); - upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); - break; - case UPB_TYPE_BYTES: - upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); - break; - default: - UPB_ASSERT(false); - break; - } - - switch (upb_fielddef_type(value_field)) { - case UPB_TYPE_INT32: - upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); - break; - case UPB_TYPE_INT64: - upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); - break; - case UPB_TYPE_UINT32: - upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); - break; - case UPB_TYPE_UINT64: - upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, value_field, putbool, &empty_attr); - break; - case UPB_TYPE_FLOAT: - upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); - upb_handlers_setstring(h, value_field, putstr, &empty_attr); - upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); - break; - case UPB_TYPE_BYTES: - upb_handlers_setstring(h, value_field, putbytes, &empty_attr); - break; - case UPB_TYPE_ENUM: { - upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; - set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr); - upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); - break; - } - case UPB_TYPE_MESSAGE: - /* No handler necessary -- the submsg handlers will print the message - * as appropriate. */ - break; - } -} - -static bool putseconds(void *closure, const void *handler_data, - int64_t seconds) { - upb_json_printer *p = closure; - p->seconds = seconds; - UPB_UNUSED(handler_data); - return true; -} - -static bool putnanos(void *closure, const void *handler_data, - int32_t nanos) { - upb_json_printer *p = closure; - p->nanos = nanos; - UPB_UNUSED(handler_data); - return true; -} - -static void *scalar_startstr_nokey(void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_data(p, "\"", 1); - return p; -} - -static size_t putstr_nokey(void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(handle); - print_data(p, "\"", 1); - putstring(p, str, len); - print_data(p, "\"", 1); - return len + 2; -} - -static void *startseq_nokey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "[", 1); - return closure; -} - -static void *startseq_fieldmask(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - return closure; -} - -static bool endseq_fieldmask(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_--; - return true; -} - -static void *repeated_startstr_fieldmask( - void *closure, const void *handler_data, - size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(size_hint); - print_comma(p); - return p; -} - -static size_t repeated_str_fieldmask( - void *closure, const void *handler_data, - const char *str, size_t len, - const upb_bufhandle *handle) { - const char* limit = str + len; - bool upper = false; - size_t result_len = 0; - for (; str < limit; str++) { - if (*str == '_') { - upper = true; - continue; - } - if (upper && *str >= 'a' && *str <= 'z') { - char upper_char = toupper(*str); - CHK(putstr(closure, handler_data, &upper_char, 1, handle)); - } else { - CHK(putstr(closure, handler_data, str, 1, handle)); - } - upper = false; - result_len++; - } - return result_len; -} - -static void *startmap_nokey(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - p->depth_++; - p->first_elem_[p->depth_] = true; - print_data(p, "{", 1); - return closure; -} - -static bool putnull(void *closure, const void *handler_data, - int32_t null) { - upb_json_printer *p = closure; - print_data(p, "null", 4); - UPB_UNUSED(handler_data); - UPB_UNUSED(null); - return true; -} - -static bool printer_startdurationmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -#define UPB_DURATION_MAX_JSON_LEN 23 -#define UPB_DURATION_MAX_NANO_LEN 9 - -static bool printer_enddurationmsg(void *closure, const void *handler_data, - upb_status *s) { - upb_json_printer *p = closure; - char buffer[UPB_DURATION_MAX_JSON_LEN]; - size_t base_len; - size_t curr; - size_t i; - - memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN); - - if (p->seconds < -315576000000) { - upb_status_seterrf(s, "error parsing duration: " - "minimum acceptable value is " - "-315576000000"); - return false; - } - - if (p->seconds > 315576000000) { - upb_status_seterrf(s, "error serializing duration: " - "maximum acceptable value is " - "315576000000"); - return false; - } - - _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds); - base_len = strlen(buffer); - - if (p->nanos != 0) { - char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3]; - _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", - p->nanos / 1000000000.0); - /* Remove trailing 0. */ - for (i = UPB_DURATION_MAX_NANO_LEN + 2; - nanos_buffer[i] == '0'; i--) { - nanos_buffer[i] = 0; - } - strcpy(buffer + base_len, nanos_buffer + 1); - } - - curr = strlen(buffer); - strcpy(buffer + curr, "s"); - - p->seconds = 0; - p->nanos = 0; - - print_data(p, "\"", 1); - print_data(p, buffer, strlen(buffer)); - print_data(p, "\"", 1); - - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - - UPB_UNUSED(handler_data); - return true; -} - -static bool printer_starttimestampmsg(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -#define UPB_TIMESTAMP_MAX_JSON_LEN 31 -#define UPB_TIMESTAMP_BEFORE_NANO_LEN 19 -#define UPB_TIMESTAMP_MAX_NANO_LEN 9 - -static bool printer_endtimestampmsg(void *closure, const void *handler_data, - upb_status *s) { - upb_json_printer *p = closure; - char buffer[UPB_TIMESTAMP_MAX_JSON_LEN]; - time_t time = p->seconds; - size_t curr; - size_t i; - size_t year_length = - strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time)); - - if (p->seconds < -62135596800) { - upb_status_seterrf(s, "error parsing timestamp: " - "minimum acceptable value is " - "0001-01-01T00:00:00Z"); - return false; - } - - if (p->seconds > 253402300799) { - upb_status_seterrf(s, "error parsing timestamp: " - "maximum acceptable value is " - "9999-12-31T23:59:59Z"); - return false; - } - - /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */ - for (i = 0; i < 4 - year_length; i++) { - buffer[i] = '0'; - } - - strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN, - "%Y-%m-%dT%H:%M:%S", gmtime(&time)); - if (p->nanos != 0) { - char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3]; - _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", - p->nanos / 1000000000.0); - /* Remove trailing 0. */ - for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2; - nanos_buffer[i] == '0'; i--) { - nanos_buffer[i] = 0; - } - strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1); - } - - curr = strlen(buffer); - strcpy(buffer + curr, "Z"); - - p->seconds = 0; - p->nanos = 0; - - print_data(p, "\"", 1); - print_data(p, buffer, strlen(buffer)); - print_data(p, "\"", 1); - - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - return true; -} - -static bool printer_startmsg_noframe(void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - return true; -} - -static bool printer_endmsg_noframe( - void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static bool printer_startmsg_fieldmask( - void *closure, const void *handler_data) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - if (p->depth_ == 0) { - upb_bytessink_start(p->output_, 0, &p->subc_); - } - print_data(p, "\"", 1); - return true; -} - -static bool printer_endmsg_fieldmask( - void *closure, const void *handler_data, upb_status *s) { - upb_json_printer *p = closure; - UPB_UNUSED(handler_data); - UPB_UNUSED(s); - print_data(p, "\"", 1); - if (p->depth_ == 0) { - upb_bytessink_end(p->output_); - } - return true; -} - -static void *scalar_startstr_onlykey( - void *closure, const void *handler_data, size_t size_hint) { - upb_json_printer *p = closure; - UPB_UNUSED(size_hint); - CHK(putkey(closure, handler_data)); - return p; -} - -/* Set up handlers for an Any submessage. */ -void printer_sethandlers_any(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE); - const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - /* type_url's json name is "@type" */ - upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT; - upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT; - strpc *type_url_json_name = newstrpc_str(h, "@type"); - strpc *value_json_name = newstrpc_str(h, "value"); - - type_name_attr.handler_data = type_url_json_name; - value_name_attr.handler_data = value_json_name; - - /* Set up handlers. */ - upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); - - upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr); - upb_handlers_setstring(h, type_field, scalar_str, &empty_attr); - upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr); - - /* This is not the full and correct JSON encoding for the Any value field. It - * requires further processing by the wrapper code based on the type URL. - */ - upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey, - &value_name_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a fieldmask submessage. */ -void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr); - upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr); - - upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr); - upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a duration submessage. */ -void printer_sethandlers_duration(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* seconds_field = - upb_msgdef_itof(md, UPB_DURATION_SECONDS); - const upb_fielddef* nanos_field = - upb_msgdef_itof(md, UPB_DURATION_NANOS); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr); - upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); - upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); - upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -/* Set up handlers for a timestamp submessage. Instead of printing fields - * separately, the json representation of timestamp follows RFC 3339 */ -void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - - const upb_fielddef* seconds_field = - upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS); - const upb_fielddef* nanos_field = - upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr); - upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); - upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); - upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers_value(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - upb_msg_field_iter i; - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - upb_msg_field_begin(&i, md); - for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - - switch (upb_fielddef_type(f)) { - case UPB_TYPE_ENUM: - upb_handlers_setint32(h, f, putnull, &empty_attr); - break; - case UPB_TYPE_DOUBLE: - upb_handlers_setdouble(h, f, putdouble, &empty_attr); - break; - case UPB_TYPE_STRING: - upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr); - upb_handlers_setstring(h, f, scalar_str, &empty_attr); - upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); - break; - case UPB_TYPE_BOOL: - upb_handlers_setbool(h, f, putbool, &empty_attr); - break; - case UPB_TYPE_MESSAGE: - break; - default: - UPB_ASSERT(false); - break; - } - } - - UPB_UNUSED(closure); -} - -#define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \ -void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \ - const upb_msgdef *md = upb_handlers_msgdef(h); \ - const upb_fielddef* f = upb_msgdef_itof(md, 1); \ - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; \ - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \ - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \ - upb_handlers_set##type(h, f, putmethod, &empty_attr); \ - UPB_UNUSED(closure); \ -} - -WRAPPER_SETHANDLERS(doublevalue, double, putdouble) -WRAPPER_SETHANDLERS(floatvalue, float, putfloat) -WRAPPER_SETHANDLERS(int64value, int64, putint64_t) -WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t) -WRAPPER_SETHANDLERS(int32value, int32, putint32_t) -WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t) -WRAPPER_SETHANDLERS(boolvalue, bool, putbool) -WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey) -WRAPPER_SETHANDLERS(bytesvalue, string, putbytes) - -#undef WRAPPER_SETHANDLERS - -void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr); - upb_handlers_setendseq(h, f, endseq, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - const upb_fielddef* f = upb_msgdef_itof(md, 1); - - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - - upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr); - upb_handlers_setendseq(h, f, endmap, &empty_attr); - - upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); - - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); - - UPB_UNUSED(closure); -} - -void printer_sethandlers(const void *closure, upb_handlers *h) { - const upb_msgdef *md = upb_handlers_msgdef(h); - bool is_mapentry = upb_msgdef_mapentry(md); - upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; - upb_msg_field_iter i; - const upb_json_printercache *cache = closure; - const bool preserve_fieldnames = cache->preserve_fieldnames; - - if (is_mapentry) { - /* mapentry messages are sufficiently different that we handle them - * separately. */ - printer_sethandlers_mapentry(closure, preserve_fieldnames, h); - return; - } - - switch (upb_msgdef_wellknowntype(md)) { - case UPB_WELLKNOWN_UNSPECIFIED: - break; - case UPB_WELLKNOWN_ANY: - printer_sethandlers_any(closure, h); - return; - case UPB_WELLKNOWN_FIELDMASK: - printer_sethandlers_fieldmask(closure, h); - return; - case UPB_WELLKNOWN_DURATION: - printer_sethandlers_duration(closure, h); - return; - case UPB_WELLKNOWN_TIMESTAMP: - printer_sethandlers_timestamp(closure, h); - return; - case UPB_WELLKNOWN_VALUE: - printer_sethandlers_value(closure, h); - return; - case UPB_WELLKNOWN_LISTVALUE: - printer_sethandlers_listvalue(closure, h); - return; - case UPB_WELLKNOWN_STRUCT: - printer_sethandlers_structvalue(closure, h); - return; -#define WRAPPER(wellknowntype, name) \ - case wellknowntype: \ - printer_sethandlers_##name(closure, h); \ - return; \ - - WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue); - WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue); - WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value); - WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value); - WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value); - WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value); - WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue); - WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue); - WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue); - -#undef WRAPPER - } - - upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); - upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); - -#define TYPE(type, name, ctype) \ - case type: \ - if (upb_fielddef_isseq(f)) { \ - upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ - } else { \ - upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ - } \ - break; - - upb_msg_field_begin(&i, md); - for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { - const upb_fielddef *f = upb_msg_iter_field(&i); - - upb_handlerattr name_attr = UPB_HANDLERATTR_INIT; - name_attr.handler_data = newstrpc(h, f, preserve_fieldnames); - - if (upb_fielddef_ismap(f)) { - upb_handlers_setstartseq(h, f, startmap, &name_attr); - upb_handlers_setendseq(h, f, endmap, &name_attr); - } else if (upb_fielddef_isseq(f)) { - upb_handlers_setstartseq(h, f, startseq, &name_attr); - upb_handlers_setendseq(h, f, endseq, &empty_attr); - } - - switch (upb_fielddef_type(f)) { - TYPE(UPB_TYPE_FLOAT, float, float); - TYPE(UPB_TYPE_DOUBLE, double, double); - TYPE(UPB_TYPE_BOOL, bool, bool); - TYPE(UPB_TYPE_INT32, int32, int32_t); - TYPE(UPB_TYPE_UINT32, uint32, uint32_t); - TYPE(UPB_TYPE_INT64, int64, int64_t); - TYPE(UPB_TYPE_UINT64, uint64, uint64_t); - case UPB_TYPE_ENUM: { - /* For now, we always emit symbolic names for enums. We may want an - * option later to control this behavior, but we will wait for a real - * need first. */ - upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; - set_enum_hd(h, f, preserve_fieldnames, &enum_attr); - - if (upb_fielddef_isseq(f)) { - upb_handlers_setint32(h, f, repeated_enum, &enum_attr); - } else { - upb_handlers_setint32(h, f, scalar_enum, &enum_attr); - } - - break; - } - case UPB_TYPE_STRING: - if (upb_fielddef_isseq(f)) { - upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); - upb_handlers_setstring(h, f, repeated_str, &empty_attr); - upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); - } else { - upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); - upb_handlers_setstring(h, f, scalar_str, &empty_attr); - upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); - } - break; - case UPB_TYPE_BYTES: - /* XXX: this doesn't support strings that span buffers yet. The base64 - * encoder will need to be made resumable for this to work properly. */ - if (upb_fielddef_isseq(f)) { - upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); - } else { - upb_handlers_setstring(h, f, scalar_bytes, &name_attr); - } - break; - case UPB_TYPE_MESSAGE: - if (upb_fielddef_isseq(f)) { - upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr); - } else { - upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr); - } - break; - } - } - -#undef TYPE -} - -static void json_printer_reset(upb_json_printer *p) { - p->depth_ = 0; -} - - -/* Public API *****************************************************************/ - -upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h, - upb_bytessink output) { -#ifndef NDEBUG - size_t size_before = upb_arena_bytesallocated(a); -#endif - - upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer)); - if (!p) return NULL; - - p->output_ = output; - json_printer_reset(p); - upb_sink_reset(&p->input_, h, p); - p->seconds = 0; - p->nanos = 0; - - /* If this fails, increase the value in printer.h. */ - UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <= - UPB_JSON_PRINTER_SIZE); - return p; -} - -upb_sink upb_json_printer_input(upb_json_printer *p) { - return p->input_; -} - -upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) { - upb_json_printercache *cache = upb_gmalloc(sizeof(*cache)); - upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache); - - cache->preserve_fieldnames = preserve_proto_fieldnames; - upb_handlercache_addcleanup(ret, cache, upb_gfree); - - return ret; -} -/* See port_def.inc. This should #undef all macros #defined there. */ - -#undef UPB_MAPTYPE_STRING -#undef UPB_SIZE -#undef UPB_PTR_AT -#undef UPB_READ_ONEOF -#undef UPB_WRITE_ONEOF -#undef UPB_INLINE -#undef UPB_FORCEINLINE -#undef UPB_NOINLINE -#undef UPB_NORETURN -#undef UPB_MAX -#undef UPB_MIN -#undef UPB_UNUSED -#undef UPB_ASSUME -#undef UPB_ASSERT -#undef UPB_ASSERT_DEBUGVAR -#undef UPB_UNREACHABLE -#undef UPB_INFINITY -#undef UPB_MSVC_VSNPRINTF -#undef _upb_snprintf -#undef _upb_vsnprintf -#undef _upb_va_copy diff --git a/ruby/google-protobuf.gemspec b/ruby/google-protobuf.gemspec index 1a18da3e952c..4852f7d587c7 100644 --- a/ruby/google-protobuf.gemspec +++ b/ruby/google-protobuf.gemspec @@ -17,7 +17,7 @@ Gem::Specification.new do |s| else s.files += Dir.glob('ext/**/*') s.extensions= ["ext/google/protobuf_c/extconf.rb"] - s.add_development_dependency "rake-compiler-dock", ">= 1.0.1", "< 2.0" + s.add_development_dependency "rake-compiler-dock", ">= 1.1.0", "< 2.0" end s.test_files = ["tests/basic.rb", "tests/stress.rb", diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index 60a705019ae1..098ac41e7e9c 100755 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -216,7 +216,7 @@ def test_map_field m.map_string_int32 = {} end - assert_raise TypeError do + assert_raise Google::Protobuf::TypeError do m = MapMessage.new(:map_string_int32 => { 1 => "I am not a number" }) end end @@ -447,6 +447,7 @@ def test_to_h :optional_int32=>0, :optional_int64=>0, :optional_msg=>nil, + :optional_msg2=>nil, :optional_string=>"foo", :optional_uint32=>0, :optional_uint64=>0, @@ -484,9 +485,9 @@ def test_json_maps m = MapMessage.new(:map_string_int32 => {"a" => 1}) expected = {mapStringInt32: {a: 1}, mapStringMsg: {}, mapStringEnum: {}} expected_preserve = {map_string_int32: {a: 1}, map_string_msg: {}, map_string_enum: {}} - assert_equal JSON.parse(MapMessage.encode_json(m), :symbolize_names => true), expected + assert_equal JSON.parse(MapMessage.encode_json(m, :emit_defaults=>true), :symbolize_names => true), expected - json = MapMessage.encode_json(m, :preserve_proto_fieldnames => true) + json = MapMessage.encode_json(m, :preserve_proto_fieldnames => true, :emit_defaults=>true) assert_equal JSON.parse(json, :symbolize_names => true), expected_preserve m2 = MapMessage.decode_json(MapMessage.encode_json(m)) @@ -496,7 +497,7 @@ def test_json_maps def test_json_maps_emit_defaults_submsg # TODO: Fix JSON in JRuby version. return if RUBY_PLATFORM == "java" - m = MapMessage.new(:map_string_msg => {"a" => TestMessage2.new}) + m = MapMessage.new(:map_string_msg => {"a" => TestMessage2.new(foo: 0)}) expected = {mapStringInt32: {}, mapStringMsg: {a: {foo: 0}}, mapStringEnum: {}} actual = MapMessage.encode_json(m, :emit_defaults => true) @@ -504,6 +505,30 @@ def test_json_maps_emit_defaults_submsg assert_equal JSON.parse(actual, :symbolize_names => true), expected end + def test_json_emit_defaults_submsg + # TODO: Fix JSON in JRuby version. + return if RUBY_PLATFORM == "java" + m = TestSingularFields.new(singular_msg: proto_module::TestMessage2.new) + + expected = { + singularInt32: 0, + singularInt64: "0", + singularUint32: 0, + singularUint64: "0", + singularBool: false, + singularFloat: 0, + singularDouble: 0, + singularString: "", + singularBytes: "", + singularMsg: {}, + singularEnum: "Default", + } + + actual = proto_module::TestMessage.encode_json(m, :emit_defaults => true) + + assert_equal expected, JSON.parse(actual, :symbolize_names => true) + end + def test_respond_to # This test fails with JRuby 1.7.23, likely because of an old JRuby bug. return if RUBY_PLATFORM == "java" diff --git a/ruby/tests/basic_proto2.rb b/ruby/tests/basic_proto2.rb index 2d30a0894414..a7114ea6edf5 100755 --- a/ruby/tests/basic_proto2.rb +++ b/ruby/tests/basic_proto2.rb @@ -237,23 +237,6 @@ def test_to_h assert_equal expected_result, m.to_h end - def test_map_keyword_disabled - pool = Google::Protobuf::DescriptorPool.new - - e = assert_raise ArgumentError do - pool.build do - add_file 'test_file.proto', syntax: :proto2 do - add_message "MapMessage" do - map :map_string_int32, :string, :int32, 1 - map :map_string_msg, :string, :message, 2, "TestMessage2" - end - end - end - end - - assert_match(/Cannot add a native map/, e.message) - end - def test_respond_to # This test fails with JRuby 1.7.23, likely because of an old JRuby bug. return if RUBY_PLATFORM == "java" diff --git a/ruby/tests/basic_test.proto b/ruby/tests/basic_test.proto index 6086879d0923..5b4ed6311c78 100644 --- a/ruby/tests/basic_test.proto +++ b/ruby/tests/basic_test.proto @@ -44,6 +44,8 @@ message TestMessage { repeated bytes repeated_bytes = 20; repeated TestMessage2 repeated_msg = 21; repeated TestEnum repeated_enum = 22; + + optional TestSingularFields optional_msg2 = 23; } message TestSingularFields { @@ -61,7 +63,7 @@ message TestSingularFields { } message TestMessage2 { - int32 foo = 1; + optional int32 foo = 1; } enum TestEnum { diff --git a/ruby/tests/common_tests.rb b/ruby/tests/common_tests.rb index 1f5013a529ad..589934b0e92f 100644 --- a/ruby/tests/common_tests.rb +++ b/ruby/tests/common_tests.rb @@ -115,14 +115,14 @@ def test_inspect_eq_to_s m = proto_module::TestMessage.new( :optional_int32 => -42, :optional_enum => :A, - :optional_msg => proto_module::TestMessage2.new, + :optional_msg => proto_module::TestMessage2.new(foo: 0), :repeated_string => ["hello", "there", "world"]) - expected = "<#{proto_module}::TestMessage: optional_int32: -42, optional_int64: 0, optional_uint32: 0, optional_uint64: 0, optional_bool: false, optional_float: 0.0, optional_double: 0.0, optional_string: \"\", optional_bytes: \"\", optional_msg: <#{proto_module}::TestMessage2: foo: 0>, optional_enum: :A, repeated_int32: [], repeated_int64: [], repeated_uint32: [], repeated_uint64: [], repeated_bool: [], repeated_float: [], repeated_double: [], repeated_string: [\"hello\", \"there\", \"world\"], repeated_bytes: [], repeated_msg: [], repeated_enum: []>" + expected = "<#{proto_module}::TestMessage: optional_int32: -42, optional_msg: <#{proto_module}::TestMessage2: foo: 0>, optional_enum: :A, repeated_int32: [], repeated_int64: [], repeated_uint32: [], repeated_uint64: [], repeated_bool: [], repeated_float: [], repeated_double: [], repeated_string: [\"hello\", \"there\", \"world\"], repeated_bytes: [], repeated_msg: [], repeated_enum: []>" assert_equal expected, m.inspect assert_equal expected, m.to_s m = proto_module::OneofMessage.new(:b => -42) - expected = "<#{proto_module}::OneofMessage: a: \"\", b: -42, c: nil, d: :Default>" + expected = "<#{proto_module}::OneofMessage: b: -42>" assert_equal expected, m.inspect assert_equal expected, m.to_s end @@ -428,7 +428,7 @@ def test_map_basic assert m.length == 0 assert m == {} - assert_raise TypeError do + assert_raise Google::Protobuf::TypeError do m[1] = 1 end assert_raise RangeError do @@ -492,7 +492,7 @@ def test_map_keytypes m = Google::Protobuf::Map.new(:string, :int32) m["asdf"] = 1 - assert_raise TypeError do + assert_raise Google::Protobuf::TypeError do m[1] = 1 end assert_raise Encoding::UndefinedConversionError do @@ -505,7 +505,7 @@ def test_map_keytypes m[bytestring] = 1 # Allowed -- we will automatically convert to ASCII-8BIT. m["asdf"] = 1 - assert_raise TypeError do + assert_raise Google::Protobuf::TypeError do m[1] = 1 end end @@ -547,6 +547,7 @@ def test_map_dup_deep_copy "b" => proto_module::TestMessage.new(:optional_int32 => 84) }) m2 = m.dup + assert m.to_h == m2.to_h assert m == m2 assert m.object_id != m2.object_id assert m["a"].object_id == m2["a"].object_id @@ -1103,16 +1104,6 @@ def test_json_emit_defaults m = proto_module::TestMessage.new expected = { - optionalInt32: 0, - optionalInt64: "0", - optionalUint32: 0, - optionalUint64: "0", - optionalBool: false, - optionalFloat: 0, - optionalDouble: 0, - optionalString: "", - optionalBytes: "", - optionalEnum: "Default", repeatedInt32: [], repeatedInt64: [], repeatedUint32: [], @@ -1137,17 +1128,7 @@ def test_json_emit_defaults_submsg m = proto_module::TestMessage.new(optional_msg: proto_module::TestMessage2.new) expected = { - optionalInt32: 0, - optionalInt64: "0", - optionalUint32: 0, - optionalUint64: "0", - optionalBool: false, - optionalFloat: 0, - optionalDouble: 0, - optionalString: "", - optionalBytes: "", - optionalMsg: {foo: 0}, - optionalEnum: "Default", + optionalMsg: {}, repeatedInt32: [], repeatedInt64: [], repeatedUint32: [], @@ -1172,16 +1153,6 @@ def test_json_emit_defaults_repeated_submsg m = proto_module::TestMessage.new(repeated_msg: [proto_module::TestMessage2.new]) expected = { - optionalInt32: 0, - optionalInt64: "0", - optionalUint32: 0, - optionalUint64: "0", - optionalBool: false, - optionalFloat: 0, - optionalDouble: 0, - optionalString: "", - optionalBytes: "", - optionalEnum: "Default", repeatedInt32: [], repeatedInt64: [], repeatedUint32: [], @@ -1191,7 +1162,7 @@ def test_json_emit_defaults_repeated_submsg repeatedDouble: [], repeatedString: [], repeatedBytes: [], - repeatedMsg: [{foo: 0}], + repeatedMsg: [{}], repeatedEnum: [] } @@ -1256,8 +1227,9 @@ def test_deep_json assert_equal json, struct.to_json assert_raise(RuntimeError, "Maximum recursion depth exceeded during encoding") do - proto_module::MyRepeatedStruct.encode( - proto_module::MyRepeatedStruct.new(structs: [proto_module::MyStruct.new(struct: struct)])) + struct = Google::Protobuf::Struct.new + struct.fields["foobar"] = Google::Protobuf::Value.new(struct_value: struct) + Google::Protobuf::Struct.encode(struct) end end @@ -1786,4 +1758,25 @@ def test_eq assert m1.hash != m2.hash assert_nil h[m2] end + + def test_object_gc + m = proto_module::TestMessage.new(optional_msg: proto_module::TestMessage2.new) + m.optional_msg + GC.start(full_mark: true, immediate_sweep: true) + m.optional_msg.inspect + end + + def test_object_gc_freeze + m = proto_module::TestMessage.new + m.repeated_float.freeze + GC.start(full_mark: true) + + # Make sure we remember that the object is frozen. + # The wrapper object contains this information, so we need to ensure that + # the previous GC did not collect it. + assert m.repeated_float.frozen? + + GC.start(full_mark: true, immediate_sweep: true) + assert m.repeated_float.frozen? + end end diff --git a/ruby/travis-test.sh b/ruby/travis-test.sh index 513c0ec37265..faf0f9d0c733 100755 --- a/ruby/travis-test.sh +++ b/ruby/travis-test.sh @@ -18,7 +18,7 @@ test_version() { rake gc_test && cd ../conformance && make test_jruby && cd ../ruby/compatibility_tests/v3.0.0 && ./test.sh" - elif [ "$version" == "ruby-2.6.0" -o "$version" == "ruby-2.7.0" ] ; then + elif [ "$version" == "ruby-2.6.0" -o "$version" == "ruby-2.7.0" -o "$version" == "ruby-3.0.0" ] ; then bash --login -c \ "rvm install $version && rvm use $version && \ which ruby && \ diff --git a/src/google/protobuf/compiler/ruby/ruby_generator.cc b/src/google/protobuf/compiler/ruby/ruby_generator.cc index ebc972c14856..a6935c76584d 100644 --- a/src/google/protobuf/compiler/ruby/ruby_generator.cc +++ b/src/google/protobuf/compiler/ruby/ruby_generator.cc @@ -242,8 +242,7 @@ void GenerateOneof(const OneofDescriptor* oneof, io::Printer* printer) { bool GenerateMessage(const Descriptor* message, io::Printer* printer, std::string* error) { if (message->extension_range_count() > 0 || message->extension_count() > 0) { - *error = "Extensions are not yet supported for proto2 .proto files."; - return false; + GOOGLE_LOG(WARNING) << "Extensions are not yet supported for proto2 .proto files."; } // Don't generate MapEntry messages -- we use the Ruby extension's native @@ -543,8 +542,7 @@ bool GenerateFile(const FileDescriptor* file, io::Printer* printer, // TODO: Remove this when ruby supports extensions for proto2 syntax. if (file->syntax() == FileDescriptor::SYNTAX_PROTO2 && file->extension_count() > 0) { - *error = "Extensions are not yet supported for proto2 .proto files."; - return false; + GOOGLE_LOG(WARNING) << "Extensions are not yet supported for proto2 .proto files."; } printer->Print("Google::Protobuf::DescriptorPool.generated_pool.build do\n"); diff --git a/tests.sh b/tests.sh index f1242a5c5717..e445384e53dc 100755 --- a/tests.sh +++ b/tests.sh @@ -441,6 +441,10 @@ build_ruby27() { internal_build_cpp # For conformance tests. cd ruby && bash travis-test.sh ruby-2.7.0 && cd .. } +build_ruby30() { + internal_build_cpp # For conformance tests. + cd ruby && bash travis-test.sh ruby-3.0.0 && cd .. +} build_jruby() { internal_build_cpp # For conformance tests. @@ -723,6 +727,7 @@ Usage: $0 { cpp | ruby25 | ruby26 | ruby27 | + ruby30 | jruby | ruby_all | php7.0 | diff --git a/third_party/wyhash/wyhash.h b/third_party/wyhash/wyhash.h index 5658f02df3ce..bb247a9af50d 100644 --- a/third_party/wyhash/wyhash.h +++ b/third_party/wyhash/wyhash.h @@ -116,7 +116,7 @@ static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, cons return _wyfinish(p,len,seed,secret,i); } //utility functions -const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full}; +static const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full}; static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);} static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);} static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}