From c558aa75a36703cc1949acc6fe6b0cf22b80433c Mon Sep 17 00:00:00 2001 From: Penelope Phippen Date: Mon, 6 Apr 2020 11:33:50 -0400 Subject: [PATCH] Call "Class#new" over rb_class_new_instance in decoding (#7352) This patch has almost no change in behaviour where users have not patched the implementation of new on either a specific proto object class, or `Google::Protobuf::MessageExts::ClassMethods`. The default implementation of `new`, and `rb_class_new_instance` have the same behaviour. By default when we call `new` on a class in Ruby, it goes to the `Class` class's implementation: ```ruby class Foo end >> Foo.method(:new).owner => Class ``` the `Class` implementation of `new` is (pseudocode, it's actually in c): ```ruby class Class def new(*args, &blk) instance = alloc instance.initialize(*args, &blk) instance end end ``` `rb_class_new_instance` does the same thing, it calls down to [`rb_class_s_new`](https://github.com/ruby/ruby/blob/v2_5_5/object.c#L2147), which calls `rb_class_alloc`, then `rb_obj_call_init`. `rb_funcall` is a variadic c function for calling a ruby method on an object, it takes: * A `VALUE` on to which the method should be called * An `ID` which should be an ID of a method, usually created with `rb_intern`, to get an ID from a string * An integer, the number of arguments calling the method with, * A number of `VALUE`s, to send to the method call. `rb_funcall` is the same as calling a method directly in Ruby, and will perform ancestor chain respecting method lookup. This means that in C extensions, if nobody has defined the `new` method on any classes or modules in a class's inheritance chain calling `rb_class_new_instance` is the same as calling `rb_funcall(klass, rb_intern("new"))`, *however* this removes the ability for users to define or monkey patch their own constructors in to the objects created by the C extension. In Ads, we define [`new`](https://git.io/JvFC9) on `Google::Protobuf::MessageExts::ClassMethods` to allow us to insert a monkeypatch which makes it possible to assign primitive values to wrapper type fields (e.g. Google::Protobuf::StringValue). The monkeypatch we apply works for objects that we create for the user via the `new` method. Before this commit, however, the patch does not work for the `decode` method, for the reasons outlined above. Before this commit, protobuf uses `rb_class_new_instance`. After this commit, we use `rb_funcall(klass, rb_intern("new"), 0);` to construct protobuf objects during decoding. While I haven't measured it this will have a very minor performance impact for decoding (`rb_funcall` will have to go to the method cache, which `rb_class_new_instance` will not). This does however do the "more rubyish" thing of respecting the protobuf object's inheritance chain to construct them during decoding. I have run both Ads and Cloud's test suites for Ruby libraries against this patch, as well as the protobuf Ruby gem's test suite locally. --- ruby/ext/google/protobuf_c/encode_decode.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index 142fd578739a..8b0a22efd1ac 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -30,6 +30,10 @@ #include "protobuf.h" +VALUE initialize_rb_class_with_no_args(VALUE klass) { + return rb_funcall(klass, rb_intern("new"), 0); +} + // This function is equivalent to rb_str_cat(), but unlike the real // rb_str_cat(), it doesn't leak memory in some versions of Ruby. // For more information, see: @@ -295,7 +299,7 @@ static void *appendsubmsg_handler(void *closure, const void *hd) { const submsg_handlerdata_t *submsgdata = hd; MessageHeader* submsg; - VALUE submsg_rb = rb_class_new_instance(0, NULL, submsgdata->subklass); + VALUE submsg_rb = initialize_rb_class_with_no_args(submsgdata->subklass); RepeatedField_push(ary, submsg_rb); TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); @@ -322,7 +326,7 @@ static void *submsg_handler(void *closure, const void *hd) { if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) { DEREF(msg, submsgdata->ofs, VALUE) = - rb_class_new_instance(0, NULL, submsgdata->subklass); + initialize_rb_class_with_no_args(submsgdata->subklass); } set_hasbit(closure, submsgdata->hasbit); @@ -549,7 +553,7 @@ static void *oneofsubmsg_handler(void *closure, if (oldcase != oneofdata->oneof_case_num || DEREF(msg, oneofdata->ofs, VALUE) == Qnil) { DEREF(msg, oneofdata->ofs, VALUE) = - rb_class_new_instance(0, NULL, oneofdata->subklass); + initialize_rb_class_with_no_args(oneofdata->subklass); } // Set the oneof case *after* allocating the new class instance -- otherwise, // if the Ruby GC is invoked as part of a call into the VM, it might invoke @@ -1038,7 +1042,7 @@ VALUE Message_decode(VALUE klass, VALUE data) { rb_raise(rb_eArgError, "Expected string for binary protobuf data."); } - msg_rb = rb_class_new_instance(0, NULL, msgklass); + msg_rb = initialize_rb_class_with_no_args(msgklass); TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); { @@ -1114,7 +1118,7 @@ VALUE Message_decode_json(int argc, VALUE* argv, VALUE klass) { // convert, because string handlers pass data directly to message string // fields. - msg_rb = rb_class_new_instance(0, NULL, msgklass); + msg_rb = initialize_rb_class_with_no_args(msgklass); TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); {