diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index fd964c781e13..82e3d2a5dab2 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) { return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def)); } +static VALUE cached_empty_string = Qnil; +static VALUE cached_empty_bytes = Qnil; + +static VALUE create_frozen_string(const char* str, size_t size, bool binary) { + VALUE str_rb = rb_str_new(str, size); + + rb_enc_associate(str_rb, + binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding); + rb_obj_freeze(str_rb); + return str_rb; +} + +VALUE get_frozen_string(const char* str, size_t size, bool binary) { + if (size == 0) { + return binary ? cached_empty_bytes : cached_empty_string; + } else { + // It is harder to memoize non-empty strings. The obvious approach would be + // to use a Ruby hash keyed by string as memo table, but looking up in such a table + // requires constructing a string (the very thing we're trying to avoid). + // + // Since few fields have defaults, we will just optimize the empty string + // case for now. + return create_frozen_string(str, size, binary); + } +} + // ----------------------------------------------------------------------------- // Utilities. // ----------------------------------------------------------------------------- @@ -118,4 +144,9 @@ void Init_protobuf_c() { rb_gc_register_address(&upb_def_to_ruby_obj_map); upb_def_to_ruby_obj_map = rb_hash_new(); + + rb_gc_register_address(&cached_empty_string); + rb_gc_register_address(&cached_empty_bytes); + cached_empty_string = create_frozen_string("", 0, false); + cached_empty_bytes = create_frozen_string("", 0, true); } diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 8731eeb47b09..db9c1eac0e04 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod( // cycles. #define ENCODE_MAX_NESTING 63 +// ----------------------------------------------------------------------------- +// A cache of frozen string objects to use as field defaults. +// ----------------------------------------------------------------------------- +VALUE get_frozen_string(const char* data, size_t size, bool binary); + // ----------------------------------------------------------------------------- // Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor // instances. diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index ba4f831b9ae2..e9c70a282753 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) { kRubyStringUtf8Encoding : kRubyString8bitEncoding; VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding); - // Note: this will not duplicate underlying string data unless necessary. - value = rb_str_encode(value, desired_encoding_value, 0, Qnil); + if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) { + // Note: this will not duplicate underlying string data unless necessary. + value = rb_str_encode(value, desired_encoding_value, 0, Qnil); - if (type == UPB_TYPE_STRING && - rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { - rb_raise(rb_eEncodingError, "String is invalid UTF-8"); - } + if (type == UPB_TYPE_STRING && + rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "String is invalid UTF-8"); + } - // Ensure the data remains valid. Since we called #encode a moment ago, - // this does not freeze the string the user assigned. - rb_obj_freeze(value); + // Ensure the data remains valid. Since we called #encode a moment ago, + // this does not freeze the string the user assigned. + rb_obj_freeze(value); + } return value; } @@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) { case UPB_TYPE_BYTES: { size_t size; const char *str = upb_fielddef_defaultstr(field, &size); - VALUE str_rb = rb_str_new(str, size); - - rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ? - kRubyString8bitEncoding : kRubyStringUtf8Encoding); - rb_obj_freeze(str_rb); - return str_rb; + return get_frozen_string(str, size, + upb_fielddef_type(field) == UPB_TYPE_BYTES); } default: return Qnil; }