Skip to content

Commit

Permalink
Merge pull request #6502 from haberman/rubygcfix
Browse files Browse the repository at this point in the history
Optimized away the creation of empty string objects.
  • Loading branch information
haberman committed Aug 14, 2019
2 parents fa8f155 + 1e37a94 commit 402c28a
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 15 deletions.
31 changes: 31 additions & 0 deletions ruby/ext/google/protobuf_c/protobuf.c
Expand Up @@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
}

static VALUE cached_empty_string = Qnil;
static VALUE cached_empty_bytes = Qnil;

static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb,
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
}

VALUE get_frozen_string(const char* str, size_t size, bool binary) {
if (size == 0) {
return binary ? cached_empty_bytes : cached_empty_string;
} else {
// It is harder to memoize non-empty strings. The obvious approach would be
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
// requires constructing a string (the very thing we're trying to avoid).
//
// Since few fields have defaults, we will just optimize the empty string
// case for now.
return create_frozen_string(str, size, binary);
}
}

// -----------------------------------------------------------------------------
// Utilities.
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -118,4 +144,9 @@ void Init_protobuf_c() {

rb_gc_register_address(&upb_def_to_ruby_obj_map);
upb_def_to_ruby_obj_map = rb_hash_new();

rb_gc_register_address(&cached_empty_string);
rb_gc_register_address(&cached_empty_bytes);
cached_empty_string = create_frozen_string("", 0, false);
cached_empty_bytes = create_frozen_string("", 0, true);
}
5 changes: 5 additions & 0 deletions ruby/ext/google/protobuf_c/protobuf.h
Expand Up @@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
// cycles.
#define ENCODE_MAX_NESTING 63

// -----------------------------------------------------------------------------
// A cache of frozen string objects to use as field defaults.
// -----------------------------------------------------------------------------
VALUE get_frozen_string(const char* data, size_t size, bool binary);

// -----------------------------------------------------------------------------
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
// instances.
Expand Down
28 changes: 13 additions & 15 deletions ruby/ext/google/protobuf_c/storage.c
Expand Up @@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
kRubyStringUtf8Encoding : kRubyString8bitEncoding;
VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);

// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);

if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}

// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
}

return value;
}
Expand Down Expand Up @@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
case UPB_TYPE_BYTES: {
size_t size;
const char *str = upb_fielddef_defaultstr(field, &size);
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
return get_frozen_string(str, size,
upb_fielddef_type(field) == UPB_TYPE_BYTES);
}
default: return Qnil;
}
Expand Down

0 comments on commit 402c28a

Please sign in to comment.