Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized away the creation of empty string objects. #6502

Merged
merged 1 commit into from Aug 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 31 additions & 0 deletions ruby/ext/google/protobuf_c/protobuf.c
Expand Up @@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
}

static VALUE cached_empty_string = Qnil;
static VALUE cached_empty_bytes = Qnil;

static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb,
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
}

VALUE get_frozen_string(const char* str, size_t size, bool binary) {
if (size == 0) {
return binary ? cached_empty_bytes : cached_empty_string;
} else {
// It is harder to memoize non-empty strings. The obvious approach would be
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
// requires constructing a string (the very thing we're trying to avoid).
//
// Since few fields have defaults, we will just optimize the empty string
// case for now.
return create_frozen_string(str, size, binary);
}
}

// -----------------------------------------------------------------------------
// Utilities.
// -----------------------------------------------------------------------------
Expand Down Expand Up @@ -118,4 +144,9 @@ void Init_protobuf_c() {

rb_gc_register_address(&upb_def_to_ruby_obj_map);
upb_def_to_ruby_obj_map = rb_hash_new();

rb_gc_register_address(&cached_empty_string);
rb_gc_register_address(&cached_empty_bytes);
cached_empty_string = create_frozen_string("", 0, false);
cached_empty_bytes = create_frozen_string("", 0, true);
}
5 changes: 5 additions & 0 deletions ruby/ext/google/protobuf_c/protobuf.h
Expand Up @@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
// cycles.
#define ENCODE_MAX_NESTING 63

// -----------------------------------------------------------------------------
// A cache of frozen string objects to use as field defaults.
// -----------------------------------------------------------------------------
VALUE get_frozen_string(const char* data, size_t size, bool binary);

// -----------------------------------------------------------------------------
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
// instances.
Expand Down
28 changes: 13 additions & 15 deletions ruby/ext/google/protobuf_c/storage.c
Expand Up @@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
kRubyStringUtf8Encoding : kRubyString8bitEncoding;
VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);

// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);

if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}

// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
}

return value;
}
Expand Down Expand Up @@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
case UPB_TYPE_BYTES: {
size_t size;
const char *str = upb_fielddef_defaultstr(field, &size);
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
return get_frozen_string(str, size,
upb_fielddef_type(field) == UPB_TYPE_BYTES);
}
default: return Qnil;
}
Expand Down