From 7bbb4c45d4148981321c14ed3d1fe389f59cb1aa Mon Sep 17 00:00:00 2001 From: Joshua Haberman Date: Thu, 25 Mar 2021 11:34:29 -0700 Subject: [PATCH] Added some more comments and refactored slightly. --- ruby/ext/google/protobuf_c/protobuf.c | 21 +++++++++++++++++++-- ruby/ext/google/protobuf_c/protobuf.h | 2 ++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index 4f437d1cd9d3..6500602a6b40 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -260,6 +260,7 @@ VALUE secondary_map_mutex = Qnil; // Lambda that will GC entries from the secondary map that are no longer present // in the primary map. VALUE gc_secondary_map = Qnil; +ID length; extern VALUE weak_obj_cache; @@ -273,14 +274,30 @@ static void SecondaryMap_Init() { " secondary.delete_if { |k, v| !weak.key?(v) }\n" "}\n"); secondary_map_mutex = rb_mutex_new(); + length = rb_intern("length"); } +// The secondary map is a regular Hash, and will never shrink on its own. +// The main object cache is a WeakMap that will automatically remove entries +// when the target object is no longer reachable, but unless we manually +// remove the corresponding entries from the secondary map, it will grow +// without bound. +// +// To avoid this unbounded growth we periodically remove entries from the +// secondary map that are no longer present in the WeakMap. The logic of +// how often to perform this GC is an artbirary tuning parameter that +// represents a straightforward CPU/memory tradeoff. static void SecondaryMap_MaybeGC() { - size_t weak_len = NUM2ULL(rb_funcall(weak_obj_cache, rb_intern("length"), 0)); + size_t weak_len = NUM2ULL(rb_funcall(weak_obj_cache, length, 0)); size_t secondary_len = RHASH_SIZE(secondary_map); size_t waste = secondary_len - weak_len; PBRUBY_ASSERT(secondary_len >= weak_len); - if (waste > 1000 && waste > secondary_len * 0.2) { + // GC if we could remove at least 2000 entries or 20% of the table size + // (whichever is greater). Since the cost of the GC pass is O(N), we + // want to make sure that we condition this on overall table size, to + // avoid O(N^2) CPU costs. + size_t threshold = PBRUBY_MAX(secondary_len * 0.2, 2000); + if (waste > threshold) { rb_funcall(gc_secondary_map, rb_intern("call"), 2, secondary_map, weak_obj_cache); } } diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index e4873b34d2bd..01f18fb3a43b 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -106,6 +106,8 @@ extern VALUE cTypeError; #define PBRUBY_ASSERT(expr) assert(expr) #endif +#define PBRUBY_MAX(x, y) (((x) > (y)) ? (x) : (y)) + #define UPB_UNUSED(var) (void)var #endif // __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__