From d572b0a50d6003c2245cc82926f4824791ec249b Mon Sep 17 00:00:00 2001 From: Keyhan Vakil Date: Thu, 9 Mar 2023 23:41:11 +0000 Subject: [PATCH] src: register external references for source code Currently we use external strings for internalized builtin source code. However when a snapshot is taken, any external string whose resource is not registered is flattened into a SeqString (see ref). The result is that any module source code stored in the snapshot does not use external strings after deserialization. This patch registers an external string resource for each internalized builtin's source. The savings are substantial: ~1.9 MB of heap memory per isolate, or ~44% of an otherwise empty isolate's heap usage: ```bash $ node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 4190968 $ ./node --expose-gc -p 'gc(),process.memoryUsage().heapUsed' 2327536 ``` The savings can be even higher for user snapshots which may include more internal modules. Doing this with the existing UnionBytes abstraction was tricky, because UnionBytes only creates an external string resource when ToStringChecked is called. However we need to collate a list of external resources before isolate construction. UnionBytes can also be deallocated, which isn't ideal since registering an external string resource which is later deallocated would be very bad. Rather than further complicate UnionBytes, we introduce a new class called EternalBytes which assumes that the data has static lifetime and creates a single external string resource on construction. It reuses this original external string resource across V8 isolates by simply ignoring Dispose calls from V8. In order to distinguish between EternalBytes and UnionBytes, we bifurcate the sources map into two maps: the internalized builtins map (which is never modified) and the sources map (which can be changed through externalized builtins or by the embedder). Refs: https://github.com/v8/v8/blob/d2c8fbe9ccd1a6ce5591bb7dd319c3c00d6bf489/src/snapshot/serializer.cc#L633 --- node.gyp | 1 + src/node_builtins.cc | 49 ++++++++++--- src/node_builtins.h | 16 +++-- src/node_eternal_bytes.h | 122 ++++++++++++++++++++++++++++++++ src/node_external_reference.h | 3 +- test/cctest/test_per_process.cc | 24 ++++--- tools/js2c.py | 16 +++-- 7 files changed, 195 insertions(+), 36 deletions(-) create mode 100644 src/node_eternal_bytes.h diff --git a/node.gyp b/node.gyp index 72d90ec5f4ad14..25ec1dcb7017a8 100644 --- a/node.gyp +++ b/node.gyp @@ -654,6 +654,7 @@ 'src/node_sockaddr-inl.h', 'src/node_stat_watcher.h', 'src/node_union_bytes.h', + 'src/node_eternal_bytes.h', 'src/node_url.h', 'src/node_util.h', 'src/node_version.h', diff --git a/src/node_builtins.cc b/src/node_builtins.cc index 0439fff5115bd9..00cd94f032fad7 100644 --- a/src/node_builtins.cc +++ b/src/node_builtins.cc @@ -34,8 +34,7 @@ using v8::Undefined; using v8::Value; BuiltinLoader::BuiltinLoader() - : config_(GetConfig()), code_cache_(std::make_shared()) { - LoadJavaScriptSource(); + : code_cache_(std::make_shared()) { #ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH AddExternalizedBuiltin( "internal/deps/cjs-module-lexer/lexer", @@ -55,18 +54,33 @@ BuiltinLoader::BuiltinLoader() } bool BuiltinLoader::Exists(const char* id) { + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); + if (internalized_builtins->find(id) != internalized_builtins->end()) { + return true; + } auto source = source_.read(); return source->find(id) != source->end(); } bool BuiltinLoader::Add(const char* id, const UnionBytes& source) { + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); + if (internalized_builtins->find(id) != internalized_builtins->end()) { + // Cannot add this builtin as it would conflict with an + // existing internalized builtin. + return false; + } auto result = source_.write()->emplace(id, source); return result.second; } Local BuiltinLoader::GetSourceObject(Local context) { + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); Isolate* isolate = context->GetIsolate(); Local out = Object::New(isolate); + for (auto const& x : *internalized_builtins) { + Local key = OneByteString(isolate, x.first.c_str(), x.first.size()); + out->Set(context, key, x.second.ToStringChecked(isolate)).FromJust(); + } auto source = source_.read(); for (auto const& x : *source) { Local key = OneByteString(isolate, x.first.c_str(), x.first.size()); @@ -76,13 +90,17 @@ Local BuiltinLoader::GetSourceObject(Local context) { } Local BuiltinLoader::GetConfigString(Isolate* isolate) { - return config_.ToStringChecked(isolate); + return GetConfig()->ToStringChecked(isolate); } std::vector BuiltinLoader::GetBuiltinIds() const { std::vector ids; + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); auto source = source_.read(); - ids.reserve(source->size()); + ids.reserve(internalized_builtins->size() + source->size()); + for (auto const& x : *internalized_builtins) { + ids.emplace_back(x.first); + } for (auto const& x : *source) { ids.emplace_back(x.first); } @@ -184,14 +202,20 @@ static std::string OnDiskFileName(const char* id) { MaybeLocal BuiltinLoader::LoadBuiltinSource(Isolate* isolate, const char* id) const { - auto source = source_.read(); #ifndef NODE_BUILTIN_MODULES_PATH - const auto source_it = source->find(id); - if (UNLIKELY(source_it == source->end())) { - fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id); - ABORT(); + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); + const auto internalized_builtins_it = internalized_builtins->find(id); + if (LIKELY(internalized_builtins_it != internalized_builtins->end())) { + return internalized_builtins_it->second.ToStringChecked(isolate); + } + + auto source = source_.read(); + auto source_it = source->find(id); + if (LIKELY(source_it != source->end())) { + return source_it->second.ToStringChecked(isolate); } - return source_it->second.ToStringChecked(isolate); + fprintf(stderr, "Cannot find native builtin: \"%s\".\n", id); + ABORT(); #else // !NODE_BUILTIN_MODULES_PATH std::string filename = OnDiskFileName(id); @@ -710,6 +734,11 @@ void BuiltinLoader::RegisterExternalReferences( registry->Register(GetCacheUsage); registry->Register(CompileFunction); registry->Register(HasCachedBuiltins); + auto internalized_builtins = GetInternalizedBuiltinSourceMap(); + for (auto const& x : *internalized_builtins) { + auto resource = x.second.AsResource(); + registry->Register(resource); + } } } // namespace builtins diff --git a/src/node_builtins.h b/src/node_builtins.h index 81d6d1cca279c2..25096e2521f54a 100644 --- a/src/node_builtins.h +++ b/src/node_builtins.h @@ -10,6 +10,7 @@ #include #include #include +#include "node_eternal_bytes.h" #include "node_mutex.h" #include "node_threadsafe_cow.h" #include "node_union_bytes.h" @@ -25,11 +26,15 @@ class Realm; namespace builtins { -using BuiltinSourceMap = std::map; +using InternalizedBuiltinSourceMap = std::map; +using NonInternalizedBuiltinSourceMap = std::map; using BuiltinCodeCacheMap = std::unordered_map>; +const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap(); +const EternalBytes* GetConfig(); + struct CodeCacheInfo { std::string id; std::vector data; @@ -84,10 +89,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader { // Only allow access from friends. friend class CodeCacheBuilder; - // Generated by tools/js2c.py as node_javascript.cc - void LoadJavaScriptSource(); // Loads data into source_ - UnionBytes GetConfig(); // Return data for config.gypi - std::vector GetBuiltinIds() const; struct BuiltinCategories { @@ -133,9 +134,10 @@ class NODE_EXTERN_PRIVATE BuiltinLoader { void AddExternalizedBuiltin(const char* id, const char* filename); - ThreadsafeCopyOnWrite source_; + ThreadsafeCopyOnWrite source_; - const UnionBytes config_; + static void RegisterSourcesAsExternalReferences( + ExternalReferenceRegistry* registry); struct BuiltinCodeCache { RwLock mutex; diff --git a/src/node_eternal_bytes.h b/src/node_eternal_bytes.h new file mode 100644 index 00000000000000..047194d4e9f933 --- /dev/null +++ b/src/node_eternal_bytes.h @@ -0,0 +1,122 @@ +#ifndef SRC_NODE_ETERNAL_BYTES_H_ +#define SRC_NODE_ETERNAL_BYTES_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +// A pointer to const uint8_t* or const uint16_t* data that can be turned into +// external v8::String when given an isolate. Unlike UnionBytes, this assumes +// that the underlying buffer is eternal (will not be garbage collected) and +// reuses the same v8::String::ExternalStringResourceBase for all materialized +// strings. This allows the resource to be registered as an external reference +// for snapshotting. + +#include +#include "v8.h" + +namespace node { + +class EternalBytes; + +template +class EternalExternalByteResource : public Base { + static_assert(sizeof(IChar) == sizeof(Char), + "incompatible interface and internal pointers"); + + public: + explicit EternalExternalByteResource(const Char* data, size_t length) + : data_(data), length_(length) {} + + const IChar* data() const override { + return reinterpret_cast(data_); + } + size_t length() const override { return length_; } + + void Dispose() override { + // Do nothing. This class is owned by the EternalBytes instance and so + // should not be destroyed. It may also be in use by other external strings + // besides the one which was collected. + } + + EternalExternalByteResource(const EternalExternalByteResource&) = delete; + EternalExternalByteResource& operator=(const EternalExternalByteResource&) = + delete; + + friend class EternalBytes; + + private: + const Char* data_; + const size_t length_; +}; + +using EternalExternalOneByteResource = + EternalExternalByteResource; +using EternalExternalTwoByteResource = + EternalExternalByteResource; + +namespace { +template +struct overloaded : Ts... { + using Ts::operator()...; +}; +template +overloaded(Ts...) -> overloaded; +} // namespace + +class EternalBytes { + public: + EternalBytes(const uint8_t* data, size_t length) + : resource_(new EternalExternalOneByteResource(data, length)){}; + EternalBytes(const uint16_t* data, size_t length) + : resource_(new EternalExternalTwoByteResource(data, length)){}; + + EternalBytes(const EternalBytes&) = default; + EternalBytes& operator=(const EternalBytes&) = default; + EternalBytes(EternalBytes&&) = default; + EternalBytes& operator=(EternalBytes&&) = default; + + bool IsOneByte() const { + return std::holds_alternative(resource_); + } + + const v8::String::ExternalStringResourceBase* AsResource() const { + return std::visit( + overloaded{ + [](EternalExternalOneByteResource* resource) { + return static_cast( + resource); + }, + [](EternalExternalTwoByteResource* resource) { + return static_cast( + resource); + }}, + resource_); + } + + v8::Local ToStringChecked(v8::Isolate* isolate) const { + return std::visit( + overloaded{[=](EternalExternalOneByteResource* resource) { + return v8::String::NewExternalOneByte(isolate, resource) + .ToLocalChecked(); + }, + [=](EternalExternalTwoByteResource* resource) { + return v8::String::NewExternalTwoByte(isolate, resource) + .ToLocalChecked(); + }}, + resource_); + } + + private: + const std::variant + resource_; +}; + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_ETERNAL_BYTES_H_ diff --git a/src/node_external_reference.h b/src/node_external_reference.h index b2a90ba5194316..2b3b2e0841a85a 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -47,7 +47,8 @@ class ExternalReferenceRegistry { V(v8::IndexedPropertyDefinerCallback) \ V(v8::IndexedPropertyDeleterCallback) \ V(v8::IndexedPropertyQueryCallback) \ - V(v8::IndexedPropertyDescriptorCallback) + V(v8::IndexedPropertyDescriptorCallback) \ + V(const v8::String::ExternalStringResourceBase*) #define V(ExternalReferenceType) \ void Register(ExternalReferenceType addr) { RegisterT(addr); } diff --git a/test/cctest/test_per_process.cc b/test/cctest/test_per_process.cc index 34cf163add7904..822bfb3a6cf2f5 100644 --- a/test/cctest/test_per_process.cc +++ b/test/cctest/test_per_process.cc @@ -7,26 +7,28 @@ #include using node::builtins::BuiltinLoader; -using node::builtins::BuiltinSourceMap; +using node::builtins::InternalizedBuiltinSourceMap; class PerProcessTest : public ::testing::Test { protected: - static const BuiltinSourceMap get_sources_for_test() { - return *BuiltinLoader().source_.read(); + static const InternalizedBuiltinSourceMap* + get_internalized_builtin_source_map_for_test() { + return node::builtins::GetInternalizedBuiltinSourceMap(); } }; namespace { TEST_F(PerProcessTest, EmbeddedSources) { - const auto& sources = PerProcessTest::get_sources_for_test(); - ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) { - return p.second.is_one_byte(); - })) << "BuiltinLoader::source_ should have some 8bit items"; - - ASSERT_TRUE(std::any_of(sources.cbegin(), sources.cend(), [](auto p) { - return !p.second.is_one_byte(); - })) << "BuiltinLoader::source_ should have some 16bit items"; + const auto& sources = + PerProcessTest::get_internalized_builtin_source_map_for_test(); + ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) { + return p.second.IsOneByte(); + })) << "internalized_builtin_source_map should have some 8bit items"; + + ASSERT_TRUE(std::any_of(sources->cbegin(), sources->cend(), [](auto p) { + return !p.second.IsOneByte(); + })) << "internalized_builtin_source_map should have some 16bit items"; } } // end namespace diff --git a/tools/js2c.py b/tools/js2c.py index 504345e7894a85..08ed033c8a6e6e 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -58,17 +58,19 @@ def ReadFile(filename): {0} namespace {{ -const ThreadsafeCopyOnWrite global_source_map {{ - BuiltinSourceMap{{ {1} }} +const InternalizedBuiltinSourceMap internalized_builtin_source_map {{ + InternalizedBuiltinSourceMap{{ {1} }} }}; + +const EternalBytes config(config_raw, {2}); }} -void BuiltinLoader::LoadJavaScriptSource() {{ - source_ = global_source_map; +const InternalizedBuiltinSourceMap* GetInternalizedBuiltinSourceMap() {{ + return &internalized_builtin_source_map; }} -UnionBytes BuiltinLoader::GetConfig() {{ - return UnionBytes(config_raw, {2}); // config.gypi +const EternalBytes* GetConfig() {{ + return &config; }} }} // namespace builtins @@ -88,7 +90,7 @@ def ReadFile(filename): }}; """ -INITIALIZER = '{{"{0}", UnionBytes{{{1}, {2}}} }},' +INITIALIZER = '{{"{0}", {{{1}, {2}}} }},' CONFIG_GYPI_ID = 'config_raw'