Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bootstrap: make snapshot reproducible #50983

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/api/environment.cc
Expand Up @@ -458,7 +458,13 @@ Environment* CreateEnvironment(
if (use_snapshot) {
context = Context::FromSnapshot(isolate,
SnapshotData::kNodeMainContextIndex,
{DeserializeNodeInternalFields, env})
v8::DeserializeInternalFieldsCallback(
DeserializeNodeInternalFields, env),
nullptr,
MaybeLocal<Value>(),
nullptr,
v8::DeserializeContextDataCallback(
DeserializeNodeContextData, env))
.ToLocalChecked();

CHECK(!context.IsEmpty());
Expand Down
24 changes: 19 additions & 5 deletions src/blob_serializer_deserializer-inl.h
Expand Up @@ -238,7 +238,8 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
if (is_debug) {
std::string str = std::is_arithmetic_v<T> ? "" : ToStr(data);
std::string name = GetName<T>();
Debug("\nWriteVector<%s>() (%d-byte), count=%d: %s\n",
Debug("\nAt 0x%x: WriteVector<%s>() (%d-byte), count=%d: %s\n",
sink.size(),
name.c_str(),
sizeof(T),
data.size(),
Expand Down Expand Up @@ -270,7 +271,10 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
template <typename Impl>
size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
StringLogMode mode) {
Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
Debug("At 0x%x: WriteStringView(), length=%zu: %p\n",
sink.size(),
data.size(),
data.data());
size_t written_total = WriteArithmetic<size_t>(data.size());

size_t length = data.size();
Expand All @@ -294,17 +298,27 @@ size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
return WriteStringView(data, StringLogMode::kAddressAndContent);
}

static size_t kPreviewCount = 16;

// Helper for writing an array of numeric types.
template <typename Impl>
template <typename T>
size_t BlobSerializer<Impl>::WriteArithmetic(const T* data, size_t count) {
static_assert(std::is_arithmetic_v<T>, "Arithmetic type");
DCHECK_GT(count, 0); // Should not write contents for vectors of size 0.
if (is_debug) {
std::string str =
"{ " + std::to_string(data[0]) + (count > 1 ? ", ... }" : " }");
size_t preview_count = count < kPreviewCount ? count : kPreviewCount;
std::string str = "{ ";
for (size_t i = 0; i < preview_count; ++i) {
str += (std::to_string(data[i]) + ",");
}
if (count > preview_count) {
str += "...";
}
str += "}";
std::string name = GetName<T>();
Debug("Write<%s>() (%zu-byte), count=%zu: %s",
Debug("At 0x%x: Write<%s>() (%zu-byte), count=%zu: %s",
sink.size(),
name.c_str(),
sizeof(T),
count,
Expand Down
24 changes: 15 additions & 9 deletions src/node.cc
Expand Up @@ -1279,18 +1279,24 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr,
return exit_code;
}
} else {
std::optional<std::string> builder_script_content;
// Otherwise, load and run the specified builder script.
std::unique_ptr<SnapshotData> generated_data =
std::make_unique<SnapshotData>();
std::string builder_script_content;
int r = ReadFileSync(&builder_script_content, builder_script.c_str());
if (r != 0) {
FPrintF(stderr,
"Cannot read builder script %s for building snapshot. %s: %s",
builder_script,
uv_err_name(r),
uv_strerror(r));
return ExitCode::kGenericUserError;
if (builder_script != "node:generate_default_snapshot") {
builder_script_content = std::string();
int r = ReadFileSync(&(builder_script_content.value()),
builder_script.c_str());
if (r != 0) {
FPrintF(stderr,
"Cannot read builder script %s for building snapshot. %s: %s\n",
builder_script,
uv_err_name(r),
uv_strerror(r));
return ExitCode::kGenericUserError;
}
} else {
snapshot_config.builder_script_path = std::nullopt;
}

exit_code = node::SnapshotBuilder::Generate(generated_data.get(),
Expand Down
58 changes: 55 additions & 3 deletions src/node_snapshotable.cc
Expand Up @@ -603,7 +603,7 @@ std::vector<char> SnapshotData::ToBlob() const {
written_total += w.WriteArithmetic<uint32_t>(kMagic);
w.Debug("Write metadata\n");
written_total += w.Write<SnapshotMetadata>(metadata);

w.Debug("Write snapshot blob\n");
written_total += w.Write<v8::StartupData>(v8_snapshot_blob_data);
w.Debug("Write isolate_data_indices\n");
written_total += w.Write<IsolateDataSerializeInfo>(isolate_data_info);
Expand Down Expand Up @@ -1155,8 +1155,11 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
CHECK_EQ(index, SnapshotData::kNodeVMContextIndex);
index = creator->AddContext(base_context);
CHECK_EQ(index, SnapshotData::kNodeBaseContextIndex);
index = creator->AddContext(main_context,
{SerializeNodeContextInternalFields, env});
index = creator->AddContext(
main_context,
v8::SerializeInternalFieldsCallback(SerializeNodeContextInternalFields,
env),
v8::SerializeContextDataCallback(SerializeNodeContextData, env));
CHECK_EQ(index, SnapshotData::kNodeMainContextIndex);
}

Expand Down Expand Up @@ -1255,6 +1258,17 @@ std::string SnapshotableObject::GetTypeName() const {
}
}

void DeserializeNodeContextData(Local<Context> holder,
int index,
StartupData payload,
void* callback_data) {
DCHECK(index == ContextEmbedderIndex::kEnvironment ||
index == ContextEmbedderIndex::kRealm ||
index == ContextEmbedderIndex::kContextTag);
// This is a no-op for now. We will reset all the pointers in
// Environment::AssignToContext() via the realm constructor.
}

void DeserializeNodeInternalFields(Local<Object> holder,
int index,
StartupData payload,
Expand Down Expand Up @@ -1320,6 +1334,44 @@ void DeserializeNodeInternalFields(Local<Object> holder,
}
}

StartupData SerializeNodeContextData(Local<Context> holder,
int index,
void* callback_data) {
DCHECK(index == ContextEmbedderIndex::kEnvironment ||
index == ContextEmbedderIndex::kContextifyContext ||
index == ContextEmbedderIndex::kRealm ||
index == ContextEmbedderIndex::kContextTag);
void* data = holder->GetAlignedPointerFromEmbedderData(index);
per_process::Debug(DebugCategory::MKSNAPSHOT,
"Serialize context data, index=%d, holder=%p, ptr=%p\n",
static_cast<int>(index),
*holder,
data);
// Serialization of contextify context is not yet supported.
if (index == ContextEmbedderIndex::kContextifyContext) {
DCHECK_NULL(data);
return {nullptr, 0};
}

// We need to use use new[] because V8 calls delete[] on the returned data.
int size = sizeof(ContextEmbedderIndex);
char* result = new char[size];
ContextEmbedderIndex* index_data =
reinterpret_cast<ContextEmbedderIndex*>(result);
*index_data = static_cast<ContextEmbedderIndex>(index);

// For now we just reset all of them in Environment::AssignToContext()
switch (index) {
case ContextEmbedderIndex::kEnvironment:
case ContextEmbedderIndex::kContextifyContext:
case ContextEmbedderIndex::kRealm:
case ContextEmbedderIndex::kContextTag:
return StartupData{result, size};
default:
UNREACHABLE();
}
}

StartupData SerializeNodeContextInternalFields(Local<Object> holder,
int index,
void* callback_data) {
Expand Down
7 changes: 7 additions & 0 deletions src/node_snapshotable.h
Expand Up @@ -126,10 +126,17 @@ class SnapshotableObject : public BaseObject {
v8::StartupData SerializeNodeContextInternalFields(v8::Local<v8::Object> holder,
int index,
void* env);
v8::StartupData SerializeNodeContextData(v8::Local<v8::Context> holder,
int index,
void* env);
void DeserializeNodeInternalFields(v8::Local<v8::Object> holder,
int index,
v8::StartupData payload,
void* env);
void DeserializeNodeContextData(v8::Local<v8::Context> holder,
int index,
v8::StartupData payload,
void* env);
void SerializeSnapshotableObjects(Realm* realm,
v8::SnapshotCreator* creator,
RealmSerializeInfo* info);
Expand Down
53 changes: 53 additions & 0 deletions test/parallel/test-snapshot-reproducible.js
@@ -0,0 +1,53 @@
'use strict';

require('../common');
const { spawnSyncAndExitWithoutError } = require('../common/child_process');
const tmpdir = require('../common/tmpdir');
const fs = require('fs');
const assert = require('assert');
const fixtures = require('../common/fixtures');

Check failure on line 8 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'fixtures' is assigned a value but never used

function generateSnapshot() {
tmpdir.refresh();

spawnSyncAndExitWithoutError(
process.execPath,
[
'--random_seed=42',
'--predictable',
'--build-snapshot',
'node:generate_default_snapshot',
],
{
cwd: tmpdir.path
}
);
const blobPath = tmpdir.resolve('snapshot.blob');
return fs.readFileSync(blobPath);
}

const buf1 = generateSnapshot();
const buf2 = generateSnapshot();
const diff = [];
let offset = 0;
const step = 16;
do {
const length = Math.min(buf1.length - offset, step);
const slice1 = buf1.slice(offset, offset + length).toString('hex');
const slice2 = buf2.slice(offset, offset + length).toString('hex');
if (slice1 != slice2) {

Check failure on line 38 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Expected '!==' and instead saw '!='
diff.push({offset, slice1, slice2});

Check failure on line 39 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

A space is required after '{'

Check failure on line 39 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

A space is required before '}'
}
offset += length;
} while (offset < buf1.length);

assert.strictEqual(offset, buf1.length);
if (offset < buf2.length) {
const length = Math.min(buf2.length - offset, step);
const slice2 = buf2.slice(offset, offset + length).toString('hex');
diff.push({offset, slice1: '', slice2});

Check failure on line 48 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

A space is required after '{'

Check failure on line 48 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

A space is required before '}'
offset += length;
} while (offset < buf2.length);

assert.deepStrictEqual(diff, [], 'Built-in snapshot should not change in different builds.');

Check failure on line 52 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

Do not use a literal for the third argument of assert.deepStrictEqual()

Check failure on line 52 in test/parallel/test-snapshot-reproducible.js

View workflow job for this annotation

GitHub Actions / test-linux

--- stderr --- node:assert:126 throw new AssertionError(obj); ^ AssertionError [ERR_ASSERTION]: Built-in snapshot should not change in different builds. at Object.<anonymous> (/home/runner/work/node/node/test/parallel/test-snapshot-reproducible.js:52:8) at Module._compile (node:internal/modules/cjs/loader:1434:14) at Module._extensions..js (node:internal/modules/cjs/loader:1518:10) at Module.load (node:internal/modules/cjs/loader:1249:32) at Module._load (node:internal/modules/cjs/loader:1065:12) at Function.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:158:12) at node:internal/main/run_main_module:30:49 { generatedMessage: false, code: 'ERR_ASSERTION', actual: [ { offset: 64, slice1: '000000701bb020821f137731322e342e', slice2: '000000a21d43d8821f137731322e342e' }, { offset: 1500160, slice1: '6005ac61e0c87f000018000000000000', slice2: '6005ac416cdb7f000018000000000000' }, { offset: 1500192, slice1: '00dd0b04400631545605000000100000', slice2: '00dd0b044006984f5c05000000100000' }, { offset: 1500224, slice1: 'c0013154560500000030000000000000', slice2: 'c001984f5c0500000030000000000000' }, { offset: 1500272, slice1: '0000392a0008080000392a0440083154', slice2: '0000392a0008080000392a044008984f' }, { offset: 1500288, slice1: '56050000001000000000000000001d4b', slice2: '5c050000001000000000000000001d4b' }, { offset: 1500336, slice1: '01650460003154560500000018000000', slice2: '0165046000984f5c0500000018000000' }, { offset: 1500368, slice1: '080200001165046002ac61e0c87f0000', slice2: '080200001165046002ac416cdb7f0000' }, { offset: 1500400, slice1: '00196500080400001965044004797515', slice2: '0019650008040000196504400403c4a5' }, { offset: 1500416, slice1: '60550000100000000000000000216500', slice2: 'c1550000100000000000000000216500' }, { offset: 1500432, slice1: '08070000216504400779751560550000', slice2: '08070000216504400703c4a5c1550000' } ], expected: [], operator: 'deepStrictEqual' } Node.js v23.0.0-pre Command: out/Release/node --test-reporter=spec --test-reporter-destination=stdout --test-reporter=./tools/github_reporter/index.js --test-reporter-destination=stdout /home/runner/work/node/node/test/parallel/test-snapshot-reproducible.js --- TIMEOUT ---
assert.strictEqual(buf1.length, buf2.length);