Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add large blob storage to Cache #7198

Merged
merged 24 commits into from Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f99e407
Add large blob methods to cache
lettertwo Oct 26, 2021
f6c22d7
Cache request graph as large blob
lettertwo Oct 26, 2021
8f0a103
Fix LMDBCache serialization
lettertwo Oct 27, 2021
aec63a8
Prepare LMDBCache fs for serialization
lettertwo Oct 27, 2021
2189909
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Oct 29, 2021
6681eef
Cache graph request results as large blobs
lettertwo Nov 2, 2021
9fa5e09
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Nov 2, 2021
6c88b5d
Revert configurable LMDBCache FS
lettertwo Nov 2, 2021
74b4dd4
Fall back to FS for large blobs in LMDBCache
lettertwo Nov 3, 2021
28c5b66
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Nov 3, 2021
5734312
Remove stale value when auto storing a large blob
lettertwo Nov 3, 2021
da0a359
Stream large blobs out of fs cache
lettertwo Nov 3, 2021
847f8be
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Nov 15, 2021
e13e8d0
Merge branch 'v2' into lettertwo/cache-large-blob
devongovett Nov 16, 2021
a53b39b
Revert fall back to FS for large blobs in LMDBCache
lettertwo Nov 16, 2021
ce8fe8e
Prevent large blobs from being stored in cache
lettertwo Nov 16, 2021
6d9f276
Set/get streams to/from FS in LMDBCache
lettertwo Nov 16, 2021
63f6f2c
Only stream large assets from cache
lettertwo Nov 17, 2021
50dfd3b
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Nov 18, 2021
0776a9e
Remove default empty stream from `getStream`
lettertwo Nov 18, 2021
d9fc18f
Differentiate large blobs in FSCache
lettertwo Nov 18, 2021
ed1b602
Fix type error
lettertwo Nov 18, 2021
88b1e22
Merge branch 'v2' into lettertwo/cache-large-blob
lettertwo Nov 22, 2021
3f3c831
Remove vestigial isLargeBlob check
lettertwo Nov 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions packages/core/cache/src/FSCache.js
Expand Up @@ -77,6 +77,18 @@ export class FSCache implements Cache {
}
}

hasLargeBlob(key: string): Promise<boolean> {
return this.has(key);
}

getLargeBlob(key: string): Promise<Buffer> {
return this.getBlob(key);
}

setLargeBlob(key: string, contents: Buffer | string): Promise<void> {
return this.setBlob(key, contents);
}

async get<T>(key: string): Promise<?T> {
try {
let data = await this.fs.readFile(this._getCachePath(key));
Expand Down
76 changes: 56 additions & 20 deletions packages/core/cache/src/LMDBCache.js
Expand Up @@ -3,19 +3,23 @@ import type {Readable} from 'stream';
import type {FilePath} from '@parcel/types';
import type {Cache} from './types';

import path from 'path';
import {serialize, deserialize, registerSerializableClass} from '@parcel/core';
import {NodeFS} from '@parcel/fs';
import {blobToStream, bufferStream} from '@parcel/utils';
// flowlint-next-line untyped-import:off
import packageJson from '../package.json';
// $FlowFixMe
import lmdb from 'lmdb-store';

export class LMDBCache implements Cache {
fs: NodeFS;
dir: FilePath;
// $FlowFixMe
store: any;

constructor(cacheDir: FilePath) {
this.fs = new NodeFS();
this.dir = cacheDir;

this.store = lmdb.open(cacheDir, {
Expand All @@ -40,45 +44,77 @@ export class LMDBCache implements Cache {
}

has(key: string): Promise<boolean> {
return Promise.resolve(this.store.get(key) != null);
if (this.store.get(key) != null) return Promise.resolve(true);
return this.hasLargeBlob(key);
}

get<T>(key: string): Promise<?T> {
let data = this.store.get(key);
if (data == null) {
return Promise.resolve(null);
}

return Promise.resolve(deserialize(data));
async get<T>(key: string): Promise<?T> {
let data = await this.getBuffer(key);
return data == null ? null : deserialize(data);
}

async set(key: string, value: mixed): Promise<void> {
await this.store.put(key, serialize(value));
await this.setBlob(key, serialize(value));
}

getStream(key: string): Readable {
return blobToStream(this.store.get(key));
let buf = this.store.get(key);
if (buf != null) return blobToStream(buf);
return this.fs.createReadStream(path.join(this.dir, key));
}

async setStream(key: string, stream: Readable): Promise<void> {
let buf = await bufferStream(stream);
await this.store.put(key, buf);
await this.setBlob(key, await bufferStream(stream));
}

getBlob(key: string): Promise<Buffer> {
let buffer = this.store.get(key);
return buffer != null
? Promise.resolve(buffer)
: Promise.reject(new Error(`Key ${key} not found in cache`));
async getBlob(key: string): Promise<Buffer> {
let buffer = await this.getBuffer(key);
if (buffer == null) throw new Error(`Key ${key} not found in cache`);
return buffer;
}

async setBlob(key: string, contents: Buffer | string): Promise<void> {
await this.store.put(key, contents);
if (isLargeBlob(contents)) {
// Remove the old blob if it has been 'upgraded' to large blob storage.
if (this.store.get(key) != null) await this.store.remove(key);
await this.setLargeBlob(key, contents);
} else {
await this.store.put(key, contents);
}
}

async getBuffer(key: string): Promise<?Buffer> {
let buffer = this.store.get(key);
if (buffer == null && (await this.hasLargeBlob(key))) {
lettertwo marked this conversation as resolved.
Show resolved Hide resolved
buffer = await this.getLargeBlob(key);
}
return buffer;
}

hasLargeBlob(key: string): Promise<boolean> {
return this.fs.exists(path.join(this.dir, key));
}

getBuffer(key: string): Promise<?Buffer> {
return Promise.resolve(this.store.get(key));
getLargeBlob(key: string): Promise<Buffer> {
return this.fs.readFile(path.join(this.dir, key));
}

async setLargeBlob(key: string, contents: Buffer | string): Promise<void> {
await this.fs.writeFile(path.join(this.dir, key), contents);
}
}

// lmbd-store decodes cached binary data into a Node Buffer
// via `Nan::NewBuffer`, which enforces a max size of ~1GB.
// We subtract 9 bytes to account for any compression heaader
// added by lmbd-store when encoding the data.
// See: https://github.com/nodejs/nan/issues/883
const MAX_BUFFER_SIZE = 0x3fffffff - 9;

function isLargeBlob(contents: Buffer | string): boolean {
return typeof contents === 'string'
? Buffer.byteLength(contents) > MAX_BUFFER_SIZE
: contents.length > MAX_BUFFER_SIZE;
}

registerSerializableClass(`${packageJson.version}:LMDBCache`, LMDBCache);
3 changes: 3 additions & 0 deletions packages/core/cache/src/types.js
Expand Up @@ -10,5 +10,8 @@ export interface Cache {
setStream(key: string, stream: Readable): Promise<void>;
getBlob(key: string): Promise<Buffer>;
setBlob(key: string, contents: Buffer | string): Promise<void>;
hasLargeBlob(key: string): Promise<boolean>;
getLargeBlob(key: string): Promise<Buffer>;
setLargeBlob(key: string, contents: Buffer | string): Promise<void>;
getBuffer(key: string): Promise<?Buffer>;
}
26 changes: 17 additions & 9 deletions packages/core/core/src/RequestTracker.js
Expand Up @@ -29,6 +29,7 @@ import {
} from '@parcel/utils';
import {hashString} from '@parcel/hash';
import {ContentGraph} from '@parcel/graph';
import {deserialize, serialize} from './serializer';
import {assertSignalNotAborted, hashFromOption} from './utils';
import {
type ProjectPath,
Expand Down Expand Up @@ -856,10 +857,11 @@ export default class RequestTracker {
let result: T = (node.value.result: any);
return result;
} else if (node.value.resultCacheKey != null && ifMatch == null) {
let cachedResult: T = (nullthrows(
await this.options.cache.get(node.value.resultCacheKey),
// $FlowFixMe
): any);
let key = node.value.resultCacheKey;
invariant(this.options.cache.hasLargeBlob(key));
let cachedResult: T = deserialize(
await this.options.cache.getLargeBlob(key),
);
node.value.result = cachedResult;
return cachedResult;
}
Expand Down Expand Up @@ -1050,13 +1052,18 @@ export default class RequestTracker {
let resultCacheKey = node.value.resultCacheKey;
if (resultCacheKey != null && node.value.result != null) {
promises.push(
this.options.cache.set(resultCacheKey, node.value.result),
this.options.cache.setLargeBlob(
resultCacheKey,
serialize(node.value.result),
),
);
delete node.value.result;
}
}

promises.push(this.options.cache.set(requestGraphKey, this.graph));
promises.push(
this.options.cache.setLargeBlob(requestGraphKey, serialize(this.graph)),
);

let opts = getWatcherOptions(this.options);
let snapshotPath = path.join(this.options.cacheDir, snapshotKey + '.txt');
Expand Down Expand Up @@ -1100,9 +1107,10 @@ async function loadRequestGraph(options): Async<RequestGraph> {

let cacheKey = getCacheKey(options);
let requestGraphKey = hashString(`${cacheKey}:requestGraph`);
let requestGraph = await options.cache.get<RequestGraph>(requestGraphKey);

if (requestGraph) {
if (await options.cache.hasLargeBlob(requestGraphKey)) {
let requestGraph: RequestGraph = deserialize(
await options.cache.getLargeBlob(requestGraphKey),
);
let opts = getWatcherOptions(options);
let snapshotKey = hashString(`${cacheKey}:snapshot`);
let snapshotPath = path.join(options.cacheDir, snapshotKey + '.txt');
Expand Down