Skip to content

Commit

Permalink
fs: improve promise based readFile performance for big files
Browse files Browse the repository at this point in the history
This significantly reduces the peak memory for the promise
based readFile operation by reusing a single memory chunk after
each read and strinigifying that chunk immediately.

Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>
PR-URL: #44295
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
BridgeAR authored and danielleadams committed Oct 11, 2022
1 parent 9c7e664 commit 67fb765
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 37 deletions.
9 changes: 8 additions & 1 deletion benchmark/fs/readfile-promises.js
Expand Up @@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
const bench = common.createBenchmark(main, {
duration: [5],
encoding: ['', 'utf-8'],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2,
],
concurrent: [1, 10]
});

Expand Down
5 changes: 5 additions & 0 deletions lib/fs.js
Expand Up @@ -339,6 +339,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
Expand All @@ -348,6 +351,8 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
// the result and reuse the buffer instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
Expand Down
87 changes: 54 additions & 33 deletions lib/internal/fs/promises.js
Expand Up @@ -86,6 +86,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
Expand Down Expand Up @@ -416,63 +417,83 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
} else {
size = 0;
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
}
if (length === 0) {
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let offset = 0;
let buffers;
const chunkedRead = length > kReadFileBufferLength;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;

if (chunkedRead) {
length = MathMin(size - totalRead, kReadFileBufferLength);
}

This comment was marked as spam.

Copy link
@Waazer

Waazer Nov 21, 2022

This comment was marked as resolved.

Copy link
@Trott

Trott Nov 21, 2022

Member

User blocked.

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 ||
totalRead === size ||
(bytesRead !== buffer.length && !chunkedRead)) {
const singleRead = bytesRead === totalRead;

const bytesToCheck = chunkedRead ? totalRead : bytesRead;

if (bytesToCheck !== buffer.length) {
buffer = buffer.subarray(0, bytesToCheck);
}

if (!encoding) {
if (size === 0 && !singleRead) {
ArrayPrototypePush(buffers, buffer);
return Buffer.concat(buffers, totalRead);
}
return buffer;
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer);
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (encoding) {
result += decoder.write(buffer);
} else if (size !== 0) {
offset = totalRead;
} else {
buffers ??= [];
// Unknown file size requires chunks.
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
Expand Down
18 changes: 15 additions & 3 deletions test/parallel/test-fs-promises-readfile.js
@@ -1,3 +1,4 @@
// Flags: --expose-internals
'use strict';

const common = require('../common');
Expand All @@ -6,15 +7,15 @@ const assert = require('assert');
const path = require('path');
const { writeFile, readFile } = require('fs').promises;
const tmpdir = require('../common/tmpdir');
const { internalBinding } = require('internal/test/binding');
const fsBinding = internalBinding('fs');
tmpdir.refresh();

const fn = path.join(tmpdir.path, 'large-file');

// Creating large buffer with random content
const largeBuffer = Buffer.from(
Array.apply(null, { length: 16834 * 2 })
.map(Math.random)
.map((number) => (number * (1 << 8)))
Array.from({ length: 1024 ** 2 + 19 }, (_, index) => index)
);

async function createLargeFile() {
Expand Down Expand Up @@ -69,11 +70,22 @@ async function validateWrongSignalParam() {

}

async function validateZeroByteLiar() {
const originalFStat = fsBinding.fstat;
fsBinding.fstat = common.mustCall(
() => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */])
);
const readBuffer = await readFile(fn);
assert.strictEqual(readBuffer.toString(), largeBuffer.toString());
fsBinding.fstat = originalFStat;
}

(async () => {
await createLargeFile();
await validateReadFile();
await validateReadFileProc();
await validateReadFileAbortLogicBefore();
await validateReadFileAbortLogicDuring();
await validateWrongSignalParam();
await validateZeroByteLiar();
})().then(common.mustCall());

0 comments on commit 67fb765

Please sign in to comment.