Skip to content

Commit

Permalink
fs: improve promise based readFile performance for big files
Browse files Browse the repository at this point in the history
This significantly reduces the peak memory for the promise
based readFile operation by reusing a single memory chunk after
each read and strinigifying that chunk immediately.

Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>
  • Loading branch information
BridgeAR committed Aug 19, 2022
1 parent 5e57d24 commit 744bbd9
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 35 deletions.
14 changes: 11 additions & 3 deletions benchmark/fs/readfile-promises.js
Expand Up @@ -15,11 +15,19 @@ const filename = path.resolve(tmpdir.path,

const bench = common.createBenchmark(main, {
duration: [5],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2
],
encoding: ['', 'utf8'],
concurrent: [1, 10]
});

function main({ len, duration, concurrent }) {
function main({ len, duration, encoding, concurrent }) {
try {
fs.unlinkSync(filename);
} catch {
Expand All @@ -44,7 +52,7 @@ function main({ len, duration, concurrent }) {
}, duration * 1000);

function read() {
fs.promises.readFile(filename)
fs.promises.readFile(filename, { encoding })
.then((res) => afterRead(undefined, res))
.catch((err) => afterRead(err));
}
Expand Down
6 changes: 6 additions & 0 deletions lib/fs.js
Expand Up @@ -343,6 +343,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
Expand All @@ -352,6 +355,9 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): We are able to optimize this in case an encoding is used. If
// that's the case, let's use the StringDecoder and directly concat the
// result and to reuse the former chunk instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
Expand Down
77 changes: 45 additions & 32 deletions lib/internal/fs/promises.js
Expand Up @@ -86,6 +86,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
Expand Down Expand Up @@ -416,63 +417,75 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
} else {
size = 0;
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let isBufferFull = true;
let offset = 0;
let buffers;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;
if (size === 0) {
length = MathMin(size - totalRead, kReadFileBufferLength);
}

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 || totalRead === size) {
const singleRead = bytesRead === totalRead;
if (!encoding) {
if (size === 0 && !singleRead) {
return Buffer.concat(buffers, totalRead);
}
return buffer
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer.slice(0, bytesRead));
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (size === 0) {
isBufferFull = bytesRead === kReadFileUnknownBufferLength;
// Unknown file size requires chunks.
if (!encoding) {
buffers ??= [];
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength)
}
}
if (encoding) {
result += decoder.write(isBufferFull ? buffer : buffer.slice(0, bytesRead));
} else if(size !== 0) {
offset += bytesRead;
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
Expand Down

0 comments on commit 744bbd9

Please sign in to comment.