Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fs: improve promise based readFile performance for big files #44295

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion benchmark/fs/readfile-promises.js
Expand Up @@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
const bench = common.createBenchmark(main, {
duration: [5],
encoding: ['', 'utf-8'],
len: [1024, 16 * 1024 * 1024],
len: [
1024,
512 * 1024,
4 * 1024 ** 2,
8 * 1024 ** 2,
16 * 1024 ** 2,
32 * 1024 ** 2,
],
concurrent: [1, 10]
});

Expand Down
5 changes: 5 additions & 0 deletions lib/fs.js
Expand Up @@ -331,6 +331,9 @@ function readFileAfterStat(err, stats) {
if (err)
return context.close(err);

// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
// wise, similar to the promise based version (less peak memory and chunked
// stringify operations vs multiple C++/JS boundary crossings).
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;

if (size > kIoMaxLength) {
Expand All @@ -340,6 +343,8 @@ function readFileAfterStat(err, stats) {

try {
if (size === 0) {
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
// the result and reuse the buffer instead of allocating a new one.
context.buffers = [];
} else {
context.buffer = Buffer.allocUnsafeSlow(size);
Expand Down
87 changes: 54 additions & 33 deletions lib/internal/fs/promises.js
Expand Up @@ -87,6 +87,7 @@ const {
promisify,
} = require('internal/util');
const { EventEmitterMixin } = require('internal/event_target');
const { StringDecoder } = require('string_decoder');
const { watch } = require('internal/fs/watchers');
const { isIterable } = require('internal/streams/utils');
const assert = require('internal/assert');
Expand Down Expand Up @@ -419,63 +420,83 @@ async function writeFileHandle(filehandle, data, signal, encoding) {

async function readFileHandle(filehandle, options) {
const signal = options?.signal;
const encoding = options?.encoding;
const decoder = encoding && new StringDecoder(encoding);

checkAborted(signal);

const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);

checkAborted(signal);

let size;
let size = 0;
let length = 0;
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
size = statFields[8/* size */];
} else {
size = 0;
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
}
if (length === 0) {
length = kReadFileUnknownBufferLength;
}

if (size > kIoMaxLength)
throw new ERR_FS_FILE_TOO_LARGE(size);

let endOfFile = false;
let totalRead = 0;
const noSize = size === 0;
const buffers = [];
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
do {
let buffer = Buffer.allocUnsafeSlow(length);
let result = '';
let offset = 0;
let buffers;
const chunkedRead = length > kReadFileBufferLength;

while (true) {
checkAborted(signal);
let buffer;
let offset;
let length;
if (noSize) {
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
offset = 0;
length = kReadFileUnknownBufferLength;
} else {
buffer = fullBuffer;
offset = totalRead;

if (chunkedRead) {
length = MathMin(size - totalRead, kReadFileBufferLength);
}

const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
length, -1, kUsePromises)) || 0;
length, -1, kUsePromises)) ?? 0;
totalRead += bytesRead;
endOfFile = bytesRead === 0 || totalRead === size;
if (noSize && bytesRead > 0) {
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
ArrayPrototypePush(buffers, chunkBuffer);

if (bytesRead === 0 ||
totalRead === size ||
(bytesRead !== buffer.length && !chunkedRead)) {
const singleRead = bytesRead === totalRead;

const bytesToCheck = chunkedRead ? totalRead : bytesRead;

if (bytesToCheck !== buffer.length) {
buffer = buffer.subarray(0, bytesToCheck);
}

if (!encoding) {
if (size === 0 && !singleRead) {
ArrayPrototypePush(buffers, buffer);
return Buffer.concat(buffers, totalRead);
}
return buffer;
}

if (singleRead) {
return buffer.toString(encoding);
}
result += decoder.end(buffer);
return result;
}
} while (!endOfFile);

let result;
if (size > 0) {
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
} else {
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
totalRead);
if (encoding) {
result += decoder.write(buffer);
} else if (size !== 0) {
offset = totalRead;
} else {
buffers ??= [];
// Unknown file size requires chunks.
ArrayPrototypePush(buffers, buffer);
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
}
}

return options.encoding ? result.toString(options.encoding) : result;
}

// All of the functions are defined as async in order to ensure that errors
Expand Down
18 changes: 15 additions & 3 deletions test/parallel/test-fs-promises-readfile.js
@@ -1,3 +1,4 @@
// Flags: --expose-internals
'use strict';

const common = require('../common');
Expand All @@ -6,15 +7,15 @@ const assert = require('assert');
const path = require('path');
const { writeFile, readFile } = require('fs').promises;
const tmpdir = require('../common/tmpdir');
const { internalBinding } = require('internal/test/binding');
const fsBinding = internalBinding('fs');
tmpdir.refresh();

const fn = path.join(tmpdir.path, 'large-file');

// Creating large buffer with random content
const largeBuffer = Buffer.from(
Array.apply(null, { length: 16834 * 2 })
.map(Math.random)
.map((number) => (number * (1 << 8)))
Array.from({ length: 1024 ** 2 + 19 }, (_, index) => index)
);

async function createLargeFile() {
Expand Down Expand Up @@ -69,11 +70,22 @@ async function validateWrongSignalParam() {

}

async function validateZeroByteLiar() {
const originalFStat = fsBinding.fstat;
fsBinding.fstat = common.mustCall(
() => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */])
);
const readBuffer = await readFile(fn);
assert.strictEqual(readBuffer.toString(), largeBuffer.toString());
fsBinding.fstat = originalFStat;
}

(async () => {
await createLargeFile();
await validateReadFile();
await validateReadFileProc();
await validateReadFileAbortLogicBefore();
await validateReadFileAbortLogicDuring();
await validateWrongSignalParam();
await validateZeroByteLiar();
})().then(common.mustCall());