From e72802bdaad5175d09893dd6a3a8685b8586bf02 Mon Sep 17 00:00:00 2001 From: Ruben Bridgewater Date: Fri, 19 Aug 2022 17:07:19 +0200 Subject: [PATCH] fs: improve promise based readFile performance for big files This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater --- benchmark/fs/readfile-promises.js | 14 ++++-- lib/fs.js | 6 +++ lib/internal/fs/promises.js | 84 ++++++++++++++++++------------- 3 files changed, 66 insertions(+), 38 deletions(-) diff --git a/benchmark/fs/readfile-promises.js b/benchmark/fs/readfile-promises.js index 5cfa5b4cc02465..6676aa9b0c8d1c 100644 --- a/benchmark/fs/readfile-promises.js +++ b/benchmark/fs/readfile-promises.js @@ -15,11 +15,19 @@ const filename = path.resolve(tmpdir.path, const bench = common.createBenchmark(main, { duration: [5], - len: [1024, 16 * 1024 * 1024], + len: [ + 1024, + 512 * 1024, + 4 * 1024 ** 2, + 8 * 1024 ** 2, + 16 * 1024 ** 2, + 32 * 1024 ** 2, + ], + encoding: ['', 'utf8'], concurrent: [1, 10] }); -function main({ len, duration, concurrent }) { +function main({ len, duration, encoding, concurrent }) { try { fs.unlinkSync(filename); } catch { @@ -44,7 +52,7 @@ function main({ len, duration, concurrent }) { }, duration * 1000); function read() { - fs.promises.readFile(filename) + fs.promises.readFile(filename, { encoding }) .then((res) => afterRead(undefined, res)) .catch((err) => afterRead(err)); } diff --git a/lib/fs.js b/lib/fs.js index ffa216f35388e0..1f132838113a2f 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -343,6 +343,9 @@ function readFileAfterStat(err, stats) { if (err) return context.close(err); + // TODO(BridgeAR): Check if allocating a smaller chunk is better performance + // wise, similar to the promise based version (less peak memory and chunked + // stringify operations vs multiple C++/JS boundary crossings). const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0; if (size > kIoMaxLength) { @@ -352,6 +355,9 @@ function readFileAfterStat(err, stats) { try { if (size === 0) { + // TODO(BridgeAR): We are able to optimize this in case an encoding is used. If + // that's the case, let's use the StringDecoder and directly concat the + // result and to reuse the former chunk instead of allocating a new one. context.buffers = []; } else { context.buffer = Buffer.allocUnsafeSlow(size); diff --git a/lib/internal/fs/promises.js b/lib/internal/fs/promises.js index 34fd0f586766dd..a0ae36d46606bd 100644 --- a/lib/internal/fs/promises.js +++ b/lib/internal/fs/promises.js @@ -86,6 +86,7 @@ const { promisify, } = require('internal/util'); const { EventEmitterMixin } = require('internal/event_target'); +const { StringDecoder } = require('string_decoder'); const { watch } = require('internal/fs/watchers'); const { isIterable } = require('internal/streams/utils'); const assert = require('internal/assert'); @@ -416,6 +417,8 @@ async function writeFileHandle(filehandle, data, signal, encoding) { async function readFileHandle(filehandle, options) { const signal = options?.signal; + const encoding = options?.encoding; + const decoder = encoding && new StringDecoder(encoding); checkAborted(signal); @@ -423,56 +426,67 @@ async function readFileHandle(filehandle, options) { checkAborted(signal); - let size; + let size = 0; + let length = 0; if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) { size = statFields[8/* size */]; - } else { - size = 0; + length = encoding ? MathMin(size, kReadFileBufferLength) : size; + } + if (length === 0) { + length = kReadFileUnknownBufferLength; } if (size > kIoMaxLength) throw new ERR_FS_FILE_TOO_LARGE(size); - let endOfFile = false; let totalRead = 0; - const noSize = size === 0; - const buffers = []; - const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size); - do { + let buffer = Buffer.allocUnsafeSlow(length); + let result = ''; + let offset = 0; + let buffers; + + while (true) { checkAborted(signal); - let buffer; - let offset; - let length; - if (noSize) { - buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); - offset = 0; - length = kReadFileUnknownBufferLength; - } else { - buffer = fullBuffer; - offset = totalRead; - length = MathMin(size - totalRead, kReadFileBufferLength); - } const bytesRead = (await binding.read(filehandle.fd, buffer, offset, - length, -1, kUsePromises)) || 0; + length, -1, kUsePromises)) ?? 0; totalRead += bytesRead; - endOfFile = bytesRead === 0 || totalRead === size; - if (noSize && bytesRead > 0) { - const isBufferFull = bytesRead === kReadFileUnknownBufferLength; - const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead); - ArrayPrototypePush(buffers, chunkBuffer); + + if (bytesRead === 0 || totalRead === size || bytesRead !== buffer.length) { + const singleRead = bytesRead === totalRead; + + if (bytesRead !== buffer.length) { + buffer = buffer.slice(0, bytesRead); + } + + if (!encoding) { + if (size === 0 && !singleRead) { + ArrayPrototypePush(buffers, buffer); + return Buffer.concat(buffers, totalRead); + } + return buffer; + } + + if (singleRead) { + return buffer.toString(encoding); + } + result += decoder.end(buffer); + return result; } - } while (!endOfFile); - let result; - if (size > 0) { - result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead); - } else { - result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers, - totalRead); + if (encoding) { + result += decoder.write(buffer); + } else if (size !== 0) { + // TODO(BridgeAR): This condition needs a test. A file should be written + // that is chunked without encoding. + offset += bytesRead; + } else { + buffers ??= []; + // Unknown file size requires chunks. + ArrayPrototypePush(buffers, buffer); + buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength); + } } - - return options.encoding ? result.toString(options.encoding) : result; } // All of the functions are defined as async in order to ensure that errors