Skip to content

Commit

Permalink
Move file hashing to a worker thread, add async method (#21)
Browse files Browse the repository at this point in the history
Co-authored-by: Sindre Sorhus <sindresorhus@gmail.com>
  • Loading branch information
Yanis Benson and sindresorhus committed Sep 22, 2019
1 parent 92885d0 commit 7e6a0fd
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 2 deletions.
32 changes: 32 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ declare const hasha: {
): string;
(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Buffer;

/**
Asynchronously calculate the hash for a `string`, `Buffer`, or an array thereof.
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
@param input - Data you want to hash.
While strings are supported you should prefer buffers as they're faster to hash. Although if you already have a string you should not convert it to a buffer.
Pass an array instead of concatenating strings and/or buffers. The output is the same, but arrays do not incur the overhead of concatenation.
@returns A hash.
@example
```
import hasha = require('hasha');
(async () => {
console.log(await hasha.async('unicorn'));
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
})();
```
*/
async(input: hasha.HashaInput): Promise<string>;
async(
input: hasha.HashaInput,
options: hasha.Options<hasha.ToStringEncoding>
): Promise<string>;
async(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Promise<Buffer>;

/**
Create a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).
Expand Down Expand Up @@ -92,6 +122,8 @@ declare const hasha: {
/**
Calculate the hash for a file.
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
@param filePath - Path to a file you want to hash.
@returns The calculated file hash.
Expand Down
89 changes: 88 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,68 @@
'use strict';
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const isStream = require('is-stream');

const requireOptional = (name, defaultValue) => {
try {
return require(name);
} catch (_) {
return defaultValue;
}
};

const {Worker} = requireOptional('worker_threads', {});

let worker; // Lazy
let taskIdCounter = 0;
const tasks = new Map();

const recreateWorkerError = sourceError => {
const error = new Error(sourceError.message);

for (const [key, value] of Object.entries(sourceError)) {
if (key !== 'message') {
error[key] = value;
}
}

return error;
};

const createWorker = () => {
worker = new Worker(path.join(__dirname, 'thread.js'));
worker.on('message', message => {
const task = tasks.get(message.id);
tasks.delete(message.id);
if (tasks.size === 0) {
worker.unref();
}

if (message.error === undefined) {
task.resolve(message.value);
} else {
task.reject(recreateWorkerError(message.error));
}
});
worker.on('error', error => {
// Any error here is effectively an equivalent of segfault, and have no scope, so we just throw it on callback level
throw error;
});
};

const taskWorker = (method, args, transferList) => new Promise((resolve, reject) => {
const id = taskIdCounter++;
tasks.set(id, {resolve, reject});

if (worker === undefined) {
createWorker();
}

worker.ref();
worker.postMessage({id, method, args}, transferList);
});

const hasha = (input, options = {}) => {
let outputEncoding = options.encoding || 'hex';

Expand Down Expand Up @@ -55,7 +115,34 @@ hasha.fromStream = async (stream, options = {}) => {
});
};

hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
if (Worker === undefined) {
hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
hasha.async = async (input, options) => hasha(input, options);
} else {
hasha.fromFile = async (filePath, {algorithm = 'sha512', encoding = 'hex'} = {}) => {
const hash = await taskWorker('hashFile', [algorithm, filePath]);

if (encoding === 'buffer') {
return Buffer.from(hash);
}

return Buffer.from(hash).toString(encoding);
};

hasha.async = async (input, {algorithm = 'sha512', encoding = 'hex'} = {}) => {
if (encoding === 'buffer') {
encoding = undefined;
}

const hash = await taskWorker('hash', [algorithm, input]);

if (encoding === undefined) {
return Buffer.from(hash);
}

return Buffer.from(hash).toString(encoding);
};
}

hasha.fromFileSync = (filePath, options) => hasha(fs.readFileSync(filePath), options);

Expand Down
9 changes: 9 additions & 0 deletions index.test-d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ expectType<string>(hasha(['unicorn']));
expectType<string>(hasha([Buffer.from('unicorn', 'utf8')]));
expectType<string>(hasha(['unicorn', Buffer.from('unicorn', 'utf8')]));

expectType<Promise<string>>(hasha.async('unicorn'));
expectType<Promise<string>>(hasha.async('unicorn', {algorithm: 'md5'}));
expectType<Promise<string>>(hasha.async('unicorn', {encoding: 'latin1'}));
expectType<Promise<Buffer>>(hasha.async('unicorn', {encoding: 'buffer'}));

expectType<Promise<string>>(hasha.async(['unicorn']));
expectType<Promise<string>>(hasha.async([Buffer.from('unicorn', 'utf8')]));
expectType<Promise<string>>(hasha.async(['unicorn', Buffer.from('unicorn', 'utf8')]));

process.stdin.pipe(hasha.stream()).pipe(process.stdout);

expectType<Promise<string | null>>(hasha.fromStream(process.stdin));
Expand Down
8 changes: 7 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
},
"files": [
"index.js",
"index.d.ts"
"index.d.ts",
"thread.js"
],
"keywords": [
"hash",
Expand Down Expand Up @@ -51,5 +52,10 @@
"proxyquire": "^2.1.0",
"tsd": "^0.7.2",
"xo": "^0.24.0"
},
"xo": {
"rules": {
"import/no-unresolved": "off"
}
}
}
17 changes: 17 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ hasha('unicorn');
```js
const hasha = require('hasha');

(async () => {
console.log(await hasha.async('unicorn'));
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
})();
```

```js
const hasha = require('hasha');

// Hash the process input and output the hash sum
process.stdin.pipe(hasha.stream()).pipe(process.stdout);
```
Expand Down Expand Up @@ -91,6 +100,12 @@ Values: `md5` `sha1` `sha256` `sha512` *([Platform dependent](https://nodejs.org

*The `md5` algorithm is good for [file revving](https://github.com/sindresorhus/rev-hash), but you should never use `md5` or `sha1` for anything sensitive. [They're insecure.](http://googleonlinesecurity.blogspot.no/2014/09/gradually-sunsetting-sha-1.html)*

### hasha.async(input, [options])

In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.

Returns a hash asynchronously.

### hasha.stream([options])

Returns a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).
Expand All @@ -101,6 +116,8 @@ Returns a `Promise` for the calculated hash.

### hasha.fromFile(filepath, [options])

In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.

Returns a `Promise` for the calculated file hash.

### hasha.fromFileSync(filepath, [options])
Expand Down
9 changes: 9 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ test('hasha()', t => {
t.is(hasha(fixture, {algorithm: 'md5'}).length, 32);
});

test('hasha.async()', async t => {
t.is((await hasha.async(Buffer.from('unicorn'))).length, 128);
t.is((await hasha.async('unicorn')).length, 128);
t.is((await hasha.async(['foo', 'bar'])).length, 128);
t.is(await hasha.async(['foo', Buffer.from('bar')]), hasha('foobar'));
t.true(Buffer.isBuffer(await hasha.async(Buffer.from('unicorn'), {encoding: 'buffer'})));
t.is((await hasha.async(Buffer.from('unicorn'), {algorithm: 'md5'})).length, 32);
});

test('hasha.stream()', t => {
t.true(isStream(hasha.stream()));
});
Expand Down
56 changes: 56 additions & 0 deletions thread.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
'use strict';
const fs = require('fs');
const crypto = require('crypto');
const {parentPort} = require('worker_threads');

const handlers = {
hashFile: (algorithm, filePath) => new Promise((resolve, reject) => {
const hasher = crypto.createHash(algorithm);
fs.createReadStream(filePath)
.on('error', reject)
.pipe(hasher)
.on('error', reject)
.on('finish', () => {
const {buffer} = hasher.read();
resolve({value: buffer, transferList: [buffer]});
});
}),
hash: async (algorithm, input) => {
const hasher = crypto.createHash(algorithm);

if (Array.isArray(input)) {
for (const part of input) {
hasher.update(part);
}
} else {
hasher.update(input);
}

const hash = hasher.digest().buffer;
return {value: hash, transferList: [hash]};
}
};

parentPort.on('message', async message => {
try {
const {method, args} = message;
const handler = handlers[method];

if (handler === undefined) {
throw new Error(`Unknown method '${method}'`);
}

const {value, transferList} = await handler(...args);
parentPort.postMessage({id: message.id, value}, transferList);
} catch (error) {
const newError = {message: error.message, stack: error.stack};

for (const [key, value] of Object.entries(error)) {
if (typeof value !== 'object') {
newError[key] = value;
}
}

parentPort.postMessage({id: message.id, error: newError});
}
});

0 comments on commit 7e6a0fd

Please sign in to comment.