Skip to content

Commit 7e6a0fd

Browse files
Yanis Bensonsindresorhus
Yanis Benson
andcommittedSep 22, 2019
Move file hashing to a worker thread, add async method (#21)
Co-authored-by: Sindre Sorhus <sindresorhus@gmail.com>
1 parent 92885d0 commit 7e6a0fd

7 files changed

+218
-2
lines changed
 

‎index.d.ts

+32
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,36 @@ declare const hasha: {
5858
): string;
5959
(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Buffer;
6060

61+
/**
62+
Asynchronously calculate the hash for a `string`, `Buffer`, or an array thereof.
63+
64+
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
65+
66+
@param input - Data you want to hash.
67+
68+
While strings are supported you should prefer buffers as they're faster to hash. Although if you already have a string you should not convert it to a buffer.
69+
70+
Pass an array instead of concatenating strings and/or buffers. The output is the same, but arrays do not incur the overhead of concatenation.
71+
72+
@returns A hash.
73+
74+
@example
75+
```
76+
import hasha = require('hasha');
77+
78+
(async () => {
79+
console.log(await hasha.async('unicorn'));
80+
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
81+
})();
82+
```
83+
*/
84+
async(input: hasha.HashaInput): Promise<string>;
85+
async(
86+
input: hasha.HashaInput,
87+
options: hasha.Options<hasha.ToStringEncoding>
88+
): Promise<string>;
89+
async(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Promise<Buffer>;
90+
6191
/**
6292
Create a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).
6393
@@ -92,6 +122,8 @@ declare const hasha: {
92122
/**
93123
Calculate the hash for a file.
94124
125+
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
126+
95127
@param filePath - Path to a file you want to hash.
96128
@returns The calculated file hash.
97129

‎index.js

+88-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,68 @@
11
'use strict';
22
const fs = require('fs');
3+
const path = require('path');
34
const crypto = require('crypto');
45
const isStream = require('is-stream');
56

7+
const requireOptional = (name, defaultValue) => {
8+
try {
9+
return require(name);
10+
} catch (_) {
11+
return defaultValue;
12+
}
13+
};
14+
15+
const {Worker} = requireOptional('worker_threads', {});
16+
17+
let worker; // Lazy
18+
let taskIdCounter = 0;
19+
const tasks = new Map();
20+
21+
const recreateWorkerError = sourceError => {
22+
const error = new Error(sourceError.message);
23+
24+
for (const [key, value] of Object.entries(sourceError)) {
25+
if (key !== 'message') {
26+
error[key] = value;
27+
}
28+
}
29+
30+
return error;
31+
};
32+
33+
const createWorker = () => {
34+
worker = new Worker(path.join(__dirname, 'thread.js'));
35+
worker.on('message', message => {
36+
const task = tasks.get(message.id);
37+
tasks.delete(message.id);
38+
if (tasks.size === 0) {
39+
worker.unref();
40+
}
41+
42+
if (message.error === undefined) {
43+
task.resolve(message.value);
44+
} else {
45+
task.reject(recreateWorkerError(message.error));
46+
}
47+
});
48+
worker.on('error', error => {
49+
// Any error here is effectively an equivalent of segfault, and have no scope, so we just throw it on callback level
50+
throw error;
51+
});
52+
};
53+
54+
const taskWorker = (method, args, transferList) => new Promise((resolve, reject) => {
55+
const id = taskIdCounter++;
56+
tasks.set(id, {resolve, reject});
57+
58+
if (worker === undefined) {
59+
createWorker();
60+
}
61+
62+
worker.ref();
63+
worker.postMessage({id, method, args}, transferList);
64+
});
65+
666
const hasha = (input, options = {}) => {
767
let outputEncoding = options.encoding || 'hex';
868

@@ -55,7 +115,34 @@ hasha.fromStream = async (stream, options = {}) => {
55115
});
56116
};
57117

58-
hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
118+
if (Worker === undefined) {
119+
hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
120+
hasha.async = async (input, options) => hasha(input, options);
121+
} else {
122+
hasha.fromFile = async (filePath, {algorithm = 'sha512', encoding = 'hex'} = {}) => {
123+
const hash = await taskWorker('hashFile', [algorithm, filePath]);
124+
125+
if (encoding === 'buffer') {
126+
return Buffer.from(hash);
127+
}
128+
129+
return Buffer.from(hash).toString(encoding);
130+
};
131+
132+
hasha.async = async (input, {algorithm = 'sha512', encoding = 'hex'} = {}) => {
133+
if (encoding === 'buffer') {
134+
encoding = undefined;
135+
}
136+
137+
const hash = await taskWorker('hash', [algorithm, input]);
138+
139+
if (encoding === undefined) {
140+
return Buffer.from(hash);
141+
}
142+
143+
return Buffer.from(hash).toString(encoding);
144+
};
145+
}
59146

60147
hasha.fromFileSync = (filePath, options) => hasha(fs.readFileSync(filePath), options);
61148

‎index.test-d.ts

+9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@ expectType<string>(hasha(['unicorn']));
1010
expectType<string>(hasha([Buffer.from('unicorn', 'utf8')]));
1111
expectType<string>(hasha(['unicorn', Buffer.from('unicorn', 'utf8')]));
1212

13+
expectType<Promise<string>>(hasha.async('unicorn'));
14+
expectType<Promise<string>>(hasha.async('unicorn', {algorithm: 'md5'}));
15+
expectType<Promise<string>>(hasha.async('unicorn', {encoding: 'latin1'}));
16+
expectType<Promise<Buffer>>(hasha.async('unicorn', {encoding: 'buffer'}));
17+
18+
expectType<Promise<string>>(hasha.async(['unicorn']));
19+
expectType<Promise<string>>(hasha.async([Buffer.from('unicorn', 'utf8')]));
20+
expectType<Promise<string>>(hasha.async(['unicorn', Buffer.from('unicorn', 'utf8')]));
21+
1322
process.stdin.pipe(hasha.stream()).pipe(process.stdout);
1423

1524
expectType<Promise<string | null>>(hasha.fromStream(process.stdin));

‎package.json

+7-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
},
1818
"files": [
1919
"index.js",
20-
"index.d.ts"
20+
"index.d.ts",
21+
"thread.js"
2122
],
2223
"keywords": [
2324
"hash",
@@ -51,5 +52,10 @@
5152
"proxyquire": "^2.1.0",
5253
"tsd": "^0.7.2",
5354
"xo": "^0.24.0"
55+
},
56+
"xo": {
57+
"rules": {
58+
"import/no-unresolved": "off"
59+
}
5460
}
5561
}

‎readme.md

+17
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ hasha('unicorn');
3636
```js
3737
const hasha = require('hasha');
3838

39+
(async () => {
40+
console.log(await hasha.async('unicorn'));
41+
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
42+
})();
43+
```
44+
45+
```js
46+
const hasha = require('hasha');
47+
3948
// Hash the process input and output the hash sum
4049
process.stdin.pipe(hasha.stream()).pipe(process.stdout);
4150
```
@@ -91,6 +100,12 @@ Values: `md5` `sha1` `sha256` `sha512` *([Platform dependent](https://nodejs.org
91100

92101
*The `md5` algorithm is good for [file revving](https://github.com/sindresorhus/rev-hash), but you should never use `md5` or `sha1` for anything sensitive. [They're insecure.](http://googleonlinesecurity.blogspot.no/2014/09/gradually-sunsetting-sha-1.html)*
93102

103+
### hasha.async(input, [options])
104+
105+
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
106+
107+
Returns a hash asynchronously.
108+
94109
### hasha.stream([options])
95110

96111
Returns a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).
@@ -101,6 +116,8 @@ Returns a `Promise` for the calculated hash.
101116

102117
### hasha.fromFile(filepath, [options])
103118

119+
In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.
120+
104121
Returns a `Promise` for the calculated file hash.
105122

106123
### hasha.fromFileSync(filepath, [options])

‎test.js

+9
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ test('hasha()', t => {
1515
t.is(hasha(fixture, {algorithm: 'md5'}).length, 32);
1616
});
1717

18+
test('hasha.async()', async t => {
19+
t.is((await hasha.async(Buffer.from('unicorn'))).length, 128);
20+
t.is((await hasha.async('unicorn')).length, 128);
21+
t.is((await hasha.async(['foo', 'bar'])).length, 128);
22+
t.is(await hasha.async(['foo', Buffer.from('bar')]), hasha('foobar'));
23+
t.true(Buffer.isBuffer(await hasha.async(Buffer.from('unicorn'), {encoding: 'buffer'})));
24+
t.is((await hasha.async(Buffer.from('unicorn'), {algorithm: 'md5'})).length, 32);
25+
});
26+
1827
test('hasha.stream()', t => {
1928
t.true(isStream(hasha.stream()));
2029
});

‎thread.js

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict';
2+
const fs = require('fs');
3+
const crypto = require('crypto');
4+
const {parentPort} = require('worker_threads');
5+
6+
const handlers = {
7+
hashFile: (algorithm, filePath) => new Promise((resolve, reject) => {
8+
const hasher = crypto.createHash(algorithm);
9+
fs.createReadStream(filePath)
10+
.on('error', reject)
11+
.pipe(hasher)
12+
.on('error', reject)
13+
.on('finish', () => {
14+
const {buffer} = hasher.read();
15+
resolve({value: buffer, transferList: [buffer]});
16+
});
17+
}),
18+
hash: async (algorithm, input) => {
19+
const hasher = crypto.createHash(algorithm);
20+
21+
if (Array.isArray(input)) {
22+
for (const part of input) {
23+
hasher.update(part);
24+
}
25+
} else {
26+
hasher.update(input);
27+
}
28+
29+
const hash = hasher.digest().buffer;
30+
return {value: hash, transferList: [hash]};
31+
}
32+
};
33+
34+
parentPort.on('message', async message => {
35+
try {
36+
const {method, args} = message;
37+
const handler = handlers[method];
38+
39+
if (handler === undefined) {
40+
throw new Error(`Unknown method '${method}'`);
41+
}
42+
43+
const {value, transferList} = await handler(...args);
44+
parentPort.postMessage({id: message.id, value}, transferList);
45+
} catch (error) {
46+
const newError = {message: error.message, stack: error.stack};
47+
48+
for (const [key, value] of Object.entries(error)) {
49+
if (typeof value !== 'object') {
50+
newError[key] = value;
51+
}
52+
}
53+
54+
parentPort.postMessage({id: message.id, error: newError});
55+
}
56+
});

0 commit comments

Comments
 (0)
Please sign in to comment.