Skip to content

Commit 448d671

Browse files
joyeecheungrichardlau
authored andcommittedMar 25, 2024
crypto: implement crypto.hash()
This patch introduces a helper crypto.hash() that computes a digest from the input at one shot. This can be 1.2-1.6x faster than the object-based createHash() for smaller inputs (<= 5MB) that are readily available (not streamed) and incur less memory overhead since no intermediate objects will be created. PR-URL: #51044 Refs: nodejs/performance#136 Reviewed-By: Vinícius Lourenço Claro Cardoso <contact@viniciusl.com.br> Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
1 parent 7894989 commit 448d671

File tree

9 files changed

+272
-8
lines changed

9 files changed

+272
-8
lines changed
 

‎benchmark/crypto/oneshot-hash.js

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
const { createHash, hash } = require('crypto');
5+
const path = require('path');
6+
const filepath = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');
7+
const fs = require('fs');
8+
const assert = require('assert');
9+
10+
const bench = common.createBenchmark(main, {
11+
length: [1000, 100_000],
12+
method: ['md5', 'sha1', 'sha256'],
13+
type: ['string', 'buffer'],
14+
n: [100_000, 1000],
15+
}, {
16+
combinationFilter: ({ length, n }) => {
17+
return length * n <= 100_000 * 1000;
18+
},
19+
});
20+
21+
function main({ length, type, method, n }) {
22+
let data = fs.readFileSync(filepath);
23+
if (type === 'string') {
24+
data = data.toString().slice(0, length);
25+
} else {
26+
data = Uint8Array.prototype.slice.call(data, 0, length);
27+
}
28+
29+
const oneshotHash = hash ?
30+
(method, input) => hash(method, input, 'hex') :
31+
(method, input) => createHash(method).update(input).digest('hex');
32+
const array = [];
33+
for (let i = 0; i < n; i++) {
34+
array.push(null);
35+
}
36+
bench.start();
37+
for (let i = 0; i < n; i++) {
38+
array[i] = oneshotHash(method, data);
39+
}
40+
bench.end(n);
41+
assert.strictEqual(typeof array[n - 1], 'string');
42+
}

‎doc/api/crypto.md

+61
Original file line numberDiff line numberDiff line change
@@ -3630,6 +3630,67 @@ Computes the Diffie-Hellman secret based on a `privateKey` and a `publicKey`.
36303630
Both keys must have the same `asymmetricKeyType`, which must be one of `'dh'`
36313631
(for Diffie-Hellman), `'ec'` (for ECDH), `'x448'`, or `'x25519'` (for ECDH-ES).
36323632

3633+
### `crypto.hash(algorith, data[, outputEncoding])`
3634+
3635+
<!-- YAML
3636+
added:
3637+
- REPLACEME
3638+
-->
3639+
3640+
* `algorithm` {string|undefined}
3641+
* `data` {string|ArrayBuffer|Buffer|TypedArray|DataView} When `data` is a
3642+
string, it will be encoded as UTF-8 before being hashed. If a different
3643+
input encoding is desired for a string input, user could encode the string
3644+
into a `TypedArray` using either `TextEncoder` or `Buffer.from()` and passing
3645+
the encoded `TypedArray` into this API instead.
3646+
* `outputEncoding` {string|undefined} [Encoding][encoding] used to encode the
3647+
returned digest. **Default:** `'hex'`.
3648+
* Returns: {string|Buffer}
3649+
3650+
A utility for creating one-shot hash digests of data. It can be faster than
3651+
the object-based `crypto.createHash()` when hashing a smaller amount of data
3652+
(<= 5MB) that's readily available. If the data can be big or if it is streamed,
3653+
it's still recommended to use `crypto.createHash()` instead.
3654+
3655+
The `algorithm` is dependent on the available algorithms supported by the
3656+
version of OpenSSL on the platform. Examples are `'sha256'`, `'sha512'`, etc.
3657+
On recent releases of OpenSSL, `openssl list -digest-algorithms` will
3658+
display the available digest algorithms.
3659+
3660+
Example:
3661+
3662+
```cjs
3663+
const crypto = require('node:crypto');
3664+
const { Buffer } = require('node:buffer');
3665+
3666+
// Hashing a string and return the result as a hex-encoded string.
3667+
const string = 'Node.js';
3668+
// 10b3493287f831e81a438811a1ffba01f8cec4b7
3669+
console.log(crypto.hash('sha1', string));
3670+
3671+
// Encode a base64-encoded string into a Buffer, hash it and return
3672+
// the result as a buffer.
3673+
const base64 = 'Tm9kZS5qcw==';
3674+
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
3675+
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
3676+
```
3677+
3678+
```mjs
3679+
import crypto from 'node:crypto';
3680+
import { Buffer } from 'node:buffer';
3681+
3682+
// Hashing a string and return the result as a hex-encoded string.
3683+
const string = 'Node.js';
3684+
// 10b3493287f831e81a438811a1ffba01f8cec4b7
3685+
console.log(crypto.hash('sha1', string));
3686+
3687+
// Encode a base64-encoded string into a Buffer, hash it and return
3688+
// the result as a buffer.
3689+
const base64 = 'Tm9kZS5qcw==';
3690+
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
3691+
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
3692+
```
3693+
36333694
### `crypto.generateKey(type, options, callback)`
36343695

36353696
<!-- YAML

‎lib/crypto.js

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ const {
107107
const {
108108
Hash,
109109
Hmac,
110+
hash,
110111
} = require('internal/crypto/hash');
111112
const {
112113
X509Certificate,
@@ -227,6 +228,7 @@ module.exports = {
227228
getFips,
228229
setFips,
229230
verify: verifyOneShot,
231+
hash,
230232

231233
// Classes
232234
Certificate,

‎lib/internal/crypto/hash.js

+31
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
const {
44
ObjectSetPrototypeOf,
55
ReflectApply,
6+
StringPrototypeToLowerCase,
67
Symbol,
78
} = primordials;
89

@@ -11,6 +12,7 @@ const {
1112
HashJob,
1213
Hmac: _Hmac,
1314
kCryptoJobAsync,
15+
oneShotDigest,
1416
} = internalBinding('crypto');
1517

1618
const {
@@ -29,6 +31,8 @@ const {
2931

3032
const {
3133
lazyDOMException,
34+
normalizeEncoding,
35+
encodingsMap,
3236
} = require('internal/util');
3337

3438
const {
@@ -40,13 +44,15 @@ const {
4044
ERR_CRYPTO_HASH_FINALIZED,
4145
ERR_CRYPTO_HASH_UPDATE_FAILED,
4246
ERR_INVALID_ARG_TYPE,
47+
ERR_INVALID_ARG_VALUE,
4348
},
4449
} = require('internal/errors');
4550

4651
const {
4752
validateEncoding,
4853
validateString,
4954
validateUint32,
55+
validateBuffer,
5056
} = require('internal/validators');
5157

5258
const {
@@ -188,8 +194,33 @@ async function asyncDigest(algorithm, data) {
188194
throw lazyDOMException('Unrecognized algorithm name', 'NotSupportedError');
189195
}
190196

197+
function hash(algorithm, input, outputEncoding = 'hex') {
198+
validateString(algorithm, 'algorithm');
199+
if (typeof input !== 'string') {
200+
validateBuffer(input, 'input');
201+
}
202+
let normalized = outputEncoding;
203+
// Fast case: if it's 'hex', we don't need to validate it further.
204+
if (outputEncoding !== 'hex') {
205+
validateString(outputEncoding, 'outputEncoding');
206+
normalized = normalizeEncoding(outputEncoding);
207+
// If the encoding is invalid, normalizeEncoding() returns undefined.
208+
if (normalized === undefined) {
209+
// normalizeEncoding() doesn't handle 'buffer'.
210+
if (StringPrototypeToLowerCase(outputEncoding) === 'buffer') {
211+
normalized = 'buffer';
212+
} else {
213+
throw new ERR_INVALID_ARG_VALUE('outputEncoding', outputEncoding);
214+
}
215+
}
216+
}
217+
return oneShotDigest(algorithm, getCachedHashId(algorithm), getHashCache(),
218+
input, normalized, encodingsMap[normalized]);
219+
}
220+
191221
module.exports = {
192222
Hash,
193223
Hmac,
194224
asyncDigest,
225+
hash,
195226
};

‎src/api/encoding.cc

+10
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ enum encoding ParseEncoding(const char* encoding,
109109
return default_encoding;
110110
}
111111

112+
enum encoding ParseEncoding(Isolate* isolate,
113+
Local<Value> encoding_v,
114+
Local<Value> encoding_id,
115+
enum encoding default_encoding) {
116+
if (encoding_id->IsUint32()) {
117+
return static_cast<enum encoding>(encoding_id.As<v8::Uint32>()->Value());
118+
}
119+
120+
return ParseEncoding(isolate, encoding_v, default_encoding);
121+
}
112122

113123
enum encoding ParseEncoding(Isolate* isolate,
114124
Local<Value> encoding_v,

‎src/crypto/crypto_hash.cc

+78-8
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,71 @@ const EVP_MD* GetDigestImplementation(Environment* env,
202202
#endif
203203
}
204204

205+
// crypto.digest(algorithm, algorithmId, algorithmCache,
206+
// input, outputEncoding, outputEncodingId)
207+
void Hash::OneShotDigest(const FunctionCallbackInfo<Value>& args) {
208+
Environment* env = Environment::GetCurrent(args);
209+
Isolate* isolate = env->isolate();
210+
CHECK_EQ(args.Length(), 6);
211+
CHECK(args[0]->IsString()); // algorithm
212+
CHECK(args[1]->IsInt32()); // algorithmId
213+
CHECK(args[2]->IsObject()); // algorithmCache
214+
CHECK(args[3]->IsString() || args[3]->IsArrayBufferView()); // input
215+
CHECK(args[4]->IsString()); // outputEncoding
216+
CHECK(args[5]->IsUint32() || args[5]->IsUndefined()); // outputEncodingId
217+
218+
const EVP_MD* md = GetDigestImplementation(env, args[0], args[1], args[2]);
219+
if (md == nullptr) {
220+
Utf8Value method(isolate, args[0]);
221+
std::string message =
222+
"Digest method " + method.ToString() + " is not supported";
223+
return ThrowCryptoError(env, ERR_get_error(), message.c_str());
224+
}
225+
226+
enum encoding output_enc = ParseEncoding(isolate, args[4], args[5], HEX);
227+
228+
int md_len = EVP_MD_size(md);
229+
unsigned int result_size;
230+
ByteSource::Builder output(md_len);
231+
int success;
232+
// On smaller inputs, EVP_Digest() can be slower than the
233+
// deprecated helpers e.g SHA256_XXX. The speedup may not
234+
// be worth using deprecated APIs, however, so we use
235+
// EVP_Digest(), unless there's a better alternative
236+
// in the future.
237+
// https://github.com/openssl/openssl/issues/19612
238+
if (args[3]->IsString()) {
239+
Utf8Value utf8(isolate, args[3]);
240+
success = EVP_Digest(utf8.out(),
241+
utf8.length(),
242+
output.data<unsigned char>(),
243+
&result_size,
244+
md,
245+
nullptr);
246+
} else {
247+
ArrayBufferViewContents<unsigned char> input(args[3]);
248+
success = EVP_Digest(input.data(),
249+
input.length(),
250+
output.data<unsigned char>(),
251+
&result_size,
252+
md,
253+
nullptr);
254+
}
255+
if (!success) {
256+
return ThrowCryptoError(env, ERR_get_error());
257+
}
258+
259+
Local<Value> error;
260+
MaybeLocal<Value> rc = StringBytes::Encode(
261+
env->isolate(), output.data<char>(), md_len, output_enc, &error);
262+
if (rc.IsEmpty()) {
263+
CHECK(!error.IsEmpty());
264+
env->isolate()->ThrowException(error);
265+
return;
266+
}
267+
args.GetReturnValue().Set(rc.FromMaybe(Local<Value>()));
268+
}
269+
205270
void Hash::Initialize(Environment* env, Local<Object> target) {
206271
Isolate* isolate = env->isolate();
207272
Local<Context> context = env->context();
@@ -216,6 +281,7 @@ void Hash::Initialize(Environment* env, Local<Object> target) {
216281

217282
SetMethodNoSideEffect(context, target, "getHashes", GetHashes);
218283
SetMethodNoSideEffect(context, target, "getCachedAliases", GetCachedAliases);
284+
SetMethodNoSideEffect(context, target, "oneShotDigest", OneShotDigest);
219285

220286
HashJob::Initialize(env, target);
221287

@@ -229,6 +295,7 @@ void Hash::RegisterExternalReferences(ExternalReferenceRegistry* registry) {
229295
registry->Register(HashDigest);
230296
registry->Register(GetHashes);
231297
registry->Register(GetCachedAliases);
298+
registry->Register(OneShotDigest);
232299

233300
HashJob::RegisterExternalReferences(registry);
234301

@@ -294,14 +361,17 @@ bool Hash::HashUpdate(const char* data, size_t len) {
294361
}
295362

296363
void Hash::HashUpdate(const FunctionCallbackInfo<Value>& args) {
297-
Decode<Hash>(args, [](Hash* hash, const FunctionCallbackInfo<Value>& args,
298-
const char* data, size_t size) {
299-
Environment* env = Environment::GetCurrent(args);
300-
if (UNLIKELY(size > INT_MAX))
301-
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
302-
bool r = hash->HashUpdate(data, size);
303-
args.GetReturnValue().Set(r);
304-
});
364+
Decode<Hash>(args,
365+
[](Hash* hash,
366+
const FunctionCallbackInfo<Value>& args,
367+
const char* data,
368+
size_t size) {
369+
Environment* env = Environment::GetCurrent(args);
370+
if (UNLIKELY(size > INT_MAX))
371+
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
372+
bool r = hash->HashUpdate(data, size);
373+
args.GetReturnValue().Set(r);
374+
});
305375
}
306376

307377
void Hash::HashDigest(const FunctionCallbackInfo<Value>& args) {

‎src/crypto/crypto_hash.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class Hash final : public BaseObject {
2626

2727
static void GetHashes(const v8::FunctionCallbackInfo<v8::Value>& args);
2828
static void GetCachedAliases(const v8::FunctionCallbackInfo<v8::Value>& args);
29+
static void OneShotDigest(const v8::FunctionCallbackInfo<v8::Value>& args);
2930

3031
protected:
3132
static void New(const v8::FunctionCallbackInfo<v8::Value>& args);

‎src/node_internals.h

+4
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,10 @@ v8::HeapProfiler::HeapSnapshotOptions GetHeapSnapshotOptions(
446446
v8::Local<v8::Value> options);
447447
} // namespace heap
448448

449+
enum encoding ParseEncoding(v8::Isolate* isolate,
450+
v8::Local<v8::Value> encoding_v,
451+
v8::Local<v8::Value> encoding_id,
452+
enum encoding default_encoding);
449453
} // namespace node
450454

451455
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
'use strict';
2+
// This tests crypto.hash() works.
3+
const common = require('../common');
4+
5+
if (!common.hasCrypto)
6+
common.skip('missing crypto');
7+
8+
const assert = require('assert');
9+
const crypto = require('crypto');
10+
const fixtures = require('../common/fixtures');
11+
const fs = require('fs');
12+
13+
// Test errors for invalid arguments.
14+
[undefined, null, true, 1, () => {}, {}].forEach((invalid) => {
15+
assert.throws(() => { crypto.hash(invalid, 'test'); }, { code: 'ERR_INVALID_ARG_TYPE' });
16+
});
17+
18+
[undefined, null, true, 1, () => {}, {}].forEach((invalid) => {
19+
assert.throws(() => { crypto.hash('sha1', invalid); }, { code: 'ERR_INVALID_ARG_TYPE' });
20+
});
21+
22+
[null, true, 1, () => {}, {}].forEach((invalid) => {
23+
assert.throws(() => { crypto.hash('sha1', 'test', invalid); }, { code: 'ERR_INVALID_ARG_TYPE' });
24+
});
25+
26+
assert.throws(() => { crypto.hash('sha1', 'test', 'not an encoding'); }, { code: 'ERR_INVALID_ARG_VALUE' });
27+
28+
// Test that the output of crypto.hash() is the same as crypto.createHash().
29+
const methods = crypto.getHashes();
30+
31+
const input = fs.readFileSync(fixtures.path('utf8_test_text.txt'));
32+
33+
for (const method of methods) {
34+
for (const outputEncoding of ['buffer', 'hex', 'base64', undefined]) {
35+
const oldDigest = crypto.createHash(method).update(input).digest(outputEncoding || 'hex');
36+
const digestFromBuffer = crypto.hash(method, input, outputEncoding);
37+
assert.deepStrictEqual(digestFromBuffer, oldDigest,
38+
`different result from ${method} with encoding ${outputEncoding}`);
39+
const digestFromString = crypto.hash(method, input.toString(), outputEncoding);
40+
assert.deepStrictEqual(digestFromString, oldDigest,
41+
`different result from ${method} with encoding ${outputEncoding}`);
42+
}
43+
}

0 commit comments

Comments
 (0)
Please sign in to comment.