Skip to content

Commit

Permalink
buffer: add buffer.isUtf8 for utf8 validation
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Dec 22, 2022
1 parent 09f33c9 commit ef35908
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 1 deletion.
11 changes: 11 additions & 0 deletions doc/api/buffer.md
Expand Up @@ -5130,6 +5130,17 @@ For code running using Node.js APIs, converting between base64-encoded strings
and binary data should be performed using `Buffer.from(str, 'base64')` and
`buf.toString('base64')`.**

### `buffer.isUtf8(input)`

<!-- YAML
added: REPLACEME
-->

* input {Buffer | ArrayBuffer | TypedArray} The input to validate.
* Returns: {boolean} Returns true if and only if the input is valid UTF-8.

This function is used to check if input contains UTF-8 code points (characters).

### `buffer.INSPECT_MAX_BYTES`

<!-- YAML
Expand Down
17 changes: 16 additions & 1 deletion lib/buffer.js
Expand Up @@ -57,6 +57,7 @@ const {
compareOffset,
createFromString,
fill: bindingFill,
isUtf8: bindingIsUtf8,
indexOfBuffer,
indexOfNumber,
indexOfString,
Expand Down Expand Up @@ -84,7 +85,8 @@ const {
const {
isAnyArrayBuffer,
isArrayBufferView,
isUint8Array
isUint8Array,
isTypedArray,
} = require('internal/util/types');
const {
inspect: utilInspect
Expand Down Expand Up @@ -1314,10 +1316,23 @@ function atob(input) {
return Buffer.from(input, 'base64').toString('latin1');
}

function isUtf8(input) {
if (isTypedArray(input) || Buffer.isBuffer(input)) {
return bindingIsUtf8(input.buffer);
}

if (isAnyArrayBuffer(input)) {
return bindingIsUtf8(input);
}

return false;
}

module.exports = {
Buffer,
SlowBuffer,
transcode,
isUtf8,

// Legacy
kMaxLength,
Expand Down
11 changes: 11 additions & 0 deletions src/node_buffer.cc
Expand Up @@ -1223,6 +1223,13 @@ static void EncodeInto(const FunctionCallbackInfo<Value>& args) {
results[1] = written;
}

static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsArrayBuffer());
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
auto external = static_cast<const char*>(input->Data());
args.GetReturnValue().Set(simdutf::validate_utf8(external, input->ByteLength()));
}

void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
Expand Down Expand Up @@ -1358,6 +1365,8 @@ void Initialize(Local<Object> target,
SetMethod(context, target, "encodeInto", EncodeInto);
SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);

SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);

target
->Set(context,
FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"),
Expand Down Expand Up @@ -1413,6 +1422,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(EncodeInto);
registry->Register(EncodeUtf8String);

registry->Register(IsUtf8);

registry->Register(StringSlice<ASCII>);
registry->Register(StringSlice<BASE64>);
registry->Register(StringSlice<BASE64URL>);
Expand Down
15 changes: 15 additions & 0 deletions test/parallel/test-buffer-isutf8.js
@@ -0,0 +1,15 @@
'use strict';

require('../common');
const assert = require('assert');
const { isUtf8 } = require('buffer');
const { TextEncoder } = require('util');

const encoder = new TextEncoder();

assert.strictEqual(isUtf8(encoder.encode('hello')), true);
assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
assert.strictEqual(isUtf8(Buffer.from([0xf8])), false);

assert.strictEqual(isUtf8(null), false);
assert.strictEqual(isUtf8(undefined), false);

0 comments on commit ef35908

Please sign in to comment.