nodejs · nodejs-github-bot · Dec 25, 2022 · Dec 22, 2022 · Dec 23, 2022 · Dec 23, 2022
diff --git a/doc/api/buffer.md b/doc/api/buffer.md
@@ -5130,6 +5130,17 @@ For code running using Node.js APIs, converting between base64-encoded strings
 and binary data should be performed using `Buffer.from(str, 'base64')` and
 `buf.toString('base64')`.**
 
+### `buffer.isUtf8(input)`
+
+<!-- YAML
+added: REPLACEME
+-->
+
+* input {Buffer | ArrayBuffer | TypedArray} The input to validate.
+* Returns: {boolean} Returns `true` if and only if the input is valid UTF-8.
+
+This function is used to check if input contains UTF-8 code points (characters).
+
 ### `buffer.INSPECT_MAX_BYTES`
 
 <!-- YAML

diff --git a/lib/buffer.js b/lib/buffer.js
@@ -57,6 +57,7 @@ const {
   compareOffset,
   createFromString,
   fill: bindingFill,
+  isUtf8: bindingIsUtf8,
   indexOfBuffer,
   indexOfNumber,
   indexOfString,
@@ -84,7 +85,8 @@ const {
 const {
   isAnyArrayBuffer,
   isArrayBufferView,
-  isUint8Array
+  isUint8Array,
+  isTypedArray,
 } = require('internal/util/types');
 const {
   inspect: utilInspect
@@ -1314,10 +1316,19 @@ function atob(input) {
   return Buffer.from(input, 'base64').toString('latin1');
 }
 
+function isUtf8(input) {
+  if (isTypedArray(input) || isAnyArrayBuffer(input)) {
+    return bindingIsUtf8(input);
+  }
+
+  throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input);
+}
+
 module.exports = {
   Buffer,
   SlowBuffer,
   transcode,
+  isUtf8,
 
   // Legacy
   kMaxLength,

diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -1223,6 +1223,34 @@ static void EncodeInto(const FunctionCallbackInfo<Value>& args) {
   results[1] = written;
 }
 
+static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
+  Environment* env = Environment::GetCurrent(args);
+  Isolate* isolate = env->isolate();
+
+  CHECK_EQ(args.Length(), 1);
+  CHECK(args[0]->IsTypedArray() || args[0]->IsArrayBuffer());
+
+  Local<ArrayBuffer> buf;
+  size_t offset = 0;
+  size_t length = 0;
+
+  if (args[0]->IsTypedArray()) {
+    Local<v8::TypedArray> input = args[0].As<v8::TypedArray>();
+    buf = input->Buffer();
+    offset = input->ByteOffset();
+    length = input->ByteLength();
+  } else {
+    buf = args[0].As<ArrayBuffer>();
+    length = buf->ByteLength();
+  }
+
+  if (buf->WasDetached()) {
+    return node::THROW_ERR_BUFFER_CONTEXT_NOT_AVAILABLE(isolate);
+  }
+
+  const char* external = static_cast<const char*>(buf->Data()) + offset;
+  args.GetReturnValue().Set(simdutf::validate_utf8(external, length));
+}
 
 void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {
   Environment* env = Environment::GetCurrent(args);
@@ -1358,6 +1386,8 @@ void Initialize(Local<Object> target,
   SetMethod(context, target, "encodeInto", EncodeInto);
   SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);
 
+  SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
+
   target
       ->Set(context,
             FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"),
@@ -1413,6 +1443,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
   registry->Register(EncodeInto);
   registry->Register(EncodeUtf8String);
 
+  registry->Register(IsUtf8);
+
   registry->Register(StringSlice<ASCII>);
   registry->Register(StringSlice<BASE64>);
   registry->Register(StringSlice<BASE64URL>);

diff --git a/test/parallel/test-buffer-isutf8.js b/test/parallel/test-buffer-isutf8.js
@@ -0,0 +1,40 @@
+'use strict';
+
+require('../common');
+const assert = require('assert');
+const { isUtf8, Buffer } = require('buffer');
+const { TextEncoder } = require('util');
+
+const encoder = new TextEncoder();
+
+assert.strictEqual(isUtf8(encoder.encode('hello')), true);
+assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
+assert.strictEqual(isUtf8(Buffer.from([0xf8])), false);
+assert.strictEqual(isUtf8(encoder.encode('aé日')), true);
+
+[
+  null,
+  undefined,
+  'hello',
+  true,
+  false,
+].forEach((input) => {
+  assert.throws(
+    () => { isUtf8(input); },
+    {
+      code: 'ERR_INVALID_ARG_TYPE',
+    },
+  );
+});
+
+{
+  // Test with detached array buffers
+  const arrayBuffer = new ArrayBuffer(1024);
+  structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
+  assert.throws(
+    () => { isUtf8(arrayBuffer); },
+    {
+      code: 'ERR_BUFFER_CONTEXT_NOT_AVAILABLE'
+    }
+  );
+}