Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util: add fast path for utf8 encoding #45412

Merged
merged 1 commit into from Nov 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 30 additions & 5 deletions lib/internal/encoding.js
Expand Up @@ -4,6 +4,7 @@
// https://encoding.spec.whatwg.org

const {
Boolean,
ObjectCreate,
ObjectDefineProperties,
ObjectGetOwnPropertyDescriptors,
Expand All @@ -28,6 +29,8 @@ const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
getConstructorOf,
Expand All @@ -49,7 +52,8 @@ const {

const {
encodeInto,
encodeUtf8String
encodeUtf8String,
decodeUTF8,
} = internalBinding('buffer');

let Buffer;
Expand Down Expand Up @@ -397,19 +401,40 @@ function makeTextDecoderICU() {
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

const handle = getConverter(enc, flags);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
// Only support fast path for UTF-8 without FATAL flag
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
BridgeAR marked this conversation as resolved.
Show resolved Hide resolved

this[kDecoder] = true;
this[kHandle] = handle;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kUTF8FastPath] = fastPathAvailable;
this[kHandle] = undefined;

if (!fastPathAvailable) {
this.#prepareConverter();
}
}

#prepareConverter() {
if (this[kHandle] !== undefined) return;
const handle = getConverter(this[kEncoding], this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);

this[kUTF8FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM]);
}

this.#prepareConverter();

validateObject(options, 'options', {
nullable: true,
allowArray: true,
Expand Down
45 changes: 45 additions & 0 deletions src/node_buffer.cc
Expand Up @@ -24,6 +24,7 @@
#include "node_blob.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"

#include "env-inl.h"
Expand Down Expand Up @@ -565,6 +566,48 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(ret);
}

// Convert the input into an encoded string
void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args); // list, flags

if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"list\" argument must be an instance of SharedArrayBuffer, "
"ArrayBuffer or ArrayBufferView.");
}

ArrayBufferViewContents<char> buffer(args[0]);

CHECK(args[1]->IsBoolean());
bool ignore_bom = args[1]->IsTrue();

const char* data = buffer.data();
size_t length = buffer.length();

if (!ignore_bom && length >= 3) {
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
data += 3;
length -= 3;
}
}

anonrig marked this conversation as resolved.
Show resolved Hide resolved
if (length == 0) return args.GetReturnValue().SetEmptyString();

Local<Value> error;
MaybeLocal<Value> maybe_ret =
StringBytes::Encode(env->isolate(), data, length, UTF8, &error);
Local<Value> ret;

if (!maybe_ret.ToLocal(&ret)) {
CHECK(!error.IsEmpty());
env->isolate()->ThrowException(error);
return;
}

args.GetReturnValue().Set(ret);
}

// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
void Copy(const FunctionCallbackInfo<Value> &args) {
Expand Down Expand Up @@ -1282,6 +1325,7 @@ void Initialize(Local<Object> target,

SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);

SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8);
SetMethod(context, target, "copy", Copy);
Expand Down Expand Up @@ -1339,6 +1383,7 @@ void Initialize(Local<Object> target,
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(SetBufferPrototype);
registry->Register(CreateFromString);
registry->Register(DecodeUTF8);

registry->Register(ByteLengthUtf8);
registry->Register(Copy);
Expand Down
2 changes: 1 addition & 1 deletion test/parallel/test-whatwg-encoding-custom-textdecoder.js
Expand Up @@ -113,7 +113,7 @@ if (common.hasIntl) {
' fatal: false,\n' +
' ignoreBOM: true,\n' +
' [Symbol(flags)]: 4,\n' +
' [Symbol(handle)]: Converter {}\n' +
' [Symbol(handle)]: undefined\n' +
'}'
);
} else {
Expand Down