From 50c25d3ff5341f5fe2f2c1fae3f2fe418a9eb40b Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Wed, 2 Nov 2022 21:34:01 -0400 Subject: [PATCH 1/3] util: improve textdecoder decode performance --- benchmark/util/text-decoder.js | 19 ++++++++++++++++++ lib/internal/encoding.js | 2 +- src/node_i18n.cc | 35 ++++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 benchmark/util/text-decoder.js diff --git a/benchmark/util/text-decoder.js b/benchmark/util/text-decoder.js new file mode 100644 index 00000000000000..ad845f7c92d0c7 --- /dev/null +++ b/benchmark/util/text-decoder.js @@ -0,0 +1,19 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + encoding: ['utf-8', 'latin1', 'iso-8859-3'], + ignoreBOM: [0, 1], + len: [256, 1024 * 16, 1024 * 512], + n: [1e6] +}); + +function main({ encoding, len, n, ignoreBOM }) { + const buf = Buffer.allocUnsafe(len); + const decoder = new TextDecoder(encoding, { ignoreBOM }); + + bench.start(); + decoder.decode(buf); + bench.end(n); +} diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 3a3d558361e118..2ab85d9d9acb06 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -438,7 +438,7 @@ function makeTextDecoderICU() { if (typeof ret === 'number') { throw new ERR_ENCODING_INVALID_ENCODED_DATA(this.encoding, ret); } - return ret.toString('ucs2'); + return ret; } } diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 581d52a7d05738..1fc2e0c9cbbe40 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -50,6 +50,7 @@ #include "node_buffer.h" #include "node_errors.h" #include "node_internals.h" +#include "string_bytes.h" #include "util-inl.h" #include "v8.h" @@ -502,18 +503,32 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { } } ret = ToBufferEndian(env, &result); - if (omit_initial_bom && !ret.IsEmpty()) { - // Perform `ret = ret.slice(2)`. + + if (!ret.IsEmpty()) { CHECK(ret.ToLocalChecked()->IsUint8Array()); - Local orig_ret = ret.ToLocalChecked().As(); - ret = Buffer::New(env, - orig_ret->Buffer(), - orig_ret->ByteOffset() + 2, - orig_ret->ByteLength() - 2) - .FromMaybe(Local()); + + if (omit_initial_bom) { + // Perform `ret = ret.slice(2)`. + Local orig_ret = ret.ToLocalChecked().As(); + ret = Buffer::New(env, + orig_ret->Buffer(), + orig_ret->ByteOffset() + 2, + orig_ret->ByteLength() - 2) + .FromMaybe(Local()); + } + + Local error; + ArrayBufferViewContents buf(ret.ToLocalChecked()); + MaybeLocal encoded = StringBytes::Encode( + env->isolate(), buf.data(), buf.length(), encoding::UCS2, &error); + + if (!encoded.IsEmpty()) { + args.GetReturnValue().Set(encoded.ToLocalChecked()); + } else { + args.GetReturnValue().Set(error); + } } - if (!ret.IsEmpty()) - args.GetReturnValue().Set(ret.ToLocalChecked()); + return; } From 03be017117b3f107169928d7db8842f1e2baf913 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Thu, 3 Nov 2022 16:06:57 -0400 Subject: [PATCH 2/3] util: change implementation to not use buffers --- src/node_i18n.cc | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 1fc2e0c9cbbe40..b21d9f8d81e561 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -97,7 +97,6 @@ using v8::NewStringType; using v8::Object; using v8::ObjectTemplate; using v8::String; -using v8::Uint8Array; using v8::Value; namespace i18n { @@ -446,7 +445,6 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { UErrorCode status = U_ZERO_ERROR; MaybeStackBuffer result; - MaybeLocal ret; UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; @@ -502,31 +500,27 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { converter->set_bom_seen(true); } } - ret = ToBufferEndian(env, &result); - if (!ret.IsEmpty()) { - CHECK(ret.ToLocalChecked()->IsUint8Array()); - - if (omit_initial_bom) { - // Perform `ret = ret.slice(2)`. - Local orig_ret = ret.ToLocalChecked().As(); - ret = Buffer::New(env, - orig_ret->Buffer(), - orig_ret->ByteOffset() + 2, - orig_ret->ByteLength() - 2) - .FromMaybe(Local()); - } + Local error; + const UChar* output = result.out(); + size_t beginning = 0; + size_t length = result.length() * sizeof(UChar); - Local error; - ArrayBufferViewContents buf(ret.ToLocalChecked()); - MaybeLocal encoded = StringBytes::Encode( - env->isolate(), buf.data(), buf.length(), encoding::UCS2, &error); + if (omit_initial_bom) { + // Perform `ret = ret.slice(2)`. + beginning += 2; + length -= 2; + } - if (!encoded.IsEmpty()) { - args.GetReturnValue().Set(encoded.ToLocalChecked()); - } else { - args.GetReturnValue().Set(error); - } + const char* value = reinterpret_cast(output) + beginning; + MaybeLocal encoded = + StringBytes::Encode(env->isolate(), value, length, UCS2, &error); + + Local ret; + if (encoded.ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } else { + args.GetReturnValue().Set(error); } return; From d13d595619d240b980be669ae164be75a70400de Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sat, 5 Nov 2022 18:41:55 -0400 Subject: [PATCH 3/3] util: try to add big endian support --- src/node_i18n.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/node_i18n.cc b/src/node_i18n.cc index b21d9f8d81e561..ed7b72c31f975e 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -502,7 +502,7 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { } Local error; - const UChar* output = result.out(); + UChar* output = result.out(); size_t beginning = 0; size_t length = result.length() * sizeof(UChar); @@ -512,7 +512,12 @@ void ConverterObject::Decode(const FunctionCallbackInfo& args) { length -= 2; } - const char* value = reinterpret_cast(output) + beginning; + char* value = reinterpret_cast(output) + beginning; + + if (IsBigEndian()) { + SwapBytes16(value, length); + } + MaybeLocal encoded = StringBytes::Encode(env->isolate(), value, length, UCS2, &error);