denoland · littledivy · Nov 11, 2022 · Nov 11, 2022 · Nov 11, 2022 · Nov 11, 2022
diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js
@@ -16,14 +16,14 @@
   const ops = core.ops;
   const webidl = window.__bootstrap.webidl;
   const {
-    ArrayBufferIsView,
-    ObjectPrototypeIsPrototypeOf,
     PromiseReject,
     PromiseResolve,
     StringPrototypeCharCodeAt,
     StringPrototypeSlice,
     TypedArrayPrototypeSubarray,
     Uint8Array,
+    ObjectPrototypeIsPrototypeOf,
+    ArrayBufferIsView,
     Uint32Array,
   } = window.__bootstrap.primordials;
 
@@ -34,6 +34,8 @@
     #fatal;
     /** @type {boolean} */
     #ignoreBOM;
+    /** @type {boolean} */
+    #utf8SinglePass;
 
     /** @type {number | null} */
     #rid = null;
@@ -56,6 +58,7 @@
       this.#encoding = encoding;
       this.#fatal = options.fatal;
       this.#ignoreBOM = options.ignoreBOM;
+      this.#utf8SinglePass = encoding === "utf-8" && !options.fatal;
       this[webidl.brand] = webidl.brand;
     }
 
@@ -81,7 +84,7 @@
      * @param {BufferSource} [input]
      * @param {TextDecodeOptions} options
      */
-    decode(input = new Uint8Array(), options = {}) {
+    decode(input = new Uint8Array(), options = undefined) {
       webidl.assertBranded(this, TextDecoderPrototype);
       const prefix = "Failed to execute 'decode' on 'TextDecoder'";
       if (input !== undefined) {
@@ -91,40 +94,46 @@
           allowShared: true,
         });
       }
-      options = webidl.converters.TextDecodeOptions(options, {
-        prefix,
-        context: "Argument 2",
-      });
+      let stream = false;
+      if (options !== undefined) {
+        options = webidl.converters.TextDecodeOptions(options, {
+          prefix,
+          context: "Argument 2",
+        });
+        stream = options.stream;
+      }
 
       try {
-        try {
-          if (ArrayBufferIsView(input)) {
-            input = new Uint8Array(
-              input.buffer,
-              input.byteOffset,
-              input.byteLength,
-            );
-          } else {
-            input = new Uint8Array(input);
-          }
-        } catch {
-          // If the buffer is detached, just create a new empty Uint8Array.
-          input = new Uint8Array();
-        }
+        // Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy.
+        // When doing so they will have to make sure that changes to input do not affect future calls to decode().
         if (
           ObjectPrototypeIsPrototypeOf(
             SharedArrayBuffer.prototype,
-          ObjectPrototypeIsPrototypeOf(
-            SharedArrayBuffer.prototype,
+          ObjectPrototypeIsPrototypeOf(
+            SharedArrayBufferPrototype,
-          ObjectPrototypeIsPrototypeOf(
-            SharedArrayBuffer.prototype,
+          ObjectPrototypeIsPrototypeOf(
+            SharedArrayBufferPrototype,
-            input.buffer,
+            input || input.buffer,
           )
         ) {
           // We clone the data into a non-shared ArrayBuffer so we can pass it
           // to Rust.
           // `input` is now a Uint8Array, and calling the TypedArray constructor
           // with a TypedArray argument copies the data.
-          input = new Uint8Array(input);
+          if (ArrayBufferIsView(input)) {
+            input = new Uint8Array(
+              input.buffer,
+              input.byteOffset,
+              input.byteLength,
+            );
+          } else {
+            input = new Uint8Array(input);
+          }
         }
 
-        if (!options.stream && this.#rid === null) {
+        // Fast path for single pass encoding.
+        if (!stream && this.#rid === null) {
+          // Fast path for utf8 single pass encoding.
+          if (this.#utf8SinglePass) {
+            return ops.op_encoding_decode_utf8(input, this.#ignoreBOM);
+          }
+
           return ops.op_encoding_decode_single(
             input,
             this.#encoding,
@@ -140,9 +149,9 @@
             this.#ignoreBOM,
           );
         }
-        return ops.op_encoding_decode(input, this.#rid, options.stream);
+        return ops.op_encoding_decode(input, this.#rid, stream);
       } finally {
-        if (!options.stream && this.#rid !== null) {
+        if (!stream && this.#rid !== null) {
           core.close(this.#rid);
           this.#rid = null;
         }

diff --git a/ext/web/lib.rs b/ext/web/lib.rs
@@ -91,6 +91,7 @@ pub fn init<P: TimersPermission + 'static>(
       op_base64_btoa::decl(),
       op_encoding_normalize_label::decl(),
       op_encoding_decode_single::decl(),
+      op_encoding_decode_utf8::decl(),
       op_encoding_new_decoder::decl(),
       op_encoding_decode::decl(),
       op_encoding_encode_into::decl(),
@@ -179,6 +180,39 @@ fn op_encoding_normalize_label(label: String) -> Result<String, AnyError> {
   Ok(encoding.name().to_lowercase())
 }
 
+#[op(v8)]
+fn op_encoding_decode_utf8<'a>(
+  scope: &mut v8::HandleScope<'a>,
+  zero_copy: &[u8],
+  ignore_bom: bool,
+) -> Result<serde_v8::Value<'a>, AnyError> {
+  let buf = &zero_copy;
+
+  let buf = if !ignore_bom
+    && buf.len() >= 3
+    && buf[0] == 0xef
+    && buf[1] == 0xbb
+    && buf[2] == 0xbf
+  {
+    &buf[3..]
+  } else {
+    buf
+  };
+
+  // If `String::new_from_utf8()` returns `None`, this means that the
+  // length of the decoded string would be longer than what V8 can
+  // handle. In this case we return `RangeError`.
+  //
+  // For more details see:
+  // - https://encoding.spec.whatwg.org/#dom-textdecoder-decode
+  // - https://github.com/denoland/deno/issues/6649
+  // - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278
+  match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) {
+    Some(text) => Ok(serde_v8::from_v8(scope, text.into())?),
+    None => Err(type_error("buffer exceeds maximum length")),
+  }
+}
+
 #[op]
 fn op_encoding_decode_single(
   data: &[u8],

diff --git a/ops/lib.rs b/ops/lib.rs
@@ -910,22 +910,33 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 {
     let value = args.get(#idx as i32);
     match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) {
       Ok(b) => {
-        let store = b.data() as *mut u8;
-        // SAFETY: rust guarantees that lifetime of slice is no longer than the call.
-        unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }
+        // Handles detached buffers.
+        let byte_length = b.byte_length();
+        if byte_length == 0 {
+          &mut []
+        } else {
+          let store = b.data() as *mut u8;
+          // SAFETY: rust guarantees that lifetime of slice is no longer than the call.
+          unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) }
+        }
       },
       Err(_) => {
         if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) {
-          let (offset, len) = (view.byte_offset(), view.byte_length());
-          let buffer = match view.buffer(scope) {
-              Some(v) => v,
-              None => {
-                return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx));
-              }
-          };
-          let store = buffer.data() as *mut u8;
-          // SAFETY: rust guarantees that lifetime of slice is no longer than the call.
-          unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) }
+          let len = view.byte_length();
+          if len == 0 {
+            &mut []
+          } else {
+            let offset = view.byte_offset();
+            let buffer = match view.buffer(scope) {
+                Some(v) => v,
+                None => {
+                  return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx));
+                }
+            };
+            let store = buffer.data() as *mut u8;
+            // SAFETY: rust guarantees that lifetime of slice is no longer than the call.
+            unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) }
+          }
         } else {
           return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx));
         }