Skip to content

Commit

Permalink
refactor(compiler): use TypedArrays and DataViews to calculate i18n m…
Browse files Browse the repository at this point in the history
…essage IDs (#48256)

The native UTF-8 encoder (`TextEncoder`) provides its output as a `Uint8Array`
which is a specialized `TypedArray` for storing byte values. To remove the need
to copy and transform this data multiple times, a `DataView` is used to allow
for low-level access to the data in a variety of bit-widths and endianness.
The `DataView` class is natively available and supported by all browsers and
Node.js versions support by the framework (https://caniuse.com/mdn-javascript_builtins_dataview).

PR Close #48256
  • Loading branch information
clydin authored and AndrewKushnir committed Dec 1, 2022
1 parent 0827091 commit 7f36221
Showing 1 changed file with 66 additions and 30 deletions.
96 changes: 66 additions & 30 deletions packages/compiler/src/i18n/digest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,11 @@ function fk(index: number, b: number, c: number, d: number): [number, number] {
*/
export function fingerprint(str: string): [number, number] {
textEncoder ??= new TextEncoder();
const utf8 = [...textEncoder.encode(str)];
const utf8 = textEncoder.encode(str);
const view = new DataView(utf8.buffer, utf8.byteOffset, utf8.byteLength);

let hi = hash32(utf8, 0);
let lo = hash32(utf8, 102072);
let hi = hash32(view, utf8.length, 0);
let lo = hash32(view, utf8.length, 102072);

if (hi == 0 && (lo == 0 || lo == 1)) {
hi = hi ^ 0x130f9bef;
Expand All @@ -208,40 +209,81 @@ export function computeMsgId(msg: string, meaning: string = ''): string {
return wordsToDecimalString(hi & 0x7fffffff, lo);
}

function hash32(bytes: Byte[], c: number): number {
function hash32(view: DataView, length: number, c: number): number {
let a = 0x9e3779b9, b = 0x9e3779b9;
let i: number;

const len = bytes.length;
let index = 0;

for (i = 0; i + 12 <= len; i += 12) {
a = add32(a, wordAt(bytes, i, Endian.Little));
b = add32(b, wordAt(bytes, i + 4, Endian.Little));
c = add32(c, wordAt(bytes, i + 8, Endian.Little));
const end = length - 12;
for (; index <= end; index += 12) {
a += view.getUint32(index, true);
b += view.getUint32(index + 4, true);
c += view.getUint32(index + 8, true);
const res = mix(a, b, c);
a = res[0], b = res[1], c = res[2];
}

a = add32(a, wordAt(bytes, i, Endian.Little));
b = add32(b, wordAt(bytes, i + 4, Endian.Little));
const remainder = length - index;

// the first byte of c is reserved for the length
c = add32(c, len);
c = add32(c, wordAt(bytes, i + 8, Endian.Little) << 8);
c += length;

if (remainder >= 4) {
a += view.getUint32(index, true);
index += 4;

if (remainder >= 8) {
b += view.getUint32(index, true);
index += 4;

// Partial 32-bit word for c
if (remainder >= 9) {
c += view.getUint8(index++) << 8;
}
if (remainder >= 10) {
c += view.getUint8(index++) << 16;
}
if (remainder === 11) {
c += view.getUint8(index++) << 24;
}
} else {
// Partial 32-bit word for b
if (remainder >= 5) {
b += view.getUint8(index++);
}
if (remainder >= 6) {
b += view.getUint8(index++) << 8;
}
if (remainder === 7) {
b += view.getUint8(index++) << 16;
}
}
} else {
// Partial 32-bit word for a
if (remainder >= 1) {
a += view.getUint8(index++);
}
if (remainder >= 2) {
a += view.getUint8(index++) << 8;
}
if (remainder === 3) {
a += view.getUint8(index++) << 16;
}
}

return mix(a, b, c)[2];
}

// clang-format off
function mix(a: number, b: number, c: number): [number, number, number] {
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 13;
b = sub32(b, c); b = sub32(b, a); b ^= a << 8;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 13;
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 12;
b = sub32(b, c); b = sub32(b, a); b ^= a << 16;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 5;
a = sub32(a, b); a = sub32(a, c); a ^= c >>> 3;
b = sub32(b, c); b = sub32(b, a); b ^= a << 10;
c = sub32(c, a); c = sub32(c, b); c ^= b >>> 15;
a -= b; a -= c; a ^= c >>> 13;
b -= c; b -= a; b ^= a << 8;
c -= a; c -= b; c ^= b >>> 13;
a -= b; a -= c; a ^= c >>> 12;
b -= c; b -= a; b ^= a << 16;
c -= a; c -= b; c ^= b >>> 5;
a -= b; a -= c; a ^= c >>> 3;
b -= c; b -= a; b ^= a << 10;
c -= a; c -= b; c ^= b >>> 15;
return [a, b, c];
}
// clang-format on
Expand Down Expand Up @@ -273,12 +315,6 @@ function add64(a: [number, number], b: [number, number]): [number, number] {
return [h, l];
}

function sub32(a: number, b: number): number {
const low = (a & 0xffff) - (b & 0xffff);
const high = (a >> 16) - (b >> 16) + (low >> 16);
return (high << 16) | (low & 0xffff);
}

// Rotate a 32b number left `count` position
function rol32(a: number, count: number): number {
return (a << count) | (a >>> (32 - count));
Expand Down

0 comments on commit 7f36221

Please sign in to comment.