Skip to content

Commit

Permalink
feat: hash uniformity for base digests
Browse files Browse the repository at this point in the history
  • Loading branch information
subzey committed Nov 11, 2021
1 parent f7dbfe1 commit 451858b
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 30 deletions.
47 changes: 28 additions & 19 deletions lib/getHashDigest.js
Expand Up @@ -11,34 +11,46 @@ const baseEncodeTables = {
64: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_",
};

function encodeBufferToBase(buffer, base) {
/**
* @param {Uint32Array} uint32Array Treated as a long base-0x100000000 number, little endian
* @param {number} divisor The divisor
* @return {number} Modulo (remainder) of the division
*/
function divmod32(uint32Array, divisor) {
let carry = 0;
for (let i = uint32Array.length - 1; i >= 0; i--) {
const value = carry * 0x100000000 + uint32Array[i];
carry = value % divisor;
uint32Array[i] = Math.floor(value / divisor);
}
return carry;
}

function encodeBufferToBase(buffer, base, length) {
const encodeTable = baseEncodeTables[base];

if (!encodeTable) {
throw new Error("Unknown encoding base" + base);
}

const readLength = buffer.length;
const Big = require("big.js");
// Input bits are only enough to generate this many characters
const limit = Math.ceil((buffer.length * 8) / Math.log2(base));
length = Math.min(length, limit);

Big.RM = Big.DP = 0;
// Most of the crypto digests (if not all) has length a multiple of 4 bytes.
// Fewer numbers in the array means faster math.
const uint32Array = new Uint32Array(Math.ceil(buffer.length / 4));

let b = new Big(0);

for (let i = readLength - 1; i >= 0; i--) {
b = b.times(256).plus(buffer[i]);
}
// Make sure the input buffer data is copied and is not mutated by reference.
// divmod32() would corrupt the BulkUpdateDecorator cache otherwise.
buffer.copy(Buffer.from(uint32Array.buffer));

let output = "";

while (b.gt(0)) {
output = encodeTable[b.mod(base)] + output;
b = b.div(base);
for (let i = 0; i < length; i++) {
output = encodeTable[divmod32(uint32Array, base)] + output;
}

Big.DP = 20;
Big.RM = 1;

return output;
}

Expand Down Expand Up @@ -110,10 +122,7 @@ function getHashDigest(buffer, algorithm, digestType, maxLength) {
digestType === "base58" ||
digestType === "base62"
) {
return encodeBufferToBase(hash.digest(), digestType.substr(4)).substr(
0,
maxLength
);
return encodeBufferToBase(hash.digest(), digestType.substr(4), maxLength);
} else {
return hash.digest(digestType || "hex").substr(0, maxLength);
}
Expand Down
4 changes: 1 addition & 3 deletions package.json
Expand Up @@ -3,9 +3,7 @@
"version": "3.1.3",
"author": "Tobias Koppers @sokra",
"description": "utils for webpack loaders",
"dependencies": {
"big.js": "^6.1.1"
},
"dependencies": {},
"scripts": {
"lint": "prettier --list-different . && eslint .",
"pretest": "yarn lint",
Expand Down
50 changes: 47 additions & 3 deletions test/getHashDigest.test.js
Expand Up @@ -11,8 +11,8 @@ describe("getHashDigest()", () => {
["abc\\0💩", "xxhash64", "hex", undefined, "86733ec125b93904"],
["abc\\0💩", "xxhash64", "base64", undefined, "hnM+wSW5OQQ="],
["abc\\0♥", "xxhash64", "base64", undefined, "S5o0KX3APSA="],
["abc\\0💩", "xxhash64", "base52", undefined, "cfByjQcJZIU"],
["abc\\0♥", "xxhash64", "base52", undefined, "qdLyAQjLlod"],
["abc\\0💩", "xxhash64", "base52", undefined, "acfByjQcJZIU"],
["abc\\0♥", "xxhash64", "base52", undefined, "aqdLyAQjLlod"],

["test string", "md4", "hex", 4, "2e06"],
["test string", "md4", "base64", undefined, "Lgbt1PFiMmjFpRcw2KCyrw=="],
Expand All @@ -34,7 +34,8 @@ describe("getHashDigest()", () => {
],
["test string", "md5", "base52", undefined, "dJnldHSAutqUacjgfBQGLQx"],
["test string", "md5", "base64", undefined, "b421md6Yb6t6IWJbeRZYnA=="],
["test string", "md5", "base26", 6, "bhtsgu"],
["test string", "md5", "base26", undefined, "bhtsgujtzvmjtgtzlqvubqggbvgx"],
["test string", "md5", "base26", 6, "ggbvgx"],
["abc\\0♥", "md5", "hex", undefined, "2e897b64f8050e66aff98d38f7a012c5"],
["abc\\0💩", "md5", "hex", undefined, "63ad5b3d675c5890e0c01ed339ba0187"],
["abc\\0💩", "md5", "base64", undefined, "Y61bPWdcWJDgwB7TOboBhw=="],
Expand Down Expand Up @@ -79,3 +80,46 @@ describe("getHashDigest()", () => {
);
});
});

function testDistribution(digestType, length, tableSize, iterations) {
const lowerBound = Math.round(iterations / 2);
const upperBound = Math.round(iterations * 2);

const stats = [];
for (let i = tableSize * iterations; i-- > 0; ) {
const generatedString = loaderUtils.getHashDigest(
`Some input #${i}`,
undefined,
digestType,
length
);

for (let pos = 0; pos < length; pos++) {
const char = generatedString[pos];
stats[pos] = stats[pos] || {};
stats[pos][char] = (stats[pos][char] || 0) + 1;
}
}

for (let pos = 0; pos < length; pos++) {
const chars = Object.keys(stats[pos]).sort();
test(`distinct chars at position ${pos}`, () => {
expect(chars.length).toBe(tableSize);
});
for (const char of chars) {
test(`occurences of char "${char}" at position ${pos} should be around ${iterations}`, () => {
expect(stats[pos][char]).toBeLessThanOrEqual(upperBound);
expect(stats[pos][char]).toBeGreaterThanOrEqual(lowerBound);
});
}
}
}

describe("getHashDigest() char distribution", () => {
describe("should be uniform for base62", () => {
testDistribution("base62", 8, 62, 100);
});
describe("should be uniform for base26", () => {
testDistribution("base26", 8, 26, 100);
});
});
5 changes: 0 additions & 5 deletions yarn.lock
Expand Up @@ -865,11 +865,6 @@ bcrypt-pbkdf@^1.0.0:
dependencies:
tweetnacl "^0.14.3"

big.js@^6.1.1:
version "6.1.1"
resolved "https://registry.yarnpkg.com/big.js/-/big.js-6.1.1.tgz#63b35b19dc9775c94991ee5db7694880655d5537"
integrity sha512-1vObw81a8ylZO5ePrtMay0n018TcftpTA5HFKDaSuiUDBo8biRBtjIobw60OpwuvrGk+FsxKamqN4cnmj/eXdg==

brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
Expand Down

0 comments on commit 451858b

Please sign in to comment.