diff --git a/hash/rabin-gf2-polynomial.js b/hash/rabin-gf2-polynomial.js index 8579edf..6ae87b2 100644 --- a/hash/rabin-gf2-polynomial.js +++ b/hash/rabin-gf2-polynomial.js @@ -20,12 +20,12 @@ import * as buffer from '../buffer.js' const _degreeToMinByteLength = degree => math.floor(degree / 8) + 1 /** - * This is a GC2 Polynomial abstraction that is not meant for production! + * This is a GF2 Polynomial abstraction that is not meant for production! * * It is easy to understand and it's correctness is as obvious as possible. It can be used to verify - * efficient implementations of algorithms on GC2. + * efficient implementations of algorithms on GF2. */ -export class GC2Polynomial { +export class GF2Polynomial { constructor () { /** * @type {Set} @@ -40,7 +40,7 @@ export class GC2Polynomial { * @param {Uint8Array} bytes */ export const createFromBytes = bytes => { - const p = new GC2Polynomial() + const p = new GF2Polynomial() for (let bsi = bytes.length - 1, currDegree = 0; bsi >= 0; bsi--) { const currByte = bytes[bsi] for (let i = 0; i < 8; i++) { @@ -56,7 +56,7 @@ export const createFromBytes = bytes => { /** * Transform to Uint8Array (MSB). * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p * @param {number} byteLength */ export const toUint8Array = (p, byteLength = _degreeToMinByteLength(getHighestDegree(p))) => { @@ -103,7 +103,7 @@ export const createRandom = degree => { } /** - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p * @return number */ export const getHighestDegree = p => array.fold(array.from(p.degrees), 0, math.max) @@ -113,8 +113,8 @@ export const getHighestDegree = p => array.fold(array.from(p.degrees), 0, math.m * * Addition is defined as xor in F2. Substraction is equivalent to addition in F2. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const addInto = (p1, p2) => { p2.degrees.forEach(degree => { @@ -131,8 +131,8 @@ export const addInto = (p1, p2) => { * * Addition is defined as xor in F2. Substraction is equivalent to addition in F2. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const orInto = (p1, p2) => { p2.degrees.forEach(degree => { @@ -145,11 +145,11 @@ export const orInto = (p1, p2) => { * * Addition is defined as xor in F2. Substraction is equivalent to addition in F2. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const add = (p1, p2) => { - const result = new GC2Polynomial() + const result = new GF2Polynomial() p2.degrees.forEach(degree => { if (!p1.degrees.has(degree)) { result.degrees.add(degree) @@ -168,10 +168,10 @@ export const add = (p1, p2) => { * * Addition is defined as xor in F2. Substraction is equivalent to addition in F2. * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p */ export const clone = (p) => { - const result = new GC2Polynomial() + const result = new GF2Polynomial() p.degrees.forEach(d => result.degrees.add(d)) return result } @@ -181,7 +181,7 @@ export const clone = (p) => { * * Addition is defined as xor in F2. Substraction is equivalent to addition in F2. * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p * @param {number} degree */ export const addDegreeInto = (p, degree) => { @@ -195,11 +195,11 @@ export const addDegreeInto = (p, degree) => { /** * Multiply (•) p1 with p2 and store the result in p1. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const multiply = (p1, p2) => { - const result = new GC2Polynomial() + const result = new GF2Polynomial() p1.degrees.forEach(degree1 => { p2.degrees.forEach(degree2 => { addDegreeInto(result, degree1 + degree2) @@ -211,11 +211,11 @@ export const multiply = (p1, p2) => { /** * Multiply (•) p1 with p2 and store the result in p1. * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p * @param {number} shift */ export const shiftLeft = (p, shift) => { - const result = new GC2Polynomial() + const result = new GF2Polynomial() p.degrees.forEach(degree => { const r = degree + shift r >= 0 && result.degrees.add(r) @@ -226,8 +226,8 @@ export const shiftLeft = (p, shift) => { /** * Computes p1 % p2. I.e. the remainder of p1/p2. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const mod = (p1, p2) => { const maxDeg1 = getHighestDegree(p1) @@ -247,9 +247,9 @@ export const mod = (p1, p2) => { * * http://en.wikipedia.org/wiki/Modular_exponentiation * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p * @param {number} e - * @param {GC2Polynomial} m + * @param {GF2Polynomial} m */ export const modPow = (p, e, m) => { let result = ONE @@ -268,8 +268,8 @@ export const modPow = (p, e, m) => { /** * Find the greatest common divisor using Euclid's Algorithm. * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const gcd = (p1, p2) => { while (p2.degrees.size > 0) { @@ -283,8 +283,8 @@ export const gcd = (p1, p2) => { /** * true iff p1 equals p2 * - * @param {GC2Polynomial} p1 - * @param {GC2Polynomial} p2 + * @param {GF2Polynomial} p1 + * @param {GF2Polynomial} p2 */ export const equals = (p1, p2) => { if (p1.degrees.size !== p2.degrees.size) return false @@ -303,7 +303,7 @@ const ONE = createFromBytes(new Uint8Array([1])) * (shamelessly copied from * https://github.com/opendedup/rabinfingerprint/blob/master/src/org/rabinfingerprint/polynomial/Polynomial.java) * - * @param {GC2Polynomial} f + * @param {GF2Polynomial} f * @param {number} p */ const reduceExponent = (f, p) => { @@ -322,7 +322,7 @@ const reduceExponent = (f, p) => { * * http://citeseer.ist.psu.edu/cache/papers/cs/27167/http:zSzzSzwww.math.clemson.eduzSzfacultyzSzGaozSzpaperszSzGP97a.pdf/gao97tests.pdf * - * @param {GC2Polynomial} p + * @param {GF2Polynomial} p */ export const isIrreducibleBenOr = p => { const degree = getHighestDegree(p) @@ -350,16 +350,16 @@ export const createIrreducible = degree => { * Create a fingerprint of buf using the irreducible polynomial m. * * @param {Uint8Array} buf - * @param {GC2Polynomial} m + * @param {GF2Polynomial} m */ export const fingerprint = (buf, m) => toUint8Array(mod(createFromBytes(buf), m), _degreeToMinByteLength(getHighestDegree(m) - 1)) export class RabinPolynomialEncoder { /** - * @param {GC2Polynomial} m The irreducible polynomial + * @param {GF2Polynomial} m The irreducible polynomial */ constructor (m) { - this.fingerprint = new GC2Polynomial() + this.fingerprint = new GF2Polynomial() this.m = m } diff --git a/hash/rabin.js b/hash/rabin.js index 9526fd3..90f0a28 100644 --- a/hash/rabin.js +++ b/hash/rabin.js @@ -8,7 +8,7 @@ import * as buffer from '../buffer.js' import * as map from '../map.js' -export const StandardIrreducible8 = new Uint8Array([1, 189]) +export const StandardIrreducible8 = new Uint8Array([1, 221]) export const StandardIrreducible16 = new Uint8Array([1, 244, 157]) export const StandardIrreducible32 = new Uint8Array([1, 149, 183, 205, 191]) export const StandardIrreducible64 = new Uint8Array([1, 133, 250, 114, 193, 250, 28, 193, 231]) @@ -85,3 +85,17 @@ export class RabinEncoder { return result } } + +/** + * Basically an exact copy of the Encoder, but inlined. + * + * @param {Uint8Array} irreducible + * @param {Uint8Array} data + */ +export const fingerprint = (irreducible, data) => { + const encoder = new RabinEncoder(irreducible) + for (let i = 0; i < data.length; i++) { + encoder.write(data[i]) + } + return encoder.getFingerprint() +} diff --git a/hash/rabin.test.js b/hash/rabin.test.js index d802b9a..37a82ff 100644 --- a/hash/rabin.test.js +++ b/hash/rabin.test.js @@ -6,6 +6,7 @@ import * as math from '../math.js' import * as array from '../array.js' import * as prng from '../prng.js' import * as buffer from '../buffer.js' +import * as map from '../map.js' /** * @param {t.TestCase} _tc @@ -40,7 +41,7 @@ export const testIrreducibleInput = _tc => { */ export const testIrreducibleSpread = _tc => { const degree = 32 - const N = 400 + const N = 1000 const avgSpread = getSpreadAverage(degree, N) const diffSpread = math.abs(avgSpread - degree) t.info(`Average spread for degree ${degree} at ${N} repetitions: ${avgSpread}`) @@ -98,7 +99,7 @@ export const testGenerateIrreducibles = _tc => { * @param {t.TestCase} tc * @param {number} K */ -const _testFingerprintK = (tc, K) => { +const _testFingerprintCompatiblityK = (tc, K) => { /** * @type {Array} */ @@ -107,7 +108,7 @@ const _testFingerprintK = (tc, K) => { const MSIZE = 130 t.info(`N=${N} K=${K} MSIZE=${MSIZE}`) /** - * @type {gf2.GC2Polynomial} + * @type {gf2.GF2Polynomial} */ let irreducible /** @@ -158,11 +159,15 @@ const _testFingerprintK = (tc, K) => { }) }) t.compare(fingerprints1, fingerprints3) + // ensuring that the cache is already populated + // @ts-ignore + // eslint-disable-next-line + new rabin.RabinEncoder(irreducibleBuffer) /** * @type {Array} */ let fingerprints4 = [] - t.measureTime('polynomial incremental (efficent & cached))', () => { + t.measureTime('polynomial incremental (efficent & cached)) using encoder', () => { fingerprints4 = dataObjects.map((o, _index) => { const encoder = new rabin.RabinEncoder(irreducibleBuffer) for (let i = 0; i < o.byteLength; i++) { @@ -172,15 +177,56 @@ const _testFingerprintK = (tc, K) => { }) }) t.compare(fingerprints1, fingerprints4) + /** + * @type {Array} + */ + let fingerprints5 = [] + t.measureTime('polynomial incremental (efficent & cached))', () => { + fingerprints5 = dataObjects.map((o, _index) => { + return rabin.fingerprint(irreducibleBuffer, o) + }) + }) + t.compare(fingerprints1, fingerprints5) } /** * @param {t.TestCase} tc */ -export const testFingerprint = tc => { - _testFingerprintK(tc, 8) - _testFingerprintK(tc, 16) - _testFingerprintK(tc, 32) - _testFingerprintK(tc, 64) - _testFingerprintK(tc, 128) +export const testFingerprintCompatiblity = tc => { + _testFingerprintCompatiblityK(tc, 8) + _testFingerprintCompatiblityK(tc, 16) + _testFingerprintCompatiblityK(tc, 32) + _testFingerprintCompatiblityK(tc, 64) + _testFingerprintCompatiblityK(tc, 128) +} + +/** + * @param {t.TestCase} tc + */ +export const testConflicts = tc => { + /** + * @type {Array} + */ + const data = [] + const N = 100 + const Irr = rabin.StandardIrreducible8 + t.measureTime(`generate ${N} items`, () => { + for (let i = 0; i < N; i++) { + data.push(prng.uint8Array(tc.prng, prng.uint32(tc.prng, 5, 50))) + } + }) + /** + * @type {Map>} + */ + const results = new Map() + t.measureTime(`fingerprint ${N} items`, () => { + data.forEach(d => { + const f = buffer.toBase64(rabin.fingerprint(Irr, d)) + map.setIfUndefined(results, f, () => new Set()).add(buffer.toBase64(d)) + }) + }) + const conflicts = array.fold(map.map(results, (ds) => ds.size - 1), 0, math.add) + const usedFields = results.size + const unusedFieds = math.pow(2, (Irr.length - 1) * 8) - results.size + console.log({ conflicts, usedFields, unusedFieds }) }