Skip to content

Commit

Permalink
[rabin] more tests & documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
dmonad committed Jun 22, 2023
1 parent 542e539 commit b7915f3
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 45 deletions.
68 changes: 34 additions & 34 deletions hash/rabin-gf2-polynomial.js
Expand Up @@ -20,12 +20,12 @@ import * as buffer from '../buffer.js'
const _degreeToMinByteLength = degree => math.floor(degree / 8) + 1

/**
* This is a GC2 Polynomial abstraction that is not meant for production!
* This is a GF2 Polynomial abstraction that is not meant for production!
*
* It is easy to understand and it's correctness is as obvious as possible. It can be used to verify
* efficient implementations of algorithms on GC2.
* efficient implementations of algorithms on GF2.
*/
export class GC2Polynomial {
export class GF2Polynomial {
constructor () {
/**
* @type {Set<number>}
Expand All @@ -40,7 +40,7 @@ export class GC2Polynomial {
* @param {Uint8Array} bytes
*/
export const createFromBytes = bytes => {
const p = new GC2Polynomial()
const p = new GF2Polynomial()
for (let bsi = bytes.length - 1, currDegree = 0; bsi >= 0; bsi--) {
const currByte = bytes[bsi]
for (let i = 0; i < 8; i++) {
Expand All @@ -56,7 +56,7 @@ export const createFromBytes = bytes => {
/**
* Transform to Uint8Array (MSB).
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
* @param {number} byteLength
*/
export const toUint8Array = (p, byteLength = _degreeToMinByteLength(getHighestDegree(p))) => {
Expand Down Expand Up @@ -103,7 +103,7 @@ export const createRandom = degree => {
}

/**
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
* @return number
*/
export const getHighestDegree = p => array.fold(array.from(p.degrees), 0, math.max)
Expand All @@ -113,8 +113,8 @@ export const getHighestDegree = p => array.fold(array.from(p.degrees), 0, math.m
*
* Addition is defined as xor in F2. Substraction is equivalent to addition in F2.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const addInto = (p1, p2) => {
p2.degrees.forEach(degree => {
Expand All @@ -131,8 +131,8 @@ export const addInto = (p1, p2) => {
*
* Addition is defined as xor in F2. Substraction is equivalent to addition in F2.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const orInto = (p1, p2) => {
p2.degrees.forEach(degree => {
Expand All @@ -145,11 +145,11 @@ export const orInto = (p1, p2) => {
*
* Addition is defined as xor in F2. Substraction is equivalent to addition in F2.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const add = (p1, p2) => {
const result = new GC2Polynomial()
const result = new GF2Polynomial()
p2.degrees.forEach(degree => {
if (!p1.degrees.has(degree)) {
result.degrees.add(degree)
Expand All @@ -168,10 +168,10 @@ export const add = (p1, p2) => {
*
* Addition is defined as xor in F2. Substraction is equivalent to addition in F2.
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
*/
export const clone = (p) => {
const result = new GC2Polynomial()
const result = new GF2Polynomial()
p.degrees.forEach(d => result.degrees.add(d))
return result
}
Expand All @@ -181,7 +181,7 @@ export const clone = (p) => {
*
* Addition is defined as xor in F2. Substraction is equivalent to addition in F2.
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
* @param {number} degree
*/
export const addDegreeInto = (p, degree) => {
Expand All @@ -195,11 +195,11 @@ export const addDegreeInto = (p, degree) => {
/**
* Multiply (•) p1 with p2 and store the result in p1.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const multiply = (p1, p2) => {
const result = new GC2Polynomial()
const result = new GF2Polynomial()
p1.degrees.forEach(degree1 => {
p2.degrees.forEach(degree2 => {
addDegreeInto(result, degree1 + degree2)
Expand All @@ -211,11 +211,11 @@ export const multiply = (p1, p2) => {
/**
* Multiply (•) p1 with p2 and store the result in p1.
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
* @param {number} shift
*/
export const shiftLeft = (p, shift) => {
const result = new GC2Polynomial()
const result = new GF2Polynomial()
p.degrees.forEach(degree => {
const r = degree + shift
r >= 0 && result.degrees.add(r)
Expand All @@ -226,8 +226,8 @@ export const shiftLeft = (p, shift) => {
/**
* Computes p1 % p2. I.e. the remainder of p1/p2.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const mod = (p1, p2) => {
const maxDeg1 = getHighestDegree(p1)
Expand All @@ -247,9 +247,9 @@ export const mod = (p1, p2) => {
*
* http://en.wikipedia.org/wiki/Modular_exponentiation
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
* @param {number} e
* @param {GC2Polynomial} m
* @param {GF2Polynomial} m
*/
export const modPow = (p, e, m) => {
let result = ONE
Expand All @@ -268,8 +268,8 @@ export const modPow = (p, e, m) => {
/**
* Find the greatest common divisor using Euclid's Algorithm.
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const gcd = (p1, p2) => {
while (p2.degrees.size > 0) {
Expand All @@ -283,8 +283,8 @@ export const gcd = (p1, p2) => {
/**
* true iff p1 equals p2
*
* @param {GC2Polynomial} p1
* @param {GC2Polynomial} p2
* @param {GF2Polynomial} p1
* @param {GF2Polynomial} p2
*/
export const equals = (p1, p2) => {
if (p1.degrees.size !== p2.degrees.size) return false
Expand All @@ -303,7 +303,7 @@ const ONE = createFromBytes(new Uint8Array([1]))
* (shamelessly copied from
* https://github.com/opendedup/rabinfingerprint/blob/master/src/org/rabinfingerprint/polynomial/Polynomial.java)
*
* @param {GC2Polynomial} f
* @param {GF2Polynomial} f
* @param {number} p
*/
const reduceExponent = (f, p) => {
Expand All @@ -322,7 +322,7 @@ const reduceExponent = (f, p) => {
*
* http://citeseer.ist.psu.edu/cache/papers/cs/27167/http:zSzzSzwww.math.clemson.eduzSzfacultyzSzGaozSzpaperszSzGP97a.pdf/gao97tests.pdf
*
* @param {GC2Polynomial} p
* @param {GF2Polynomial} p
*/
export const isIrreducibleBenOr = p => {
const degree = getHighestDegree(p)
Expand Down Expand Up @@ -350,16 +350,16 @@ export const createIrreducible = degree => {
* Create a fingerprint of buf using the irreducible polynomial m.
*
* @param {Uint8Array} buf
* @param {GC2Polynomial} m
* @param {GF2Polynomial} m
*/
export const fingerprint = (buf, m) => toUint8Array(mod(createFromBytes(buf), m), _degreeToMinByteLength(getHighestDegree(m) - 1))

export class RabinPolynomialEncoder {
/**
* @param {GC2Polynomial} m The irreducible polynomial
* @param {GF2Polynomial} m The irreducible polynomial
*/
constructor (m) {
this.fingerprint = new GC2Polynomial()
this.fingerprint = new GF2Polynomial()
this.m = m
}

Expand Down
16 changes: 15 additions & 1 deletion hash/rabin.js
Expand Up @@ -8,7 +8,7 @@
import * as buffer from '../buffer.js'
import * as map from '../map.js'

export const StandardIrreducible8 = new Uint8Array([1, 189])
export const StandardIrreducible8 = new Uint8Array([1, 221])
export const StandardIrreducible16 = new Uint8Array([1, 244, 157])
export const StandardIrreducible32 = new Uint8Array([1, 149, 183, 205, 191])
export const StandardIrreducible64 = new Uint8Array([1, 133, 250, 114, 193, 250, 28, 193, 231])
Expand Down Expand Up @@ -85,3 +85,17 @@ export class RabinEncoder {
return result
}
}

/**
* Basically an exact copy of the Encoder, but inlined.
*
* @param {Uint8Array} irreducible
* @param {Uint8Array} data
*/
export const fingerprint = (irreducible, data) => {
const encoder = new RabinEncoder(irreducible)
for (let i = 0; i < data.length; i++) {
encoder.write(data[i])
}
return encoder.getFingerprint()
}
66 changes: 56 additions & 10 deletions hash/rabin.test.js
Expand Up @@ -6,6 +6,7 @@ import * as math from '../math.js'
import * as array from '../array.js'
import * as prng from '../prng.js'
import * as buffer from '../buffer.js'
import * as map from '../map.js'

/**
* @param {t.TestCase} _tc
Expand Down Expand Up @@ -40,7 +41,7 @@ export const testIrreducibleInput = _tc => {
*/
export const testIrreducibleSpread = _tc => {
const degree = 32
const N = 400
const N = 1000
const avgSpread = getSpreadAverage(degree, N)
const diffSpread = math.abs(avgSpread - degree)
t.info(`Average spread for degree ${degree} at ${N} repetitions: ${avgSpread}`)
Expand Down Expand Up @@ -98,7 +99,7 @@ export const testGenerateIrreducibles = _tc => {
* @param {t.TestCase} tc
* @param {number} K
*/
const _testFingerprintK = (tc, K) => {
const _testFingerprintCompatiblityK = (tc, K) => {
/**
* @type {Array<Uint8Array>}
*/
Expand All @@ -107,7 +108,7 @@ const _testFingerprintK = (tc, K) => {
const MSIZE = 130
t.info(`N=${N} K=${K} MSIZE=${MSIZE}`)
/**
* @type {gf2.GC2Polynomial}
* @type {gf2.GF2Polynomial}
*/
let irreducible
/**
Expand Down Expand Up @@ -158,11 +159,15 @@ const _testFingerprintK = (tc, K) => {
})
})
t.compare(fingerprints1, fingerprints3)
// ensuring that the cache is already populated
// @ts-ignore
// eslint-disable-next-line
new rabin.RabinEncoder(irreducibleBuffer)
/**
* @type {Array<Uint8Array>}
*/
let fingerprints4 = []
t.measureTime('polynomial incremental (efficent & cached))', () => {
t.measureTime('polynomial incremental (efficent & cached)) using encoder', () => {
fingerprints4 = dataObjects.map((o, _index) => {
const encoder = new rabin.RabinEncoder(irreducibleBuffer)
for (let i = 0; i < o.byteLength; i++) {
Expand All @@ -172,15 +177,56 @@ const _testFingerprintK = (tc, K) => {
})
})
t.compare(fingerprints1, fingerprints4)
/**
* @type {Array<Uint8Array>}
*/
let fingerprints5 = []
t.measureTime('polynomial incremental (efficent & cached))', () => {
fingerprints5 = dataObjects.map((o, _index) => {
return rabin.fingerprint(irreducibleBuffer, o)
})
})
t.compare(fingerprints1, fingerprints5)
}

/**
* @param {t.TestCase} tc
*/
export const testFingerprint = tc => {
_testFingerprintK(tc, 8)
_testFingerprintK(tc, 16)
_testFingerprintK(tc, 32)
_testFingerprintK(tc, 64)
_testFingerprintK(tc, 128)
export const testFingerprintCompatiblity = tc => {
_testFingerprintCompatiblityK(tc, 8)
_testFingerprintCompatiblityK(tc, 16)
_testFingerprintCompatiblityK(tc, 32)
_testFingerprintCompatiblityK(tc, 64)
_testFingerprintCompatiblityK(tc, 128)
}

/**
* @param {t.TestCase} tc
*/
export const testConflicts = tc => {
/**
* @type {Array<Uint8Array>}
*/
const data = []
const N = 100
const Irr = rabin.StandardIrreducible8
t.measureTime(`generate ${N} items`, () => {
for (let i = 0; i < N; i++) {
data.push(prng.uint8Array(tc.prng, prng.uint32(tc.prng, 5, 50)))
}
})
/**
* @type {Map<string, Set<string>>}
*/
const results = new Map()
t.measureTime(`fingerprint ${N} items`, () => {
data.forEach(d => {
const f = buffer.toBase64(rabin.fingerprint(Irr, d))
map.setIfUndefined(results, f, () => new Set()).add(buffer.toBase64(d))
})
})
const conflicts = array.fold(map.map(results, (ds) => ds.size - 1), 0, math.add)
const usedFields = results.size
const unusedFieds = math.pow(2, (Irr.length - 1) * 8) - results.size
console.log({ conflicts, usedFields, unusedFieds })
}

0 comments on commit b7915f3

Please sign in to comment.