diff --git a/phf/src/map.rs b/phf/src/map.rs index bd743516..30fc2648 100644 --- a/phf/src/map.rs +++ b/phf/src/map.rs @@ -4,7 +4,7 @@ use core::ops::Index; use core::slice; use core::fmt; use core::iter::IntoIterator; -use phf_shared::{self, PhfHash}; +use phf_shared::{self, PhfHash, HashKey}; use crate::Slice; /// An immutable map constructed at compile time. @@ -16,7 +16,7 @@ use crate::Slice; /// time and should never be accessed directly. pub struct Map { #[doc(hidden)] - pub key: u64, + pub key: HashKey, #[doc(hidden)] pub disps: Slice<(u32, u32)>, #[doc(hidden)] @@ -81,8 +81,8 @@ impl Map { K: Borrow { if self.disps.len() == 0 { return None; } //Prevent panic on empty map - let hash = phf_shared::hash(key, self.key); - let index = phf_shared::get_index(hash, &*self.disps, self.entries.len()); + let hashes = phf_shared::hash(key, &self.key); + let index = phf_shared::get_index(&hashes, &*self.disps, self.entries.len()); let entry = &self.entries[index as usize]; let b: &T = entry.0.borrow(); if b == key { diff --git a/phf/src/ordered_map.rs b/phf/src/ordered_map.rs index 239ce95f..f91b36cc 100644 --- a/phf/src/ordered_map.rs +++ b/phf/src/ordered_map.rs @@ -4,7 +4,7 @@ use core::iter::IntoIterator; use core::ops::Index; use core::fmt; use core::slice; -use phf_shared::{self, PhfHash}; +use phf_shared::{self, PhfHash, HashKey}; use crate::Slice; @@ -20,7 +20,7 @@ use crate::Slice; /// any time and should never be accessed directly. pub struct OrderedMap { #[doc(hidden)] - pub key: u64, + pub key: HashKey, #[doc(hidden)] pub disps: Slice<(u32, u32)>, #[doc(hidden)] @@ -109,8 +109,8 @@ impl OrderedMap { K: Borrow { if self.disps.len() == 0 { return None; } //Prevent panic on empty map - let hash = phf_shared::hash(key, self.key); - let idx_index = phf_shared::get_index(hash, &*self.disps, self.idxs.len()); + let hashes = phf_shared::hash(key, &self.key); + let idx_index = phf_shared::get_index(&hashes, &*self.disps, self.idxs.len()); let idx = self.idxs[idx_index as usize]; let entry = &self.entries[idx]; diff --git a/phf_codegen/src/lib.rs b/phf_codegen/src/lib.rs index 8217fbb5..38d3a766 100644 --- a/phf_codegen/src/lib.rs +++ b/phf_codegen/src/lib.rs @@ -157,7 +157,7 @@ impl Map { try!(write!(w, "{}::Map {{ - key: {}, + key: {:?}, disps: {}::Slice::Static(&[", self.path, state.key, self.path)); for &(d1, d2) in &state.disps { @@ -272,7 +272,7 @@ impl OrderedMap { try!(write!(w, "{}::OrderedMap {{ - key: {}, + key: {:?}, disps: {}::Slice::Static(&[", self.path, state.key, self.path)); for &(d1, d2) in &state.disps { diff --git a/phf_generator/Cargo.toml b/phf_generator/Cargo.toml index 35dc3512..decc4ce2 100644 --- a/phf_generator/Cargo.toml +++ b/phf_generator/Cargo.toml @@ -13,6 +13,13 @@ phf_shared = { version = "0.7.24", path = "../phf_shared" } # for stable black_box() criterion = { version = "0.2", optional = true } +[dev-dependencies] +criterion = "0.2" + +[[bench]] +name = "benches" +harness = false + [[bin]] name = "gen_hash_test" required-features = ["criterion"] diff --git a/phf_generator/benches/benches.rs b/phf_generator/benches/benches.rs new file mode 100644 index 00000000..4e2d4db5 --- /dev/null +++ b/phf_generator/benches/benches.rs @@ -0,0 +1,80 @@ +use criterion::*; + +use rand::distributions::Standard; +use rand::rngs::SmallRng; +use rand::{Rng, SeedableRng}; + +use phf_generator::generate_hash; + +fn gen_vec(len: usize) -> Vec { + SmallRng::seed_from_u64(0xAAAAAAAAAAAAAAAA).sample_iter(Standard).take(len).collect() +} + +fn bench_hash(b: &mut Bencher, len: &usize) { + let vec = gen_vec(*len); + b.iter(|| generate_hash(&vec)) +} + +fn gen_hash_small(c: &mut Criterion) { + let sizes = vec![0, 1, 2, 5, 10, 25, 50, 75]; + c.bench_function_over_inputs("gen_hash_small", bench_hash, sizes); +} + +fn gen_hash_med(c: &mut Criterion) { + let sizes = vec![100, 250, 500, 1000, 2500, 5000, 7500]; + c.bench_function_over_inputs("gen_hash_medium", bench_hash, sizes); +} + +fn gen_hash_large(c: &mut Criterion) { + let sizes = vec![10_000, 25_000, 50_000, 75_000]; + c.bench_function_over_inputs("gen_hash_large", bench_hash, sizes); +} + +fn gen_hash_xlarge(c: &mut Criterion) { + let sizes = vec![100_000, 250_000, 500_000, 750_000, 1_000_000]; + c.bench_function_over_inputs("gen_hash_xlarge", bench_hash, sizes); +} + +criterion_group!(benches, gen_hash_small, gen_hash_med, gen_hash_large, gen_hash_xlarge); + +#[cfg(not(feature = "rayon"))] +criterion_main!(benches); + +#[cfg(feature = "rayon")] +criterion_main!(benches, rayon::benches); + +#[cfg(feature = "rayon")] +mod rayon { + use criterion::*; + + use phf_generator::generate_hash_rayon; + + use super::gen_vec; + + fn bench_hash(b: &mut Bencher, len: &usize) { + let vec = gen_vec(*len); + b.iter(|| generate_hash_rayon(&vec)) + } + + fn gen_hash_small(c: &mut Criterion) { + let sizes = vec![0, 1, 2, 5, 10, 25, 50, 75]; + c.bench_function_over_inputs("gen_hash_small_rayon", bench_hash, sizes); + } + + fn gen_hash_med(c: &mut Criterion) { + let sizes = vec![100, 250, 500, 1000, 2500, 5000, 7500]; + c.bench_function_over_inputs("gen_hash_medium_rayon", bench_hash, sizes); + } + + fn gen_hash_large(c: &mut Criterion) { + let sizes = vec![10_000, 25_000, 50_000, 75_000]; + c.bench_function_over_inputs("gen_hash_large_rayon", bench_hash, sizes); + } + + fn gen_hash_xlarge(c: &mut Criterion) { + let sizes = vec![100_000, 250_000, 500_000, 750_000, 1_000_000]; + c.bench_function_over_inputs("gen_hash_xlarge_rayon", bench_hash, sizes); + } + + criterion_group!(benches, gen_hash_small, gen_hash_med, gen_hash_large, gen_hash_xlarge); +} \ No newline at end of file diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 6a83340e..6d0950e1 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -1,9 +1,7 @@ #![doc(html_root_url="https://docs.rs/phf_generator/0.7")] -extern crate phf_shared; -extern crate rand; - -use phf_shared::PhfHash; +use phf_shared::{PhfHash, HashKey}; use rand::{SeedableRng, Rng}; +use rand::distributions::Standard; use rand::rngs::SmallRng; const DEFAULT_LAMBDA: usize = 5; @@ -11,47 +9,27 @@ const DEFAULT_LAMBDA: usize = 5; const FIXED_SEED: u64 = 1234567890; pub struct HashState { - pub key: u64, + pub key: HashKey, pub disps: Vec<(u32, u32)>, pub map: Vec, } pub fn generate_hash(entries: &[H]) -> HashState { - let mut rng = SmallRng::seed_from_u64(FIXED_SEED); - loop { - if let Some(s) = try_generate_hash(entries, &mut rng) { - return s; - } - } + SmallRng::seed_from_u64(FIXED_SEED) + .sample_iter(Standard) + .find_map(|key| try_generate_hash(entries, key)) + .expect("failed to solve PHF") } -fn try_generate_hash(entries: &[H], rng: &mut SmallRng) -> Option { +fn try_generate_hash(entries: &[H], key: HashKey) -> Option { struct Bucket { idx: usize, keys: Vec, } - struct Hashes { - g: u32, - f1: u32, - f2: u32, - } + let hashes: Vec<_> = entries.iter().map(|entry| phf_shared::hash(entry, &key)).collect(); - let key = rng.gen(); - - let hashes: Vec<_> = entries.iter() - .map(|entry| { - let hash = phf_shared::hash(entry, key); - let (g, f1, f2) = phf_shared::split(hash); - Hashes { - g: g, - f1: f1, - f2: f2, - } - }) - .collect(); - - let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; + let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; let mut buckets = (0..buckets_len) .map(|i| { Bucket { @@ -68,7 +46,7 @@ fn try_generate_hash(entries: &[H], rng: &mut SmallRng) -> Option(entries: &[H], rng: &mut SmallRng) -> Option u32 { - d2 + f1 * d1 + f2 + (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0 } +/// `key` is from `phf_generator::HashState`. #[inline] -pub fn split(hash: u64) -> (u32, u32, u32) { - const BITS: u32 = 21; - const MASK: u64 = (1 << BITS) - 1; +pub fn hash(x: &T, key: &HashKey) -> Hashes { + let lower = { + // large 64-bit primes as initial keys + let mut hasher = SipHasher13::new_with_keys(14_130_675_974_360_801_221, + key[0]); + x.phf_hash(&mut hasher); + hasher.finish() + }; - ((hash & MASK) as u32, - ((hash >> BITS) & MASK) as u32, - ((hash >> (2 * BITS)) & MASK) as u32) -} + let upper = { + let mut hasher = SipHasher13::new_with_keys(11_542_695_197_553_437_579, + key[1]); + x.phf_hash(&mut hasher); + hasher.finish() + }; -/// `key` is from `phf_generator::HashState::key`. -#[inline] -pub fn hash(x: &T, key: u64) -> u64 { - let mut hasher = SipHasher13::new_with_keys(0, key); - x.phf_hash(&mut hasher); - hasher.finish() + Hashes { + g: (lower >> 32) as u32, + f1: lower as u32, + f2: upper as u32, + _priv: (), + } } /// Return an index into `phf_generator::HashState::map`. @@ -42,10 +61,9 @@ pub fn hash(x: &T, key: u64) -> u64 { /// * `disps` is from `phf_generator::HashState::disps`. /// * `len` is the length of `phf_generator::HashState::map`. #[inline] -pub fn get_index(hash: u64, disps: &[(u32, u32)], len: usize) -> u32 { - let (g, f1, f2) = split(hash); - let (d1, d2) = disps[(g % (disps.len() as u32)) as usize]; - displace(f1, f2, d1, d2) % (len as u32) +pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { + let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize]; + displace(hashes.f1, hashes.f2, d1, d2) % (len as u32) } /// A trait implemented by types which can be used in PHF data structures. @@ -77,7 +95,7 @@ pub trait FmtConst { /// /// Ideally with specialization this could be just one default impl and then specialized where /// it doesn't apply. -macro_rules! delegate_debug( +macro_rules! delegate_debug ( ($ty:ty) => { impl FmtConst for $ty { fn fmt_const(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -153,7 +171,7 @@ impl FmtConst for [u8] { #[cfg(feature = "unicase")] impl PhfHash for unicase::UniCase -where unicase::UniCase: Hash { + where unicase::UniCase: Hash { #[inline] fn phf_hash(&self, state: &mut H) { self.hash(state) @@ -174,7 +192,7 @@ impl FmtConst for unicase::UniCase where S: AsRef { } } -macro_rules! sip_impl( +macro_rules! sip_impl ( (le $t:ty) => ( impl PhfHash for $t { #[inline] @@ -217,7 +235,7 @@ fn fmt_array(array: &[u8], f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:?}", array) } -macro_rules! array_impl( +macro_rules! array_impl ( ($t:ty, $n:expr) => ( impl PhfHash for [$t; $n] { #[inline]