Skip to content

Commit

Permalink
use two separate hashes and full 32-bit displacements
Browse files Browse the repository at this point in the history
add benchmark
  • Loading branch information
abonander committed Jul 12, 2019
1 parent ecb9fd5 commit 9b70bd9
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 68 deletions.
8 changes: 4 additions & 4 deletions phf/src/map.rs
Expand Up @@ -4,7 +4,7 @@ use core::ops::Index;
use core::slice;
use core::fmt;
use core::iter::IntoIterator;
use phf_shared::{self, PhfHash};
use phf_shared::{self, PhfHash, HashKey};
use crate::Slice;

/// An immutable map constructed at compile time.
Expand All @@ -16,7 +16,7 @@ use crate::Slice;
/// time and should never be accessed directly.
pub struct Map<K: 'static, V: 'static> {
#[doc(hidden)]
pub key: u64,
pub key: HashKey,
#[doc(hidden)]
pub disps: Slice<(u32, u32)>,
#[doc(hidden)]
Expand Down Expand Up @@ -81,8 +81,8 @@ impl<K, V> Map<K, V> {
K: Borrow<T>
{
if self.disps.len() == 0 { return None; } //Prevent panic on empty map
let hash = phf_shared::hash(key, self.key);
let index = phf_shared::get_index(hash, &*self.disps, self.entries.len());
let hashes = phf_shared::hash(key, &self.key);
let index = phf_shared::get_index(&hashes, &*self.disps, self.entries.len());
let entry = &self.entries[index as usize];
let b: &T = entry.0.borrow();
if b == key {
Expand Down
8 changes: 4 additions & 4 deletions phf/src/ordered_map.rs
Expand Up @@ -4,7 +4,7 @@ use core::iter::IntoIterator;
use core::ops::Index;
use core::fmt;
use core::slice;
use phf_shared::{self, PhfHash};
use phf_shared::{self, PhfHash, HashKey};

use crate::Slice;

Expand All @@ -20,7 +20,7 @@ use crate::Slice;
/// any time and should never be accessed directly.
pub struct OrderedMap<K: 'static, V: 'static> {
#[doc(hidden)]
pub key: u64,
pub key: HashKey,
#[doc(hidden)]
pub disps: Slice<(u32, u32)>,
#[doc(hidden)]
Expand Down Expand Up @@ -109,8 +109,8 @@ impl<K, V> OrderedMap<K, V> {
K: Borrow<T>
{
if self.disps.len() == 0 { return None; } //Prevent panic on empty map
let hash = phf_shared::hash(key, self.key);
let idx_index = phf_shared::get_index(hash, &*self.disps, self.idxs.len());
let hashes = phf_shared::hash(key, &self.key);
let idx_index = phf_shared::get_index(&hashes, &*self.disps, self.idxs.len());
let idx = self.idxs[idx_index as usize];
let entry = &self.entries[idx];

Expand Down
4 changes: 2 additions & 2 deletions phf_codegen/src/lib.rs
Expand Up @@ -157,7 +157,7 @@ impl<K: Hash+PhfHash+Eq+FmtConst> Map<K> {

try!(write!(w,
"{}::Map {{
key: {},
key: {:?},
disps: {}::Slice::Static(&[",
self.path, state.key, self.path));
for &(d1, d2) in &state.disps {
Expand Down Expand Up @@ -272,7 +272,7 @@ impl<K: Hash+PhfHash+Eq+FmtConst> OrderedMap<K> {

try!(write!(w,
"{}::OrderedMap {{
key: {},
key: {:?},
disps: {}::Slice::Static(&[",
self.path, state.key, self.path));
for &(d1, d2) in &state.disps {
Expand Down
7 changes: 7 additions & 0 deletions phf_generator/Cargo.toml
Expand Up @@ -13,6 +13,13 @@ phf_shared = { version = "0.7.24", path = "../phf_shared" }
# for stable black_box()
criterion = { version = "0.2", optional = true }

[dev-dependencies]
criterion = "0.2"

[[bench]]
name = "benches"
harness = false

[[bin]]
name = "gen_hash_test"
required-features = ["criterion"]
80 changes: 80 additions & 0 deletions phf_generator/benches/benches.rs
@@ -0,0 +1,80 @@
use criterion::*;

use rand::distributions::Standard;
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};

use phf_generator::generate_hash;

fn gen_vec(len: usize) -> Vec<u64> {
SmallRng::seed_from_u64(0xAAAAAAAAAAAAAAAA).sample_iter(Standard).take(len).collect()
}

fn bench_hash(b: &mut Bencher, len: &usize) {
let vec = gen_vec(*len);
b.iter(|| generate_hash(&vec))
}

fn gen_hash_small(c: &mut Criterion) {
let sizes = vec![0, 1, 2, 5, 10, 25, 50, 75];
c.bench_function_over_inputs("gen_hash_small", bench_hash, sizes);
}

fn gen_hash_med(c: &mut Criterion) {
let sizes = vec![100, 250, 500, 1000, 2500, 5000, 7500];
c.bench_function_over_inputs("gen_hash_medium", bench_hash, sizes);
}

fn gen_hash_large(c: &mut Criterion) {
let sizes = vec![10_000, 25_000, 50_000, 75_000];
c.bench_function_over_inputs("gen_hash_large", bench_hash, sizes);
}

fn gen_hash_xlarge(c: &mut Criterion) {
let sizes = vec![100_000, 250_000, 500_000, 750_000, 1_000_000];
c.bench_function_over_inputs("gen_hash_xlarge", bench_hash, sizes);
}

criterion_group!(benches, gen_hash_small, gen_hash_med, gen_hash_large, gen_hash_xlarge);

#[cfg(not(feature = "rayon"))]
criterion_main!(benches);

#[cfg(feature = "rayon")]
criterion_main!(benches, rayon::benches);

#[cfg(feature = "rayon")]
mod rayon {
use criterion::*;

use phf_generator::generate_hash_rayon;

use super::gen_vec;

fn bench_hash(b: &mut Bencher, len: &usize) {
let vec = gen_vec(*len);
b.iter(|| generate_hash_rayon(&vec))
}

fn gen_hash_small(c: &mut Criterion) {
let sizes = vec![0, 1, 2, 5, 10, 25, 50, 75];
c.bench_function_over_inputs("gen_hash_small_rayon", bench_hash, sizes);
}

fn gen_hash_med(c: &mut Criterion) {
let sizes = vec![100, 250, 500, 1000, 2500, 5000, 7500];
c.bench_function_over_inputs("gen_hash_medium_rayon", bench_hash, sizes);
}

fn gen_hash_large(c: &mut Criterion) {
let sizes = vec![10_000, 25_000, 50_000, 75_000];
c.bench_function_over_inputs("gen_hash_large_rayon", bench_hash, sizes);
}

fn gen_hash_xlarge(c: &mut Criterion) {
let sizes = vec![100_000, 250_000, 500_000, 750_000, 1_000_000];
c.bench_function_over_inputs("gen_hash_xlarge_rayon", bench_hash, sizes);
}

criterion_group!(benches, gen_hash_small, gen_hash_med, gen_hash_large, gen_hash_xlarge);
}
48 changes: 13 additions & 35 deletions phf_generator/src/lib.rs
@@ -1,57 +1,35 @@
#![doc(html_root_url="https://docs.rs/phf_generator/0.7")]
extern crate phf_shared;
extern crate rand;

use phf_shared::PhfHash;
use phf_shared::{PhfHash, HashKey};
use rand::{SeedableRng, Rng};
use rand::distributions::Standard;
use rand::rngs::SmallRng;

const DEFAULT_LAMBDA: usize = 5;

const FIXED_SEED: u64 = 1234567890;

pub struct HashState {
pub key: u64,
pub key: HashKey,
pub disps: Vec<(u32, u32)>,
pub map: Vec<usize>,
}

pub fn generate_hash<H: PhfHash>(entries: &[H]) -> HashState {
let mut rng = SmallRng::seed_from_u64(FIXED_SEED);
loop {
if let Some(s) = try_generate_hash(entries, &mut rng) {
return s;
}
}
SmallRng::seed_from_u64(FIXED_SEED)
.sample_iter(Standard)
.find_map(|key| try_generate_hash(entries, key))
.expect("failed to solve PHF")
}

fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<HashState> {
fn try_generate_hash<H: PhfHash>(entries: &[H], key: HashKey) -> Option<HashState> {
struct Bucket {
idx: usize,
keys: Vec<usize>,
}

struct Hashes {
g: u32,
f1: u32,
f2: u32,
}
let hashes: Vec<_> = entries.iter().map(|entry| phf_shared::hash(entry, &key)).collect();

let key = rng.gen();

let hashes: Vec<_> = entries.iter()
.map(|entry| {
let hash = phf_shared::hash(entry, key);
let (g, f1, f2) = phf_shared::split(hash);
Hashes {
g: g,
f1: f1,
f2: f2,
}
})
.collect();

let buckets_len = (entries.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
let buckets_len = (hashes.len() + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA;
let mut buckets = (0..buckets_len)
.map(|i| {
Bucket {
Expand All @@ -68,7 +46,7 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
// Sort descending
buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse());

let table_len = entries.len();
let table_len = hashes.len();
let mut map = vec![None; table_len];
let mut disps = vec![(0u32, 0u32); buckets_len];

Expand Down Expand Up @@ -117,8 +95,8 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut SmallRng) -> Option<Ha
}

Some(HashState {
key: key,
disps: disps,
key,
disps,
map: map.into_iter().map(|i| i.unwrap()).collect(),
})
}
64 changes: 41 additions & 23 deletions phf_shared/src/lib.rs
@@ -1,4 +1,4 @@
#![doc(html_root_url="https://docs.rs/phf_shared/0.7")]
#![doc(html_root_url = "https://docs.rs/phf_shared/0.7")]
#![cfg_attr(feature = "core", no_std)]

#[cfg(not(feature = "core"))]
Expand All @@ -11,29 +11,48 @@ extern crate unicase;

use core::fmt;
use core::hash::{Hasher, Hash};
use core::num::Wrapping;
use siphasher::sip::SipHasher13;

pub struct Hashes {
pub g: u32,
pub f1: u32,
pub f2: u32,
_priv: (),
}

/// A central typedef for hash keys
pub type HashKey = [u64; 2];

#[inline]
pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 {
d2 + f1 * d1 + f2
(Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0
}

/// `key` is from `phf_generator::HashState`.
#[inline]
pub fn split(hash: u64) -> (u32, u32, u32) {
const BITS: u32 = 21;
const MASK: u64 = (1 << BITS) - 1;
pub fn hash<T: ?Sized + PhfHash>(x: &T, key: &HashKey) -> Hashes {
let lower = {
// large 64-bit primes as initial keys
let mut hasher = SipHasher13::new_with_keys(14_130_675_974_360_801_221,
key[0]);
x.phf_hash(&mut hasher);
hasher.finish()
};

((hash & MASK) as u32,
((hash >> BITS) & MASK) as u32,
((hash >> (2 * BITS)) & MASK) as u32)
}
let upper = {
let mut hasher = SipHasher13::new_with_keys(11_542_695_197_553_437_579,
key[1]);
x.phf_hash(&mut hasher);
hasher.finish()
};

/// `key` is from `phf_generator::HashState::key`.
#[inline]
pub fn hash<T: ?Sized + PhfHash>(x: &T, key: u64) -> u64 {
let mut hasher = SipHasher13::new_with_keys(0, key);
x.phf_hash(&mut hasher);
hasher.finish()
Hashes {
g: (lower >> 32) as u32,
f1: lower as u32,
f2: upper as u32,
_priv: (),
}
}

/// Return an index into `phf_generator::HashState::map`.
Expand All @@ -42,10 +61,9 @@ pub fn hash<T: ?Sized + PhfHash>(x: &T, key: u64) -> u64 {
/// * `disps` is from `phf_generator::HashState::disps`.
/// * `len` is the length of `phf_generator::HashState::map`.
#[inline]
pub fn get_index(hash: u64, disps: &[(u32, u32)], len: usize) -> u32 {
let (g, f1, f2) = split(hash);
let (d1, d2) = disps[(g % (disps.len() as u32)) as usize];
displace(f1, f2, d1, d2) % (len as u32)
pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 {
let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize];
displace(hashes.f1, hashes.f2, d1, d2) % (len as u32)
}

/// A trait implemented by types which can be used in PHF data structures.
Expand Down Expand Up @@ -77,7 +95,7 @@ pub trait FmtConst {
///
/// Ideally with specialization this could be just one default impl and then specialized where
/// it doesn't apply.
macro_rules! delegate_debug(
macro_rules! delegate_debug (
($ty:ty) => {
impl FmtConst for $ty {
fn fmt_const(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down Expand Up @@ -153,7 +171,7 @@ impl FmtConst for [u8] {

#[cfg(feature = "unicase")]
impl<S> PhfHash for unicase::UniCase<S>
where unicase::UniCase<S>: Hash {
where unicase::UniCase<S>: Hash {
#[inline]
fn phf_hash<H: Hasher>(&self, state: &mut H) {
self.hash(state)
Expand All @@ -174,7 +192,7 @@ impl<S> FmtConst for unicase::UniCase<S> where S: AsRef<str> {
}
}

macro_rules! sip_impl(
macro_rules! sip_impl (
(le $t:ty) => (
impl PhfHash for $t {
#[inline]
Expand Down Expand Up @@ -217,7 +235,7 @@ fn fmt_array(array: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", array)
}

macro_rules! array_impl(
macro_rules! array_impl (
($t:ty, $n:expr) => (
impl PhfHash for [$t; $n] {
#[inline]
Expand Down

0 comments on commit 9b70bd9

Please sign in to comment.