diff --git a/phf/src/lib.rs b/phf/src/lib.rs index ddad4fa6..13e8133c 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -7,13 +7,15 @@ #![crate_type="rlib"] #![crate_type="dylib"] #![warn(missing_doc)] +#![feature(macro_rules)] use std::fmt; -use std::hash::Hash; use std::iter; use std::slice; use std::collections::Collection; +pub use shared::PhfHash; + #[path="../../shared/mod.rs"] mod shared; @@ -44,11 +46,9 @@ mod shared; /// be accessed directly. pub struct PhfMap { #[doc(hidden)] - pub k1: u64, - #[doc(hidden)] - pub k2: u64, + pub key: u64, #[doc(hidden)] - pub disps: &'static [(uint, uint)], + pub disps: &'static [(u32, u32)], #[doc(hidden)] pub entries: &'static [(K, V)], } @@ -59,7 +59,7 @@ impl Collection for PhfMap { } } -impl<'a, K: Hash+Eq, V> Map for PhfMap { +impl<'a, K: PhfHash+Eq, V> Map for PhfMap { fn find(&self, key: &K) -> Option<&V> { self.get_entry(key, |k| key == k).map(|e| { let &(_, ref v) = e; @@ -83,13 +83,13 @@ impl fmt::Show for PhfMap { } } -impl Index for PhfMap { +impl Index for PhfMap { fn index(&self, k: &K) -> &V { self.find(k).expect("invalid key") } } -impl PhfMap { +impl PhfMap { /// Returns a reference to the map's internal static instance of the given /// key. /// @@ -103,12 +103,11 @@ impl PhfMap { } impl PhfMap { - fn get_entry(&self, key: &T, check: |&K| -> bool) - -> Option<&(K, V)> { - let (g, f1, f2) = shared::hash(key, self.k1, self.k2); - let (d1, d2) = self.disps[g % self.disps.len()]; - let entry = &self.entries[shared::displace(f1, f2, d1, d2) % - self.entries.len()]; + fn get_entry(&self, key: &T, check: |&K| -> bool) -> Option<&(K, V)> { + let (g, f1, f2) = key.phf_hash(self.key); + let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as uint]; + let entry = &self.entries[(shared::displace(f1, f2, d1, d2) % (self.entries.len() as u32)) + as uint]; let &(ref s, _) = entry; if check(s) { Some(entry) @@ -118,7 +117,7 @@ impl PhfMap { } /// Like `find`, but can operate on any type that is equivalent to a key. - pub fn find_equiv>(&self, key: &T) -> Option<&V> { + pub fn find_equiv>(&self, key: &T) -> Option<&V> { self.get_entry(key, |k| key.equiv(k)).map(|e| { let &(_, ref v) = e; v @@ -127,7 +126,7 @@ impl PhfMap { /// Like `find_key`, but can operate on any type that is equivalent to a /// key. - pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { + pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { self.get_entry(key, |k| key.equiv(k)).map(|e| { let &(ref k, _) = e; k @@ -279,7 +278,7 @@ impl Collection for PhfSet { } } -impl<'a, T: Hash+Eq> Set for PhfSet { +impl<'a, T: PhfHash+Eq> Set for PhfSet { #[inline] fn contains(&self, value: &T) -> bool { self.map.contains_key(value) @@ -296,7 +295,7 @@ impl<'a, T: Hash+Eq> Set for PhfSet { } } -impl PhfSet { +impl PhfSet { /// Returns a reference to the set's internal static instance of the given /// key. /// @@ -311,14 +310,14 @@ impl PhfSet { /// Like `contains`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn contains_equiv>(&self, key: &U) -> bool { + pub fn contains_equiv>(&self, key: &U) -> bool { self.map.find_equiv(key).is_some() } /// Like `find_key`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { + pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { self.map.find_key_equiv(key) } } @@ -386,11 +385,9 @@ impl<'a, T> ExactSize<&'a T> for PhfSetValues<'a, T> {} /// never be accessed directly. pub struct PhfOrderedMap { #[doc(hidden)] - pub k1: u64, - #[doc(hidden)] - pub k2: u64, + pub key: u64, #[doc(hidden)] - pub disps: &'static [(uint, uint)], + pub disps: &'static [(u32, u32)], #[doc(hidden)] pub idxs: &'static [uint], #[doc(hidden)] @@ -418,7 +415,7 @@ impl Collection for PhfOrderedMap { } } -impl Map for PhfOrderedMap { +impl Map for PhfOrderedMap { fn find(&self, key: &K) -> Option<&V> { self.find_entry(key, |k| k == key).map(|e| { let &(_, ref v) = e; @@ -427,13 +424,13 @@ impl Map for PhfOrderedMap { } } -impl Index for PhfOrderedMap { +impl Index for PhfOrderedMap { fn index(&self, k: &K) -> &V { self.find(k).expect("invalid key") } } -impl PhfOrderedMap { +impl PhfOrderedMap { /// Returns a reference to the map's internal static instance of the given /// key. /// @@ -447,11 +444,10 @@ impl PhfOrderedMap { } impl PhfOrderedMap { - fn find_entry(&self, key: &T, check: |&K| -> bool) - -> Option<&(K, V)> { - let (g, f1, f2) = shared::hash(key, self.k1, self.k2); - let (d1, d2) = self.disps[g % self.disps.len()]; - let idx = self.idxs[shared::displace(f1, f2, d1, d2) % self.idxs.len()]; + fn find_entry(&self, key: &T, check: |&K| -> bool) -> Option<&(K, V)> { + let (g, f1, f2) = key.phf_hash(self.key); + let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as uint]; + let idx = self.idxs[(shared::displace(f1, f2, d1, d2) % (self.idxs.len() as u32)) as uint]; let entry = &self.entries[idx]; let &(ref s, _) = entry; @@ -463,7 +459,7 @@ impl PhfOrderedMap { } /// Like `find`, but can operate on any type that is equivalent to a key. - pub fn find_equiv>(&self, key: &T) -> Option<&V> { + pub fn find_equiv>(&self, key: &T) -> Option<&V> { self.find_entry(key, |k| key.equiv(k)).map(|e| { let &(_, ref v) = e; v @@ -472,7 +468,7 @@ impl PhfOrderedMap { /// Like `find_key`, but can operate on any type that is equivalent to a /// key. - pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { + pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { self.find_entry(key, |k| key.equiv(k)).map(|e| { let &(ref k, _) = e; k @@ -659,7 +655,7 @@ impl Collection for PhfOrderedSet { } } -impl Set for PhfOrderedSet { +impl Set for PhfOrderedSet { #[inline] fn contains(&self, value: &T) -> bool { self.map.contains_key(value) @@ -676,7 +672,7 @@ impl Set for PhfOrderedSet { } } -impl PhfOrderedSet { +impl PhfOrderedSet { /// Returns a reference to the set's internal static instance of the given /// key. /// @@ -691,14 +687,14 @@ impl PhfOrderedSet { /// Like `contains`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn contains_equiv>(&self, key: &U) -> bool { + pub fn contains_equiv>(&self, key: &U) -> bool { self.map.find_equiv(key).is_some() } /// Like `find_key`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { + pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { self.map.find_key_equiv(key) } diff --git a/phf_mac/src/lib.rs b/phf_mac/src/lib.rs index 7597f042..72a40c69 100644 --- a/phf_mac/src/lib.rs +++ b/phf_mac/src/lib.rs @@ -4,7 +4,7 @@ #![crate_name="phf_mac"] #![crate_type="dylib"] #![doc(html_root_url="http://sfackler.github.io/rust-phf/doc")] -#![feature(plugin_registrar, quote, default_type_params)] +#![feature(plugin_registrar, quote, default_type_params, macro_rules)] extern crate rand; extern crate syntax; @@ -13,10 +13,10 @@ extern crate rustc; use std::collections::HashMap; use std::gc::{Gc, GC}; -use std::hash; -use std::hash::Hash; use std::os; use std::rc::Rc; +use std::hash; +use std::hash::Hash; use syntax::ast; use syntax::ast::{TokenTree, LitStr, LitBinary, LitByte, LitChar, Expr, ExprVec, ExprLit}; use syntax::codemap::Span; @@ -31,6 +31,8 @@ use syntax::print::pprust; use rand::{Rng, SeedableRng, XorShiftRng}; use rustc::plugin::Registry; +use shared::PhfHash; + #[path="../../shared/mod.rs"] mod shared; @@ -82,6 +84,25 @@ impl Hash for Key { } } +impl PhfHash for Key { + fn phf_hash(&self, key: u64) -> (u32, u32, u32) { + match *self { + KeyStr(ref s) => s.get().phf_hash(key), + KeyBinary(ref b) => b.as_slice().phf_hash(key), + KeyChar(c) => c.phf_hash(key), + KeyU8(b) => b.phf_hash(key), + KeyI8(b) => b.phf_hash(key), + KeyU16(b) => b.phf_hash(key), + KeyI16(b) => b.phf_hash(key), + KeyU32(b) => b.phf_hash(key), + KeyI32(b) => b.phf_hash(key), + KeyU64(b) => b.phf_hash(key), + KeyI64(b) => b.phf_hash(key), + KeyBool(b) => b.phf_hash(key), + } + } +} + struct Entry { key_contents: Key, key: Gc, @@ -89,9 +110,8 @@ struct Entry { } struct HashState { - k1: u64, - k2: u64, - disps: Vec<(uint, uint)>, + key: u64, + disps: Vec<(u32, u32)>, map: Vec, } @@ -334,16 +354,15 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) } struct Hashes { - g: uint, - f1: uint, - f2: uint, + g: u32, + f1: u32, + f2: u32, } - let k1 = rng.gen(); - let k2 = rng.gen(); + let key = rng.gen(); let hashes: Vec = entries.iter().map(|entry| { - let (g, f1, f2) = shared::hash(&entry.key_contents, k1, k2); + let (g, f1, f2) = entry.key_contents.phf_hash(key); Hashes { g: g, f1: f1, @@ -356,7 +375,7 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) |i| Bucket { idx: i, keys: Vec::new() }); for (i, hash) in hashes.iter().enumerate() { - buckets.get_mut(hash.g % buckets_len).keys.push(i); + buckets.get_mut((hash.g % (buckets_len as u32)) as uint).keys.push(i); } // Sort descending @@ -364,17 +383,15 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) let table_len = entries.len(); let mut map = Vec::from_elem(table_len, None); - let mut disps = Vec::from_elem(buckets_len, (0u, 0u)); + let mut disps = Vec::from_elem(buckets_len, (0u32, 0u32)); let mut try_map = HashMap::new(); 'buckets: for bucket in buckets.iter() { - for d1 in range(0, table_len) { - 'disps: for d2 in range(0, table_len) { + for d1 in range(0, table_len as u32) { + 'disps: for d2 in range(0, table_len as u32) { try_map.clear(); for &key in bucket.keys.iter() { - let idx = shared::displace(hashes[key].f1, - hashes[key].f2, - d1, - d2) % table_len; + let idx = (shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) + % (table_len as u32)) as uint; if map[idx].is_some() || try_map.find(&idx).is_some() { continue 'disps; } @@ -395,8 +412,7 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) } Some(HashState { - k1: k1, - k2: k2, + key: key, disps: disps, map: map.move_iter().map(|i| i.unwrap()).collect(), }) @@ -415,11 +431,9 @@ fn create_map(cx: &mut ExtCtxt, sp: Span, entries: Vec, state: HashState) }).collect(); let entries = create_slice_expr(entries, sp); - let k1 = state.k1; - let k2 = state.k2; + let key = state.key; MacExpr::new(quote_expr!(cx, ::phf::PhfMap { - k1: $k1, - k2: $k2, + key: $key, disps: &$disps, entries: &$entries, })) @@ -446,11 +460,9 @@ fn create_ordered_map(cx: &mut ExtCtxt, sp: Span, entries: Vec, }).collect(); let entries = create_slice_expr(entries, sp); - let k1 = state.k1; - let k2 = state.k2; + let key = state.key; MacExpr::new(quote_expr!(cx, ::phf::PhfOrderedMap { - k1: $k1, - k2: $k2, + key: $key, disps: &$disps, idxs: &$idxs, entries: &$entries, diff --git a/shared/mod.rs b/shared/mod.rs index be451e06..aaf5760a 100644 --- a/shared/mod.rs +++ b/shared/mod.rs @@ -1,19 +1,57 @@ -use std::hash::{Hash, Hasher}; -use std::hash::sip::SipHasher; +use std::hash::{Hash, Hasher, Writer}; +use std::hash::sip::{SipHasher, SipState}; static LOG_MAX_SIZE: uint = 21; pub static MAX_SIZE: uint = 1 << LOG_MAX_SIZE; -pub fn hash(s: &T, k1: u64, k2: u64) -> (uint, uint, uint) { - let hash = SipHasher::new_with_keys(k1, k2).hash(s); +pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { + d2 + f1 * d1 + f2 +} + +fn split(hash: u64) -> (u32, u32, u32) { let mask = (MAX_SIZE - 1) as u64; - ((hash & mask) as uint, - ((hash >> LOG_MAX_SIZE) & mask) as uint, - ((hash >> (2 * LOG_MAX_SIZE)) & mask) as uint) + ((hash & mask) as u32, + ((hash >> LOG_MAX_SIZE) & mask) as u32, + ((hash >> (2 * LOG_MAX_SIZE)) & mask) as u32) } -pub fn displace(f1: uint, f2: uint, d1: uint, d2: uint) -> uint { - d2 + f1 * d1 + f2 +pub trait PhfHash { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32); +} + +impl<'a> PhfHash for &'a str { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + split(SipHasher::new_with_keys(0, seed).hash(self)) + } +} + +impl<'a> PhfHash for &'a [u8] { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + let mut state = SipState::new_with_keys(0, seed); + state.write(*self); + split(state.result()) + } } + +macro_rules! sip_impl( + ($t:ty) => ( + impl PhfHash for $t { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + split(SipHasher::new_with_keys(0, seed).hash(self)) + } + } + ) +) + +sip_impl!(u8) +sip_impl!(i8) +sip_impl!(u16) +sip_impl!(i16) +sip_impl!(u32) +sip_impl!(i32) +sip_impl!(u64) +sip_impl!(i64) +sip_impl!(char) +sip_impl!(bool)