From b2220d9a428049fb9c52b51c16d8f6b15cd02487 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Fri, 8 Aug 2014 23:27:53 -0700 Subject: [PATCH] Properly support cross compiled builds The default Hash implementations aren't always invariant between architectures, so we're defining our own trait. Indexing operations are also carried out in u32 instead of uint as well. Closes #9 --- phf/src/lib.rs | 72 ++++++++++++++++++++++------------------------ phf_mac/src/lib.rs | 72 +++++++++++++++++++++++++++------------------- shared/mod.rs | 56 ++++++++++++++++++++++++++++++------ 3 files changed, 123 insertions(+), 77 deletions(-) diff --git a/phf/src/lib.rs b/phf/src/lib.rs index ddad4fa6..13e8133c 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -7,13 +7,15 @@ #![crate_type="rlib"] #![crate_type="dylib"] #![warn(missing_doc)] +#![feature(macro_rules)] use std::fmt; -use std::hash::Hash; use std::iter; use std::slice; use std::collections::Collection; +pub use shared::PhfHash; + #[path="../../shared/mod.rs"] mod shared; @@ -44,11 +46,9 @@ mod shared; /// be accessed directly. pub struct PhfMap { #[doc(hidden)] - pub k1: u64, - #[doc(hidden)] - pub k2: u64, + pub key: u64, #[doc(hidden)] - pub disps: &'static [(uint, uint)], + pub disps: &'static [(u32, u32)], #[doc(hidden)] pub entries: &'static [(K, V)], } @@ -59,7 +59,7 @@ impl Collection for PhfMap { } } -impl<'a, K: Hash+Eq, V> Map for PhfMap { +impl<'a, K: PhfHash+Eq, V> Map for PhfMap { fn find(&self, key: &K) -> Option<&V> { self.get_entry(key, |k| key == k).map(|e| { let &(_, ref v) = e; @@ -83,13 +83,13 @@ impl fmt::Show for PhfMap { } } -impl Index for PhfMap { +impl Index for PhfMap { fn index(&self, k: &K) -> &V { self.find(k).expect("invalid key") } } -impl PhfMap { +impl PhfMap { /// Returns a reference to the map's internal static instance of the given /// key. /// @@ -103,12 +103,11 @@ impl PhfMap { } impl PhfMap { - fn get_entry(&self, key: &T, check: |&K| -> bool) - -> Option<&(K, V)> { - let (g, f1, f2) = shared::hash(key, self.k1, self.k2); - let (d1, d2) = self.disps[g % self.disps.len()]; - let entry = &self.entries[shared::displace(f1, f2, d1, d2) % - self.entries.len()]; + fn get_entry(&self, key: &T, check: |&K| -> bool) -> Option<&(K, V)> { + let (g, f1, f2) = key.phf_hash(self.key); + let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as uint]; + let entry = &self.entries[(shared::displace(f1, f2, d1, d2) % (self.entries.len() as u32)) + as uint]; let &(ref s, _) = entry; if check(s) { Some(entry) @@ -118,7 +117,7 @@ impl PhfMap { } /// Like `find`, but can operate on any type that is equivalent to a key. - pub fn find_equiv>(&self, key: &T) -> Option<&V> { + pub fn find_equiv>(&self, key: &T) -> Option<&V> { self.get_entry(key, |k| key.equiv(k)).map(|e| { let &(_, ref v) = e; v @@ -127,7 +126,7 @@ impl PhfMap { /// Like `find_key`, but can operate on any type that is equivalent to a /// key. - pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { + pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { self.get_entry(key, |k| key.equiv(k)).map(|e| { let &(ref k, _) = e; k @@ -279,7 +278,7 @@ impl Collection for PhfSet { } } -impl<'a, T: Hash+Eq> Set for PhfSet { +impl<'a, T: PhfHash+Eq> Set for PhfSet { #[inline] fn contains(&self, value: &T) -> bool { self.map.contains_key(value) @@ -296,7 +295,7 @@ impl<'a, T: Hash+Eq> Set for PhfSet { } } -impl PhfSet { +impl PhfSet { /// Returns a reference to the set's internal static instance of the given /// key. /// @@ -311,14 +310,14 @@ impl PhfSet { /// Like `contains`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn contains_equiv>(&self, key: &U) -> bool { + pub fn contains_equiv>(&self, key: &U) -> bool { self.map.find_equiv(key).is_some() } /// Like `find_key`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { + pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { self.map.find_key_equiv(key) } } @@ -386,11 +385,9 @@ impl<'a, T> ExactSize<&'a T> for PhfSetValues<'a, T> {} /// never be accessed directly. pub struct PhfOrderedMap { #[doc(hidden)] - pub k1: u64, - #[doc(hidden)] - pub k2: u64, + pub key: u64, #[doc(hidden)] - pub disps: &'static [(uint, uint)], + pub disps: &'static [(u32, u32)], #[doc(hidden)] pub idxs: &'static [uint], #[doc(hidden)] @@ -418,7 +415,7 @@ impl Collection for PhfOrderedMap { } } -impl Map for PhfOrderedMap { +impl Map for PhfOrderedMap { fn find(&self, key: &K) -> Option<&V> { self.find_entry(key, |k| k == key).map(|e| { let &(_, ref v) = e; @@ -427,13 +424,13 @@ impl Map for PhfOrderedMap { } } -impl Index for PhfOrderedMap { +impl Index for PhfOrderedMap { fn index(&self, k: &K) -> &V { self.find(k).expect("invalid key") } } -impl PhfOrderedMap { +impl PhfOrderedMap { /// Returns a reference to the map's internal static instance of the given /// key. /// @@ -447,11 +444,10 @@ impl PhfOrderedMap { } impl PhfOrderedMap { - fn find_entry(&self, key: &T, check: |&K| -> bool) - -> Option<&(K, V)> { - let (g, f1, f2) = shared::hash(key, self.k1, self.k2); - let (d1, d2) = self.disps[g % self.disps.len()]; - let idx = self.idxs[shared::displace(f1, f2, d1, d2) % self.idxs.len()]; + fn find_entry(&self, key: &T, check: |&K| -> bool) -> Option<&(K, V)> { + let (g, f1, f2) = key.phf_hash(self.key); + let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as uint]; + let idx = self.idxs[(shared::displace(f1, f2, d1, d2) % (self.idxs.len() as u32)) as uint]; let entry = &self.entries[idx]; let &(ref s, _) = entry; @@ -463,7 +459,7 @@ impl PhfOrderedMap { } /// Like `find`, but can operate on any type that is equivalent to a key. - pub fn find_equiv>(&self, key: &T) -> Option<&V> { + pub fn find_equiv>(&self, key: &T) -> Option<&V> { self.find_entry(key, |k| key.equiv(k)).map(|e| { let &(_, ref v) = e; v @@ -472,7 +468,7 @@ impl PhfOrderedMap { /// Like `find_key`, but can operate on any type that is equivalent to a /// key. - pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { + pub fn find_key_equiv>(&self, key: &T) -> Option<&K> { self.find_entry(key, |k| key.equiv(k)).map(|e| { let &(ref k, _) = e; k @@ -659,7 +655,7 @@ impl Collection for PhfOrderedSet { } } -impl Set for PhfOrderedSet { +impl Set for PhfOrderedSet { #[inline] fn contains(&self, value: &T) -> bool { self.map.contains_key(value) @@ -676,7 +672,7 @@ impl Set for PhfOrderedSet { } } -impl PhfOrderedSet { +impl PhfOrderedSet { /// Returns a reference to the set's internal static instance of the given /// key. /// @@ -691,14 +687,14 @@ impl PhfOrderedSet { /// Like `contains`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn contains_equiv>(&self, key: &U) -> bool { + pub fn contains_equiv>(&self, key: &U) -> bool { self.map.find_equiv(key).is_some() } /// Like `find_key`, but can operate on any type that is equivalent to a /// value #[inline] - pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { + pub fn find_key_equiv>(&self, key: &U) -> Option<&T> { self.map.find_key_equiv(key) } diff --git a/phf_mac/src/lib.rs b/phf_mac/src/lib.rs index 7597f042..72a40c69 100644 --- a/phf_mac/src/lib.rs +++ b/phf_mac/src/lib.rs @@ -4,7 +4,7 @@ #![crate_name="phf_mac"] #![crate_type="dylib"] #![doc(html_root_url="http://sfackler.github.io/rust-phf/doc")] -#![feature(plugin_registrar, quote, default_type_params)] +#![feature(plugin_registrar, quote, default_type_params, macro_rules)] extern crate rand; extern crate syntax; @@ -13,10 +13,10 @@ extern crate rustc; use std::collections::HashMap; use std::gc::{Gc, GC}; -use std::hash; -use std::hash::Hash; use std::os; use std::rc::Rc; +use std::hash; +use std::hash::Hash; use syntax::ast; use syntax::ast::{TokenTree, LitStr, LitBinary, LitByte, LitChar, Expr, ExprVec, ExprLit}; use syntax::codemap::Span; @@ -31,6 +31,8 @@ use syntax::print::pprust; use rand::{Rng, SeedableRng, XorShiftRng}; use rustc::plugin::Registry; +use shared::PhfHash; + #[path="../../shared/mod.rs"] mod shared; @@ -82,6 +84,25 @@ impl Hash for Key { } } +impl PhfHash for Key { + fn phf_hash(&self, key: u64) -> (u32, u32, u32) { + match *self { + KeyStr(ref s) => s.get().phf_hash(key), + KeyBinary(ref b) => b.as_slice().phf_hash(key), + KeyChar(c) => c.phf_hash(key), + KeyU8(b) => b.phf_hash(key), + KeyI8(b) => b.phf_hash(key), + KeyU16(b) => b.phf_hash(key), + KeyI16(b) => b.phf_hash(key), + KeyU32(b) => b.phf_hash(key), + KeyI32(b) => b.phf_hash(key), + KeyU64(b) => b.phf_hash(key), + KeyI64(b) => b.phf_hash(key), + KeyBool(b) => b.phf_hash(key), + } + } +} + struct Entry { key_contents: Key, key: Gc, @@ -89,9 +110,8 @@ struct Entry { } struct HashState { - k1: u64, - k2: u64, - disps: Vec<(uint, uint)>, + key: u64, + disps: Vec<(u32, u32)>, map: Vec, } @@ -334,16 +354,15 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) } struct Hashes { - g: uint, - f1: uint, - f2: uint, + g: u32, + f1: u32, + f2: u32, } - let k1 = rng.gen(); - let k2 = rng.gen(); + let key = rng.gen(); let hashes: Vec = entries.iter().map(|entry| { - let (g, f1, f2) = shared::hash(&entry.key_contents, k1, k2); + let (g, f1, f2) = entry.key_contents.phf_hash(key); Hashes { g: g, f1: f1, @@ -356,7 +375,7 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) |i| Bucket { idx: i, keys: Vec::new() }); for (i, hash) in hashes.iter().enumerate() { - buckets.get_mut(hash.g % buckets_len).keys.push(i); + buckets.get_mut((hash.g % (buckets_len as u32)) as uint).keys.push(i); } // Sort descending @@ -364,17 +383,15 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) let table_len = entries.len(); let mut map = Vec::from_elem(table_len, None); - let mut disps = Vec::from_elem(buckets_len, (0u, 0u)); + let mut disps = Vec::from_elem(buckets_len, (0u32, 0u32)); let mut try_map = HashMap::new(); 'buckets: for bucket in buckets.iter() { - for d1 in range(0, table_len) { - 'disps: for d2 in range(0, table_len) { + for d1 in range(0, table_len as u32) { + 'disps: for d2 in range(0, table_len as u32) { try_map.clear(); for &key in bucket.keys.iter() { - let idx = shared::displace(hashes[key].f1, - hashes[key].f2, - d1, - d2) % table_len; + let idx = (shared::displace(hashes[key].f1, hashes[key].f2, d1, d2) + % (table_len as u32)) as uint; if map[idx].is_some() || try_map.find(&idx).is_some() { continue 'disps; } @@ -395,8 +412,7 @@ fn try_generate_hash(entries: &[Entry], rng: &mut XorShiftRng) } Some(HashState { - k1: k1, - k2: k2, + key: key, disps: disps, map: map.move_iter().map(|i| i.unwrap()).collect(), }) @@ -415,11 +431,9 @@ fn create_map(cx: &mut ExtCtxt, sp: Span, entries: Vec, state: HashState) }).collect(); let entries = create_slice_expr(entries, sp); - let k1 = state.k1; - let k2 = state.k2; + let key = state.key; MacExpr::new(quote_expr!(cx, ::phf::PhfMap { - k1: $k1, - k2: $k2, + key: $key, disps: &$disps, entries: &$entries, })) @@ -446,11 +460,9 @@ fn create_ordered_map(cx: &mut ExtCtxt, sp: Span, entries: Vec, }).collect(); let entries = create_slice_expr(entries, sp); - let k1 = state.k1; - let k2 = state.k2; + let key = state.key; MacExpr::new(quote_expr!(cx, ::phf::PhfOrderedMap { - k1: $k1, - k2: $k2, + key: $key, disps: &$disps, idxs: &$idxs, entries: &$entries, diff --git a/shared/mod.rs b/shared/mod.rs index be451e06..aaf5760a 100644 --- a/shared/mod.rs +++ b/shared/mod.rs @@ -1,19 +1,57 @@ -use std::hash::{Hash, Hasher}; -use std::hash::sip::SipHasher; +use std::hash::{Hash, Hasher, Writer}; +use std::hash::sip::{SipHasher, SipState}; static LOG_MAX_SIZE: uint = 21; pub static MAX_SIZE: uint = 1 << LOG_MAX_SIZE; -pub fn hash(s: &T, k1: u64, k2: u64) -> (uint, uint, uint) { - let hash = SipHasher::new_with_keys(k1, k2).hash(s); +pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { + d2 + f1 * d1 + f2 +} + +fn split(hash: u64) -> (u32, u32, u32) { let mask = (MAX_SIZE - 1) as u64; - ((hash & mask) as uint, - ((hash >> LOG_MAX_SIZE) & mask) as uint, - ((hash >> (2 * LOG_MAX_SIZE)) & mask) as uint) + ((hash & mask) as u32, + ((hash >> LOG_MAX_SIZE) & mask) as u32, + ((hash >> (2 * LOG_MAX_SIZE)) & mask) as u32) } -pub fn displace(f1: uint, f2: uint, d1: uint, d2: uint) -> uint { - d2 + f1 * d1 + f2 +pub trait PhfHash { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32); +} + +impl<'a> PhfHash for &'a str { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + split(SipHasher::new_with_keys(0, seed).hash(self)) + } +} + +impl<'a> PhfHash for &'a [u8] { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + let mut state = SipState::new_with_keys(0, seed); + state.write(*self); + split(state.result()) + } } + +macro_rules! sip_impl( + ($t:ty) => ( + impl PhfHash for $t { + fn phf_hash(&self, seed: u64) -> (u32, u32, u32) { + split(SipHasher::new_with_keys(0, seed).hash(self)) + } + } + ) +) + +sip_impl!(u8) +sip_impl!(i8) +sip_impl!(u16) +sip_impl!(i16) +sip_impl!(u32) +sip_impl!(i32) +sip_impl!(u64) +sip_impl!(i64) +sip_impl!(char) +sip_impl!(bool)