Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add hash() and get_index() to phf_shared.
In https://github.com/servo/string-cache, we currently use a `phf::OrderedSet`
with its `get_index` method to get an identified stored in an `Atom`,
and `index` to get a string back from that identifier.

However, the extra inderection of `OrderedSet` of `Set` is not necessary.
We don’t care about the order, only about getting numeric identifiers.

Additionally, when `get_index` returns `None`,
we hash the input string again to find it in table of dynamic atoms.
With this chang, we can reuse the phf hash instead:

servo/string-cache#103

At first I tried adding hash and index access to `phf::Map`,
but the API got messy quickly.
  • Loading branch information
SimonSapin committed Aug 3, 2015
1 parent 96ef156 commit d3b2ea0
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 21 deletions.
11 changes: 3 additions & 8 deletions phf/src/map.rs
Expand Up @@ -2,7 +2,6 @@
use debug_builders::DebugMap;
use std::borrow::Borrow;
use std::ops::Index;
use std::hash::{Hasher, SipHasher};
use std::slice;
use std::fmt;
use std::iter::IntoIterator;
Expand Down Expand Up @@ -71,13 +70,9 @@ impl<K, V> Map<K, V> {
/// Like `get`, but returns both the key and the value.
pub fn get_entry<T: ?Sized>(&self, key: &T) -> Option<(&K, &V)>
where T: Eq + PhfHash, K: Borrow<T> {
let mut hasher = SipHasher::new_with_keys(0, self.key);
key.phf_hash(&mut hasher);
let (g, f1, f2) = phf_shared::split(hasher.finish());

let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as usize];
let entry = &self.entries[(phf_shared::displace(f1, f2, d1, d2) % (self.entries.len() as u32))
as usize];
let hash = phf_shared::hash(key, self.key);
let index = phf_shared::get_index(hash, self.disps, self.entries.len());
let entry = &self.entries[index as usize];
let b: &T = entry.0.borrow();
if b == key {
Some((&entry.0, &entry.1))
Expand Down
10 changes: 3 additions & 7 deletions phf/src/ordered_map.rs
@@ -1,7 +1,6 @@
//! An order-preserving immutable map constructed at compile time.
use debug_builders::DebugMap;
use std::borrow::Borrow;
use std::hash::{Hasher, SipHasher};
use std::iter::IntoIterator;
use std::ops::Index;
use std::fmt;
Expand Down Expand Up @@ -95,12 +94,9 @@ impl<K, V> OrderedMap<K, V> {

fn get_internal<T: ?Sized>(&self, key: &T) -> Option<(usize, (&K, &V))>
where T: Eq + PhfHash, K: Borrow<T> {
let mut hasher = SipHasher::new_with_keys(0, self.key);
key.phf_hash(&mut hasher);
let (g, f1, f2) = phf_shared::split(hasher.finish());

let (d1, d2) = self.disps[(g % (self.disps.len() as u32)) as usize];
let idx = self.idxs[(phf_shared::displace(f1, f2, d1, d2) % (self.idxs.len() as u32)) as usize];
let hash = phf_shared::hash(key, self.key);
let idx_index = phf_shared::get_index(hash, self.disps, self.idxs.len());
let idx = self.idxs[idx_index as usize];
let entry = &self.entries[idx];

let b: &T = entry.0.borrow();
Expand Down
7 changes: 2 additions & 5 deletions phf_generator/src/lib.rs
Expand Up @@ -4,7 +4,6 @@ extern crate rand;

use phf_shared::PhfHash;
use rand::{SeedableRng, XorShiftRng, Rng};
use std::hash::{Hasher, SipHasher};

const DEFAULT_LAMBDA: usize = 5;

Expand Down Expand Up @@ -38,12 +37,10 @@ fn try_generate_hash<H: PhfHash>(entries: &[H], rng: &mut XorShiftRng) -> Option
}

let key = rng.gen();
let hasher = SipHasher::new_with_keys(0, key);

let hashes: Vec<_> = entries.iter().map(|entry| {
let mut hasher = hasher.clone();
entry.phf_hash(&mut hasher);
let (g, f1, f2) = phf_shared::split(hasher.finish());
let hash = phf_shared::hash(entry, key);
let (g, f1, f2) = phf_shared::split(hash);
Hashes {
g: g,
f1: f1,
Expand Down
22 changes: 21 additions & 1 deletion phf_shared/src/lib.rs
@@ -1,6 +1,6 @@
#![doc(html_root_url="http://sfackler.github.io/rust-phf/doc")]

use std::hash::{Hasher, Hash};
use std::hash::{Hasher, Hash, SipHasher};

#[inline]
pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 {
Expand All @@ -17,6 +17,26 @@ pub fn split(hash: u64) -> (u32, u32, u32) {
((hash >> (2 * BITS)) & MASK) as u32)
}

/// `key` is from `phf_generator::HashState::key`.
#[inline]
pub fn hash<T: ?Sized + PhfHash>(x: &T, key: u64) -> u64 {
let mut hasher = SipHasher::new_with_keys(0, key);
x.phf_hash(&mut hasher);
hasher.finish()
}

/// Return an index into `phf_generator::HashState::map`.
///
/// * `hash` is from `hash()` in this crate.
/// * `disps` is from `phf_generator::HashState::disps`.
/// * `len` is the length of `phf_generator::HashState::map`.
#[inline]
pub fn get_index(hash: u64, disps: &[(u32, u32)], len: usize) -> u32 {
let (g, f1, f2) = split(hash);
let (d1, d2) = disps[(g % (disps.len() as u32)) as usize];
displace(f1, f2, d1, d2) % (len as u32)
}

/// A trait implemented by types which can be used in PHF data structures.
///
/// This differs from the standard library's `Hash` trait in that `PhfHash`'s
Expand Down

0 comments on commit d3b2ea0

Please sign in to comment.