diff --git a/Cargo.toml b/Cargo.toml index 13b416d0..e1a10585 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,11 @@ [workspace] members = [ "phf", - "phf_codegen", - "phf_codegen/test", + # TODO: Re-enable when a possibility is found to build all of phf_codegen's + # dependencies *WITHOUT* the `const-api` cargo feature, but everything else + # either with or without, depending on feature opt-ins. + #"phf_codegen", + #"phf_codegen/test", "phf_generator", "phf_macros", "phf_macros_tests", diff --git a/README.md b/README.md index f01286d3..7f0e62f9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ a compiler note about how long it took. MSRV (minimum supported rust version) is Rust 1.46. +TODO: Carify MSRV when building with const feature. + ## Usage PHF data structures can be constructed via either the procedural diff --git a/phf/Cargo.toml b/phf/Cargo.toml index 5c394543..19b3050b 100644 --- a/phf/Cargo.toml +++ b/phf/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Runtime support for perfect hash function data structures" repository = "https://github.com/sfackler/rust-phf" @@ -18,15 +18,12 @@ default = ["std"] std = ["phf_shared/std"] uncased = ["phf_shared/uncased"] unicase = ["phf_shared/unicase"] -macros = [ - "phf_macros", - "proc-macro-hack", -] +macros = ["phf_generator", "phf_macros"] [dependencies] -proc-macro-hack = { version = "0.5.4", optional = true } -phf_macros = { version = "0.10.0", optional = true } -phf_shared = { version = "0.10.0", default-features = false } +phf_generator = { version = "0.11.0", features = ["const-api"], optional = true } +phf_macros = { version = "0.11.0", optional = true } +phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } [package.metadata.docs.rs] features = ["macros"] diff --git a/phf/src/lib.rs b/phf/src/lib.rs index 627d3e81..a90c91a0 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -36,7 +36,7 @@ //! ```rust //! use phf::phf_map; //! -//! #[derive(Clone)] +//! #[derive(Clone, Copy)] //! pub enum Keyword { //! Loop, //! Continue, @@ -71,13 +71,52 @@ //! [#183]: https://github.com/rust-phf/rust-phf/issues/183 //! [#196]: https://github.com/rust-phf/rust-phf/issues/196 -#![doc(html_root_url = "https://docs.rs/phf/0.10")] +// XXX: Remove on stabilization. +#![allow(incomplete_features)] +#![feature(generic_const_exprs, const_trait_impl)] +#![doc(html_root_url = "https://docs.rs/phf/0.11")] #![warn(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #[cfg(feature = "std")] extern crate std as core; +// Not part of the public API. Used by the macro facade. +#[cfg(feature = "macros")] +#[doc(hidden)] +pub extern crate phf_macros as __phf_macros; + +#[cfg(feature = "macros")] +#[doc(hidden)] +pub const fn build_map( + state: &'static ([(Key, Value); N], phf_generator::HashState), +) -> Map +where + [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, +{ + Map { + key: state.1.key, + disps: &*state.1.disps, + entries: &state.0, + } +} + +#[cfg(feature = "macros")] +#[doc(hidden)] +pub const fn build_ordered_map( + state: &'static ([(Key, Value); N], phf_generator::HashState), +) -> OrderedMap +where + [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, +{ + OrderedMap { + key: state.1.key, + disps: &*state.1.disps, + idxs: &*state.1.map, + entries: &state.0, + } +} + #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`Map`]. /// @@ -97,15 +136,25 @@ extern crate std as core; /// assert_eq!(MY_MAP["hello"], 1); /// } /// ``` -#[proc_macro_hack::proc_macro_hack] -pub use phf_macros::phf_map; +#[macro_export] +macro_rules! phf_map { + ($($key:expr => $value:expr),* $(,)*) => { + $crate::build_map(&$crate::__phf_macros::phf_map(&[$(($key, $value)),*])) + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedMap`]. /// /// Requires the `macros` feature. Same usage as [`phf_map`]. -#[proc_macro_hack::proc_macro_hack] -pub use phf_macros::phf_ordered_map; +#[macro_export] +macro_rules! phf_ordered_map { + ($($key:expr => $value:expr),* $(,)*) => { + $crate::build_ordered_map( + &$crate::__phf_macros::phf_ordered_map(&[$(($key, $value)),*]), + ) + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`Set`]. @@ -126,15 +175,29 @@ pub use phf_macros::phf_ordered_map; /// assert!(MY_SET.contains("hello world")); /// } /// ``` -#[proc_macro_hack::proc_macro_hack] -pub use phf_macros::phf_set; +#[macro_export] +macro_rules! phf_set { + ($($key:expr),* $(,)*) => { + $crate::Set { + map: $crate::build_map(&$crate::__phf_macros::phf_set(&[$($key),*])), + } + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedSet`]. /// /// Requires the `macros` feature. Same usage as [`phf_set`]. -#[proc_macro_hack::proc_macro_hack] -pub use phf_macros::phf_ordered_set; +#[macro_export] +macro_rules! phf_ordered_set { + ($($key:expr),* $(,)*) => { + $crate::OrderedSet { + map: $crate::build_ordered_map( + &$crate::__phf_macros::phf_ordered_set(&[$($key),*]), + ), + } + }; +} #[doc(inline)] pub use self::map::Map; diff --git a/phf/src/map.rs b/phf/src/map.rs index 4d558359..fb687292 100644 --- a/phf/src/map.rs +++ b/phf/src/map.rs @@ -13,6 +13,7 @@ use phf_shared::{self, HashKey, PhfBorrow, PhfHash}; /// The fields of this struct are public so that they may be initialized by the /// `phf_map!` macro and code generation. They are subject to change at any /// time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct Map { #[doc(hidden)] pub key: HashKey, diff --git a/phf/src/ordered_map.rs b/phf/src/ordered_map.rs index c8d5ac59..0af5681b 100644 --- a/phf/src/ordered_map.rs +++ b/phf/src/ordered_map.rs @@ -16,6 +16,7 @@ use phf_shared::{self, HashKey, PhfBorrow, PhfHash}; /// The fields of this struct are public so that they may be initialized by the /// `phf_ordered_map!` macro and code generation. They are subject to change at /// any time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct OrderedMap { #[doc(hidden)] pub key: HashKey, diff --git a/phf/src/ordered_set.rs b/phf/src/ordered_set.rs index e85d4571..33fd5c92 100644 --- a/phf/src/ordered_set.rs +++ b/phf/src/ordered_set.rs @@ -15,6 +15,7 @@ use phf_shared::PhfBorrow; /// The fields of this struct are public so that they may be initialized by the /// `phf_ordered_set!` macro and code generation. They are subject to change at /// any time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct OrderedSet { #[doc(hidden)] pub map: OrderedMap, diff --git a/phf/src/set.rs b/phf/src/set.rs index d9fdd5bb..641b1729 100644 --- a/phf/src/set.rs +++ b/phf/src/set.rs @@ -14,6 +14,7 @@ use crate::{map, Map}; /// The fields of this struct are public so that they may be initialized by the /// `phf_set!` macro and code generation. They are subject to change at any /// time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct Set { #[doc(hidden)] pub map: Map, diff --git a/phf_codegen/Cargo.toml b/phf_codegen/Cargo.toml index 6e6c90d9..8a0736c0 100644 --- a/phf_codegen/Cargo.toml +++ b/phf_codegen/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf_codegen" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Codegen library for PHF types" repository = "https://github.com/sfackler/rust-phf" @@ -9,5 +9,5 @@ edition = "2018" readme = "../README.md" [dependencies] -phf_generator = "0.10.0" -phf_shared = "0.10.0" +phf_generator = "0.11.0" +phf_shared = "0.11.0" diff --git a/phf_codegen/src/lib.rs b/phf_codegen/src/lib.rs index 2bfb9722..6907f695 100644 --- a/phf_codegen/src/lib.rs +++ b/phf_codegen/src/lib.rs @@ -128,7 +128,7 @@ //! // ... //! ``` -#![doc(html_root_url = "https://docs.rs/phf_codegen/0.10")] +#![doc(html_root_url = "https://docs.rs/phf_codegen/0.11")] use phf_shared::{FmtConst, PhfHash}; use std::collections::HashSet; diff --git a/phf_codegen/test/Cargo.toml b/phf_codegen/test/Cargo.toml index e9d65264..86212791 100644 --- a/phf_codegen/test/Cargo.toml +++ b/phf_codegen/test/Cargo.toml @@ -6,11 +6,11 @@ build = "build.rs" edition = "2018" [dependencies] -phf = { version = "0.10.0", features = ["uncased", "unicase"] } +phf = { version = "0.11.0", features = ["uncased", "unicase"] } uncased = { version = "0.9.6", default-features = false } unicase = "2.4.0" [build-dependencies] -phf_codegen = { version = "0.10.0", path = ".." } +phf_codegen = { version = "0.11.0", path = ".." } unicase = "2.4.0" uncased = { version = "0.9.6", default-features = false } diff --git a/phf_generator/Cargo.toml b/phf_generator/Cargo.toml index 806b4412..7dd14bdf 100644 --- a/phf_generator/Cargo.toml +++ b/phf_generator/Cargo.toml @@ -1,15 +1,14 @@ [package] name = "phf_generator" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "PHF generation logic" repository = "https://github.com/sfackler/rust-phf" edition = "2018" [dependencies] -rand = { version = "0.8", features = ["small_rng"] } -phf_shared = { version = "0.10.0", default-features = false } +phf_shared = { version = "0.11.0", default-features = false } # for stable black_box() criterion = { version = "=0.3.4", optional = true } @@ -23,3 +22,6 @@ harness = false [[bin]] name = "gen_hash_test" required-features = ["criterion"] + +[features] +const-api = ["phf_shared/const-api"] diff --git a/phf_generator/src/bin/gen_hash_test.rs b/phf_generator/src/bin/gen_hash_test.rs index 2e1fbec4..ad14fa45 100644 --- a/phf_generator/src/bin/gen_hash_test.rs +++ b/phf_generator/src/bin/gen_hash_test.rs @@ -1,16 +1,16 @@ use criterion::*; -use rand::distributions::Alphanumeric; -use rand::rngs::SmallRng; -use rand::{Rng, SeedableRng}; - -use phf_generator::generate_hash; +use phf_generator::{generate_hash, rng::Rng}; fn gen_vec(len: usize) -> Vec { - let mut rng = SmallRng::seed_from_u64(0xAAAAAAAAAAAAAAAA).sample_iter(Alphanumeric); + let mut rng = Rng::new(0xAAAAAAAAAAAAAAAA); (0..len) - .map(move |_| rng.by_ref().take(64).collect::()) + .map(move |_| { + let mut str = String::with_capacity(64); + (0..64).for_each(|_| str.push(rng.generate_alphanumeric())); + str + }) .collect() } diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 8b75a36e..e24f9933 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -2,29 +2,230 @@ //! //! [phf]: https://docs.rs/phf -#![doc(html_root_url = "https://docs.rs/phf_generator/0.10")] +// XXX: Temporary until stabilization. +#![allow(incomplete_features)] +#![feature( + const_fn_trait_bound, + const_option, + const_panic, + const_trait_impl, + const_mut_refs, + generic_const_exprs +)] +#![doc(html_root_url = "https://docs.rs/phf_generator/0.11")] + +pub mod rng; +#[cfg(feature = "const-api")] +mod utils; + use phf_shared::{HashKey, PhfHash}; -use rand::distributions::Standard; -use rand::rngs::SmallRng; -use rand::{Rng, SeedableRng}; +use rng::Rng; -const DEFAULT_LAMBDA: usize = 5; +// We need `DEFAULT_LAMBDA` as part of the stable public API to formalize +// where clauses for the const API on map and set generation methods. +#[doc(hidden)] +pub const DEFAULT_LAMBDA: usize = 5; const FIXED_SEED: u64 = 1234567890; +#[cfg(feature = "const-api")] +pub struct HashState +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + pub key: HashKey, + pub disps: utils::ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>, + pub map: utils::ArrayVec, +} + +#[cfg(not(feature = "const-api"))] pub struct HashState { pub key: HashKey, pub disps: Vec<(u32, u32)>, pub map: Vec, } +#[cfg(feature = "const-api")] +pub const fn generate_hash(entries: &[H; N]) -> HashState +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut rng = Rng::new(FIXED_SEED); + loop { + match try_generate_hash(entries, rng.generate()) { + Some(state) => break state, + None => continue, + } + } +} + +#[cfg(not(feature = "const-api"))] pub fn generate_hash(entries: &[H]) -> HashState { - SmallRng::seed_from_u64(FIXED_SEED) - .sample_iter(Standard) - .find_map(|key| try_generate_hash(entries, key)) - .expect("failed to solve PHF") + let mut rng = Rng::new(FIXED_SEED); + loop { + match try_generate_hash(entries, rng.generate()) { + Some(state) => break state, + None => continue, + } + } +} + +#[cfg(feature = "const-api")] +const fn try_generate_hash( + entries: &[H; N], + key: HashKey, +) -> Option> +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + use utils::ArrayVec; + + #[derive(Clone, Copy)] + struct Bucket { + idx: usize, + keys: ArrayVec, + } + + impl const Default for Bucket { + fn default() -> Self { + Bucket { + idx: 0, + keys: ArrayVec::new_empty(0), + } + } + } + + let hashes = { + let mut hashes = [phf_shared::Hashes::default(); N]; + let mut i = 0; + while i < N { + hashes[i] = phf_shared::hash(&entries[i], &key); + i += 1; + } + hashes + }; + + let mut buckets = { + let mut buckets = [Bucket::::default(); { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }]; + let mut i = 0; + while i < buckets.len() { + buckets[i].idx = i; + i += 1; + } + buckets + }; + + let mut i = 0; + while i < hashes.len() { + buckets[(hashes[i].g % (buckets.len() as u32)) as usize] + .keys + .push(i); + i += 1; + } + + // Sort descending + { + // This is a bubble sort. Given that it is executed at compile-time + // without any runtime overhead over relatively few entries from + // hand-written macro literals, its minimal and robust implementation + // is good enough for us and the const evaluation engine. + let mut swapped = true; + while swapped { + swapped = false; + let mut i = 1; + while i < buckets.len() { + if buckets[i - 1].keys.len() < buckets[i].keys.len() { + // Swap elements + let temp = buckets[i - 1]; + buckets[i - 1] = buckets[i]; + buckets[i] = temp; + + swapped = true; + } + i += 1; + } + } + } + + let mut map: ArrayVec, N> = ArrayVec::new_full(None); + let mut disps: ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }> = + ArrayVec::new_full((0, 0)); + + // store whether an element from the bucket being placed is + // located at a certain position, to allow for efficient overlap + // checks. It works by storing the generation in each cell and + // each new placement-attempt is a new generation, so you can tell + // if this is legitimately full by checking that the generations + // are equal. (A u64 is far too large to overflow in a reasonable + // time for current hardware.) + let mut try_map = [0u64; N]; + let mut generation = 0u64; + + // the actual values corresponding to the markers above, as + // (index, key) pairs, for adding to the main map, once we've + // chosen the right disps. + let mut values_to_add: ArrayVec<(usize, usize), N> = ArrayVec::new_empty((0, 0)); + + let mut i = 0; + 'buckets: while i < buckets.len() { + let bucket = &buckets[i]; + i += 1; + + let mut d1 = 0; + while d1 < N { + let mut d2 = 0; + 'disps: while d2 < N { + values_to_add.clear(); + generation += 1; + + let mut j = 0; + while j < bucket.keys.len() { + let key = bucket.keys[j]; + let idx = + (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1 as u32, d2 as u32) + % (N as u32)) as usize; + if map.get_ref(idx).is_some() || try_map[idx] == generation { + d2 += 1; + continue 'disps; + } + try_map[idx] = generation; + values_to_add.push((idx, key)); + j += 1; + } + + // We've picked a good set of disps + disps.set(bucket.idx, (d1 as u32, d2 as u32)); + j = 0; + while j < values_to_add.len() { + let (idx, key) = values_to_add.get(j); + map.set(idx, Some(key)); + j += 1; + } + continue 'buckets; + } + d1 += 1; + } + + // Unable to find displacements for a bucket + return None; + } + + Some(HashState { + key, + disps, + map: { + let mut result: ArrayVec = ArrayVec::new_full(0); + let mut i = 0; + while i < map.len() { + result.set(i, map.get(i).unwrap()); + i += 1; + } + result + }, + }) } +#[cfg(not(feature = "const-api"))] fn try_generate_hash(entries: &[H], key: HashKey) -> Option { struct Bucket { idx: usize, diff --git a/phf_generator/src/rng.rs b/phf_generator/src/rng.rs new file mode 100644 index 00000000..470f6168 --- /dev/null +++ b/phf_generator/src/rng.rs @@ -0,0 +1,66 @@ +//! A fixed-seed PRNG based on the wyrand algorithm. +//! +//! The focus is to provide a fast implementation that is usable in const +//! context, but not to be cryptographically secure by any means. + +/// A tiny and fast pseudo-random number generator based on wyrand. +/// +/// This must be initialized to a fixed seed which will be the +/// base for random number generation. +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct Rng { + seed: u64, +} + +impl Rng { + /// Creates a new RNG given an initial seed. + pub const fn new(seed: u64) -> Self { + Self { seed } + } + + /// Generates a pseudo-random [`u64`] value and alters the + /// internal state. + /// + /// This method may be called repeatedly on the same [`Rng`] + /// instance to produce several random numbers. + #[inline] + pub const fn generate(&mut self) -> u64 { + self.seed = self.seed.wrapping_add(0xa0761d6478bd642f); + + let t: u128 = (self.seed as u128).wrapping_mul((self.seed ^ 0xe7037ed1a0b428db) as u128); + (t.wrapping_shr(64) ^ t) as u64 + } + + /// Generates a pseudo-random [`char`] value and alters the + /// internal state. + /// + /// This method may be called repeatedly on the same [`Rng`] + /// to produce a random string. + #[inline] + pub const fn generate_alphanumeric(&mut self) -> char { + const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + const NCHARS: u32 = CHARS.len() as u32; + + // Adapted from https://lemire.me/blog/2016/06/30/fast-random-shuffling/ + let mut r = self.generate() as u32; + let mut hi = (((r as u64) * (NCHARS as u64)) >> 32) as u32; + let mut lo = r.wrapping_mul(NCHARS); + if lo < NCHARS { + let t = NCHARS.wrapping_neg() % NCHARS; + while lo < t { + r = self.generate() as u32; + hi = (((r as u64) * (NCHARS as u64)) >> 32) as u32; + lo = r.wrapping_mul(NCHARS); + } + } + + CHARS[hi as usize] as char + } +} + +// TODO: Implement the `Iterator` trait for `Rng` once all its provided methods +// are decorated with `#[method_body_is_const]`. Before that, we'd have to +// implement *all* Iterator methods by hand which would become very verbose +// for mostly unneeded features. Thereby we will wait until we get away with +// just providing a `next` implementation on our part. diff --git a/phf_generator/src/utils.rs b/phf_generator/src/utils.rs new file mode 100644 index 00000000..f2f910fc --- /dev/null +++ b/phf_generator/src/utils.rs @@ -0,0 +1,106 @@ +use core::ops::Deref; + +#[derive(Clone, Copy)] +pub struct ArrayVec { + arr: [T; N], + len: usize, +} + +impl ArrayVec { + #[inline(always)] + pub const fn new_empty(marker: T) -> Self { + Self { + arr: [marker; N], + len: 0, + } + } + + #[inline(always)] + pub const fn new_full(marker: T) -> Self { + Self { + arr: [marker; N], + len: N, + } + } + + #[inline] + pub const fn len(&self) -> usize { + self.len + } + + #[inline] + pub const fn capacity(&self) -> usize { + self.arr.len() + } + + #[inline] + pub const fn push(&mut self, value: T) { + assert!(self.len() < self.capacity()); + self.arr[self.len] = value; + self.len += 1; + } + + #[inline] + pub const fn clear(&mut self) { + self.len = 0; + } + + #[inline] + pub const fn get(&self, i: usize) -> T { + assert!(i < self.len()); + self.arr[i] + } + + #[inline] + pub const fn get_ref(&self, i: usize) -> &T { + assert!(i < self.len()); + &self.arr[i] + } + + #[inline] + pub const fn set(&mut self, i: usize, value: T) { + if i == self.len() { + self.push(value); + } else { + assert!(i < self.len()); + self.arr[i] = value; + } + } +} + +impl const Deref for ArrayVec { + type Target = [T; N]; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.arr + } +} + +#[cfg(test)] +mod tests { + use super::ArrayVec; + + #[test] + fn test_api() { + let mut arr = ArrayVec::::new_empty(0); + assert_eq!(arr.len(), 0); + assert_eq!(arr.capacity(), 10); + + arr.push(1); + arr.push(2); + arr.push(4); + assert_eq!(arr.len(), 3); + assert_eq!(arr.capacity(), 10); + assert_eq!(arr.get(2), 4); + assert_eq!(arr.get(0), 1); + + arr.push(4); + arr.set(2, 3); + assert_eq!(arr.get(2), 3); + assert_eq!(arr.get(arr.len() - 1), 4); + + arr.clear(); + assert_eq!(arr.len(), 0); + } +} diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml index a3454a36..74d53719 100644 --- a/phf_macros/Cargo.toml +++ b/phf_macros/Cargo.toml @@ -1,26 +1,17 @@ [package] name = "phf_macros" -version = "0.10.0" +version = "0.11.0" authors = ["Steven Fackler "] edition = "2018" license = "MIT" -description = "Macros to generate types in the phf crate" +description = "Macros utils to generate types in the phf crate; don't use directly" repository = "https://github.com/sfackler/rust-phf" readme = "../README.md" include = ["src/lib.rs"] -[lib] -proc-macro = true - [features] -unicase = ["unicase_", "phf_shared/unicase"] +unicase = ["phf_shared/unicase"] [dependencies] -syn = { version = "1", features = ["full"] } -quote = "1" -proc-macro2 = "1" -proc-macro-hack = "0.5.4" -unicase_ = { package = "unicase", version = "2.4.0", optional = true } - -phf_generator = "0.10.0" -phf_shared = { version = "0.10.0", default-features = false } +phf_generator = { version = "0.11.0", features = ["const-api"] } +phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index 8161b05f..8c37e9bb 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -1,320 +1,115 @@ -//! A set of macros to generate Rust source for PHF data structures at compile time. +//! Helper functions for macros to generate PHF data structures at compile time. //! See [the `phf` crate's documentation][phf] for details. //! //! [phf]: https://docs.rs/phf -use phf_generator::HashState; +// XXX: Remove on stabilization. +#![allow(incomplete_features)] +#![feature( + const_fn_trait_bound, + const_maybe_uninit_write, + const_mut_refs, + const_ptr_read, + const_panic, + const_refs_to_cell, + const_trait_impl, + const_transmute_copy, + generic_const_exprs, + maybe_uninit_uninit_array +)] + +use core::mem::{transmute_copy, MaybeUninit}; + +use phf_generator::{HashState, DEFAULT_LAMBDA}; use phf_shared::PhfHash; -use proc_macro::TokenStream; -use quote::quote; -use std::collections::HashSet; -use std::hash::Hasher; -use syn::parse::{self, Parse, ParseStream}; -use syn::punctuated::Punctuated; -#[cfg(feature = "unicase")] -use syn::ExprLit; -use syn::{parse_macro_input, Error, Expr, Lit, Token, UnOp}; -#[cfg(feature = "unicase")] -use unicase_::UniCase; -#[derive(Hash, PartialEq, Eq, Clone)] -enum ParsedKey { - Str(String), - Binary(Vec), - Char(char), - I8(i8), - I16(i16), - I32(i32), - I64(i64), - I128(i128), - U8(u8), - U16(u16), - U32(u32), - U64(u64), - U128(u128), - Bool(bool), - #[cfg(feature = "unicase")] - UniCase(UniCase), +const unsafe fn const_array_assume_init(array: &[MaybeUninit; N]) -> [T; N] { + transmute_copy(array) } -impl PhfHash for ParsedKey { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - match self { - ParsedKey::Str(s) => s.phf_hash(state), - ParsedKey::Binary(s) => s.phf_hash(state), - ParsedKey::Char(s) => s.phf_hash(state), - ParsedKey::I8(s) => s.phf_hash(state), - ParsedKey::I16(s) => s.phf_hash(state), - ParsedKey::I32(s) => s.phf_hash(state), - ParsedKey::I64(s) => s.phf_hash(state), - ParsedKey::I128(s) => s.phf_hash(state), - ParsedKey::U8(s) => s.phf_hash(state), - ParsedKey::U16(s) => s.phf_hash(state), - ParsedKey::U32(s) => s.phf_hash(state), - ParsedKey::U64(s) => s.phf_hash(state), - ParsedKey::U128(s) => s.phf_hash(state), - ParsedKey::Bool(s) => s.phf_hash(state), - #[cfg(feature = "unicase")] - ParsedKey::UniCase(s) => s.phf_hash(state), - } - } -} - -impl ParsedKey { - fn from_expr(expr: &Expr) -> Option { - match expr { - Expr::Lit(lit) => match &lit.lit { - Lit::Str(s) => Some(ParsedKey::Str(s.value())), - Lit::ByteStr(s) => Some(ParsedKey::Binary(s.value())), - Lit::Byte(s) => Some(ParsedKey::U8(s.value())), - Lit::Char(s) => Some(ParsedKey::Char(s.value())), - Lit::Int(s) => match s.suffix() { - // we've lost the sign at this point, so `-128i8` looks like `128i8`, - // which doesn't fit in an `i8`; parse it as a `u8` and cast (to `0i8`), - // which is handled below, by `Unary` - "i8" => Some(ParsedKey::I8(s.base10_parse::().unwrap() as i8)), - "i16" => Some(ParsedKey::I16(s.base10_parse::().unwrap() as i16)), - "i32" => Some(ParsedKey::I32(s.base10_parse::().unwrap() as i32)), - "i64" => Some(ParsedKey::I64(s.base10_parse::().unwrap() as i64)), - "i128" => Some(ParsedKey::I128(s.base10_parse::().unwrap() as i128)), - "u8" => Some(ParsedKey::U8(s.base10_parse::().unwrap())), - "u16" => Some(ParsedKey::U16(s.base10_parse::().unwrap())), - "u32" => Some(ParsedKey::U32(s.base10_parse::().unwrap())), - "u64" => Some(ParsedKey::U64(s.base10_parse::().unwrap())), - "u128" => Some(ParsedKey::U128(s.base10_parse::().unwrap())), - _ => None, - }, - Lit::Bool(s) => Some(ParsedKey::Bool(s.value)), - _ => None, - }, - Expr::Array(array) => { - let mut buf = vec![]; - for expr in &array.elems { - match expr { - Expr::Lit(lit) => match &lit.lit { - Lit::Int(s) => match s.suffix() { - "u8" | "" => buf.push(s.base10_parse::().unwrap()), - _ => return None, - }, - _ => return None, - }, - _ => return None, - } - } - Some(ParsedKey::Binary(buf)) - } - Expr::Unary(unary) => { - // if we received an integer literal (always unsigned) greater than i__::max_value() - // then casting it to a signed integer type of the same width will negate it to - // the same absolute value so we don't need to negate it here - macro_rules! try_negate ( - ($val:expr) => {if $val < 0 { $val } else { -$val }} - ); - - match unary.op { - UnOp::Neg(_) => match ParsedKey::from_expr(&unary.expr)? { - ParsedKey::I8(v) => Some(ParsedKey::I8(try_negate!(v))), - ParsedKey::I16(v) => Some(ParsedKey::I16(try_negate!(v))), - ParsedKey::I32(v) => Some(ParsedKey::I32(try_negate!(v))), - ParsedKey::I64(v) => Some(ParsedKey::I64(try_negate!(v))), - ParsedKey::I128(v) => Some(ParsedKey::I128(try_negate!(v))), - _ => None, - }, - _ => None, - } - } - Expr::Group(group) => ParsedKey::from_expr(&group.expr), - #[cfg(feature = "unicase")] - Expr::Call(call) => { - if let Expr::Path(ep) = call.func.as_ref() { - let segments = &mut ep.path.segments.iter().rev(); - let last = &segments.next()?.ident; - let last_ahead = &segments.next()?.ident; - let is_unicode = last_ahead == "UniCase" && last == "unicode"; - let is_ascii = last_ahead == "UniCase" && last == "ascii"; - if call.args.len() == 1 && (is_unicode || is_ascii) { - if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - })) = call.args.first() - { - let v = if is_unicode { - UniCase::unicode(s.value()) - } else { - UniCase::ascii(s.value()) - }; - Some(ParsedKey::UniCase(v)) - } else { - None - } - } else { - None - } - } else { - None - } - } - _ => None, - } - } -} - -struct Key { - parsed: ParsedKey, - expr: Expr, -} - -impl PhfHash for Key { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - self.parsed.phf_hash(state) - } +const fn check_duplicates(_entries: &[(Key, Value); N]) { + // TODO: Implement once we can compare keys in const fn and produce + // a formatted panic message that points out the duplicate key. } -impl Parse for Key { - fn parse(input: ParseStream<'_>) -> parse::Result { - let expr = input.parse()?; - let parsed = ParsedKey::from_expr(&expr) - .ok_or_else(|| Error::new_spanned(&expr, "unsupported key expression"))?; +pub const fn phf_map( + entries: &[(Key, Value); N], +) -> ([(Key, Value); N], HashState) +where + (Key, Value): Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + check_duplicates(entries); - Ok(Key { parsed, expr }) + let mut keys = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < entries.len() { + keys[i].write(&entries[i].0); + i += 1; } -} - -struct Entry { - key: Key, - value: Expr, -} - -impl PhfHash for Entry { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - self.key.phf_hash(state) + let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); + + let mut ordered_entries = MaybeUninit::uninit_array::(); + i = 0; + while i < state.map.len() { + let idx = state.map[i]; + ordered_entries[i].write(entries[idx]); + i += 1; } -} -impl Parse for Entry { - fn parse(input: ParseStream<'_>) -> parse::Result { - let key = input.parse()?; - input.parse::]>()?; - let value = input.parse()?; - Ok(Entry { key, value }) - } + (unsafe { const_array_assume_init(&ordered_entries) }, state) } -struct Map(Vec); +pub const fn phf_ordered_map( + entries: &[(Key, Value); N], +) -> ([(Key, Value); N], HashState) +where + (Key, Value): Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + check_duplicates(entries); -impl Parse for Map { - fn parse(input: ParseStream<'_>) -> parse::Result { - let parsed = Punctuated::::parse_terminated(input)?; - let map = parsed.into_iter().collect::>(); - check_duplicates(&map)?; - Ok(Map(map)) + let mut keys = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < entries.len() { + keys[i].write(&entries[i].0); + i += 1; } -} - -struct Set(Vec); - -impl Parse for Set { - fn parse(input: ParseStream<'_>) -> parse::Result { - let parsed = Punctuated::::parse_terminated(input)?; - let set = parsed - .into_iter() - .map(|key| Entry { - key, - value: syn::parse_str("()").unwrap(), - }) - .collect::>(); - check_duplicates(&set)?; - Ok(Set(set)) + let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); + + (*entries, state) +} + +pub const fn phf_set( + entries: &[Key; N], +) -> ([(Key, ()); N], HashState) +where + Key: Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut map_entries = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < map_entries.len() { + map_entries[i].write((entries[i], ())); + i += 1; } -} -fn check_duplicates(entries: &[Entry]) -> parse::Result<()> { - let mut keys = HashSet::new(); - for entry in entries { - if !keys.insert(&entry.key.parsed) { - return Err(Error::new_spanned(&entry.key.expr, "duplicate key")); - } + phf_map(unsafe { &const_array_assume_init(&map_entries) }) +} + +pub const fn phf_ordered_set( + entries: &[Key; N], +) -> ([(Key, ()); N], HashState) +where + Key: Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut map_entries = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < map_entries.len() { + map_entries[i].write((entries[i], ())); + i += 1; } - Ok(()) -} - -fn build_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream { - let key = state.key; - let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2))); - let entries = state.map.iter().map(|&idx| { - let key = &entries[idx].key.expr; - let value = &entries[idx].value; - quote!((#key, #value)) - }); - - quote! { - phf::Map { - key: #key, - disps: &[#(#disps),*], - entries: &[#(#entries),*], - } - } -} - -fn build_ordered_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream { - let key = state.key; - let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2))); - let idxs = state.map.iter().map(|idx| quote!(#idx)); - let entries = entries.iter().map(|entry| { - let key = &entry.key.expr; - let value = &entry.value; - quote!((#key, #value)) - }); - - quote! { - phf::OrderedMap { - key: #key, - disps: &[#(#disps),*], - idxs: &[#(#idxs),*], - entries: &[#(#entries),*], - } - } -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_map(input: TokenStream) -> TokenStream { - let map = parse_macro_input!(input as Map); - let state = phf_generator::generate_hash(&map.0); - - build_map(&map.0, state).into() -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_set(input: TokenStream) -> TokenStream { - let set = parse_macro_input!(input as Set); - let state = phf_generator::generate_hash(&set.0); - - let map = build_map(&set.0, state); - quote!(phf::Set { map: #map }).into() -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_ordered_map(input: TokenStream) -> TokenStream { - let map = parse_macro_input!(input as Map); - let state = phf_generator::generate_hash(&map.0); - - build_ordered_map(&map.0, state).into() -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_ordered_set(input: TokenStream) -> TokenStream { - let set = parse_macro_input!(input as Set); - let state = phf_generator::generate_hash(&set.0); - let map = build_ordered_map(&set.0, state); - quote!(phf::OrderedSet { map: #map }).into() + phf_ordered_map(unsafe { &const_array_assume_init(&map_entries) }) } diff --git a/phf_macros_tests/Cargo.toml b/phf_macros_tests/Cargo.toml index bc2478c7..7b818220 100644 --- a/phf_macros_tests/Cargo.toml +++ b/phf_macros_tests/Cargo.toml @@ -10,6 +10,6 @@ edition = "2018" [dev-dependencies] trybuild = "1.0" -phf = { version = "0.10", features = ["macros"] } -phf_macros = { version = "0.10", features = ["unicase"] } +phf = { version = "0.11", features = ["macros"] } +phf_macros = { version = "0.11", features = ["unicase"] } unicase = "2.4.0" diff --git a/phf_macros_tests/src/lib.rs b/phf_macros_tests/src/lib.rs index 8b137891..e69de29b 100644 --- a/phf_macros_tests/src/lib.rs +++ b/phf_macros_tests/src/lib.rs @@ -1 +0,0 @@ - diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index a10e1a8c..60440b01 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -241,6 +241,33 @@ mod map { } #[test] + fn test_constexpr_keys() { + static MAP: phf::Map = phf_map! { + stringify!(abc).len() as u8 => 0, + 5 + 4 + 3 => 1, + }; + + assert_eq!(MAP.get(&3), Some(&0)); + assert_eq!(MAP.get(&12), Some(&1)); + assert_eq!(MAP.get(&4), None); + } + + #[test] + fn test_nested_map() { + static MAP: phf::Map<&'static str, phf::Map<&'static str, u16>> = phf_map! { + "nested" => phf_map! { + "map" => 1337, + }, + }; + + assert_eq!( + MAP.get(&"nested").and_then(|m| m.get(&"map")), + Some(&1337) + ); + } + + // FIXME: Re-enable when UniCase is hashable as const fn. + /*#[test] fn test_unicase() { use unicase::UniCase; static MAP: phf::Map, isize> = phf_map!( @@ -250,7 +277,7 @@ mod map { assert!(Some(&10) == MAP.get(&UniCase::new("FOo"))); assert!(Some(&11) == MAP.get(&UniCase::new("bar"))); assert_eq!(None, MAP.get(&UniCase::new("asdf"))); - } + }*/ } mod set { diff --git a/phf_shared/Cargo.toml b/phf_shared/Cargo.toml index 73f0594d..aa515a43 100644 --- a/phf_shared/Cargo.toml +++ b/phf_shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf_shared" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Support code shared by PHF libraries" repository = "https://github.com/sfackler/rust-phf" @@ -16,7 +16,8 @@ test = false default = ["std"] std = [] +const-api = [] + [dependencies] -siphasher = "0.3" unicase = { version = "2.4.0", optional = true } uncased = { version = "0.9.6", optional = true, default-features = false } diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 31e37020..729a947b 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -2,17 +2,21 @@ //! //! [phf]: https://docs.rs/phf -#![doc(html_root_url = "https://docs.rs/phf_shared/0.10")] +// XXX: Temporary until stabilization. +#![feature(const_fn_trait_bound, const_mut_refs, const_panic, const_trait_impl)] +#![doc(html_root_url = "https://docs.rs/phf_shared/0.11")] #![cfg_attr(not(feature = "std"), no_std)] #[cfg(feature = "std")] extern crate std as core; +mod siphasher; + use core::fmt; -use core::hash::{Hash, Hasher}; -use core::num::Wrapping; -use siphasher::sip128::{Hash128, Hasher128, SipHasher13}; +use core::hash::Hasher; +use siphasher::{Hash128, SipHasher13}; +#[derive(Clone, Copy, Debug)] #[non_exhaustive] pub struct Hashes { pub g: u32, @@ -20,17 +24,25 @@ pub struct Hashes { pub f2: u32, } +impl const Default for Hashes { + #[inline(always)] + fn default() -> Self { + Self { g: 0, f1: 0, f2: 0 } + } +} + /// A central typedef for hash keys /// /// Makes experimentation easier by only needing to be updated here. pub type HashKey = u64; #[inline] -pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { - (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0 +pub const fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { + d2.wrapping_add(f1.wrapping_mul(d1)).wrapping_add(f2) } /// `key` is from `phf_generator::HashState`. +#[cfg(not(feature = "const-api"))] #[inline] pub fn hash(x: &T, key: &HashKey) -> Hashes { let mut hasher = SipHasher13::new_with_keys(0, *key); @@ -48,13 +60,32 @@ pub fn hash(x: &T, key: &HashKey) -> Hashes { } } +/// `key` is from `phf_generator::HashState`. +#[cfg(feature = "const-api")] +#[inline] +pub const fn hash(x: &T, key: &HashKey) -> Hashes { + let mut hasher = SipHasher13::new_with_keys(0, *key); + x.phf_hash(&mut hasher); + + let Hash128 { + h1: lower, + h2: upper, + } = hasher.finish128(); + + Hashes { + g: (lower >> 32) as u32, + f1: lower as u32, + f2: upper as u32, + } +} + /// Return an index into `phf_generator::HashState::map`. /// /// * `hash` is from `hash()` in this crate. /// * `disps` is from `phf_generator::HashState::disps`. /// * `len` is the length of `phf_generator::HashState::map`. #[inline] -pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { +pub const fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize]; displace(hashes.f1, hashes.f2, d1, d2) % (len as u32) } @@ -66,9 +97,15 @@ pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { /// between the host and target when cross compiling. pub trait PhfHash { /// Feeds the value into the state given, updating the hasher as necessary. + #[cfg(not(feature = "const-api"))] fn phf_hash(&self, state: &mut H); + /// Feeds the value into the state given, updating the hasher as necessary. + #[cfg(feature = "const-api")] + fn phf_hash(&self, state: &mut H); + /// Feeds a slice of this type into the state provided. + #[cfg(not(feature = "const-api"))] fn phf_hash_slice(data: &[Self], state: &mut H) where Self: Sized, @@ -77,9 +114,24 @@ pub trait PhfHash { piece.phf_hash(state); } } + + /// Feeds a slice of this type into the state provided. + #[cfg(feature = "const-api")] + #[default_method_body_is_const] + fn phf_hash_slice(data: &[Self], state: &mut H) + where + Self: Sized, + { + let mut i = 0; + while i < data.len() { + data[i].phf_hash(state); + i += 1; + } + } } /// Trait for printing types with `const` constructors, used by `phf_codegen` and `phf_macros`. +// TODO: Is a const variant of this trait needed? pub trait FmtConst { /// Print a `const` expression representing this value. fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result; @@ -159,11 +211,21 @@ delegate_debug!(bool); /// `impl PhfBorrow for T` macro_rules! impl_reflexive( ($($t:ty),*) => ( - $(impl PhfBorrow<$t> for $t { - fn borrow(&self) -> &$t { - self + $( + #[cfg(not(feature = "const-api"))] + impl PhfBorrow<$t> for $t { + fn borrow(&self) -> &$t { + self + } + } + + #[cfg(feature = "const-api")] + impl const PhfBorrow<$t> for $t { + fn borrow(&self) -> &$t { + self + } } - })* + )* ) ); @@ -217,30 +279,55 @@ impl PhfHash for Vec { } } +#[cfg(not(feature = "const-api"))] impl<'a, T: 'a + PhfHash + ?Sized> PhfHash for &'a T { fn phf_hash(&self, state: &mut H) { (*self).phf_hash(state) } } +#[cfg(feature = "const-api")] +impl<'a, T: 'a + ~const PhfHash + ?Sized> const PhfHash for &'a T { + fn phf_hash(&self, state: &mut H) { + (*self).phf_hash(state) + } +} + impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T { fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (*self).fmt_const(f) } } +#[cfg(not(feature = "const-api"))] impl<'a> PhfBorrow for &'a str { fn borrow(&self) -> &str { self } } +#[cfg(feature = "const-api")] +impl<'a> const PhfBorrow for &'a str { + fn borrow(&self) -> &str { + self + } +} + +#[cfg(not(feature = "const-api"))] +impl<'a> PhfBorrow<[u8]> for &'a [u8] { + fn borrow(&self) -> &[u8] { + self + } +} + +#[cfg(feature = "const-api")] impl<'a> PhfBorrow<[u8]> for &'a [u8] { fn borrow(&self) -> &[u8] { self } } +#[cfg(not(feature = "const-api"))] impl PhfHash for str { #[inline] fn phf_hash(&self, state: &mut H) { @@ -248,6 +335,15 @@ impl PhfHash for str { } } +#[cfg(feature = "const-api")] +impl const PhfHash for str { + #[inline] + fn phf_hash(&self, state: &mut H) { + self.as_bytes().phf_hash(state) + } +} + +#[cfg(not(feature = "const-api"))] impl PhfHash for [u8] { #[inline] fn phf_hash(&self, state: &mut H) { @@ -255,6 +351,14 @@ impl PhfHash for [u8] { } } +#[cfg(feature = "const-api")] +impl const PhfHash for [u8] { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write(self); + } +} + impl FmtConst for [u8] { #[inline] fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -263,14 +367,14 @@ impl FmtConst for [u8] { } } -#[cfg(feature = "unicase")] +#[cfg(all(feature = "unicase", not(feature = "const-api")))] impl PhfHash for unicase::UniCase where - unicase::UniCase: Hash, + unicase::UniCase: core::hash::Hash, { #[inline] fn phf_hash(&self, state: &mut H) { - self.hash(state) + ::hash(self, state) } } @@ -298,7 +402,7 @@ impl<'b, 'a: 'b, S: ?Sized + 'a> PhfBorrow> for unicase: } } -#[cfg(feature = "uncased")] +#[cfg(all(feature = "uncased", not(feature = "const-api")))] impl PhfHash for uncased::UncasedStr { #[inline] fn phf_hash(&self, state: &mut H) { @@ -318,48 +422,99 @@ impl FmtConst for uncased::UncasedStr { } } -#[cfg(feature = "uncased")] +#[cfg(all(feature = "uncased", not(feature = "const-api")))] impl PhfBorrow for &uncased::UncasedStr { fn borrow(&self) -> &uncased::UncasedStr { self } } -macro_rules! sip_impl ( - (le $t:ty) => ( +#[cfg(all(feature = "uncased", feature = "const-api"))] +impl const PhfBorrow for &uncased::UncasedStr { + fn borrow(&self) -> &uncased::UncasedStr { + self + } +} + +// XXX: Macro can be simplified once const Hash trait impls +// landed in upstream Rust. +macro_rules! sip_impl { + (le $t:ty, $meth:ident) => { + #[cfg(not(feature = "const-api"))] impl PhfHash for $t { #[inline] fn phf_hash(&self, state: &mut H) { - self.to_le().hash(state); + state.$meth(self.to_le()); + } + } + + #[cfg(feature = "const-api")] + impl const PhfHash for $t { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.$meth(self.to_le()); } } - ); - ($t:ty) => ( + }; + ($t:ty, $meth:ident) => { + #[cfg(not(feature = "const-api"))] impl PhfHash for $t { #[inline] fn phf_hash(&self, state: &mut H) { - self.hash(state); + state.$meth(*self); } } - ) -); -sip_impl!(u8); -sip_impl!(i8); -sip_impl!(le u16); -sip_impl!(le i16); -sip_impl!(le u32); -sip_impl!(le i32); -sip_impl!(le u64); -sip_impl!(le i64); -sip_impl!(le u128); -sip_impl!(le i128); -sip_impl!(bool); + #[cfg(feature = "const-api")] + impl const PhfHash for $t { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.$meth(*self); + } + } + }; +} + +sip_impl!(u8, write_u8); +sip_impl!(i8, write_i8); +sip_impl!(le u16, write_u16); +sip_impl!(le i16, write_i16); +sip_impl!(le u32, write_u32); +sip_impl!(le i32, write_i32); +sip_impl!(le u64, write_u64); +sip_impl!(le i64, write_i64); +sip_impl!(le u128, write_u128); +sip_impl!(le i128, write_i128); + +#[cfg(not(feature = "const-api"))] +impl PhfHash for bool { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u8(*self as u8); + } +} +#[cfg(feature = "const-api")] +impl const PhfHash for bool { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u8(*self as u8); + } +} + +#[cfg(not(feature = "const-api"))] impl PhfHash for char { #[inline] fn phf_hash(&self, state: &mut H) { - (*self as u32).phf_hash(state) + state.write_u32(*self as u32); + } +} + +#[cfg(feature = "const-api")] +impl const PhfHash for char { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u32(*self as u32); } } @@ -370,6 +525,7 @@ fn fmt_array(array: &[u8], f: &mut fmt::Formatter<'_>) -> fmt::Result { macro_rules! array_impl ( ($t:ty, $n:expr) => ( + #[cfg(not(feature = "const-api"))] impl PhfHash for [$t; $n] { #[inline] fn phf_hash(&self, state: &mut H) { @@ -377,17 +533,33 @@ macro_rules! array_impl ( } } + #[cfg(feature = "const-api")] + impl const PhfHash for [$t; $n] { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write(self); + } + } + impl FmtConst for [$t; $n] { fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt_array(self, f) } } + #[cfg(not(feature = "const-api"))] impl PhfBorrow<[$t]> for [$t; $n] { fn borrow(&self) -> &[$t] { self } } + + #[cfg(feature = "const-api")] + impl const PhfBorrow<[$t]> for [$t; $n] { + fn borrow(&self) -> &[$t] { + self + } + } ) ); diff --git a/phf_shared/src/siphasher.rs b/phf_shared/src/siphasher.rs new file mode 100644 index 00000000..02b00306 --- /dev/null +++ b/phf_shared/src/siphasher.rs @@ -0,0 +1,394 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! A const-compatible implementation of SipHash with a 128-bit output. + +use core::{hash, mem, ptr}; + +#[derive(Debug, Clone, Copy, Default)] +pub struct Hash128 { + pub h1: u64, + pub h2: u64, +} + +impl const From for Hash128 { + fn from(v: u128) -> Self { + Self { + h1: v as u64, + h2: (v >> 64) as u64, + } + } +} + +impl const From for u128 { + fn from(v: Hash128) -> Self { + (v.h1 as u128) | ((v.h2 as u128) << 64) + } +} + +/// An implementation of SipHash128 1-3. +#[derive(Debug, Clone, Copy)] +pub struct SipHasher13 { + k0: u64, + k1: u64, + length: usize, // how many bytes we've processed + state: State, // hash State + tail: u64, // unprocessed bytes le + ntail: usize, // how many bytes in tail are valid +} + +#[derive(Debug, Clone, Copy)] +struct State { + // v0, v2 and v1, v3 show up in pairs in the algorithm, + // and simd implementations of SipHash will use vectors + // of v02 and v13. By placing them in this order in the struct, + // the compiler can pick up on just a few simd optimizations by itself. + v0: u64, + v2: u64, + v1: u64, + v3: u64, +} + +macro_rules! compress { + ($state:expr) => {{ + compress!($state.v0, $state.v1, $state.v2, $state.v3) + }}; + ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ + $v0 = $v0.wrapping_add($v1); + $v1 = $v1.rotate_left(13); + $v1 ^= $v0; + $v0 = $v0.rotate_left(32); + $v2 = $v2.wrapping_add($v3); + $v3 = $v3.rotate_left(16); + $v3 ^= $v2; + $v0 = $v0.wrapping_add($v3); + $v3 = $v3.rotate_left(21); + $v3 ^= $v0; + $v2 = $v2.wrapping_add($v1); + $v1 = $v1.rotate_left(17); + $v1 ^= $v2; + $v2 = $v2.rotate_left(32); + }}; +} + +impl State { + #[inline] + const fn c_rounds(&mut self) { + compress!(self); + } + + #[inline] + const fn d_rounds(&mut self) { + compress!(self); + compress!(self); + compress!(self); + } +} + +#[inline] +const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { + debug_assert!(len < 8); + let mut i = 0; // current byte index (from LSB) in the output u64 + let mut out = 0; + if i + 3 < len { + out = u32::from_le_bytes([ + buf[start + i], + buf[start + i + 1], + buf[start + i + 2], + buf[start + i + 3], + ]) as u64; + i += 4; + } + if i + 1 < len { + out |= (u16::from_le_bytes([buf[start + i], buf[start + i + 1]]) as u64) << (i * 8); + i += 2; + } + if i < len { + out |= (buf[start + i] as u64) << (i * 8); + i += 1; + } + debug_assert!(i == len); + out +} + +impl SipHasher13 { + /// Creates a new `SipHasher13` that is keyed off the provided keys. + #[inline(always)] + pub const fn new_with_keys(key0: u64, key1: u64) -> Self { + let mut state = SipHasher13 { + k0: key0, + k1: key1, + length: 0, + state: State { + v0: 0, + v1: 0xee, + v2: 0, + v3: 0, + }, + tail: 0, + ntail: 0, + }; + state.reset(); + state + } + + #[inline] + const fn reset(&mut self) { + self.length = 0; + self.state.v0 = self.k0 ^ 0x736f6d6570736575; + self.state.v1 = self.k1 ^ 0x646f72616e646f83; + self.state.v2 = self.k0 ^ 0x6c7967656e657261; + self.state.v3 = self.k1 ^ 0x7465646279746573; + self.ntail = 0; + } + + // A specialized write function for values with size <= 8. + // + // The hashing of multi-byte integers depends on endianness. E.g.: + // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])` + // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])` + // + // This function does the right thing for little-endian hardware. On + // big-endian hardware `x` must be byte-swapped first to give the right + // behaviour. After any byte-swapping, the input must be zero-extended to + // 64-bits. The caller is responsible for the byte-swapping and + // zero-extension. + #[inline] + const fn short_write(&mut self, x: u64) { + let size = mem::size_of::(); + self.length += size; + + // The original number must be zero-extended, not sign-extended. + debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true }); + + // The number of bytes needed to fill `self.tail`. + let needed = 8 - self.ntail; + + self.tail |= x << (8 * self.ntail); + if size < needed { + self.ntail += size; + return; + } + + // `self.tail` is full, process it. + self.state.v3 ^= self.tail; + self.state.c_rounds(); + self.state.v0 ^= self.tail; + + self.ntail = size - needed; + self.tail = if needed < 8 { x >> (8 * needed) } else { 0 }; + } + + /// Return a 128-bit hash + #[inline] + pub const fn finish128(&self) -> Hash128 { + let mut state = self.state; + + let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; + + state.v3 ^= b; + state.c_rounds(); + state.v0 ^= b; + + state.v2 ^= 0xee; + state.d_rounds(); + let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + state.v1 ^= 0xdd; + state.d_rounds(); + let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + Hash128 { h1, h2 } + } +} + +impl const hash::Hasher for SipHasher13 { + #[inline] + fn finish(&self) -> u64 { + self.finish128().h2 + } + + #[inline] + fn write(&mut self, msg: &[u8]) { + let length = msg.len(); + self.length += length; + + let mut needed = 0; + + if self.ntail != 0 { + needed = 8 - self.ntail; + if length < needed { + self.tail |= u8to64_le(msg, 0, length) << (8 * self.ntail); + self.ntail += length; + return; + } else { + self.tail |= u8to64_le(msg, 0, needed) << (8 * self.ntail); + self.state.v3 ^= self.tail; + self.state.c_rounds(); + self.state.v0 ^= self.tail; + self.ntail = 0; + } + } + + // Buffered tail is now flushed, process new input. + let len = length - needed; + let left = len & 0x7; + + let mut i = needed; + while i < len - left { + let mi = u64::from_le_bytes([ + msg[i], + msg[i + 1], + msg[i + 2], + msg[i + 3], + msg[i + 4], + msg[i + 5], + msg[i + 6], + msg[i + 7], + ]); + + self.state.v3 ^= mi; + self.state.c_rounds(); + self.state.v0 ^= mi; + + i += 8; + } + + self.tail = u8to64_le(msg, i, left); + self.ntail = left; + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.short_write::(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.short_write::(i.to_le()); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.write(&i.to_ne_bytes()) + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_i8(&mut self, i: i8) { + self.write_u8(i as u8) + } + + #[inline] + fn write_i16(&mut self, i: i16) { + self.write_u16(i as u16) + } + + #[inline] + fn write_i32(&mut self, i: i32) { + self.write_u32(i as u32) + } + + #[inline] + fn write_i64(&mut self, i: i64) { + self.write_u64(i as u64) + } + + #[inline] + fn write_i128(&mut self, i: i128) { + self.write_u128(i as u128) + } + + #[inline] + fn write_isize(&mut self, i: isize) { + self.write_usize(i as usize) + } +} + +impl Hash128 { + /// Convert into a 16-bytes vector + pub fn as_bytes(&self) -> [u8; 16] { + let mut bytes = [0u8; 16]; + let h1 = self.h1.to_le(); + let h2 = self.h2.to_le(); + unsafe { + ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.get_unchecked_mut(0), 8); + ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.get_unchecked_mut(8), 8); + } + bytes + } +} + +#[cfg(test)] +mod tests { + use super::SipHasher13; + use std::hash::{Hash, Hasher}; + + // Hash just the bytes of the slice, without length prefix + struct Bytes<'a>(&'a [u8]); + + impl<'a> Hash for Bytes<'a> { + #[allow(unused_must_use)] + fn hash(&self, state: &mut H) { + let Bytes(v) = *self; + state.write(v); + } + } + + fn hash_with(mut st: SipHasher13, x: &T) -> [u8; 16] { + x.hash(&mut st); + st.finish128().as_bytes() + } + + #[test] + #[allow(unused_must_use)] + fn test_siphash128_1_3() { + let vecs: [[u8; 16]; 1] = [[ + 231, 126, 188, 178, 39, 136, 165, 190, 253, 98, 219, 106, 221, 48, 48, 1, + ]]; + + let k0 = 0x_07_06_05_04_03_02_01_00; + let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08; + let mut buf = Vec::new(); + let mut t = 0; + let mut state_inc = SipHasher13::new_with_keys(k0, k1); + + while t < 1 { + let vec = vecs[t]; + let out = hash_with(SipHasher13::new_with_keys(k0, k1), &Bytes(&buf)); + assert_eq!(vec, out[..]); + + let full = hash_with(SipHasher13::new_with_keys(k0, k1), &Bytes(&buf)); + let i = state_inc.finish128().as_bytes(); + + assert_eq!(full, i); + assert_eq!(full, vec); + + buf.push(t as u8); + Hasher::write(&mut state_inc, &[t as u8]); + + t += 1; + } + } +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..f588b5ff --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2021-09-29" +components = ["rustfmt", "clippy", "rust-src"]