From 41e78575ae4861835060e9bca706d99956eee2c6 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 14:23:07 +0200 Subject: [PATCH 01/21] phf_shared: Start constifying PhfBorrow impls --- README.md | 2 ++ phf_shared/Cargo.toml | 4 ++- phf_shared/src/lib.rs | 71 +++++++++++++++++++++++++++++++++++-------- rust-toolchain.toml | 3 ++ 4 files changed, 66 insertions(+), 14 deletions(-) create mode 100644 rust-toolchain.toml diff --git a/README.md b/README.md index f01286d3..7f0e62f9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ a compiler note about how long it took. MSRV (minimum supported rust version) is Rust 1.46. +TODO: Carify MSRV when building with const feature. + ## Usage PHF data structures can be constructed via either the procedural diff --git a/phf_shared/Cargo.toml b/phf_shared/Cargo.toml index 73f0594d..1736611d 100644 --- a/phf_shared/Cargo.toml +++ b/phf_shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf_shared" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Support code shared by PHF libraries" repository = "https://github.com/sfackler/rust-phf" @@ -16,6 +16,8 @@ test = false default = ["std"] std = [] +const-api = [] + [dependencies] siphasher = "0.3" unicase = { version = "2.4.0", optional = true } diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 31e37020..495ab21f 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -2,6 +2,7 @@ //! //! [phf]: https://docs.rs/phf +#![feature(const_trait_impl)] // XXX: Temporary until stabilization. #![doc(html_root_url = "https://docs.rs/phf_shared/0.10")] #![cfg_attr(not(feature = "std"), no_std)] @@ -10,7 +11,6 @@ extern crate std as core; use core::fmt; use core::hash::{Hash, Hasher}; -use core::num::Wrapping; use siphasher::sip128::{Hash128, Hasher128, SipHasher13}; #[non_exhaustive] @@ -26,8 +26,8 @@ pub struct Hashes { pub type HashKey = u64; #[inline] -pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { - (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0 +pub const fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { + d2.wrapping_add(f1).wrapping_mul(d1).wrapping_add(f2) } /// `key` is from `phf_generator::HashState`. @@ -54,7 +54,7 @@ pub fn hash(x: &T, key: &HashKey) -> Hashes { /// * `disps` is from `phf_generator::HashState::disps`. /// * `len` is the length of `phf_generator::HashState::map`. #[inline] -pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { +pub const fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 { let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize]; displace(hashes.f1, hashes.f2, d1, d2) % (len as u32) } @@ -69,17 +69,21 @@ pub trait PhfHash { fn phf_hash(&self, state: &mut H); /// Feeds a slice of this type into the state provided. + //#[default_method_body_is_const] fn phf_hash_slice(data: &[Self], state: &mut H) where Self: Sized, { - for piece in data { - piece.phf_hash(state); + let mut i = 0; + while i < data.len() { + data[i].phf_hash(state); + i += 1; } } } /// Trait for printing types with `const` constructors, used by `phf_codegen` and `phf_macros`. +// TODO: Is a const variant of this trait needed? pub trait FmtConst { /// Print a `const` expression representing this value. fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result; @@ -159,11 +163,21 @@ delegate_debug!(bool); /// `impl PhfBorrow for T` macro_rules! impl_reflexive( ($($t:ty),*) => ( - $(impl PhfBorrow<$t> for $t { - fn borrow(&self) -> &$t { - self + $( + #[cfg(not(feature = "const-api"))] + impl PhfBorrow<$t> for $t { + fn borrow(&self) -> &$t { + self + } + } + + #[cfg(feature = "const-api")] + impl const PhfBorrow<$t> for $t { + fn borrow(&self) -> &$t { + self + } } - })* + )* ) ); @@ -184,14 +198,14 @@ impl_reflexive!( [u8] ); -#[cfg(feature = "std")] +#[cfg(all(feature = "std", not(feature = "const-api")))] impl PhfBorrow for String { fn borrow(&self) -> &str { self } } -#[cfg(feature = "std")] +#[cfg(all(feature = "std", not(feature = "const-api")))] impl PhfBorrow<[u8]> for Vec { fn borrow(&self) -> &[u8] { self @@ -229,12 +243,28 @@ impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T { } } +#[cfg(not(feature = "const-api"))] impl<'a> PhfBorrow for &'a str { fn borrow(&self) -> &str { self } } +#[cfg(feature = "const-api")] +impl<'a> const PhfBorrow for &'a str { + fn borrow(&self) -> &str { + self + } +} + +#[cfg(not(feature = "const-api"))] +impl<'a> PhfBorrow<[u8]> for &'a [u8] { + fn borrow(&self) -> &[u8] { + self + } +} + +#[cfg(feature = "const-api")] impl<'a> PhfBorrow<[u8]> for &'a [u8] { fn borrow(&self) -> &[u8] { self @@ -318,13 +348,20 @@ impl FmtConst for uncased::UncasedStr { } } -#[cfg(feature = "uncased")] +#[cfg(all(feature = "uncased", not(feature = "const-api")))] impl PhfBorrow for &uncased::UncasedStr { fn borrow(&self) -> &uncased::UncasedStr { self } } +#[cfg(all(feature = "uncased", feature = "const-api"))] +impl const PhfBorrow for &uncased::UncasedStr { + fn borrow(&self) -> &uncased::UncasedStr { + self + } +} + macro_rules! sip_impl ( (le $t:ty) => ( impl PhfHash for $t { @@ -383,11 +420,19 @@ macro_rules! array_impl ( } } + #[cfg(not(feature = "const-api"))] impl PhfBorrow<[$t]> for [$t; $n] { fn borrow(&self) -> &[$t] { self } } + + #[cfg(feature = "const-api")] + impl const PhfBorrow<[$t]> for [$t; $n] { + fn borrow(&self) -> &[$t] { + self + } + } ) ); diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..f588b5ff --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2021-09-29" +components = ["rustfmt", "clippy", "rust-src"] From 9bc0631d8997029aa8798a12a0f87618e79ed5f9 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 14:31:58 +0200 Subject: [PATCH 02/21] phf_shared: Make PhfHash usable in const context --- phf_shared/src/lib.rs | 147 +++++++++++++++++++++++++++++++++++------- 1 file changed, 122 insertions(+), 25 deletions(-) diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 495ab21f..98051d20 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -2,7 +2,8 @@ //! //! [phf]: https://docs.rs/phf -#![feature(const_trait_impl)] // XXX: Temporary until stabilization. +// XXX: Temporary until stabilization. +#![feature(const_fn_trait_bound, const_mut_refs, const_trait_impl)] #![doc(html_root_url = "https://docs.rs/phf_shared/0.10")] #![cfg_attr(not(feature = "std"), no_std)] @@ -10,7 +11,7 @@ extern crate std as core; use core::fmt; -use core::hash::{Hash, Hasher}; +use core::hash::Hasher; use siphasher::sip128::{Hash128, Hasher128, SipHasher13}; #[non_exhaustive] @@ -66,11 +67,28 @@ pub const fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 /// between the host and target when cross compiling. pub trait PhfHash { /// Feeds the value into the state given, updating the hasher as necessary. + #[cfg(not(feature = "const-api"))] fn phf_hash(&self, state: &mut H); + /// Feeds the value into the state given, updating the hasher as necessary. + #[cfg(feature = "const-api")] + fn phf_hash(&self, state: &mut H); + /// Feeds a slice of this type into the state provided. - //#[default_method_body_is_const] + #[cfg(not(feature = "const-api"))] fn phf_hash_slice(data: &[Self], state: &mut H) + where + Self: Sized, + { + for piece in data { + piece.phf_hash(state); + } + } + + /// Feeds a slice of this type into the state provided. + #[cfg(feature = "const-api")] + #[default_method_body_is_const] + fn phf_hash_slice(data: &[Self], state: &mut H) where Self: Sized, { @@ -198,14 +216,14 @@ impl_reflexive!( [u8] ); -#[cfg(all(feature = "std", not(feature = "const-api")))] +#[cfg(feature = "std")] impl PhfBorrow for String { fn borrow(&self) -> &str { self } } -#[cfg(all(feature = "std", not(feature = "const-api")))] +#[cfg(feature = "std")] impl PhfBorrow<[u8]> for Vec { fn borrow(&self) -> &[u8] { self @@ -231,12 +249,20 @@ impl PhfHash for Vec { } } +#[cfg(not(feature = "const-api"))] impl<'a, T: 'a + PhfHash + ?Sized> PhfHash for &'a T { fn phf_hash(&self, state: &mut H) { (*self).phf_hash(state) } } +#[cfg(feature = "const-api")] +impl<'a, T: 'a + ~const PhfHash + ?Sized> const PhfHash for &'a T { + fn phf_hash(&self, state: &mut H) { + (*self).phf_hash(state) + } +} + impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T { fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (*self).fmt_const(f) @@ -271,6 +297,7 @@ impl<'a> PhfBorrow<[u8]> for &'a [u8] { } } +#[cfg(not(feature = "const-api"))] impl PhfHash for str { #[inline] fn phf_hash(&self, state: &mut H) { @@ -278,6 +305,15 @@ impl PhfHash for str { } } +#[cfg(feature = "const-api")] +impl const PhfHash for str { + #[inline] + fn phf_hash(&self, state: &mut H) { + self.as_bytes().phf_hash(state) + } +} + +#[cfg(not(feature = "const-api"))] impl PhfHash for [u8] { #[inline] fn phf_hash(&self, state: &mut H) { @@ -285,6 +321,14 @@ impl PhfHash for [u8] { } } +#[cfg(feature = "const-api")] +impl const PhfHash for [u8] { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write(self); + } +} + impl FmtConst for [u8] { #[inline] fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -362,41 +406,85 @@ impl const PhfBorrow for &uncased::UncasedStr { } } -macro_rules! sip_impl ( - (le $t:ty) => ( +// XXX: Macro can be simplified once const Hash trait impls +// landed in upstream Rust. +macro_rules! sip_impl { + (le $t:ty, $meth:ident) => { + #[cfg(not(feature = "const-api"))] impl PhfHash for $t { #[inline] fn phf_hash(&self, state: &mut H) { - self.to_le().hash(state); + state.$meth(self.to_le()); } } - ); - ($t:ty) => ( + + #[cfg(feature = "const-api")] + impl const PhfHash for $t { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.$meth(self.to_le()); + } + } + }; + ($t:ty, $meth:ident) => { + #[cfg(not(feature = "const-api"))] impl PhfHash for $t { #[inline] fn phf_hash(&self, state: &mut H) { - self.hash(state); + state.$meth(*self); } } - ) -); -sip_impl!(u8); -sip_impl!(i8); -sip_impl!(le u16); -sip_impl!(le i16); -sip_impl!(le u32); -sip_impl!(le i32); -sip_impl!(le u64); -sip_impl!(le i64); -sip_impl!(le u128); -sip_impl!(le i128); -sip_impl!(bool); + #[cfg(feature = "const-api")] + impl const PhfHash for $t { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.$meth(*self); + } + } + }; +} + +sip_impl!(u8, write_u8); +sip_impl!(i8, write_i8); +sip_impl!(le u16, write_u16); +sip_impl!(le i16, write_i16); +sip_impl!(le u32, write_u32); +sip_impl!(le i32, write_i32); +sip_impl!(le u64, write_u64); +sip_impl!(le i64, write_i64); +sip_impl!(le u128, write_u128); +sip_impl!(le i128, write_i128); +#[cfg(not(feature = "const-api"))] +impl PhfHash for bool { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u8(*self as u8); + } +} + +#[cfg(feature = "const-api")] +impl const PhfHash for bool { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u8(*self as u8); + } +} + +#[cfg(not(feature = "const-api"))] impl PhfHash for char { #[inline] fn phf_hash(&self, state: &mut H) { - (*self as u32).phf_hash(state) + state.write_u32(*self as u32); + } +} + +#[cfg(feature = "const-api")] +impl const PhfHash for char { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write_u32(*self as u32); } } @@ -407,6 +495,7 @@ fn fmt_array(array: &[u8], f: &mut fmt::Formatter<'_>) -> fmt::Result { macro_rules! array_impl ( ($t:ty, $n:expr) => ( + #[cfg(not(feature = "const-api"))] impl PhfHash for [$t; $n] { #[inline] fn phf_hash(&self, state: &mut H) { @@ -414,6 +503,14 @@ macro_rules! array_impl ( } } + #[cfg(feature = "const-api")] + impl const PhfHash for [$t; $n] { + #[inline] + fn phf_hash(&self, state: &mut H) { + state.write(self); + } + } + impl FmtConst for [$t; $n] { fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt_array(self, f) From d091ca1c7a7a3f30e575b841db4eae39367b249d Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 17:58:11 +0200 Subject: [PATCH 03/21] phf_shared: Implement SipHasher13 to make hash a const fn --- phf_shared/Cargo.toml | 1 - phf_shared/src/lib.rs | 26 ++- phf_shared/src/siphasher.rs | 423 ++++++++++++++++++++++++++++++++++++ 3 files changed, 447 insertions(+), 3 deletions(-) create mode 100644 phf_shared/src/siphasher.rs diff --git a/phf_shared/Cargo.toml b/phf_shared/Cargo.toml index 1736611d..aa515a43 100644 --- a/phf_shared/Cargo.toml +++ b/phf_shared/Cargo.toml @@ -19,6 +19,5 @@ std = [] const-api = [] [dependencies] -siphasher = "0.3" unicase = { version = "2.4.0", optional = true } uncased = { version = "0.9.6", optional = true, default-features = false } diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 98051d20..14e57018 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -3,16 +3,18 @@ //! [phf]: https://docs.rs/phf // XXX: Temporary until stabilization. -#![feature(const_fn_trait_bound, const_mut_refs, const_trait_impl)] +#![feature(const_fn_trait_bound, const_mut_refs, const_panic, const_trait_impl)] #![doc(html_root_url = "https://docs.rs/phf_shared/0.10")] #![cfg_attr(not(feature = "std"), no_std)] #[cfg(feature = "std")] extern crate std as core; +mod siphasher; + use core::fmt; use core::hash::Hasher; -use siphasher::sip128::{Hash128, Hasher128, SipHasher13}; +use siphasher::{Hash128, Hasher128, SipHasher13}; #[non_exhaustive] pub struct Hashes { @@ -32,6 +34,7 @@ pub const fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { } /// `key` is from `phf_generator::HashState`. +#[cfg(not(feature = "const-api"))] #[inline] pub fn hash(x: &T, key: &HashKey) -> Hashes { let mut hasher = SipHasher13::new_with_keys(0, *key); @@ -49,6 +52,25 @@ pub fn hash(x: &T, key: &HashKey) -> Hashes { } } +/// `key` is from `phf_generator::HashState`. +#[cfg(feature = "const-api")] +#[inline] +pub const fn hash(x: &T, key: &HashKey) -> Hashes { + let mut hasher = SipHasher13::new_with_keys(0, *key); + x.phf_hash(&mut hasher); + + let Hash128 { + h1: lower, + h2: upper, + } = hasher.finish128(); + + Hashes { + g: (lower >> 32) as u32, + f1: lower as u32, + f2: upper as u32, + } +} + /// Return an index into `phf_generator::HashState::map`. /// /// * `hash` is from `hash()` in this crate. diff --git a/phf_shared/src/siphasher.rs b/phf_shared/src/siphasher.rs new file mode 100644 index 00000000..50586945 --- /dev/null +++ b/phf_shared/src/siphasher.rs @@ -0,0 +1,423 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! A const-compatible implementation of SipHash with a 128-bit output. + +use std::{hash, mem, ptr}; + +/// A 128-bit (2x64) hash output. +#[derive(Debug, Clone, Copy, Default)] +pub struct Hash128 { + pub h1: u64, + pub h2: u64, +} + +impl const From for Hash128 { + fn from(v: u128) -> Self { + Hash128 { + h1: v as u64, + h2: (v >> 64) as u64, + } + } +} + +impl const From for u128 { + fn from(h: Hash128) -> u128 { + (h.h1 as u128) | ((h.h2 as u128) << 64) + } +} + +/// An implementation of SipHash128 1-3. +#[derive(Debug, Clone, Copy)] +pub struct SipHasher13 { + k0: u64, + k1: u64, + length: usize, // how many bytes we've processed + state: State, // hash State + tail: u64, // uncompressed bytes le + ntail: usize, // how many bytes in tail are valid +} + +#[derive(Debug, Clone, Copy)] +struct State { + // v0, v2 and v1, v3 show up in pairs in the algorithm, + // and simd implementations of SipHash will use vectors + // of v02 and v13. By placing them in this order in the struct, + // the compiler can pick up on just a few simd optimizations by itself. + v0: u64, + v2: u64, + v1: u64, + v3: u64, +} + +macro_rules! compress { + ($state:expr) => {{ + compress!($state.v0, $state.v1, $state.v2, $state.v3) + }}; + ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ + $v0 = $v0.wrapping_add($v1); + $v1 = $v1.rotate_left(13); + $v1 ^= $v0; + $v0 = $v0.rotate_left(32); + $v2 = $v2.wrapping_add($v3); + $v3 = $v3.rotate_left(16); + $v3 ^= $v2; + $v0 = $v0.wrapping_add($v3); + $v3 = $v3.rotate_left(21); + $v3 ^= $v0; + $v2 = $v2.wrapping_add($v1); + $v1 = $v1.rotate_left(17); + $v1 ^= $v2; + $v2 = $v2.rotate_left(32); + }}; +} + +impl State { + #[inline] + const fn c_rounds(&mut self) { + compress!(self); + } + + #[inline] + const fn d_rounds(&mut self) { + compress!(self); + compress!(self); + compress!(self); + } +} + +#[inline] +const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { + debug_assert!(len < 8); + let mut i = 0; // current byte index (from LSB) in the output u64. + let mut out = 0; + if i + 3 < len { + out = u32::from_le_bytes([ + buf[start + i + 0], + buf[start + i + 1], + buf[start + i + 2], + buf[start + i + 3], + ]) as u64; + i += 4; + } + if i + 1 < len { + out |= (u16::from_le_bytes([buf[start + i + 0], buf[start + i + 1]]) as u64) << (i * 8); + i += 2; + } + if i < len { + out |= (buf[start + i] as u64) << (i * 8); + i += 1; + } + debug_assert!(i == len); + out +} + +pub trait Hasher128 { + /// Returns a 128-bit hash + fn finish128(&self) -> Hash128; +} + +impl SipHasher13 { + /// Creates a `SipHasher13` that is keyed off the provided keys. + #[inline] + pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 { + let mut state = Self { + k0: key0, + k1: key1, + length: 0, + state: State { + v0: 0, + v1: 0xee, + v2: 0, + v3: 0, + }, + tail: 0, + ntail: 0, + }; + state.reset(); + state + } + + #[inline(always)] + const fn reset(&mut self) { + self.length = 0; + self.state.v0 = self.k0 ^ 0x736f6d6570736575; + self.state.v1 = self.k1 ^ 0x646f72616e646f83; + self.state.v2 = self.k0 ^ 0x6c7967656e657261; + self.state.v3 = self.k1 ^ 0x7465646279746573; + self.ntail = 0; + } + + // A specialized write function for values with size <= 8. + // + // The hashing of multi-byte integers depends on endianness. E.g.: + // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])` + // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])` + // + // This function does the right thing for little-endian hardware. On + // big-endian hardware `x` must be byte-swapped first to give the right + // behaviour. After any byte-swapping, the input must be zero-extended to + // 64-bits. The caller is responsible for the byte-swapping and + // zero-extension. + #[inline] + const fn short_write(&mut self, x: u64) { + let size = mem::size_of::(); + self.length += size; + + // The original number must be zero-extended, not sign-extended. + debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true }); + + // The number of bytes needed to fill `self.tail`. + let needed = 8 - self.ntail; + + self.tail |= x << (8 * self.ntail); + if size < needed { + self.ntail += size; + return; + } + + // `self.tail` is full, process it. + self.state.v3 ^= self.tail; + self.state.c_rounds(); + self.state.v0 ^= self.tail; + + self.ntail = size - needed; + self.tail = if needed < 8 { x >> (8 * needed) } else { 0 }; + } + + const fn finish128(&self) -> Hash128 { + let mut state = self.state; + + let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; + + state.v3 ^= b; + state.c_rounds(); + state.v0 ^= b; + + state.v2 ^= 0xee; + state.d_rounds(); + let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + state.v1 ^= 0xdd; + state.d_rounds(); + let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + Hash128 { h1, h2 } + } +} + +impl const Hasher128 for SipHasher13 { + /// Return a 128-bit hash + #[inline] + fn finish128(&self) -> Hash128 { + Self::finish128(self) + } +} + +impl const hash::Hasher for SipHasher13 { + #[inline] + fn write_usize(&mut self, i: usize) { + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.short_write::(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + // TODO: Is this correct? + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.short_write::(i.to_le() as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.short_write::(i.to_le()); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.write(&i.to_ne_bytes()) + } + + #[inline] + fn write_isize(&mut self, i: isize) { + self.write_usize(i as usize); + } + + #[inline] + fn write_i8(&mut self, i: i8) { + self.write_u8(i as u8); + } + + #[inline] + fn write_i16(&mut self, i: i16) { + self.write_u16(i as u16); + } + + #[inline] + fn write_i32(&mut self, i: i32) { + self.write_u32(i as u32); + } + + #[inline] + fn write_i64(&mut self, i: i64) { + self.write_u64(i as u64); + } + + #[inline] + fn write_i128(&mut self, i: i128) { + self.write_u128(i as u128); + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + let length = bytes.len(); + self.length += length; + + let mut needed = 0; + + if self.ntail != 0 { + needed = 8 - self.ntail; + if length < needed { + self.tail |= u8to64_le(bytes, 0, length) << (8 * self.ntail); + self.ntail += length; + return; + } else { + self.tail |= u8to64_le(bytes, 0, needed) << (8 * self.ntail); + self.state.v3 ^= self.tail; + self.state.c_rounds(); + self.state.v0 ^= self.tail; + self.ntail = 0; + } + } + + // Buffered tail is now flushed, process new input. + let len = length - needed; + let left = len & 0x7; + + let mut i = needed; + while i < len - left { + let mi = u64::from_le_bytes([ + bytes[i + 0], + bytes[i + 1], + bytes[i + 2], + bytes[i + 3], + bytes[i + 4], + bytes[i + 5], + bytes[i + 6], + bytes[i + 7], + ]); + + self.state.v3 ^= mi; + self.state.c_rounds(); + self.state.v0 ^= mi; + + i += 8; + } + + self.tail = u8to64_le(bytes, i, left); + self.ntail = left; + } + + #[inline] + fn finish(&self) -> u64 { + self.finish128().h2 + } +} + +impl Hash128 { + /// Convert into a 16-bytes vector + pub fn as_bytes(&self) -> [u8; 16] { + let mut bytes = [0u8; 16]; + let h1 = self.h1.to_le(); + let h2 = self.h2.to_le(); + unsafe { + ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.get_unchecked_mut(0), 8); + ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.get_unchecked_mut(8), 8); + } + bytes + } + + /// Convert into a `u128` + #[inline] + pub const fn as_u128(&self) -> u128 { + let h1 = self.h1.to_le(); + let h2 = self.h2.to_le(); + h1 as u128 | ((h2 as u128) << 64) + } + + /// Convert into `(u64, u64)` + #[inline] + pub const fn as_u64(&self) -> (u64, u64) { + let h1 = self.h1.to_le(); + let h2 = self.h2.to_le(); + (h1, h2) + } +} + +#[cfg(test)] +mod tests { + use super::{Hasher128, SipHasher13}; + use std::hash::{Hash, Hasher}; + + // Hash just the bytes of the slice, without length prefix + struct Bytes<'a>(&'a [u8]); + + impl<'a> Hash for Bytes<'a> { + #[allow(unused_must_use)] + fn hash(&self, state: &mut H) { + let Bytes(v) = *self; + state.write(v); + } + } + + fn hash_with(mut st: H, x: &T) -> [u8; 16] { + x.hash(&mut st); + st.finish128().as_bytes() + } + + #[test] + #[allow(unused_must_use)] + fn test_siphash128_1_3() { + let vecs: [[u8; 16]; 1] = [[ + 231, 126, 188, 178, 39, 136, 165, 190, 253, 98, 219, 106, 221, 48, 48, 1, + ]]; + + let k0 = 0x_07_06_05_04_03_02_01_00; + let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08; + let mut buf = Vec::new(); + let mut t = 0; + let mut state_inc = SipHasher13::new_with_keys(k0, k1); + + while t < 1 { + let vec = vecs[t]; + let out = hash_with(SipHasher13::new_with_keys(k0, k1), &Bytes(&buf)); + assert_eq!(vec, out[..]); + + let full = hash_with(SipHasher13::new_with_keys(k0, k1), &Bytes(&buf)); + let i = state_inc.finish128().as_bytes(); + + assert_eq!(full, i); + assert_eq!(full, vec); + + buf.push(t as u8); + Hasher::write(&mut state_inc, &[t as u8]); + + t += 1; + } + } +} From 0376ed4dfe5c8870b23d489e290e57d1c753ed5b Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 19:25:03 +0200 Subject: [PATCH 04/21] Bump all crate versions to 0.11.0 --- phf/Cargo.toml | 7 ++++--- phf_codegen/Cargo.toml | 9 ++++++--- phf_codegen/test/Cargo.toml | 4 ++-- phf_generator/Cargo.toml | 7 +++++-- phf_macros/Cargo.toml | 7 ++++--- phf_macros_tests/Cargo.toml | 4 ++-- phf_shared/src/lib.rs | 2 +- 7 files changed, 24 insertions(+), 16 deletions(-) diff --git a/phf/Cargo.toml b/phf/Cargo.toml index 5c394543..b3b3b99c 100644 --- a/phf/Cargo.toml +++ b/phf/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Runtime support for perfect hash function data structures" repository = "https://github.com/sfackler/rust-phf" @@ -16,6 +16,7 @@ test = false [features] default = ["std"] std = ["phf_shared/std"] +const-api = ["phf_shared/const-api"] uncased = ["phf_shared/uncased"] unicase = ["phf_shared/unicase"] macros = [ @@ -25,8 +26,8 @@ macros = [ [dependencies] proc-macro-hack = { version = "0.5.4", optional = true } -phf_macros = { version = "0.10.0", optional = true } -phf_shared = { version = "0.10.0", default-features = false } +phf_macros = { version = "0.11.0", optional = true } +phf_shared = { version = "0.11.0", default-features = false } [package.metadata.docs.rs] features = ["macros"] diff --git a/phf_codegen/Cargo.toml b/phf_codegen/Cargo.toml index 6e6c90d9..9094d9ac 100644 --- a/phf_codegen/Cargo.toml +++ b/phf_codegen/Cargo.toml @@ -1,13 +1,16 @@ [package] name = "phf_codegen" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "Codegen library for PHF types" repository = "https://github.com/sfackler/rust-phf" edition = "2018" readme = "../README.md" +[features] +const-api = ["phf_generator/const-api", "phf_shared/const-api"] + [dependencies] -phf_generator = "0.10.0" -phf_shared = "0.10.0" +phf_generator = "0.11.0" +phf_shared = "0.11.0" diff --git a/phf_codegen/test/Cargo.toml b/phf_codegen/test/Cargo.toml index e9d65264..86212791 100644 --- a/phf_codegen/test/Cargo.toml +++ b/phf_codegen/test/Cargo.toml @@ -6,11 +6,11 @@ build = "build.rs" edition = "2018" [dependencies] -phf = { version = "0.10.0", features = ["uncased", "unicase"] } +phf = { version = "0.11.0", features = ["uncased", "unicase"] } uncased = { version = "0.9.6", default-features = false } unicase = "2.4.0" [build-dependencies] -phf_codegen = { version = "0.10.0", path = ".." } +phf_codegen = { version = "0.11.0", path = ".." } unicase = "2.4.0" uncased = { version = "0.9.6", default-features = false } diff --git a/phf_generator/Cargo.toml b/phf_generator/Cargo.toml index 806b4412..e804cfdb 100644 --- a/phf_generator/Cargo.toml +++ b/phf_generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "phf_generator" authors = ["Steven Fackler "] -version = "0.10.0" +version = "0.11.0" license = "MIT" description = "PHF generation logic" repository = "https://github.com/sfackler/rust-phf" @@ -9,7 +9,7 @@ edition = "2018" [dependencies] rand = { version = "0.8", features = ["small_rng"] } -phf_shared = { version = "0.10.0", default-features = false } +phf_shared = { version = "0.11.0", default-features = false } # for stable black_box() criterion = { version = "=0.3.4", optional = true } @@ -23,3 +23,6 @@ harness = false [[bin]] name = "gen_hash_test" required-features = ["criterion"] + +[features] +const-api = ["phf_shared/const-api"] diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml index a3454a36..4f9683f2 100644 --- a/phf_macros/Cargo.toml +++ b/phf_macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "phf_macros" -version = "0.10.0" +version = "0.11.0" authors = ["Steven Fackler "] edition = "2018" license = "MIT" @@ -13,6 +13,7 @@ include = ["src/lib.rs"] proc-macro = true [features] +const-api = ["phf_shared/const-api"] unicase = ["unicase_", "phf_shared/unicase"] [dependencies] @@ -22,5 +23,5 @@ proc-macro2 = "1" proc-macro-hack = "0.5.4" unicase_ = { package = "unicase", version = "2.4.0", optional = true } -phf_generator = "0.10.0" -phf_shared = { version = "0.10.0", default-features = false } +phf_generator = "0.11.0" +phf_shared = { version = "0.11.0", default-features = false } diff --git a/phf_macros_tests/Cargo.toml b/phf_macros_tests/Cargo.toml index bc2478c7..7b818220 100644 --- a/phf_macros_tests/Cargo.toml +++ b/phf_macros_tests/Cargo.toml @@ -10,6 +10,6 @@ edition = "2018" [dev-dependencies] trybuild = "1.0" -phf = { version = "0.10", features = ["macros"] } -phf_macros = { version = "0.10", features = ["unicase"] } +phf = { version = "0.11", features = ["macros"] } +phf_macros = { version = "0.11", features = ["unicase"] } unicase = "2.4.0" diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 14e57018..0fa8cf0d 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -13,7 +13,7 @@ extern crate std as core; mod siphasher; use core::fmt; -use core::hash::Hasher; +use core::hash::{Hash, Hasher}; use siphasher::{Hash128, Hasher128, SipHasher13}; #[non_exhaustive] From d1dd1544ae718b472a26ac08cb30ce0c86fe3ccb Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 23:42:51 +0200 Subject: [PATCH 05/21] phf_generator: Constify most of the functionality --- phf_generator/Cargo.toml | 1 - phf_generator/src/lib.rs | 190 +++++++++++++++++++++++++++++++++++++-- phf_generator/src/rng.rs | 40 +++++++++ phf_shared/src/lib.rs | 7 ++ 4 files changed, 230 insertions(+), 8 deletions(-) create mode 100644 phf_generator/src/rng.rs diff --git a/phf_generator/Cargo.toml b/phf_generator/Cargo.toml index e804cfdb..7dd14bdf 100644 --- a/phf_generator/Cargo.toml +++ b/phf_generator/Cargo.toml @@ -8,7 +8,6 @@ repository = "https://github.com/sfackler/rust-phf" edition = "2018" [dependencies] -rand = { version = "0.8", features = ["small_rng"] } phf_shared = { version = "0.11.0", default-features = false } # for stable black_box() criterion = { version = "=0.3.4", optional = true } diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 8b75a36e..1468c731 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -2,29 +2,205 @@ //! //! [phf]: https://docs.rs/phf +// XXX: Temporary until stabilization. +#![allow(incomplete_features)] +#![feature( + const_fn_trait_bound, + const_option, + const_trait_impl, + const_mut_refs, + generic_const_exprs +)] #![doc(html_root_url = "https://docs.rs/phf_generator/0.10")] + +pub mod rng; + use phf_shared::{HashKey, PhfHash}; -use rand::distributions::Standard; -use rand::rngs::SmallRng; -use rand::{Rng, SeedableRng}; +use rng::Rng; const DEFAULT_LAMBDA: usize = 5; const FIXED_SEED: u64 = 1234567890; +#[cfg(feature = "const-api")] +pub struct HashState +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + pub key: HashKey, + pub disps: [(u32, u32); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA], + pub map: [usize; N], +} + +#[cfg(not(feature = "const-api"))] pub struct HashState { pub key: HashKey, pub disps: Vec<(u32, u32)>, pub map: Vec, } +#[cfg(feature = "const-api")] +pub const fn generate_hash(entries: &[H; N]) -> HashState +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut rng = Rng::new(FIXED_SEED); + loop { + match try_generate_hash(entries, rng.generate()) { + Some(state) => break state, + None => continue, + } + } +} + +#[cfg(not(feature = "const-api"))] pub fn generate_hash(entries: &[H]) -> HashState { - SmallRng::seed_from_u64(FIXED_SEED) - .sample_iter(Standard) - .find_map(|key| try_generate_hash(entries, key)) - .expect("failed to solve PHF") + let mut rng = Rng::new(FIXED_SEED); + loop { + match try_generate_hash(entries, rng.generate()) { + Some(state) => break state, + None => continue, + } + } +} + +#[cfg(feature = "const-api")] +const fn try_generate_hash( + entries: &[H; N], + key: HashKey, +) -> Option> +where + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + assert_ne!(N, usize::MAX); + + struct Bucket { + idx: usize, + keys: [usize; N], + } + + impl const Default for Bucket { + #[inline(always)] + fn default() -> Self { + Self { + idx: 0, + // We use usize::MAX as a marker to distinguish what is an actual + // key and what is not due to fixed allocation sizes. We previously + // assert that `N` is not `usize::MAX` to avoid ambiguity. + keys: [usize::MAX; N], + } + } + } + + let mut hashes: [_; N] = [phf_shared::Hashes::default(); N]; + let mut i = 0; + while i < N { + hashes[i] = phf_shared::hash(&entries[i], &key); + i += 1; + } + + const BUCKETS_LEN: usize = (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; + let mut buckets: [Bucket; BUCKETS_LEN] = [Bucket::default(); BUCKETS_LEN]; + i = 0; + while i < BUCKETS_LEN { + buckets[i].idx = i; + i += 1; + } + + i = 0; + let mut key_lens: [usize; N] = [0; N]; + while i < N { + let bucket = (hashes[i].g % (BUCKETS_LEN as u32)) as usize; + buckets[bucket].keys[key_lens[bucket]] = i; + key_lens[bucket] += 1; + } + + // Sort descending + // buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); + // TODO + + let mut map: [Option; N] = [None; N]; + let mut disps: [(u32, u32); BUCKETS_LEN] = [(0, 0); BUCKETS_LEN]; + + // store whether an element from the bucket being placed is + // located at a certain position, to allow for efficient overlap + // checks. It works by storing the generation in each cell and + // each new placement-attempt is a new generation, so you can tell + // if this is legitimately full by checking that the generations + // are equal. (A u64 is far too large to overflow in a reasonable + // time for current hardware.) + let mut try_map: [u64; N] = [0; N]; + let mut generation = 0u64; + + // the actual values corresponding to the markers above, as + // (index, key) pairs, for adding to the main map once we've + // chosen the right disps. + let mut values_to_add_len: usize = 0; + let mut values_to_add: [(usize, usize); N] = [(0, 0); N]; + + i = 0; + 'buckets: while i < buckets.len() { + let bucket = &buckets[i]; + let mut d1 = 0; + while d1 < N { + let mut d2 = 0; + 'disps: while d2 < N { + let mut j = 0; + while j < N { + values_to_add[j] = (0, 0); + j += 1; + } + generation += 1; + + j = 0; + while j < N { + let key = bucket.keys[j]; + let idx = + (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1 as u32, d2 as u32) + % (N as u32)) as usize; + if map[idx].is_some() || try_map[idx] == generation { + d2 += 1; + continue 'disps; + } + try_map[idx] = generation; + values_to_add[values_to_add_len] = (idx, key); + values_to_add_len += 1; + j += 1; + } + + // We've picked a good set of disps. + disps[bucket.idx] = (d1 as u32, d2 as u32); + j = 0; + while j < N { + let &(idx, key) = &values_to_add[j]; + map[idx] = Some(key); + j += 1; + } + continue 'buckets; + } + d1 += 1; + } + + // Unable to find displacements for a bucket + return None; + } + + Some(HashState { + key, + disps, + map: { + let mut result = [0; N]; + i = 0; + while i < N { + result[i] = map[i].unwrap(); + i += 1; + } + result + }, + }) } +#[cfg(not(feature = "const-api"))] fn try_generate_hash(entries: &[H], key: HashKey) -> Option { struct Bucket { idx: usize, diff --git a/phf_generator/src/rng.rs b/phf_generator/src/rng.rs new file mode 100644 index 00000000..87ba3881 --- /dev/null +++ b/phf_generator/src/rng.rs @@ -0,0 +1,40 @@ +//! A fixed-seed PRNG based on the wyrand algorithm. +//! +//! The focus is to provide a fast implementation that is usable in const +//! context, but not to be cryptographically secure by any means. + +/// A tiny and fast pseudo-random number generator based on wyrand. +/// +/// This must be initialized to a fixed seed which will be the +/// base for random number generation. +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct Rng { + seed: u64, +} + +impl Rng { + /// Creates a new RNG given an initial seed. + pub const fn new(seed: u64) -> Self { + Self { seed } + } + + /// Generates a pseudo-random [`u64`] value and alters the + /// internal state. + /// + /// This method may be called repeatedly on the same [`Rng`] + /// instance to produce several random numbers. + #[inline] + pub const fn generate(&mut self) -> u64 { + self.seed = self.seed.wrapping_add(0xa0761d6478bd642f); + + let t: u128 = (self.seed as u128).wrapping_mul((self.seed ^ 0xe7037ed1a0b428db) as u128); + (t.wrapping_shr(64) ^ t) as u64 + } +} + +// TODO: Implement the `Iterator` trait for `Rng` once all its provided methods +// are decorated with `#[method_body_is_const]`. Before that, we'd have to +// implement *all* Iterator methods by hand which would become very verbose +// for mostly unneeded features. Thereby we will wait until we get away with +// just providing a `next` implementation on our part. diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 0fa8cf0d..a3e5576f 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -23,6 +23,13 @@ pub struct Hashes { pub f2: u32, } +impl const Default for Hashes { + #[inline(always)] + fn default() -> Self { + Self { g: 0, f1: 0, f2: 0 } + } +} + /// A central typedef for hash keys /// /// Makes experimentation easier by only needing to be updated here. From b759a569e94d2d9a9299fd86dfb3ddaa1e0432a4 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Thu, 21 Oct 2021 23:48:05 +0200 Subject: [PATCH 06/21] Update html_root_urls for new versions --- phf/src/lib.rs | 2 +- phf_codegen/src/lib.rs | 2 +- phf_generator/src/lib.rs | 2 +- phf_shared/src/lib.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/phf/src/lib.rs b/phf/src/lib.rs index 627d3e81..036b6f38 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -71,7 +71,7 @@ //! [#183]: https://github.com/rust-phf/rust-phf/issues/183 //! [#196]: https://github.com/rust-phf/rust-phf/issues/196 -#![doc(html_root_url = "https://docs.rs/phf/0.10")] +#![doc(html_root_url = "https://docs.rs/phf/0.11")] #![warn(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] diff --git a/phf_codegen/src/lib.rs b/phf_codegen/src/lib.rs index 2bfb9722..6907f695 100644 --- a/phf_codegen/src/lib.rs +++ b/phf_codegen/src/lib.rs @@ -128,7 +128,7 @@ //! // ... //! ``` -#![doc(html_root_url = "https://docs.rs/phf_codegen/0.10")] +#![doc(html_root_url = "https://docs.rs/phf_codegen/0.11")] use phf_shared::{FmtConst, PhfHash}; use std::collections::HashSet; diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 1468c731..7042429b 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -11,7 +11,7 @@ const_mut_refs, generic_const_exprs )] -#![doc(html_root_url = "https://docs.rs/phf_generator/0.10")] +#![doc(html_root_url = "https://docs.rs/phf_generator/0.11")] pub mod rng; diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index a3e5576f..c584ec1f 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -4,7 +4,7 @@ // XXX: Temporary until stabilization. #![feature(const_fn_trait_bound, const_mut_refs, const_panic, const_trait_impl)] -#![doc(html_root_url = "https://docs.rs/phf_shared/0.10")] +#![doc(html_root_url = "https://docs.rs/phf_shared/0.11")] #![cfg_attr(not(feature = "std"), no_std)] #[cfg(feature = "std")] From e69d5d7de9ee1a949713110c3d4aeb97f367d151 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 14:33:14 +0200 Subject: [PATCH 07/21] phf_generator: Fix build with const-api feature --- phf_codegen/Cargo.toml | 3 - phf_generator/src/lib.rs | 131 +++++++++++++++++++------------------ phf_generator/src/utils.rs | 55 ++++++++++++++++ phf_shared/src/lib.rs | 1 + 4 files changed, 123 insertions(+), 67 deletions(-) create mode 100644 phf_generator/src/utils.rs diff --git a/phf_codegen/Cargo.toml b/phf_codegen/Cargo.toml index 9094d9ac..8a0736c0 100644 --- a/phf_codegen/Cargo.toml +++ b/phf_codegen/Cargo.toml @@ -8,9 +8,6 @@ repository = "https://github.com/sfackler/rust-phf" edition = "2018" readme = "../README.md" -[features] -const-api = ["phf_generator/const-api", "phf_shared/const-api"] - [dependencies] phf_generator = "0.11.0" phf_shared = "0.11.0" diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 7042429b..0b462969 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -7,6 +7,7 @@ #![feature( const_fn_trait_bound, const_option, + const_panic, const_trait_impl, const_mut_refs, generic_const_exprs @@ -14,6 +15,8 @@ #![doc(html_root_url = "https://docs.rs/phf_generator/0.11")] pub mod rng; +#[cfg(feature = "const-api")] +mod utils; use phf_shared::{HashKey, PhfHash}; use rng::Rng; @@ -25,11 +28,11 @@ const FIXED_SEED: u64 = 1234567890; #[cfg(feature = "const-api")] pub struct HashState where - [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, + utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, { pub key: HashKey, - pub disps: [(u32, u32); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA], - pub map: [usize; N], + pub disps: utils::ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>, + pub map: utils::ArrayVec, } #[cfg(not(feature = "const-api"))] @@ -42,7 +45,7 @@ pub struct HashState { #[cfg(feature = "const-api")] pub const fn generate_hash(entries: &[H; N]) -> HashState where - [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, + utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, { let mut rng = Rng::new(FIXED_SEED); loop { @@ -70,57 +73,61 @@ const fn try_generate_hash( key: HashKey, ) -> Option> where - [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, + utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, { - assert_ne!(N, usize::MAX); + use utils::ArrayVec; + #[derive(Clone, Copy)] struct Bucket { idx: usize, - keys: [usize; N], + keys: ArrayVec, } impl const Default for Bucket { - #[inline(always)] fn default() -> Self { - Self { + Bucket { idx: 0, - // We use usize::MAX as a marker to distinguish what is an actual - // key and what is not due to fixed allocation sizes. We previously - // assert that `N` is not `usize::MAX` to avoid ambiguity. - keys: [usize::MAX; N], + keys: ArrayVec::new(0), } } } - let mut hashes: [_; N] = [phf_shared::Hashes::default(); N]; - let mut i = 0; - while i < N { - hashes[i] = phf_shared::hash(&entries[i], &key); - i += 1; - } + let hashes = { + let mut hashes = [phf_shared::Hashes::default(); N]; + let mut i = 0; + while i < N { + hashes[i] = phf_shared::hash(&entries[i], &key); + i += 1; + } + hashes + }; + + let mut buckets = { + let mut buckets = [Bucket::::default(); { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }]; + let mut i = 0; + while i < buckets.len() { + buckets[i].idx = i; + i += 1; + } + buckets + }; - const BUCKETS_LEN: usize = (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA; - let mut buckets: [Bucket; BUCKETS_LEN] = [Bucket::default(); BUCKETS_LEN]; - i = 0; - while i < BUCKETS_LEN { - buckets[i].idx = i; + let mut i = 0; + while i < hashes.len() { + buckets[(hashes[i].g % (buckets.len() as u32)) as usize] + .keys + .push(i); i += 1; } - i = 0; - let mut key_lens: [usize; N] = [0; N]; - while i < N { - let bucket = (hashes[i].g % (BUCKETS_LEN as u32)) as usize; - buckets[bucket].keys[key_lens[bucket]] = i; - key_lens[bucket] += 1; - } - // Sort descending // buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); // TODO - let mut map: [Option; N] = [None; N]; - let mut disps: [(u32, u32); BUCKETS_LEN] = [(0, 0); BUCKETS_LEN]; + // table_len = N + let mut map: ArrayVec, N> = ArrayVec::new(None); + let mut disps: ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }> = + ArrayVec::new((0, 0)); // store whether an element from the bucket being placed is // located at a certain position, to allow for efficient overlap @@ -129,52 +136,48 @@ where // if this is legitimately full by checking that the generations // are equal. (A u64 is far too large to overflow in a reasonable // time for current hardware.) - let mut try_map: [u64; N] = [0; N]; + let mut try_map = [0u64; N]; let mut generation = 0u64; // the actual values corresponding to the markers above, as - // (index, key) pairs, for adding to the main map once we've + // (index, key) pairs, for adding to the main map, once we've // chosen the right disps. - let mut values_to_add_len: usize = 0; - let mut values_to_add: [(usize, usize); N] = [(0, 0); N]; + let mut values_to_add: ArrayVec<(usize, usize), N> = ArrayVec::new((0, 0)); + + let mut i1 = 0; + 'buckets: while i1 < buckets.len() { + let bucket = &buckets[i1]; + i1 += 1; - i = 0; - 'buckets: while i < buckets.len() { - let bucket = &buckets[i]; let mut d1 = 0; while d1 < N { let mut d2 = 0; 'disps: while d2 < N { - let mut j = 0; - while j < N { - values_to_add[j] = (0, 0); - j += 1; - } + values_to_add.clear(); generation += 1; - j = 0; - while j < N { - let key = bucket.keys[j]; + let mut i2 = 0; + while i2 < bucket.keys.len() { + let key = bucket.keys.get(i2); let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1 as u32, d2 as u32) % (N as u32)) as usize; - if map[idx].is_some() || try_map[idx] == generation { + if map.get_ref(idx).is_some() || try_map[idx] == generation { d2 += 1; continue 'disps; } try_map[idx] = generation; - values_to_add[values_to_add_len] = (idx, key); - values_to_add_len += 1; - j += 1; + values_to_add.push((idx, key)); + i2 += 1; } - // We've picked a good set of disps. - disps[bucket.idx] = (d1 as u32, d2 as u32); - j = 0; - while j < N { - let &(idx, key) = &values_to_add[j]; - map[idx] = Some(key); - j += 1; + // We've picked a good set of disps + disps.set(bucket.idx, (d1 as u32, d2 as u32)); + i2 = 0; + while i2 < values_to_add.len() { + let &(idx, key) = values_to_add.get_ref(i2); + map.set(idx, Some(key)); + i2 += 1; } continue 'buckets; } @@ -189,10 +192,10 @@ where key, disps, map: { - let mut result = [0; N]; - i = 0; - while i < N { - result[i] = map[i].unwrap(); + let mut result: ArrayVec = ArrayVec::new(0); + let mut i = 0; + while i < map.len() { + result.set(i, map.get(i).unwrap()); i += 1; } result diff --git a/phf_generator/src/utils.rs b/phf_generator/src/utils.rs new file mode 100644 index 00000000..e9fa2459 --- /dev/null +++ b/phf_generator/src/utils.rs @@ -0,0 +1,55 @@ +#[derive(Clone, Copy)] +pub struct ArrayVec { + arr: [T; N], + len: usize, +} + +impl ArrayVec { + #[inline(always)] + pub const fn new(marker: T) -> Self { + Self { + arr: [marker; N], + len: 0, + } + } + + #[inline] + pub const fn len(&self) -> usize { + self.len + 1 + } + + #[inline] + pub const fn push(&mut self, value: T) { + self.arr[self.len] = value; + self.len += 1; + } + + #[inline] + pub const fn pop(&mut self) -> T { + self.len -= 1; + self.arr[self.len] + } + + #[inline] + pub const fn clear(&mut self) { + self.len = 0; + } + + #[inline] + pub const fn get(&self, i: usize) -> T { + assert!(i < self.len); + self.arr[i] + } + + #[inline] + pub const fn get_ref(&self, i: usize) -> &T { + assert!(i < self.len); + &self.arr[i] + } + + #[inline] + pub const fn set(&mut self, i: usize, value: T) { + assert!(i <= self.len); + self.arr[i] = value; + } +} diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index c584ec1f..ded55cb9 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -16,6 +16,7 @@ use core::fmt; use core::hash::{Hash, Hasher}; use siphasher::{Hash128, Hasher128, SipHasher13}; +#[derive(Clone, Copy)] #[non_exhaustive] pub struct Hashes { pub g: u32, From d744a73f4e8a7e7eb02dee70dc0fb3203af84901 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 15:00:40 +0200 Subject: [PATCH 08/21] phf_generator: Add RNG support for alphanumeric chars and fix gen_hash_test --- phf_generator/src/bin/gen_hash_test.rs | 14 ++++++------- phf_generator/src/rng.rs | 28 +++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/phf_generator/src/bin/gen_hash_test.rs b/phf_generator/src/bin/gen_hash_test.rs index 2e1fbec4..ad14fa45 100644 --- a/phf_generator/src/bin/gen_hash_test.rs +++ b/phf_generator/src/bin/gen_hash_test.rs @@ -1,16 +1,16 @@ use criterion::*; -use rand::distributions::Alphanumeric; -use rand::rngs::SmallRng; -use rand::{Rng, SeedableRng}; - -use phf_generator::generate_hash; +use phf_generator::{generate_hash, rng::Rng}; fn gen_vec(len: usize) -> Vec { - let mut rng = SmallRng::seed_from_u64(0xAAAAAAAAAAAAAAAA).sample_iter(Alphanumeric); + let mut rng = Rng::new(0xAAAAAAAAAAAAAAAA); (0..len) - .map(move |_| rng.by_ref().take(64).collect::()) + .map(move |_| { + let mut str = String::with_capacity(64); + (0..64).for_each(|_| str.push(rng.generate_alphanumeric())); + str + }) .collect() } diff --git a/phf_generator/src/rng.rs b/phf_generator/src/rng.rs index 87ba3881..470f6168 100644 --- a/phf_generator/src/rng.rs +++ b/phf_generator/src/rng.rs @@ -21,7 +21,7 @@ impl Rng { /// Generates a pseudo-random [`u64`] value and alters the /// internal state. - /// + /// /// This method may be called repeatedly on the same [`Rng`] /// instance to produce several random numbers. #[inline] @@ -31,6 +31,32 @@ impl Rng { let t: u128 = (self.seed as u128).wrapping_mul((self.seed ^ 0xe7037ed1a0b428db) as u128); (t.wrapping_shr(64) ^ t) as u64 } + + /// Generates a pseudo-random [`char`] value and alters the + /// internal state. + /// + /// This method may be called repeatedly on the same [`Rng`] + /// to produce a random string. + #[inline] + pub const fn generate_alphanumeric(&mut self) -> char { + const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + const NCHARS: u32 = CHARS.len() as u32; + + // Adapted from https://lemire.me/blog/2016/06/30/fast-random-shuffling/ + let mut r = self.generate() as u32; + let mut hi = (((r as u64) * (NCHARS as u64)) >> 32) as u32; + let mut lo = r.wrapping_mul(NCHARS); + if lo < NCHARS { + let t = NCHARS.wrapping_neg() % NCHARS; + while lo < t { + r = self.generate() as u32; + hi = (((r as u64) * (NCHARS as u64)) >> 32) as u32; + lo = r.wrapping_mul(NCHARS); + } + } + + CHARS[hi as usize] as char + } } // TODO: Implement the `Iterator` trait for `Rng` once all its provided methods From c4720353e8644564db6bfec4d6c66d0a24c9dbfc Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 15:05:52 +0200 Subject: [PATCH 09/21] phf_shared: std => core --- phf_shared/src/siphasher.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phf_shared/src/siphasher.rs b/phf_shared/src/siphasher.rs index 50586945..06e5d13b 100644 --- a/phf_shared/src/siphasher.rs +++ b/phf_shared/src/siphasher.rs @@ -10,7 +10,7 @@ //! A const-compatible implementation of SipHash with a 128-bit output. -use std::{hash, mem, ptr}; +use core::{hash, mem, ptr}; /// A 128-bit (2x64) hash output. #[derive(Debug, Clone, Copy, Default)] From 55fcace11b4b538c696fad62026821eff62d33f1 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 15:15:51 +0200 Subject: [PATCH 10/21] Exclude phf_codegen and phf_macros from const-api feature --- phf_macros/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml index 4f9683f2..807a433c 100644 --- a/phf_macros/Cargo.toml +++ b/phf_macros/Cargo.toml @@ -13,7 +13,6 @@ include = ["src/lib.rs"] proc-macro = true [features] -const-api = ["phf_shared/const-api"] unicase = ["unicase_", "phf_shared/unicase"] [dependencies] From bad9d6b4c89801f4ef834f8ccdc58dbbc6023aa6 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 19:48:16 +0200 Subject: [PATCH 11/21] phf_macros: Repurpose as utility crate --- Cargo.toml | 7 +- phf/Cargo.toml | 9 +- phf/src/lib.rs | 9 +- phf_generator/src/lib.rs | 9 +- phf_macros/Cargo.toml | 13 +- phf_macros/src/lib.rs | 401 +++++++++++---------------------------- 6 files changed, 132 insertions(+), 316 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 13b416d0..e1a10585 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,11 @@ [workspace] members = [ "phf", - "phf_codegen", - "phf_codegen/test", + # TODO: Re-enable when a possibility is found to build all of phf_codegen's + # dependencies *WITHOUT* the `const-api` cargo feature, but everything else + # either with or without, depending on feature opt-ins. + #"phf_codegen", + #"phf_codegen/test", "phf_generator", "phf_macros", "phf_macros_tests", diff --git a/phf/Cargo.toml b/phf/Cargo.toml index b3b3b99c..3aa98602 100644 --- a/phf/Cargo.toml +++ b/phf/Cargo.toml @@ -16,18 +16,13 @@ test = false [features] default = ["std"] std = ["phf_shared/std"] -const-api = ["phf_shared/const-api"] uncased = ["phf_shared/uncased"] unicase = ["phf_shared/unicase"] -macros = [ - "phf_macros", - "proc-macro-hack", -] +macros = ["phf_macros"] [dependencies] -proc-macro-hack = { version = "0.5.4", optional = true } phf_macros = { version = "0.11.0", optional = true } -phf_shared = { version = "0.11.0", default-features = false } +phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } [package.metadata.docs.rs] features = ["macros"] diff --git a/phf/src/lib.rs b/phf/src/lib.rs index 036b6f38..31930ee2 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -78,6 +78,11 @@ #[cfg(feature = "std")] extern crate std as core; +// Not part of the public API. Used by the macro facade. +#[cfg(feature = "macros")] +#[doc(hidden)] +pub extern crate phf_macros as __phf_macros; + #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`Map`]. /// @@ -97,14 +102,12 @@ extern crate std as core; /// assert_eq!(MY_MAP["hello"], 1); /// } /// ``` -#[proc_macro_hack::proc_macro_hack] pub use phf_macros::phf_map; #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedMap`]. /// /// Requires the `macros` feature. Same usage as [`phf_map`]. -#[proc_macro_hack::proc_macro_hack] pub use phf_macros::phf_ordered_map; #[cfg(feature = "macros")] @@ -126,14 +129,12 @@ pub use phf_macros::phf_ordered_map; /// assert!(MY_SET.contains("hello world")); /// } /// ``` -#[proc_macro_hack::proc_macro_hack] pub use phf_macros::phf_set; #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedSet`]. /// /// Requires the `macros` feature. Same usage as [`phf_set`]. -#[proc_macro_hack::proc_macro_hack] pub use phf_macros::phf_ordered_set; #[doc(inline)] diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 0b462969..12c9f3d5 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -21,14 +21,15 @@ mod utils; use phf_shared::{HashKey, PhfHash}; use rng::Rng; -const DEFAULT_LAMBDA: usize = 5; +#[doc(hidden)] +pub const DEFAULT_LAMBDA: usize = 5; const FIXED_SEED: u64 = 1234567890; #[cfg(feature = "const-api")] pub struct HashState where - utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, { pub key: HashKey, pub disps: utils::ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>, @@ -45,7 +46,7 @@ pub struct HashState { #[cfg(feature = "const-api")] pub const fn generate_hash(entries: &[H; N]) -> HashState where - utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, { let mut rng = Rng::new(FIXED_SEED); loop { @@ -73,7 +74,7 @@ const fn try_generate_hash( key: HashKey, ) -> Option> where - utils::ArrayVec<(), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }>: Sized, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, { use utils::ArrayVec; diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml index 807a433c..7caca752 100644 --- a/phf_macros/Cargo.toml +++ b/phf_macros/Cargo.toml @@ -4,23 +4,16 @@ version = "0.11.0" authors = ["Steven Fackler "] edition = "2018" license = "MIT" -description = "Macros to generate types in the phf crate" +description = "Macros utils to generate types in the phf crate; don't use directly" repository = "https://github.com/sfackler/rust-phf" readme = "../README.md" include = ["src/lib.rs"] -[lib] -proc-macro = true - [features] unicase = ["unicase_", "phf_shared/unicase"] [dependencies] -syn = { version = "1", features = ["full"] } -quote = "1" -proc-macro2 = "1" -proc-macro-hack = "0.5.4" unicase_ = { package = "unicase", version = "2.4.0", optional = true } -phf_generator = "0.11.0" -phf_shared = { version = "0.11.0", default-features = false } +phf_generator = { version = "0.11.0", features = ["const-api"] } +phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index 8161b05f..1ee46cf7 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -1,243 +1,44 @@ -//! A set of macros to generate Rust source for PHF data structures at compile time. +//! Helper functions for macros to generate PHF data structures at compile time. //! See [the `phf` crate's documentation][phf] for details. //! //! [phf]: https://docs.rs/phf -use phf_generator::HashState; +// XXX: Remove on stabilization. +#![allow(incomplete_features)] +#![feature( + const_fn_trait_bound, + const_maybe_uninit_write, + const_mut_refs, + const_ptr_read, + const_refs_to_cell, + const_trait_impl, + const_transmute_copy, + generic_const_exprs, + maybe_uninit_uninit_array +)] + +use std::{ + hash::{Hash, Hasher}, + mem::{transmute_copy, MaybeUninit}, +}; + +use phf_generator::{HashState, DEFAULT_LAMBDA}; use phf_shared::PhfHash; -use proc_macro::TokenStream; -use quote::quote; -use std::collections::HashSet; -use std::hash::Hasher; -use syn::parse::{self, Parse, ParseStream}; -use syn::punctuated::Punctuated; -#[cfg(feature = "unicase")] -use syn::ExprLit; -use syn::{parse_macro_input, Error, Expr, Lit, Token, UnOp}; -#[cfg(feature = "unicase")] -use unicase_::UniCase; -#[derive(Hash, PartialEq, Eq, Clone)] -enum ParsedKey { - Str(String), - Binary(Vec), - Char(char), - I8(i8), - I16(i16), - I32(i32), - I64(i64), - I128(i128), - U8(u8), - U16(u16), - U32(u32), - U64(u64), - U128(u128), - Bool(bool), - #[cfg(feature = "unicase")] - UniCase(UniCase), +const unsafe fn const_array_assume_init(array: &[MaybeUninit; N]) -> [T; N] { + transmute_copy(array) } -impl PhfHash for ParsedKey { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - match self { - ParsedKey::Str(s) => s.phf_hash(state), - ParsedKey::Binary(s) => s.phf_hash(state), - ParsedKey::Char(s) => s.phf_hash(state), - ParsedKey::I8(s) => s.phf_hash(state), - ParsedKey::I16(s) => s.phf_hash(state), - ParsedKey::I32(s) => s.phf_hash(state), - ParsedKey::I64(s) => s.phf_hash(state), - ParsedKey::I128(s) => s.phf_hash(state), - ParsedKey::U8(s) => s.phf_hash(state), - ParsedKey::U16(s) => s.phf_hash(state), - ParsedKey::U32(s) => s.phf_hash(state), - ParsedKey::U64(s) => s.phf_hash(state), - ParsedKey::U128(s) => s.phf_hash(state), - ParsedKey::Bool(s) => s.phf_hash(state), - #[cfg(feature = "unicase")] - ParsedKey::UniCase(s) => s.phf_hash(state), - } - } -} - -impl ParsedKey { - fn from_expr(expr: &Expr) -> Option { - match expr { - Expr::Lit(lit) => match &lit.lit { - Lit::Str(s) => Some(ParsedKey::Str(s.value())), - Lit::ByteStr(s) => Some(ParsedKey::Binary(s.value())), - Lit::Byte(s) => Some(ParsedKey::U8(s.value())), - Lit::Char(s) => Some(ParsedKey::Char(s.value())), - Lit::Int(s) => match s.suffix() { - // we've lost the sign at this point, so `-128i8` looks like `128i8`, - // which doesn't fit in an `i8`; parse it as a `u8` and cast (to `0i8`), - // which is handled below, by `Unary` - "i8" => Some(ParsedKey::I8(s.base10_parse::().unwrap() as i8)), - "i16" => Some(ParsedKey::I16(s.base10_parse::().unwrap() as i16)), - "i32" => Some(ParsedKey::I32(s.base10_parse::().unwrap() as i32)), - "i64" => Some(ParsedKey::I64(s.base10_parse::().unwrap() as i64)), - "i128" => Some(ParsedKey::I128(s.base10_parse::().unwrap() as i128)), - "u8" => Some(ParsedKey::U8(s.base10_parse::().unwrap())), - "u16" => Some(ParsedKey::U16(s.base10_parse::().unwrap())), - "u32" => Some(ParsedKey::U32(s.base10_parse::().unwrap())), - "u64" => Some(ParsedKey::U64(s.base10_parse::().unwrap())), - "u128" => Some(ParsedKey::U128(s.base10_parse::().unwrap())), - _ => None, - }, - Lit::Bool(s) => Some(ParsedKey::Bool(s.value)), - _ => None, - }, - Expr::Array(array) => { - let mut buf = vec![]; - for expr in &array.elems { - match expr { - Expr::Lit(lit) => match &lit.lit { - Lit::Int(s) => match s.suffix() { - "u8" | "" => buf.push(s.base10_parse::().unwrap()), - _ => return None, - }, - _ => return None, - }, - _ => return None, - } - } - Some(ParsedKey::Binary(buf)) - } - Expr::Unary(unary) => { - // if we received an integer literal (always unsigned) greater than i__::max_value() - // then casting it to a signed integer type of the same width will negate it to - // the same absolute value so we don't need to negate it here - macro_rules! try_negate ( - ($val:expr) => {if $val < 0 { $val } else { -$val }} - ); - - match unary.op { - UnOp::Neg(_) => match ParsedKey::from_expr(&unary.expr)? { - ParsedKey::I8(v) => Some(ParsedKey::I8(try_negate!(v))), - ParsedKey::I16(v) => Some(ParsedKey::I16(try_negate!(v))), - ParsedKey::I32(v) => Some(ParsedKey::I32(try_negate!(v))), - ParsedKey::I64(v) => Some(ParsedKey::I64(try_negate!(v))), - ParsedKey::I128(v) => Some(ParsedKey::I128(try_negate!(v))), - _ => None, - }, - _ => None, - } - } - Expr::Group(group) => ParsedKey::from_expr(&group.expr), - #[cfg(feature = "unicase")] - Expr::Call(call) => { - if let Expr::Path(ep) = call.func.as_ref() { - let segments = &mut ep.path.segments.iter().rev(); - let last = &segments.next()?.ident; - let last_ahead = &segments.next()?.ident; - let is_unicode = last_ahead == "UniCase" && last == "unicode"; - let is_ascii = last_ahead == "UniCase" && last == "ascii"; - if call.args.len() == 1 && (is_unicode || is_ascii) { - if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - })) = call.args.first() - { - let v = if is_unicode { - UniCase::unicode(s.value()) - } else { - UniCase::ascii(s.value()) - }; - Some(ParsedKey::UniCase(v)) - } else { - None - } - } else { - None - } - } else { - None - } - } - _ => None, - } - } -} - -struct Key { - parsed: ParsedKey, - expr: Expr, -} +// `Key` struct previously; arbitrary hashable expression now. +// `Entry` struct previously; tuple of `Key` and an arbitrary expression as value; hashable by key +// `Map` struct previously; duplicates-checked Vec of `Entry`s. +// `Set` struct previously; duplicates-checked Vec of `Entry`s with real key and hacked `()` as value. -impl PhfHash for Key { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - self.parsed.phf_hash(state) - } -} - -impl Parse for Key { - fn parse(input: ParseStream<'_>) -> parse::Result { - let expr = input.parse()?; - let parsed = ParsedKey::from_expr(&expr) - .ok_or_else(|| Error::new_spanned(&expr, "unsupported key expression"))?; - - Ok(Key { parsed, expr }) - } -} - -struct Entry { - key: Key, - value: Expr, -} - -impl PhfHash for Entry { - fn phf_hash(&self, state: &mut H) - where - H: Hasher, - { - self.key.phf_hash(state) - } -} - -impl Parse for Entry { - fn parse(input: ParseStream<'_>) -> parse::Result { - let key = input.parse()?; - input.parse::]>()?; - let value = input.parse()?; - Ok(Entry { key, value }) - } +const fn check_duplicates(_entries: &[(Key, Value); N]) { + // TODO: Implement this and enable `const_panic` feature. } -struct Map(Vec); - -impl Parse for Map { - fn parse(input: ParseStream<'_>) -> parse::Result { - let parsed = Punctuated::::parse_terminated(input)?; - let map = parsed.into_iter().collect::>(); - check_duplicates(&map)?; - Ok(Map(map)) - } -} - -struct Set(Vec); - -impl Parse for Set { - fn parse(input: ParseStream<'_>) -> parse::Result { - let parsed = Punctuated::::parse_terminated(input)?; - let set = parsed - .into_iter() - .map(|key| Entry { - key, - value: syn::parse_str("()").unwrap(), - }) - .collect::>(); - check_duplicates(&set)?; - Ok(Set(set)) - } -} - -fn check_duplicates(entries: &[Entry]) -> parse::Result<()> { +/*fn check_duplicates(entries: &[Entry]) -> parse::Result<()> { let mut keys = HashSet::new(); for entry in entries { if !keys.insert(&entry.key.parsed) { @@ -245,76 +46,98 @@ fn check_duplicates(entries: &[Entry]) -> parse::Result<()> { } } Ok(()) -} +}*/ -fn build_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream { - let key = state.key; - let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2))); - let entries = state.map.iter().map(|&idx| { - let key = &entries[idx].key.expr; - let value = &entries[idx].value; - quote!((#key, #value)) - }); +pub struct Entry<'a, Key, Value>(&'a (Key, Value)); - quote! { - phf::Map { - key: #key, - disps: &[#(#disps),*], - entries: &[#(#entries),*], - } +impl<'a, Key: ~const Hash, Value> const PhfHash for Entry<'a, Key, Value> { + #[inline] + fn phf_hash(&self, state: &mut H) { + self.0 .0.hash(state) } } -fn build_ordered_map(entries: &[Entry], state: HashState) -> proc_macro2::TokenStream { - let key = state.key; - let disps = state.disps.iter().map(|&(d1, d2)| quote!((#d1, #d2))); - let idxs = state.map.iter().map(|idx| quote!(#idx)); - let entries = entries.iter().map(|entry| { - let key = &entry.key.expr; - let value = &entry.value; - quote!((#key, #value)) - }); - - quote! { - phf::OrderedMap { - key: #key, - disps: &[#(#disps),*], - idxs: &[#(#idxs),*], - entries: &[#(#entries),*], - } +pub const fn phf_map( + entries: &[(Key, Value); N], +) -> ([(Key, Value); N], HashState) +where + (Key, Value): Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + check_duplicates(entries); + + // Produce a hash state over all the keys in our map. + let mut keys = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < entries.len() { + keys[i].write(Entry(&entries[i])); + i += 1; + } + let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); + + // Reorder all the entries as per state's map. + let mut ordered_entries = MaybeUninit::uninit_array::(); + i = 0; + while i < state.map.len() { + ordered_entries[i].write(entries[i]); + i += 1; } -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_map(input: TokenStream) -> TokenStream { - let map = parse_macro_input!(input as Map); - let state = phf_generator::generate_hash(&map.0); - - build_map(&map.0, state).into() -} - -#[proc_macro_hack::proc_macro_hack] -pub fn phf_set(input: TokenStream) -> TokenStream { - let set = parse_macro_input!(input as Set); - let state = phf_generator::generate_hash(&set.0); - let map = build_map(&set.0, state); - quote!(phf::Set { map: #map }).into() + (unsafe { const_array_assume_init(&ordered_entries) }, state) } -#[proc_macro_hack::proc_macro_hack] -pub fn phf_ordered_map(input: TokenStream) -> TokenStream { - let map = parse_macro_input!(input as Map); - let state = phf_generator::generate_hash(&map.0); +pub const fn phf_ordered_map( + entries: &[(Key, Value); N], +) -> ([(Key, Value); N], HashState) +where + (Key, Value): Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + check_duplicates(entries); - build_ordered_map(&map.0, state).into() -} + // Produce a hash state over all the keys in our map. + let mut keys = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < entries.len() { + keys[i].write(Entry(&entries[i])); + i += 1; + } + let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); + + // We don't need to do any sorting here. + (*entries, state) +} + +pub const fn phf_set( + entries: &[Key; N], +) -> ([(Key, ()); N], HashState) +where + Key: Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut map_entries = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < map_entries.len() { + map_entries[i].write((entries[i], ())); + i += 1; + } -#[proc_macro_hack::proc_macro_hack] -pub fn phf_ordered_set(input: TokenStream) -> TokenStream { - let set = parse_macro_input!(input as Set); - let state = phf_generator::generate_hash(&set.0); + phf_map(unsafe { &const_array_assume_init(&map_entries) }) +} + +pub const fn phf_ordered_set( + entries: &[Key; N], +) -> ([(Key, ()); N], HashState) +where + Key: Copy, + [(); (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA]: Sized, +{ + let mut map_entries = MaybeUninit::uninit_array::(); + let mut i = 0; + while i < map_entries.len() { + map_entries[i].write((entries[i], ())); + i += 1; + } - let map = build_ordered_map(&set.0, state); - quote!(phf::OrderedSet { map: #map }).into() + phf_ordered_map(unsafe { &const_array_assume_init(&map_entries) }) } From 20c7b2741386a2ba5389d0469f4af3be0f384775 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 20:37:55 +0200 Subject: [PATCH 12/21] phf: Turn macros into constant expressions --- phf/Cargo.toml | 3 +- phf/src/lib.rs | 68 +++++++++++++++++++++++++++++++++++--- phf_generator/src/utils.rs | 11 ++++++ 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/phf/Cargo.toml b/phf/Cargo.toml index 3aa98602..19b3050b 100644 --- a/phf/Cargo.toml +++ b/phf/Cargo.toml @@ -18,9 +18,10 @@ default = ["std"] std = ["phf_shared/std"] uncased = ["phf_shared/uncased"] unicase = ["phf_shared/unicase"] -macros = ["phf_macros"] +macros = ["phf_generator", "phf_macros"] [dependencies] +phf_generator = { version = "0.11.0", features = ["const-api"], optional = true } phf_macros = { version = "0.11.0", optional = true } phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } diff --git a/phf/src/lib.rs b/phf/src/lib.rs index 31930ee2..e3bb451c 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -71,6 +71,9 @@ //! [#183]: https://github.com/rust-phf/rust-phf/issues/183 //! [#196]: https://github.com/rust-phf/rust-phf/issues/196 +// XXX: Remove on stabilization. +#![allow(incomplete_features)] +#![feature(generic_const_exprs, const_trait_impl)] #![doc(html_root_url = "https://docs.rs/phf/0.11")] #![warn(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] @@ -83,6 +86,37 @@ extern crate std as core; #[doc(hidden)] pub extern crate phf_macros as __phf_macros; +#[cfg(feature = "macros")] +impl const + From<&'static ([(Key, Value); N], phf_generator::HashState)> for Map +where + [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, +{ + fn from(v: &'static ([(Key, Value); N], phf_generator::HashState)) -> Self { + Self { + key: v.1.key, + disps: &*v.1.disps, + entries: &v.0, + } + } +} + +#[cfg(feature = "macros")] +impl const + From<&'static ([(Key, Value); N], phf_generator::HashState)> for OrderedMap +where + [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, +{ + fn from(v: &'static ([(Key, Value); N], phf_generator::HashState)) -> Self { + Self { + key: v.1.key, + disps: &*v.1.disps, + idxs: &*v.1.map, + entries: &v.0, + } + } +} + #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`Map`]. /// @@ -102,13 +136,25 @@ pub extern crate phf_macros as __phf_macros; /// assert_eq!(MY_MAP["hello"], 1); /// } /// ``` -pub use phf_macros::phf_map; +#[macro_export] +macro_rules! phf_map { + ($($($key:tt)* => $($value:tt)*),* $(,)*) => { + Map::from(&$crate::__phf_macros::phf_map(&[$(($($key)*, $($value)*)),*])) + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedMap`]. /// /// Requires the `macros` feature. Same usage as [`phf_map`]. -pub use phf_macros::phf_ordered_map; +#[macro_export] +macro_rules! phf_ordered_map { + ($($($key:tt)* => $($value:tt)*),* $(,)*) => { + OrderedMap::from( + &$crate::__phf_macros::phf_ordered_map(&[$(($($key)*, $($value)*)),*]), + ) + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`Set`]. @@ -129,13 +175,27 @@ pub use phf_macros::phf_ordered_map; /// assert!(MY_SET.contains("hello world")); /// } /// ``` -pub use phf_macros::phf_set; +#[macro_export] +macro_rules! phf_set { + ($($($key:tt)*),* $(,)*) => { + Set { + map: Map::from($crate::__phf_macros::phf_set(&[$($($key)*),*])), + } + }; +} #[cfg(feature = "macros")] /// Macro to create a `static` (compile-time) [`OrderedSet`]. /// /// Requires the `macros` feature. Same usage as [`phf_set`]. -pub use phf_macros::phf_ordered_set; +#[macro_export] +macro_rules! phf_ordered_set { + ($($($key:tt)*),* $(,)*) => { + OrderedSet { + map: OrderedMap::from($crate::__phf_macros::phf_ordered_set(&[$($($key)*),*])), + } + }; +} #[doc(inline)] pub use self::map::Map; diff --git a/phf_generator/src/utils.rs b/phf_generator/src/utils.rs index e9fa2459..ebcacbb5 100644 --- a/phf_generator/src/utils.rs +++ b/phf_generator/src/utils.rs @@ -1,3 +1,5 @@ +use core::ops::Deref; + #[derive(Clone, Copy)] pub struct ArrayVec { arr: [T; N], @@ -53,3 +55,12 @@ impl ArrayVec { self.arr[i] = value; } } + +impl const Deref for ArrayVec { + type Target = [T; N]; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.arr + } +} From a263190232814c3701243c06a0c5ba286047ad9c Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 21:09:45 +0200 Subject: [PATCH 13/21] phf: Properly reference crate types in macros --- phf/src/lib.rs | 14 ++++++++------ phf_generator/src/lib.rs | 1 - 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/phf/src/lib.rs b/phf/src/lib.rs index e3bb451c..ea15560f 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -139,7 +139,7 @@ where #[macro_export] macro_rules! phf_map { ($($($key:tt)* => $($value:tt)*),* $(,)*) => { - Map::from(&$crate::__phf_macros::phf_map(&[$(($($key)*, $($value)*)),*])) + $crate::Map::from(&$crate::__phf_macros::phf_map(&[$(($($key)*, $($value)*)),*])) }; } @@ -150,7 +150,7 @@ macro_rules! phf_map { #[macro_export] macro_rules! phf_ordered_map { ($($($key:tt)* => $($value:tt)*),* $(,)*) => { - OrderedMap::from( + $crate::OrderedMap::from( &$crate::__phf_macros::phf_ordered_map(&[$(($($key)*, $($value)*)),*]), ) }; @@ -178,8 +178,8 @@ macro_rules! phf_ordered_map { #[macro_export] macro_rules! phf_set { ($($($key:tt)*),* $(,)*) => { - Set { - map: Map::from($crate::__phf_macros::phf_set(&[$($($key)*),*])), + $crate::Set { + map: $crate::Map::from($crate::__phf_macros::phf_set(&[$($($key)*),*])), } }; } @@ -191,8 +191,10 @@ macro_rules! phf_set { #[macro_export] macro_rules! phf_ordered_set { ($($($key:tt)*),* $(,)*) => { - OrderedSet { - map: OrderedMap::from($crate::__phf_macros::phf_ordered_set(&[$($($key)*),*])), + $crate::OrderedSet { + map: $crate::OrderedMap::from( + $crate::__phf_macros::phf_ordered_set(&[$($($key)*),*]), + ), } }; } diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 12c9f3d5..3e7dbea3 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -125,7 +125,6 @@ where // buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); // TODO - // table_len = N let mut map: ArrayVec, N> = ArrayVec::new(None); let mut disps: ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }> = ArrayVec::new((0, 0)); From 43f883f91f4fe86a0b725b408daea48e6cd290bd Mon Sep 17 00:00:00 2001 From: Valentin B Date: Fri, 22 Oct 2021 22:19:08 +0200 Subject: [PATCH 14/21] phf_generator: Implement const quicksort for buckets --- phf_generator/src/lib.rs | 50 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 3e7dbea3..224637dd 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -122,8 +122,54 @@ where } // Sort descending - // buckets.sort_by(|a, b| a.keys.len().cmp(&b.keys.len()).reverse()); - // TODO + { + const fn partition( + buckets: &mut [Bucket], + mut start: usize, + mut end: usize, + ) -> usize { + let pivot_idx = start; + let pivot = buckets[start]; + + while start < end { + // Increment start until an element smaller than pivot is found. + while start < buckets.len() && pivot.keys.len() <= buckets[start].keys.len() { + start += 1; + } + + // Decrement end until an element greater than pivot is found. + while pivot.keys.len() > buckets[end].keys.len() { + end -= 1; + } + + // If start and end have not crossed each other, swap them. + if start < end { + let temp = buckets[start]; + buckets[start] = buckets[end]; + buckets[end] = temp; + } + } + + // Swap pivot element and end to put pivot in its correct place. + let temp = buckets[end]; + buckets[end] = buckets[pivot_idx]; + buckets[pivot_idx] = temp; + + end + } + + const fn quick_sort(start: usize, end: usize, buckets: &mut [Bucket]) { + if start < end { + let part = partition(buckets, start, end); + + // Sort elements before and after partition. + quick_sort(start, part - 1, buckets); + quick_sort(part + 1, end, buckets); + } + } + + quick_sort(0, buckets.len(), &mut buckets) + } let mut map: ArrayVec, N> = ArrayVec::new(None); let mut disps: ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }> = From 69a0db717ccb744cf41262a4db76b34d4635ffb1 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 15:10:25 +0200 Subject: [PATCH 15/21] phf: Fix build and most of the tests --- phf/src/lib.rs | 54 +++++++++---------- phf_generator/src/lib.rs | 96 +++++++++++++--------------------- phf_generator/src/utils.rs | 64 ++++++++++++++++++----- phf_macros/Cargo.toml | 4 +- phf_macros/src/lib.rs | 31 +++-------- phf_macros_tests/src/lib.rs | 1 - phf_macros_tests/tests/test.rs | 5 +- phf_shared/src/lib.rs | 12 ++--- 8 files changed, 132 insertions(+), 135 deletions(-) diff --git a/phf/src/lib.rs b/phf/src/lib.rs index ea15560f..abe9290c 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -87,33 +87,33 @@ extern crate std as core; pub extern crate phf_macros as __phf_macros; #[cfg(feature = "macros")] -impl const - From<&'static ([(Key, Value); N], phf_generator::HashState)> for Map +#[doc(hidden)] +pub const fn build_map( + state: &'static ([(Key, Value); N], phf_generator::HashState), +) -> Map where [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, { - fn from(v: &'static ([(Key, Value); N], phf_generator::HashState)) -> Self { - Self { - key: v.1.key, - disps: &*v.1.disps, - entries: &v.0, - } + Map { + key: state.1.key, + disps: &*state.1.disps, + entries: &state.0, } } #[cfg(feature = "macros")] -impl const - From<&'static ([(Key, Value); N], phf_generator::HashState)> for OrderedMap +#[doc(hidden)] +pub const fn build_ordered_map( + state: &'static ([(Key, Value); N], phf_generator::HashState), +) -> OrderedMap where [(); (N + phf_generator::DEFAULT_LAMBDA - 1) / phf_generator::DEFAULT_LAMBDA]: Sized, { - fn from(v: &'static ([(Key, Value); N], phf_generator::HashState)) -> Self { - Self { - key: v.1.key, - disps: &*v.1.disps, - idxs: &*v.1.map, - entries: &v.0, - } + OrderedMap { + key: state.1.key, + disps: &*state.1.disps, + idxs: &*state.1.map, + entries: &state.0, } } @@ -138,8 +138,8 @@ where /// ``` #[macro_export] macro_rules! phf_map { - ($($($key:tt)* => $($value:tt)*),* $(,)*) => { - $crate::Map::from(&$crate::__phf_macros::phf_map(&[$(($($key)*, $($value)*)),*])) + ($($key:expr => $value:expr),* $(,)*) => { + $crate::build_map(&$crate::__phf_macros::phf_map(&[$(($key, $value)),*])) }; } @@ -149,9 +149,9 @@ macro_rules! phf_map { /// Requires the `macros` feature. Same usage as [`phf_map`]. #[macro_export] macro_rules! phf_ordered_map { - ($($($key:tt)* => $($value:tt)*),* $(,)*) => { - $crate::OrderedMap::from( - &$crate::__phf_macros::phf_ordered_map(&[$(($($key)*, $($value)*)),*]), + ($($key:expr => $value:expr),* $(,)*) => { + $crate::build_ordered_map( + &$crate::__phf_macros::phf_ordered_map(&[$(($key, $value)),*]), ) }; } @@ -177,9 +177,9 @@ macro_rules! phf_ordered_map { /// ``` #[macro_export] macro_rules! phf_set { - ($($($key:tt)*),* $(,)*) => { + ($($key:expr),* $(,)*) => { $crate::Set { - map: $crate::Map::from($crate::__phf_macros::phf_set(&[$($($key)*),*])), + map: $crate::build_map(&$crate::__phf_macros::phf_set(&[$($key),*])), } }; } @@ -190,10 +190,10 @@ macro_rules! phf_set { /// Requires the `macros` feature. Same usage as [`phf_set`]. #[macro_export] macro_rules! phf_ordered_set { - ($($($key:tt)*),* $(,)*) => { + ($($key:expr),* $(,)*) => { $crate::OrderedSet { - map: $crate::OrderedMap::from( - $crate::__phf_macros::phf_ordered_set(&[$($($key)*),*]), + map: $crate::build_ordered_map( + &$crate::__phf_macros::phf_ordered_set(&[$($key),*]), ), } }; diff --git a/phf_generator/src/lib.rs b/phf_generator/src/lib.rs index 224637dd..e24f9933 100644 --- a/phf_generator/src/lib.rs +++ b/phf_generator/src/lib.rs @@ -21,6 +21,8 @@ mod utils; use phf_shared::{HashKey, PhfHash}; use rng::Rng; +// We need `DEFAULT_LAMBDA` as part of the stable public API to formalize +// where clauses for the const API on map and set generation methods. #[doc(hidden)] pub const DEFAULT_LAMBDA: usize = 5; @@ -88,7 +90,7 @@ where fn default() -> Self { Bucket { idx: 0, - keys: ArrayVec::new(0), + keys: ArrayVec::new_empty(0), } } } @@ -123,57 +125,31 @@ where // Sort descending { - const fn partition( - buckets: &mut [Bucket], - mut start: usize, - mut end: usize, - ) -> usize { - let pivot_idx = start; - let pivot = buckets[start]; - - while start < end { - // Increment start until an element smaller than pivot is found. - while start < buckets.len() && pivot.keys.len() <= buckets[start].keys.len() { - start += 1; + // This is a bubble sort. Given that it is executed at compile-time + // without any runtime overhead over relatively few entries from + // hand-written macro literals, its minimal and robust implementation + // is good enough for us and the const evaluation engine. + let mut swapped = true; + while swapped { + swapped = false; + let mut i = 1; + while i < buckets.len() { + if buckets[i - 1].keys.len() < buckets[i].keys.len() { + // Swap elements + let temp = buckets[i - 1]; + buckets[i - 1] = buckets[i]; + buckets[i] = temp; + + swapped = true; } - - // Decrement end until an element greater than pivot is found. - while pivot.keys.len() > buckets[end].keys.len() { - end -= 1; - } - - // If start and end have not crossed each other, swap them. - if start < end { - let temp = buckets[start]; - buckets[start] = buckets[end]; - buckets[end] = temp; - } - } - - // Swap pivot element and end to put pivot in its correct place. - let temp = buckets[end]; - buckets[end] = buckets[pivot_idx]; - buckets[pivot_idx] = temp; - - end - } - - const fn quick_sort(start: usize, end: usize, buckets: &mut [Bucket]) { - if start < end { - let part = partition(buckets, start, end); - - // Sort elements before and after partition. - quick_sort(start, part - 1, buckets); - quick_sort(part + 1, end, buckets); + i += 1; } } - - quick_sort(0, buckets.len(), &mut buckets) } - let mut map: ArrayVec, N> = ArrayVec::new(None); + let mut map: ArrayVec, N> = ArrayVec::new_full(None); let mut disps: ArrayVec<(u32, u32), { (N + DEFAULT_LAMBDA - 1) / DEFAULT_LAMBDA }> = - ArrayVec::new((0, 0)); + ArrayVec::new_full((0, 0)); // store whether an element from the bucket being placed is // located at a certain position, to allow for efficient overlap @@ -188,12 +164,12 @@ where // the actual values corresponding to the markers above, as // (index, key) pairs, for adding to the main map, once we've // chosen the right disps. - let mut values_to_add: ArrayVec<(usize, usize), N> = ArrayVec::new((0, 0)); + let mut values_to_add: ArrayVec<(usize, usize), N> = ArrayVec::new_empty((0, 0)); - let mut i1 = 0; - 'buckets: while i1 < buckets.len() { - let bucket = &buckets[i1]; - i1 += 1; + let mut i = 0; + 'buckets: while i < buckets.len() { + let bucket = &buckets[i]; + i += 1; let mut d1 = 0; while d1 < N { @@ -202,9 +178,9 @@ where values_to_add.clear(); generation += 1; - let mut i2 = 0; - while i2 < bucket.keys.len() { - let key = bucket.keys.get(i2); + let mut j = 0; + while j < bucket.keys.len() { + let key = bucket.keys[j]; let idx = (phf_shared::displace(hashes[key].f1, hashes[key].f2, d1 as u32, d2 as u32) % (N as u32)) as usize; @@ -214,16 +190,16 @@ where } try_map[idx] = generation; values_to_add.push((idx, key)); - i2 += 1; + j += 1; } // We've picked a good set of disps disps.set(bucket.idx, (d1 as u32, d2 as u32)); - i2 = 0; - while i2 < values_to_add.len() { - let &(idx, key) = values_to_add.get_ref(i2); + j = 0; + while j < values_to_add.len() { + let (idx, key) = values_to_add.get(j); map.set(idx, Some(key)); - i2 += 1; + j += 1; } continue 'buckets; } @@ -238,7 +214,7 @@ where key, disps, map: { - let mut result: ArrayVec = ArrayVec::new(0); + let mut result: ArrayVec = ArrayVec::new_full(0); let mut i = 0; while i < map.len() { result.set(i, map.get(i).unwrap()); diff --git a/phf_generator/src/utils.rs b/phf_generator/src/utils.rs index ebcacbb5..f2f910fc 100644 --- a/phf_generator/src/utils.rs +++ b/phf_generator/src/utils.rs @@ -8,28 +8,36 @@ pub struct ArrayVec { impl ArrayVec { #[inline(always)] - pub const fn new(marker: T) -> Self { + pub const fn new_empty(marker: T) -> Self { Self { arr: [marker; N], len: 0, } } + #[inline(always)] + pub const fn new_full(marker: T) -> Self { + Self { + arr: [marker; N], + len: N, + } + } + #[inline] pub const fn len(&self) -> usize { - self.len + 1 + self.len } #[inline] - pub const fn push(&mut self, value: T) { - self.arr[self.len] = value; - self.len += 1; + pub const fn capacity(&self) -> usize { + self.arr.len() } #[inline] - pub const fn pop(&mut self) -> T { - self.len -= 1; - self.arr[self.len] + pub const fn push(&mut self, value: T) { + assert!(self.len() < self.capacity()); + self.arr[self.len] = value; + self.len += 1; } #[inline] @@ -39,20 +47,24 @@ impl ArrayVec { #[inline] pub const fn get(&self, i: usize) -> T { - assert!(i < self.len); + assert!(i < self.len()); self.arr[i] } #[inline] pub const fn get_ref(&self, i: usize) -> &T { - assert!(i < self.len); + assert!(i < self.len()); &self.arr[i] } #[inline] pub const fn set(&mut self, i: usize, value: T) { - assert!(i <= self.len); - self.arr[i] = value; + if i == self.len() { + self.push(value); + } else { + assert!(i < self.len()); + self.arr[i] = value; + } } } @@ -64,3 +76,31 @@ impl const Deref for ArrayVec { &self.arr } } + +#[cfg(test)] +mod tests { + use super::ArrayVec; + + #[test] + fn test_api() { + let mut arr = ArrayVec::::new_empty(0); + assert_eq!(arr.len(), 0); + assert_eq!(arr.capacity(), 10); + + arr.push(1); + arr.push(2); + arr.push(4); + assert_eq!(arr.len(), 3); + assert_eq!(arr.capacity(), 10); + assert_eq!(arr.get(2), 4); + assert_eq!(arr.get(0), 1); + + arr.push(4); + arr.set(2, 3); + assert_eq!(arr.get(2), 3); + assert_eq!(arr.get(arr.len() - 1), 4); + + arr.clear(); + assert_eq!(arr.len(), 0); + } +} diff --git a/phf_macros/Cargo.toml b/phf_macros/Cargo.toml index 7caca752..74d53719 100644 --- a/phf_macros/Cargo.toml +++ b/phf_macros/Cargo.toml @@ -10,10 +10,8 @@ readme = "../README.md" include = ["src/lib.rs"] [features] -unicase = ["unicase_", "phf_shared/unicase"] +unicase = ["phf_shared/unicase"] [dependencies] -unicase_ = { package = "unicase", version = "2.4.0", optional = true } - phf_generator = { version = "0.11.0", features = ["const-api"] } phf_shared = { version = "0.11.0", default-features = false, features = ["const-api"] } diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index 1ee46cf7..9dd39c66 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -17,10 +17,7 @@ maybe_uninit_uninit_array )] -use std::{ - hash::{Hash, Hasher}, - mem::{transmute_copy, MaybeUninit}, -}; +use std::mem::{transmute_copy, MaybeUninit}; use phf_generator::{HashState, DEFAULT_LAMBDA}; use phf_shared::PhfHash; @@ -29,11 +26,6 @@ const unsafe fn const_array_assume_init(array: &[MaybeUninit< transmute_copy(array) } -// `Key` struct previously; arbitrary hashable expression now. -// `Entry` struct previously; tuple of `Key` and an arbitrary expression as value; hashable by key -// `Map` struct previously; duplicates-checked Vec of `Entry`s. -// `Set` struct previously; duplicates-checked Vec of `Entry`s with real key and hacked `()` as value. - const fn check_duplicates(_entries: &[(Key, Value); N]) { // TODO: Implement this and enable `const_panic` feature. } @@ -48,16 +40,7 @@ const fn check_duplicates(_entries: &[(Key, Value); Ok(()) }*/ -pub struct Entry<'a, Key, Value>(&'a (Key, Value)); - -impl<'a, Key: ~const Hash, Value> const PhfHash for Entry<'a, Key, Value> { - #[inline] - fn phf_hash(&self, state: &mut H) { - self.0 .0.hash(state) - } -} - -pub const fn phf_map( +pub const fn phf_map( entries: &[(Key, Value); N], ) -> ([(Key, Value); N], HashState) where @@ -70,7 +53,7 @@ where let mut keys = MaybeUninit::uninit_array::(); let mut i = 0; while i < entries.len() { - keys[i].write(Entry(&entries[i])); + keys[i].write(&entries[i].0); i += 1; } let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); @@ -86,7 +69,7 @@ where (unsafe { const_array_assume_init(&ordered_entries) }, state) } -pub const fn phf_ordered_map( +pub const fn phf_ordered_map( entries: &[(Key, Value); N], ) -> ([(Key, Value); N], HashState) where @@ -99,7 +82,7 @@ where let mut keys = MaybeUninit::uninit_array::(); let mut i = 0; while i < entries.len() { - keys[i].write(Entry(&entries[i])); + keys[i].write(&entries[i].0); i += 1; } let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); @@ -108,7 +91,7 @@ where (*entries, state) } -pub const fn phf_set( +pub const fn phf_set( entries: &[Key; N], ) -> ([(Key, ()); N], HashState) where @@ -125,7 +108,7 @@ where phf_map(unsafe { &const_array_assume_init(&map_entries) }) } -pub const fn phf_ordered_set( +pub const fn phf_ordered_set( entries: &[Key; N], ) -> ([(Key, ()); N], HashState) where diff --git a/phf_macros_tests/src/lib.rs b/phf_macros_tests/src/lib.rs index 8b137891..e69de29b 100644 --- a/phf_macros_tests/src/lib.rs +++ b/phf_macros_tests/src/lib.rs @@ -1 +0,0 @@ - diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index a10e1a8c..c1da641f 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -240,7 +240,8 @@ mod map { } } - #[test] + // FIXME: Re-enable when UniCase is hashable as const fn. + /*#[test] fn test_unicase() { use unicase::UniCase; static MAP: phf::Map, isize> = phf_map!( @@ -250,7 +251,7 @@ mod map { assert!(Some(&10) == MAP.get(&UniCase::new("FOo"))); assert!(Some(&11) == MAP.get(&UniCase::new("bar"))); assert_eq!(None, MAP.get(&UniCase::new("asdf"))); - } + }*/ } mod set { diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index ded55cb9..d2ff15fb 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -13,7 +13,7 @@ extern crate std as core; mod siphasher; use core::fmt; -use core::hash::{Hash, Hasher}; +use core::hash::Hasher; use siphasher::{Hash128, Hasher128, SipHasher13}; #[derive(Clone, Copy)] @@ -38,7 +38,7 @@ pub type HashKey = u64; #[inline] pub const fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 { - d2.wrapping_add(f1).wrapping_mul(d1).wrapping_add(f2) + d2.wrapping_add(f1.wrapping_mul(d1)).wrapping_add(f2) } /// `key` is from `phf_generator::HashState`. @@ -367,14 +367,14 @@ impl FmtConst for [u8] { } } -#[cfg(feature = "unicase")] +#[cfg(all(feature = "unicase", not(feature = "const-api")))] impl PhfHash for unicase::UniCase where - unicase::UniCase: Hash, + unicase::UniCase: core::hash::Hash, { #[inline] fn phf_hash(&self, state: &mut H) { - self.hash(state) + ::hash(self, state) } } @@ -402,7 +402,7 @@ impl<'b, 'a: 'b, S: ?Sized + 'a> PhfBorrow> for unicase: } } -#[cfg(feature = "uncased")] +#[cfg(all(feature = "uncased", not(feature = "const-api")))] impl PhfHash for uncased::UncasedStr { #[inline] fn phf_hash(&self, state: &mut H) { From f303e1360c18e77179977699a5b625ea927690e2 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 15:33:21 +0200 Subject: [PATCH 16/21] phf_macros: std => core --- phf_macros/src/lib.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index 9dd39c66..a1e588d2 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -17,7 +17,7 @@ maybe_uninit_uninit_array )] -use std::mem::{transmute_copy, MaybeUninit}; +use core::mem::{transmute_copy, MaybeUninit}; use phf_generator::{HashState, DEFAULT_LAMBDA}; use phf_shared::PhfHash; @@ -27,19 +27,10 @@ const unsafe fn const_array_assume_init(array: &[MaybeUninit< } const fn check_duplicates(_entries: &[(Key, Value); N]) { - // TODO: Implement this and enable `const_panic` feature. + // TODO: Implement once we can compare keys in const fn and produce + // a formatted panic message that points out the duplicate key. } -/*fn check_duplicates(entries: &[Entry]) -> parse::Result<()> { - let mut keys = HashSet::new(); - for entry in entries { - if !keys.insert(&entry.key.parsed) { - return Err(Error::new_spanned(&entry.key.expr, "duplicate key")); - } - } - Ok(()) -}*/ - pub const fn phf_map( entries: &[(Key, Value); N], ) -> ([(Key, Value); N], HashState) From 4c2d169e762ee522b8b9a2bab5ddd7d8960e3477 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 16:48:46 +0200 Subject: [PATCH 17/21] phf_macro_tests: Add more test cases --- phf_macros/src/lib.rs | 4 ---- phf_macros_tests/tests/test.rs | 26 ++++++++++++++++++++++++++ phf_shared/src/lib.rs | 2 +- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index a1e588d2..eb536cd9 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -40,7 +40,6 @@ where { check_duplicates(entries); - // Produce a hash state over all the keys in our map. let mut keys = MaybeUninit::uninit_array::(); let mut i = 0; while i < entries.len() { @@ -49,7 +48,6 @@ where } let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); - // Reorder all the entries as per state's map. let mut ordered_entries = MaybeUninit::uninit_array::(); i = 0; while i < state.map.len() { @@ -69,7 +67,6 @@ where { check_duplicates(entries); - // Produce a hash state over all the keys in our map. let mut keys = MaybeUninit::uninit_array::(); let mut i = 0; while i < entries.len() { @@ -78,7 +75,6 @@ where } let state = phf_generator::generate_hash(unsafe { &const_array_assume_init(&keys) }); - // We don't need to do any sorting here. (*entries, state) } diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index c1da641f..cb13300b 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -240,6 +240,32 @@ mod map { } } + #[test] + fn test_constexpr_keys() { + static MAP: phf::Map = phf_map! { + stringify!(abc).len() as u8 => 0, + 5 + 4 + 3 => 1, + }; + + assert_eq!(MAP.get(&3), Some(&0)); + assert_eq!(MAP.get(&12), Some(&1)); + assert_eq!(MAP.get(&4), None); + } + + #[test] + fn test_nested_map() { + static MAP: phf::Map<&'static str, &'static phf::Map<&'static str, u16>> = phf_map! { + "nested" => &phf_map! { + "map" => 1337, + }, + }; + + assert_eq!( + MAP.get(&"nested").and_then(|m| m.get(&"map")), + Some(&1337) + ); + } + // FIXME: Re-enable when UniCase is hashable as const fn. /*#[test] fn test_unicase() { diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index d2ff15fb..4ded4616 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -16,7 +16,7 @@ use core::fmt; use core::hash::Hasher; use siphasher::{Hash128, Hasher128, SipHasher13}; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] #[non_exhaustive] pub struct Hashes { pub g: u32, From d4c3ae6f555bdaf4c270d8de3d2d4e3939e746b9 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 18:19:46 +0200 Subject: [PATCH 18/21] phf_shared: Clean up sip hasher implementation --- phf_shared/src/lib.rs | 2 +- phf_shared/src/siphasher.rs | 181 +++++++++++++++--------------------- 2 files changed, 77 insertions(+), 106 deletions(-) diff --git a/phf_shared/src/lib.rs b/phf_shared/src/lib.rs index 4ded4616..729a947b 100644 --- a/phf_shared/src/lib.rs +++ b/phf_shared/src/lib.rs @@ -14,7 +14,7 @@ mod siphasher; use core::fmt; use core::hash::Hasher; -use siphasher::{Hash128, Hasher128, SipHasher13}; +use siphasher::{Hash128, SipHasher13}; #[derive(Clone, Copy, Debug)] #[non_exhaustive] diff --git a/phf_shared/src/siphasher.rs b/phf_shared/src/siphasher.rs index 06e5d13b..02b00306 100644 --- a/phf_shared/src/siphasher.rs +++ b/phf_shared/src/siphasher.rs @@ -12,7 +12,6 @@ use core::{hash, mem, ptr}; -/// A 128-bit (2x64) hash output. #[derive(Debug, Clone, Copy, Default)] pub struct Hash128 { pub h1: u64, @@ -21,7 +20,7 @@ pub struct Hash128 { impl const From for Hash128 { fn from(v: u128) -> Self { - Hash128 { + Self { h1: v as u64, h2: (v >> 64) as u64, } @@ -29,8 +28,8 @@ impl const From for Hash128 { } impl const From for u128 { - fn from(h: Hash128) -> u128 { - (h.h1 as u128) | ((h.h2 as u128) << 64) + fn from(v: Hash128) -> Self { + (v.h1 as u128) | ((v.h2 as u128) << 64) } } @@ -41,7 +40,7 @@ pub struct SipHasher13 { k1: u64, length: usize, // how many bytes we've processed state: State, // hash State - tail: u64, // uncompressed bytes le + tail: u64, // unprocessed bytes le ntail: usize, // how many bytes in tail are valid } @@ -96,11 +95,11 @@ impl State { #[inline] const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { debug_assert!(len < 8); - let mut i = 0; // current byte index (from LSB) in the output u64. + let mut i = 0; // current byte index (from LSB) in the output u64 let mut out = 0; if i + 3 < len { out = u32::from_le_bytes([ - buf[start + i + 0], + buf[start + i], buf[start + i + 1], buf[start + i + 2], buf[start + i + 3], @@ -108,7 +107,7 @@ const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { i += 4; } if i + 1 < len { - out |= (u16::from_le_bytes([buf[start + i + 0], buf[start + i + 1]]) as u64) << (i * 8); + out |= (u16::from_le_bytes([buf[start + i], buf[start + i + 1]]) as u64) << (i * 8); i += 2; } if i < len { @@ -119,16 +118,11 @@ const fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { out } -pub trait Hasher128 { - /// Returns a 128-bit hash - fn finish128(&self) -> Hash128; -} - impl SipHasher13 { - /// Creates a `SipHasher13` that is keyed off the provided keys. - #[inline] - pub const fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 { - let mut state = Self { + /// Creates a new `SipHasher13` that is keyed off the provided keys. + #[inline(always)] + pub const fn new_with_keys(key0: u64, key1: u64) -> Self { + let mut state = SipHasher13 { k0: key0, k1: key1, length: 0, @@ -145,7 +139,7 @@ impl SipHasher13 { state } - #[inline(always)] + #[inline] const fn reset(&mut self) { self.length = 0; self.state.v0 = self.k0 ^ 0x736f6d6570736575; @@ -192,7 +186,9 @@ impl SipHasher13 { self.tail = if needed < 8 { x >> (8 * needed) } else { 0 }; } - const fn finish128(&self) -> Hash128 { + /// Return a 128-bit hash + #[inline] + pub const fn finish128(&self) -> Hash128 { let mut state = self.state; let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; @@ -213,18 +209,60 @@ impl SipHasher13 { } } -impl const Hasher128 for SipHasher13 { - /// Return a 128-bit hash +impl const hash::Hasher for SipHasher13 { #[inline] - fn finish128(&self) -> Hash128 { - Self::finish128(self) + fn finish(&self) -> u64 { + self.finish128().h2 } -} -impl const hash::Hasher for SipHasher13 { #[inline] - fn write_usize(&mut self, i: usize) { - self.short_write::(i.to_le() as u64); + fn write(&mut self, msg: &[u8]) { + let length = msg.len(); + self.length += length; + + let mut needed = 0; + + if self.ntail != 0 { + needed = 8 - self.ntail; + if length < needed { + self.tail |= u8to64_le(msg, 0, length) << (8 * self.ntail); + self.ntail += length; + return; + } else { + self.tail |= u8to64_le(msg, 0, needed) << (8 * self.ntail); + self.state.v3 ^= self.tail; + self.state.c_rounds(); + self.state.v0 ^= self.tail; + self.ntail = 0; + } + } + + // Buffered tail is now flushed, process new input. + let len = length - needed; + let left = len & 0x7; + + let mut i = needed; + while i < len - left { + let mi = u64::from_le_bytes([ + msg[i], + msg[i + 1], + msg[i + 2], + msg[i + 3], + msg[i + 4], + msg[i + 5], + msg[i + 6], + msg[i + 7], + ]); + + self.state.v3 ^= mi; + self.state.c_rounds(); + self.state.v0 ^= mi; + + i += 8; + } + + self.tail = u8to64_le(msg, i, left); + self.ntail = left; } #[inline] @@ -234,7 +272,6 @@ impl const hash::Hasher for SipHasher13 { #[inline] fn write_u16(&mut self, i: u16) { - // TODO: Is this correct? self.short_write::(i.to_le() as u64); } @@ -254,88 +291,38 @@ impl const hash::Hasher for SipHasher13 { } #[inline] - fn write_isize(&mut self, i: isize) { - self.write_usize(i as usize); + fn write_usize(&mut self, i: usize) { + self.short_write::(i.to_le() as u64); } #[inline] fn write_i8(&mut self, i: i8) { - self.write_u8(i as u8); + self.write_u8(i as u8) } #[inline] fn write_i16(&mut self, i: i16) { - self.write_u16(i as u16); + self.write_u16(i as u16) } #[inline] fn write_i32(&mut self, i: i32) { - self.write_u32(i as u32); + self.write_u32(i as u32) } #[inline] fn write_i64(&mut self, i: i64) { - self.write_u64(i as u64); + self.write_u64(i as u64) } #[inline] fn write_i128(&mut self, i: i128) { - self.write_u128(i as u128); + self.write_u128(i as u128) } #[inline] - fn write(&mut self, bytes: &[u8]) { - let length = bytes.len(); - self.length += length; - - let mut needed = 0; - - if self.ntail != 0 { - needed = 8 - self.ntail; - if length < needed { - self.tail |= u8to64_le(bytes, 0, length) << (8 * self.ntail); - self.ntail += length; - return; - } else { - self.tail |= u8to64_le(bytes, 0, needed) << (8 * self.ntail); - self.state.v3 ^= self.tail; - self.state.c_rounds(); - self.state.v0 ^= self.tail; - self.ntail = 0; - } - } - - // Buffered tail is now flushed, process new input. - let len = length - needed; - let left = len & 0x7; - - let mut i = needed; - while i < len - left { - let mi = u64::from_le_bytes([ - bytes[i + 0], - bytes[i + 1], - bytes[i + 2], - bytes[i + 3], - bytes[i + 4], - bytes[i + 5], - bytes[i + 6], - bytes[i + 7], - ]); - - self.state.v3 ^= mi; - self.state.c_rounds(); - self.state.v0 ^= mi; - - i += 8; - } - - self.tail = u8to64_le(bytes, i, left); - self.ntail = left; - } - - #[inline] - fn finish(&self) -> u64 { - self.finish128().h2 + fn write_isize(&mut self, i: isize) { + self.write_usize(i as usize) } } @@ -351,27 +338,11 @@ impl Hash128 { } bytes } - - /// Convert into a `u128` - #[inline] - pub const fn as_u128(&self) -> u128 { - let h1 = self.h1.to_le(); - let h2 = self.h2.to_le(); - h1 as u128 | ((h2 as u128) << 64) - } - - /// Convert into `(u64, u64)` - #[inline] - pub const fn as_u64(&self) -> (u64, u64) { - let h1 = self.h1.to_le(); - let h2 = self.h2.to_le(); - (h1, h2) - } } #[cfg(test)] mod tests { - use super::{Hasher128, SipHasher13}; + use super::SipHasher13; use std::hash::{Hash, Hasher}; // Hash just the bytes of the slice, without length prefix @@ -385,7 +356,7 @@ mod tests { } } - fn hash_with(mut st: H, x: &T) -> [u8; 16] { + fn hash_with(mut st: SipHasher13, x: &T) -> [u8; 16] { x.hash(&mut st); st.finish128().as_bytes() } From 4a2304ae25d227644f43c27d832725207fabe6af Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 19:17:16 +0200 Subject: [PATCH 19/21] phf_macros: Sort correctly and fix tests --- phf_macros/src/lib.rs | 4 +++- phf_macros_tests/tests/test.rs | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/phf_macros/src/lib.rs b/phf_macros/src/lib.rs index eb536cd9..8c37e9bb 100644 --- a/phf_macros/src/lib.rs +++ b/phf_macros/src/lib.rs @@ -10,6 +10,7 @@ const_maybe_uninit_write, const_mut_refs, const_ptr_read, + const_panic, const_refs_to_cell, const_trait_impl, const_transmute_copy, @@ -51,7 +52,8 @@ where let mut ordered_entries = MaybeUninit::uninit_array::(); i = 0; while i < state.map.len() { - ordered_entries[i].write(entries[i]); + let idx = state.map[i]; + ordered_entries[i].write(entries[idx]); i += 1; } diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index cb13300b..a51b4807 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -247,6 +247,10 @@ mod map { 5 + 4 + 3 => 1, }; + for (k, v) in &MAP { + println!("{} {}", k, v); + } + assert_eq!(MAP.get(&3), Some(&0)); assert_eq!(MAP.get(&12), Some(&1)); assert_eq!(MAP.get(&4), None); From c16e10e344e590dd2cfa2d3498ae9d6496d6b398 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 19:20:58 +0200 Subject: [PATCH 20/21] phf: Fix doctest --- phf/src/lib.rs | 2 +- phf_macros_tests/tests/test.rs | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/phf/src/lib.rs b/phf/src/lib.rs index abe9290c..a90c91a0 100644 --- a/phf/src/lib.rs +++ b/phf/src/lib.rs @@ -36,7 +36,7 @@ //! ```rust //! use phf::phf_map; //! -//! #[derive(Clone)] +//! #[derive(Clone, Copy)] //! pub enum Keyword { //! Loop, //! Continue, diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index a51b4807..cb13300b 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -247,10 +247,6 @@ mod map { 5 + 4 + 3 => 1, }; - for (k, v) in &MAP { - println!("{} {}", k, v); - } - assert_eq!(MAP.get(&3), Some(&0)); assert_eq!(MAP.get(&12), Some(&1)); assert_eq!(MAP.get(&4), None); From 2021fa009f22522d3dc27058023118f31a9c9923 Mon Sep 17 00:00:00 2001 From: Valentin B Date: Sat, 23 Oct 2021 21:42:39 +0200 Subject: [PATCH 21/21] phf: Make truly nested structures possible --- phf/src/map.rs | 1 + phf/src/ordered_map.rs | 1 + phf/src/ordered_set.rs | 1 + phf/src/set.rs | 1 + phf_macros_tests/tests/test.rs | 4 ++-- 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/phf/src/map.rs b/phf/src/map.rs index 4d558359..fb687292 100644 --- a/phf/src/map.rs +++ b/phf/src/map.rs @@ -13,6 +13,7 @@ use phf_shared::{self, HashKey, PhfBorrow, PhfHash}; /// The fields of this struct are public so that they may be initialized by the /// `phf_map!` macro and code generation. They are subject to change at any /// time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct Map { #[doc(hidden)] pub key: HashKey, diff --git a/phf/src/ordered_map.rs b/phf/src/ordered_map.rs index c8d5ac59..0af5681b 100644 --- a/phf/src/ordered_map.rs +++ b/phf/src/ordered_map.rs @@ -16,6 +16,7 @@ use phf_shared::{self, HashKey, PhfBorrow, PhfHash}; /// The fields of this struct are public so that they may be initialized by the /// `phf_ordered_map!` macro and code generation. They are subject to change at /// any time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct OrderedMap { #[doc(hidden)] pub key: HashKey, diff --git a/phf/src/ordered_set.rs b/phf/src/ordered_set.rs index e85d4571..33fd5c92 100644 --- a/phf/src/ordered_set.rs +++ b/phf/src/ordered_set.rs @@ -15,6 +15,7 @@ use phf_shared::PhfBorrow; /// The fields of this struct are public so that they may be initialized by the /// `phf_ordered_set!` macro and code generation. They are subject to change at /// any time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct OrderedSet { #[doc(hidden)] pub map: OrderedMap, diff --git a/phf/src/set.rs b/phf/src/set.rs index d9fdd5bb..641b1729 100644 --- a/phf/src/set.rs +++ b/phf/src/set.rs @@ -14,6 +14,7 @@ use crate::{map, Map}; /// The fields of this struct are public so that they may be initialized by the /// `phf_set!` macro and code generation. They are subject to change at any /// time and should never be accessed directly. +#[derive(Clone, Copy)] pub struct Set { #[doc(hidden)] pub map: Map, diff --git a/phf_macros_tests/tests/test.rs b/phf_macros_tests/tests/test.rs index cb13300b..60440b01 100644 --- a/phf_macros_tests/tests/test.rs +++ b/phf_macros_tests/tests/test.rs @@ -254,8 +254,8 @@ mod map { #[test] fn test_nested_map() { - static MAP: phf::Map<&'static str, &'static phf::Map<&'static str, u16>> = phf_map! { - "nested" => &phf_map! { + static MAP: phf::Map<&'static str, phf::Map<&'static str, u16>> = phf_map! { + "nested" => phf_map! { "map" => 1337, }, };