Skip to content

Commit

Permalink
search the Vec for small LazyIndexMaps (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Jan 17, 2024
1 parent 1ce40b3 commit b6a645c
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 13 deletions.
34 changes: 32 additions & 2 deletions benches/main.rs
Expand Up @@ -4,7 +4,7 @@ use std::hint::black_box;
use std::fs::File;
use std::io::Read;

use jiter::{Jiter, JsonValue, Peek};
use jiter::{Jiter, JsonValue, LazyIndexMap, Peek};
use serde_json::Value;

fn read_file(path: &str) -> String {
Expand Down Expand Up @@ -215,6 +215,33 @@ test_cases!(floats_array);
// src/github.com/json-iterator/go-benchmark/benchmark.go#L30C17-L30C29
test_cases!(medium_response);

fn lazy_map_lookup(length: i64, bench: &mut Bencher) {
bench.iter(|| {
let mut map: LazyIndexMap<String, JsonValue> = LazyIndexMap::new();
for i in 0..length {
let key = i.to_string();
map.insert(key, JsonValue::Int(i));
}

// best case we get the next value each time
for i in 0..length {
black_box(map.get(&i.to_string()).unwrap());
}
})
}

fn lazy_map_lookup_1_10(bench: &mut Bencher) {
lazy_map_lookup(10, bench);
}

fn lazy_map_lookup_2_20(bench: &mut Bencher) {
lazy_map_lookup(20, bench);
}

fn lazy_map_lookup_3_50(bench: &mut Bencher) {
lazy_map_lookup(50, bench);
}

benchmark_group!(
benches,
big_jiter_iter,
Expand Down Expand Up @@ -246,6 +273,9 @@ benchmark_group!(
true_array_serde_value,
true_object_jiter_iter,
true_object_jiter_value,
true_object_serde_value
true_object_serde_value,
lazy_map_lookup_1_10,
lazy_map_lookup_2_20,
lazy_map_lookup_3_50,
);
benchmark_main!(benches);
55 changes: 45 additions & 10 deletions src/lazy_index_map.rs
Expand Up @@ -3,16 +3,37 @@ use std::cmp::{Eq, PartialEq};
use std::fmt;
use std::hash::Hash;
use std::slice::Iter as SliceIter;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::OnceLock;

use ahash::AHashMap;
use smallvec::SmallVec;

/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed.
#[derive(Clone, Default)]
pub struct LazyIndexMap<K, V> {
vec: SmallVec<[(K, V); 8]>,
map: OnceLock<AHashMap<K, usize>>,
last_find: AtomicUsize,
}

impl<K, V> Default for LazyIndexMap<K, V>
where
K: Clone + fmt::Debug + Eq + Hash,
V: fmt::Debug,
{
fn default() -> Self {
Self::new()
}
}

impl<K: Clone, V: Clone> Clone for LazyIndexMap<K, V> {
fn clone(&self) -> Self {
Self {
vec: self.vec.clone(),
map: OnceLock::new(),
last_find: AtomicUsize::new(0),
}
}
}

impl<K, V> fmt::Debug for LazyIndexMap<K, V>
Expand All @@ -25,6 +46,9 @@ where
}
}

// picked to be a good tradeoff after experimenting with `lazy_map_lookup` benchmark, should cover most models
const HASHMAP_THRESHOLD: usize = 16;

/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed.
impl<K, V> LazyIndexMap<K, V>
where
Expand All @@ -35,6 +59,7 @@ where
Self {
vec: SmallVec::new(),
map: OnceLock::new(),
last_find: AtomicUsize::new(0),
}
}

Expand All @@ -50,22 +75,32 @@ where
}

pub fn is_empty(&self) -> bool {
self.get_map().is_empty()
self.vec.is_empty()
}

pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V>
where
K: Borrow<Q> + PartialEq<Q>,
Q: Hash + Eq,
{
let map = self.map.get_or_init(|| {
self.vec
.iter()
.enumerate()
.map(|(index, (key, _))| (key.clone(), index))
.collect()
});
map.get(key).map(|&i| &self.vec[i].1)
let vec_len = self.vec.len();
// if the vec is longer than the threshold, we use the hashmap for lookups
if vec_len > HASHMAP_THRESHOLD {
self.get_map().get(key).map(|&i| &self.vec[i].1)
} else {
// otherwise we find the value in the vec
// we assume the most likely position for the match is at `last_find + 1`
let first_try = self.last_find.load(Ordering::Relaxed) + 1;
for i in first_try..first_try + vec_len {
let index = i % vec_len;
let (k, v) = &self.vec[index];
if k == key {
self.last_find.store(index, Ordering::Relaxed);
return Some(v);
}
}
None
}
}

pub fn keys(&self) -> impl Iterator<Item = &K> {
Expand Down
55 changes: 54 additions & 1 deletion tests/main.rs
Expand Up @@ -873,11 +873,64 @@ fn test_4302_int_err() {
}

#[test]
fn lazy_index_map_prety() {
fn lazy_index_map_pretty() {
let mut map = LazyIndexMap::new();
assert!(map.is_empty());
map.insert("foo".to_string(), JsonValue::Str("bar".to_string()));
assert!(!map.is_empty());
map.insert("spam".to_string(), JsonValue::Null);
assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#);
let keys = map.keys().collect::<Vec<_>>();
assert_eq!(keys, vec!["foo", "spam"]);
}

#[test]
fn lazy_index_map_small_get() {
let mut map = LazyIndexMap::new();
map.insert("foo".to_string(), JsonValue::Str("bar".to_string()));
map.insert("spam".to_string(), JsonValue::Null);

assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map.get("other"), None);
}

#[test]
fn lazy_index_map_big_get() {
let mut map = LazyIndexMap::new();

for i in 0..25 {
let key = i.to_string();
map.insert(key, JsonValue::Int(i));
}

assert_eq!(map.get("0"), Some(&JsonValue::Int(0)));
assert_eq!(map.get("10"), Some(&JsonValue::Int(10)));
assert_eq!(map.get("22"), Some(&JsonValue::Int(22)));
assert_eq!(map.get("other"), None);
}

#[test]
fn lazy_index_map_clone() {
let mut map = LazyIndexMap::default();

map.insert("foo".to_string(), JsonValue::Str("bar".to_string()));
map.insert("spam".to_string(), JsonValue::Null);

assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map.get("other"), None);

let map2 = map.clone();
assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map2.get("spam"), Some(&JsonValue::Null));
assert_eq!(map2.get("spam"), Some(&JsonValue::Null));
assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map2.get("other"), None);
}

#[test]
Expand Down

0 comments on commit b6a645c

Please sign in to comment.