1 //! A hashmap with "external hashing": nodes are hashed or compared for
2 //! equality only with some external context provided on lookup/insert.
3 //! This allows very memory-efficient data structures where
4 //! node-internal data references some other storage (e.g., offsets into
5 //! an array or pool of shared data).
6 
7 use core::hash::{Hash, Hasher};
8 use hashbrown::hash_table::HashTable;
9 
10 /// Trait that allows for equality comparison given some external
11 /// context.
12 ///
13 /// Note that this trait is implemented by the *context*, rather than
14 /// the item type, for somewhat complex lifetime reasons (lack of GATs
15 /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
16 /// the value type).
17 pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
18     /// Determine whether `a` and `b` are equal, given the context in
19     /// `self` and the union-find data structure `uf`.
ctx_eq(&self, a: &V1, b: &V2) -> bool20     fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
21 }
22 
23 /// Trait that allows for hashing given some external context.
24 pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
25     /// Compute the hash of `value`, given the context in `self` and
26     /// the union-find data structure `uf`.
ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value)27     fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
28 }
29 
30 /// A null-comparator context type for underlying value types that
31 /// already have `Eq` and `Hash`.
32 #[derive(Default)]
33 pub struct NullCtx;
34 
35 impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
ctx_eq(&self, a: &V, b: &V) -> bool36     fn ctx_eq(&self, a: &V, b: &V) -> bool {
37         a.eq(b)
38     }
39 }
40 impl<V: Eq + Hash> CtxHash<V> for NullCtx {
ctx_hash<H: Hasher>(&self, state: &mut H, value: &V)41     fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
42         value.hash(state);
43     }
44 }
45 
46 /// A bucket in the hash table.
47 ///
48 /// Some performance-related design notes: we cache the hashcode for
49 /// speed, as this often buys a few percent speed in
50 /// interning-table-heavy workloads. We only keep the low 32 bits of
51 /// the hashcode, for memory efficiency: in common use, `K` and `V`
52 /// are often 32 bits also, and a 12-byte bucket is measurably better
53 /// than a 16-byte bucket.
54 struct BucketData<K, V> {
55     hash: u32,
56     k: K,
57     v: V,
58 }
59 
60 /// A HashMap that takes external context for all operations.
61 pub struct CtxHashMap<K, V> {
62     raw: HashTable<BucketData<K, V>>,
63 }
64 
65 impl<K, V> CtxHashMap<K, V> {
66     /// Create an empty hashmap with pre-allocated space for the given
67     /// capacity.
with_capacity(capacity: usize) -> Self68     pub fn with_capacity(capacity: usize) -> Self {
69         Self {
70             raw: HashTable::with_capacity(capacity),
71         }
72     }
73 }
74 
compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32 where Ctx: CtxHash<K>,75 fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
76 where
77     Ctx: CtxHash<K>,
78 {
79     let mut hasher = rustc_hash::FxHasher::default();
80     ctx.ctx_hash(&mut hasher, k);
81     hasher.finish() as u32
82 }
83 
84 impl<K, V> CtxHashMap<K, V> {
85     /// Insert a new key-value pair, returning the old value associated
86     /// with this key (if any).
insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V> where Ctx: CtxEq<K, K> + CtxHash<K>,87     pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
88     where
89         Ctx: CtxEq<K, K> + CtxHash<K>,
90     {
91         let hash = compute_hash(ctx, &k);
92         match self.raw.find_mut(hash as u64, |bucket| {
93             hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
94         }) {
95             Some(bucket) => Some(core::mem::replace(&mut bucket.v, v)),
96             None => {
97                 let data = BucketData { hash, k, v };
98                 self.raw
99                     .insert_unique(hash as u64, data, |bucket| bucket.hash as u64);
100                 None
101             }
102         }
103     }
104 
105     /// Look up a key, returning a borrow of the value if present.
get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> where Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,106     pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
107     where
108         Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
109     {
110         let hash = compute_hash(ctx, k);
111         self.raw
112             .find(hash as u64, |bucket| {
113                 hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
114             })
115             .map(|bucket| &bucket.v)
116     }
117 
118     /// Look up a key, returning an `Entry` that refers to an existing
119     /// value or allows inserting a new one.
entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V> where Ctx: CtxEq<K, K> + CtxHash<K>,120     pub fn entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V>
121     where
122         Ctx: CtxEq<K, K> + CtxHash<K>,
123     {
124         let hash = compute_hash(ctx, &k);
125         let raw = self.raw.entry(
126             hash as u64,
127             |bucket| hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k),
128             |bucket| compute_hash(ctx, &bucket.k) as u64,
129         );
130         match raw {
131             hashbrown::hash_table::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { raw: o }),
132             hashbrown::hash_table::Entry::Vacant(v) => Entry::Vacant(VacantEntry {
133                 hash,
134                 key: k,
135                 raw: v,
136             }),
137         }
138     }
139 }
140 
141 /// A reference to an existing or vacant entry in the hash table.
142 pub enum Entry<'a, K, V> {
143     Occupied(OccupiedEntry<'a, K, V>),
144     Vacant(VacantEntry<'a, K, V>),
145 }
146 
147 /// A reference to an occupied entry in the hash table.
148 pub struct OccupiedEntry<'a, K, V> {
149     raw: hashbrown::hash_table::OccupiedEntry<'a, BucketData<K, V>>,
150 }
151 
152 /// A reference to a vacant entry in the hash table.
153 pub struct VacantEntry<'a, K, V> {
154     hash: u32,
155     key: K,
156     raw: hashbrown::hash_table::VacantEntry<'a, BucketData<K, V>>,
157 }
158 
159 impl<'a, K, V> OccupiedEntry<'a, K, V> {
160     /// Get the existing value.
get(&self) -> &V161     pub fn get(&self) -> &V {
162         &self.raw.get().v
163     }
164 
165     /// Get the existing value, mutably.
get_mut(&mut self) -> &mut V166     pub fn get_mut(&mut self) -> &mut V {
167         &mut self.raw.get_mut().v
168     }
169 }
170 
171 impl<'a, K, V> VacantEntry<'a, K, V> {
172     /// Insert a new value.
insert(self, v: V)173     pub fn insert(self, v: V) {
174         self.raw.insert(BucketData {
175             hash: self.hash,
176             k: self.key,
177             v,
178         });
179     }
180 }
181 
182 #[cfg(test)]
183 mod test {
184     use super::*;
185 
186     #[derive(Clone, Copy, Debug)]
187     struct Key {
188         index: u32,
189     }
190     struct Ctx {
191         vals: &'static [&'static str],
192     }
193     impl CtxEq<Key, Key> for Ctx {
ctx_eq(&self, a: &Key, b: &Key) -> bool194         fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
195             self.vals[a.index as usize].eq(self.vals[b.index as usize])
196         }
197     }
198     impl CtxHash<Key> for Ctx {
ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key)199         fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
200             self.vals[value.index as usize].hash(state);
201         }
202     }
203 
204     #[test]
test_basic()205     fn test_basic() {
206         let ctx = Ctx {
207             vals: &["a", "b", "a"],
208         };
209 
210         let k0 = Key { index: 0 };
211         let k1 = Key { index: 1 };
212         let k2 = Key { index: 2 };
213 
214         assert!(ctx.ctx_eq(&k0, &k2));
215         assert!(!ctx.ctx_eq(&k0, &k1));
216         assert!(!ctx.ctx_eq(&k2, &k1));
217 
218         let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
219         assert_eq!(map.insert(k0, 42, &ctx), None);
220         assert_eq!(map.insert(k2, 84, &ctx), Some(42));
221         assert_eq!(map.get(&k1, &ctx), None);
222         assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
223     }
224 }
225