1 //! A hashmap with "external hashing": nodes are hashed or compared for
2 //! equality only with some external context provided on lookup/insert.
3 //! This allows very memory-efficient data structures where
4 //! node-internal data references some other storage (e.g., offsets into
5 //! an array or pool of shared data).
6
7 use core::hash::{Hash, Hasher};
8 use hashbrown::hash_table::HashTable;
9
10 /// Trait that allows for equality comparison given some external
11 /// context.
12 ///
13 /// Note that this trait is implemented by the *context*, rather than
14 /// the item type, for somewhat complex lifetime reasons (lack of GATs
15 /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
16 /// the value type).
17 pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
18 /// Determine whether `a` and `b` are equal, given the context in
19 /// `self` and the union-find data structure `uf`.
ctx_eq(&self, a: &V1, b: &V2) -> bool20 fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
21 }
22
23 /// Trait that allows for hashing given some external context.
24 pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
25 /// Compute the hash of `value`, given the context in `self` and
26 /// the union-find data structure `uf`.
ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value)27 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
28 }
29
30 /// A null-comparator context type for underlying value types that
31 /// already have `Eq` and `Hash`.
32 #[derive(Default)]
33 pub struct NullCtx;
34
35 impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
ctx_eq(&self, a: &V, b: &V) -> bool36 fn ctx_eq(&self, a: &V, b: &V) -> bool {
37 a.eq(b)
38 }
39 }
40 impl<V: Eq + Hash> CtxHash<V> for NullCtx {
ctx_hash<H: Hasher>(&self, state: &mut H, value: &V)41 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
42 value.hash(state);
43 }
44 }
45
46 /// A bucket in the hash table.
47 ///
48 /// Some performance-related design notes: we cache the hashcode for
49 /// speed, as this often buys a few percent speed in
50 /// interning-table-heavy workloads. We only keep the low 32 bits of
51 /// the hashcode, for memory efficiency: in common use, `K` and `V`
52 /// are often 32 bits also, and a 12-byte bucket is measurably better
53 /// than a 16-byte bucket.
54 struct BucketData<K, V> {
55 hash: u32,
56 k: K,
57 v: V,
58 }
59
60 /// A HashMap that takes external context for all operations.
61 pub struct CtxHashMap<K, V> {
62 raw: HashTable<BucketData<K, V>>,
63 }
64
65 impl<K, V> CtxHashMap<K, V> {
66 /// Create an empty hashmap with pre-allocated space for the given
67 /// capacity.
with_capacity(capacity: usize) -> Self68 pub fn with_capacity(capacity: usize) -> Self {
69 Self {
70 raw: HashTable::with_capacity(capacity),
71 }
72 }
73 }
74
compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32 where Ctx: CtxHash<K>,75 fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
76 where
77 Ctx: CtxHash<K>,
78 {
79 let mut hasher = rustc_hash::FxHasher::default();
80 ctx.ctx_hash(&mut hasher, k);
81 hasher.finish() as u32
82 }
83
84 impl<K, V> CtxHashMap<K, V> {
85 /// Insert a new key-value pair, returning the old value associated
86 /// with this key (if any).
insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V> where Ctx: CtxEq<K, K> + CtxHash<K>,87 pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
88 where
89 Ctx: CtxEq<K, K> + CtxHash<K>,
90 {
91 let hash = compute_hash(ctx, &k);
92 match self.raw.find_mut(hash as u64, |bucket| {
93 hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
94 }) {
95 Some(bucket) => Some(core::mem::replace(&mut bucket.v, v)),
96 None => {
97 let data = BucketData { hash, k, v };
98 self.raw
99 .insert_unique(hash as u64, data, |bucket| bucket.hash as u64);
100 None
101 }
102 }
103 }
104
105 /// Look up a key, returning a borrow of the value if present.
get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> where Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,106 pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
107 where
108 Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
109 {
110 let hash = compute_hash(ctx, k);
111 self.raw
112 .find(hash as u64, |bucket| {
113 hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
114 })
115 .map(|bucket| &bucket.v)
116 }
117
118 /// Look up a key, returning an `Entry` that refers to an existing
119 /// value or allows inserting a new one.
entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V> where Ctx: CtxEq<K, K> + CtxHash<K>,120 pub fn entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V>
121 where
122 Ctx: CtxEq<K, K> + CtxHash<K>,
123 {
124 let hash = compute_hash(ctx, &k);
125 let raw = self.raw.entry(
126 hash as u64,
127 |bucket| hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k),
128 |bucket| compute_hash(ctx, &bucket.k) as u64,
129 );
130 match raw {
131 hashbrown::hash_table::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { raw: o }),
132 hashbrown::hash_table::Entry::Vacant(v) => Entry::Vacant(VacantEntry {
133 hash,
134 key: k,
135 raw: v,
136 }),
137 }
138 }
139 }
140
141 /// A reference to an existing or vacant entry in the hash table.
142 pub enum Entry<'a, K, V> {
143 Occupied(OccupiedEntry<'a, K, V>),
144 Vacant(VacantEntry<'a, K, V>),
145 }
146
147 /// A reference to an occupied entry in the hash table.
148 pub struct OccupiedEntry<'a, K, V> {
149 raw: hashbrown::hash_table::OccupiedEntry<'a, BucketData<K, V>>,
150 }
151
152 /// A reference to a vacant entry in the hash table.
153 pub struct VacantEntry<'a, K, V> {
154 hash: u32,
155 key: K,
156 raw: hashbrown::hash_table::VacantEntry<'a, BucketData<K, V>>,
157 }
158
159 impl<'a, K, V> OccupiedEntry<'a, K, V> {
160 /// Get the existing value.
get(&self) -> &V161 pub fn get(&self) -> &V {
162 &self.raw.get().v
163 }
164
165 /// Get the existing value, mutably.
get_mut(&mut self) -> &mut V166 pub fn get_mut(&mut self) -> &mut V {
167 &mut self.raw.get_mut().v
168 }
169 }
170
171 impl<'a, K, V> VacantEntry<'a, K, V> {
172 /// Insert a new value.
insert(self, v: V)173 pub fn insert(self, v: V) {
174 self.raw.insert(BucketData {
175 hash: self.hash,
176 k: self.key,
177 v,
178 });
179 }
180 }
181
182 #[cfg(test)]
183 mod test {
184 use super::*;
185
186 #[derive(Clone, Copy, Debug)]
187 struct Key {
188 index: u32,
189 }
190 struct Ctx {
191 vals: &'static [&'static str],
192 }
193 impl CtxEq<Key, Key> for Ctx {
ctx_eq(&self, a: &Key, b: &Key) -> bool194 fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
195 self.vals[a.index as usize].eq(self.vals[b.index as usize])
196 }
197 }
198 impl CtxHash<Key> for Ctx {
ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key)199 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
200 self.vals[value.index as usize].hash(state);
201 }
202 }
203
204 #[test]
test_basic()205 fn test_basic() {
206 let ctx = Ctx {
207 vals: &["a", "b", "a"],
208 };
209
210 let k0 = Key { index: 0 };
211 let k1 = Key { index: 1 };
212 let k2 = Key { index: 2 };
213
214 assert!(ctx.ctx_eq(&k0, &k2));
215 assert!(!ctx.ctx_eq(&k0, &k1));
216 assert!(!ctx.ctx_eq(&k2, &k1));
217
218 let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
219 assert_eq!(map.insert(k0, 42, &ctx), None);
220 assert_eq!(map.insert(k2, 84, &ctx), Some(42));
221 assert_eq!(map.get(&k1, &ctx), None);
222 assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
223 }
224 }
225