1 //! A hashmap with "external hashing": nodes are hashed or compared for 2 //! equality only with some external context provided on lookup/insert. 3 //! This allows very memory-efficient data structures where 4 //! node-internal data references some other storage (e.g., offsets into 5 //! an array or pool of shared data). 6 7 use core::hash::{Hash, Hasher}; 8 use hashbrown::hash_table::HashTable; 9 10 /// Trait that allows for equality comparison given some external 11 /// context. 12 /// 13 /// Note that this trait is implemented by the *context*, rather than 14 /// the item type, for somewhat complex lifetime reasons (lack of GATs 15 /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on 16 /// the value type). 17 pub trait CtxEq<V1: ?Sized, V2: ?Sized> { 18 /// Determine whether `a` and `b` are equal, given the context in 19 /// `self` and the union-find data structure `uf`. 20 fn ctx_eq(&self, a: &V1, b: &V2) -> bool; 21 } 22 23 /// Trait that allows for hashing given some external context. 24 pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> { 25 /// Compute the hash of `value`, given the context in `self` and 26 /// the union-find data structure `uf`. 27 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value); 28 } 29 30 /// A null-comparator context type for underlying value types that 31 /// already have `Eq` and `Hash`. 32 #[derive(Default)] 33 pub struct NullCtx; 34 35 impl<V: Eq + Hash> CtxEq<V, V> for NullCtx { 36 fn ctx_eq(&self, a: &V, b: &V) -> bool { 37 a.eq(b) 38 } 39 } 40 impl<V: Eq + Hash> CtxHash<V> for NullCtx { 41 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) { 42 value.hash(state); 43 } 44 } 45 46 /// A bucket in the hash table. 47 /// 48 /// Some performance-related design notes: we cache the hashcode for 49 /// speed, as this often buys a few percent speed in 50 /// interning-table-heavy workloads. We only keep the low 32 bits of 51 /// the hashcode, for memory efficiency: in common use, `K` and `V` 52 /// are often 32 bits also, and a 12-byte bucket is measurably better 53 /// than a 16-byte bucket. 54 struct BucketData<K, V> { 55 hash: u32, 56 k: K, 57 v: V, 58 } 59 60 /// A HashMap that takes external context for all operations. 61 pub struct CtxHashMap<K, V> { 62 raw: HashTable<BucketData<K, V>>, 63 } 64 65 impl<K, V> CtxHashMap<K, V> { 66 /// Create an empty hashmap with pre-allocated space for the given 67 /// capacity. 68 pub fn with_capacity(capacity: usize) -> Self { 69 Self { 70 raw: HashTable::with_capacity(capacity), 71 } 72 } 73 } 74 75 fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32 76 where 77 Ctx: CtxHash<K>, 78 { 79 let mut hasher = rustc_hash::FxHasher::default(); 80 ctx.ctx_hash(&mut hasher, k); 81 hasher.finish() as u32 82 } 83 84 impl<K, V> CtxHashMap<K, V> { 85 /// Insert a new key-value pair, returning the old value associated 86 /// with this key (if any). 87 pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V> 88 where 89 Ctx: CtxEq<K, K> + CtxHash<K>, 90 { 91 let hash = compute_hash(ctx, &k); 92 match self.raw.find_mut(hash as u64, |bucket| { 93 hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k) 94 }) { 95 Some(bucket) => Some(core::mem::replace(&mut bucket.v, v)), 96 None => { 97 let data = BucketData { hash, k, v }; 98 self.raw 99 .insert_unique(hash as u64, data, |bucket| bucket.hash as u64); 100 None 101 } 102 } 103 } 104 105 /// Look up a key, returning a borrow of the value if present. 106 pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> 107 where 108 Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>, 109 { 110 let hash = compute_hash(ctx, k); 111 self.raw 112 .find(hash as u64, |bucket| { 113 hash == bucket.hash && ctx.ctx_eq(&bucket.k, k) 114 }) 115 .map(|bucket| &bucket.v) 116 } 117 118 /// Look up a key, returning an `Entry` that refers to an existing 119 /// value or allows inserting a new one. 120 pub fn entry<'a, Ctx>(&'a mut self, k: K, ctx: &Ctx) -> Entry<'a, K, V> 121 where 122 Ctx: CtxEq<K, K> + CtxHash<K>, 123 { 124 let hash = compute_hash(ctx, &k); 125 let raw = self.raw.entry( 126 hash as u64, 127 |bucket| hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k), 128 |bucket| compute_hash(ctx, &bucket.k) as u64, 129 ); 130 match raw { 131 hashbrown::hash_table::Entry::Occupied(o) => Entry::Occupied(OccupiedEntry { raw: o }), 132 hashbrown::hash_table::Entry::Vacant(v) => Entry::Vacant(VacantEntry { 133 hash, 134 key: k, 135 raw: v, 136 }), 137 } 138 } 139 } 140 141 /// A reference to an existing or vacant entry in the hash table. 142 pub enum Entry<'a, K, V> { 143 Occupied(OccupiedEntry<'a, K, V>), 144 Vacant(VacantEntry<'a, K, V>), 145 } 146 147 /// A reference to an occupied entry in the hash table. 148 pub struct OccupiedEntry<'a, K, V> { 149 raw: hashbrown::hash_table::OccupiedEntry<'a, BucketData<K, V>>, 150 } 151 152 /// A reference to a vacant entry in the hash table. 153 pub struct VacantEntry<'a, K, V> { 154 hash: u32, 155 key: K, 156 raw: hashbrown::hash_table::VacantEntry<'a, BucketData<K, V>>, 157 } 158 159 impl<'a, K, V> OccupiedEntry<'a, K, V> { 160 /// Get the existing value. 161 pub fn get(&self) -> &V { 162 &self.raw.get().v 163 } 164 165 /// Get the existing value, mutably. 166 pub fn get_mut(&mut self) -> &mut V { 167 &mut self.raw.get_mut().v 168 } 169 } 170 171 impl<'a, K, V> VacantEntry<'a, K, V> { 172 /// Insert a new value. 173 pub fn insert(self, v: V) { 174 self.raw.insert(BucketData { 175 hash: self.hash, 176 k: self.key, 177 v, 178 }); 179 } 180 } 181 182 #[cfg(test)] 183 mod test { 184 use super::*; 185 186 #[derive(Clone, Copy, Debug)] 187 struct Key { 188 index: u32, 189 } 190 struct Ctx { 191 vals: &'static [&'static str], 192 } 193 impl CtxEq<Key, Key> for Ctx { 194 fn ctx_eq(&self, a: &Key, b: &Key) -> bool { 195 self.vals[a.index as usize].eq(self.vals[b.index as usize]) 196 } 197 } 198 impl CtxHash<Key> for Ctx { 199 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) { 200 self.vals[value.index as usize].hash(state); 201 } 202 } 203 204 #[test] 205 fn test_basic() { 206 let ctx = Ctx { 207 vals: &["a", "b", "a"], 208 }; 209 210 let k0 = Key { index: 0 }; 211 let k1 = Key { index: 1 }; 212 let k2 = Key { index: 2 }; 213 214 assert!(ctx.ctx_eq(&k0, &k2)); 215 assert!(!ctx.ctx_eq(&k0, &k1)); 216 assert!(!ctx.ctx_eq(&k2, &k1)); 217 218 let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4); 219 assert_eq!(map.insert(k0, 42, &ctx), None); 220 assert_eq!(map.insert(k2, 84, &ctx), Some(42)); 221 assert_eq!(map.get(&k1, &ctx), None); 222 assert_eq!(*map.get(&k0, &ctx).unwrap(), 84); 223 } 224 } 225