1 //! A hashmap with "external hashing": nodes are hashed or compared for 2 //! equality only with some external context provided on lookup/insert. 3 //! This allows very memory-efficient data structures where 4 //! node-internal data references some other storage (e.g., offsets into 5 //! an array or pool of shared data). 6 7 use hashbrown::hash_table::HashTable; 8 use std::hash::{Hash, Hasher}; 9 10 /// Trait that allows for equality comparison given some external 11 /// context. 12 /// 13 /// Note that this trait is implemented by the *context*, rather than 14 /// the item type, for somewhat complex lifetime reasons (lack of GATs 15 /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on 16 /// the value type). 17 pub trait CtxEq<V1: ?Sized, V2: ?Sized> { 18 /// Determine whether `a` and `b` are equal, given the context in 19 /// `self` and the union-find data structure `uf`. 20 fn ctx_eq(&self, a: &V1, b: &V2) -> bool; 21 } 22 23 /// Trait that allows for hashing given some external context. 24 pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> { 25 /// Compute the hash of `value`, given the context in `self` and 26 /// the union-find data structure `uf`. 27 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value); 28 } 29 30 /// A null-comparator context type for underlying value types that 31 /// already have `Eq` and `Hash`. 32 #[derive(Default)] 33 pub struct NullCtx; 34 35 impl<V: Eq + Hash> CtxEq<V, V> for NullCtx { 36 fn ctx_eq(&self, a: &V, b: &V) -> bool { 37 a.eq(b) 38 } 39 } 40 impl<V: Eq + Hash> CtxHash<V> for NullCtx { 41 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) { 42 value.hash(state); 43 } 44 } 45 46 /// A bucket in the hash table. 47 /// 48 /// Some performance-related design notes: we cache the hashcode for 49 /// speed, as this often buys a few percent speed in 50 /// interning-table-heavy workloads. We only keep the low 32 bits of 51 /// the hashcode, for memory efficiency: in common use, `K` and `V` 52 /// are often 32 bits also, and a 12-byte bucket is measurably better 53 /// than a 16-byte bucket. 54 struct BucketData<K, V> { 55 hash: u32, 56 k: K, 57 v: V, 58 } 59 60 /// A HashMap that takes external context for all operations. 61 pub struct CtxHashMap<K, V> { 62 raw: HashTable<BucketData<K, V>>, 63 } 64 65 impl<K, V> CtxHashMap<K, V> { 66 /// Create an empty hashmap with pre-allocated space for the given 67 /// capacity. 68 pub fn with_capacity(capacity: usize) -> Self { 69 Self { 70 raw: HashTable::with_capacity(capacity), 71 } 72 } 73 } 74 75 fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32 76 where 77 Ctx: CtxHash<K>, 78 { 79 let mut hasher = rustc_hash::FxHasher::default(); 80 ctx.ctx_hash(&mut hasher, k); 81 hasher.finish() as u32 82 } 83 84 impl<K, V> CtxHashMap<K, V> { 85 /// Insert a new key-value pair, returning the old value associated 86 /// with this key (if any). 87 pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V> 88 where 89 Ctx: CtxEq<K, K> + CtxHash<K>, 90 { 91 let hash = compute_hash(ctx, &k); 92 match self.raw.find_mut(hash as u64, |bucket| { 93 hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k) 94 }) { 95 Some(bucket) => Some(std::mem::replace(&mut bucket.v, v)), 96 None => { 97 let data = BucketData { hash, k, v }; 98 self.raw 99 .insert_unique(hash as u64, data, |bucket| bucket.hash as u64); 100 None 101 } 102 } 103 } 104 105 /// Look up a key, returning a borrow of the value if present. 106 pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V> 107 where 108 Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>, 109 { 110 let hash = compute_hash(ctx, k); 111 self.raw 112 .find(hash as u64, |bucket| { 113 hash == bucket.hash && ctx.ctx_eq(&bucket.k, k) 114 }) 115 .map(|bucket| &bucket.v) 116 } 117 } 118 119 #[cfg(test)] 120 mod test { 121 use super::*; 122 123 #[derive(Clone, Copy, Debug)] 124 struct Key { 125 index: u32, 126 } 127 struct Ctx { 128 vals: &'static [&'static str], 129 } 130 impl CtxEq<Key, Key> for Ctx { 131 fn ctx_eq(&self, a: &Key, b: &Key) -> bool { 132 self.vals[a.index as usize].eq(self.vals[b.index as usize]) 133 } 134 } 135 impl CtxHash<Key> for Ctx { 136 fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) { 137 self.vals[value.index as usize].hash(state); 138 } 139 } 140 141 #[test] 142 fn test_basic() { 143 let ctx = Ctx { 144 vals: &["a", "b", "a"], 145 }; 146 147 let k0 = Key { index: 0 }; 148 let k1 = Key { index: 1 }; 149 let k2 = Key { index: 2 }; 150 151 assert!(ctx.ctx_eq(&k0, &k2)); 152 assert!(!ctx.ctx_eq(&k0, &k1)); 153 assert!(!ctx.ctx_eq(&k2, &k1)); 154 155 let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4); 156 assert_eq!(map.insert(k0, 42, &ctx), None); 157 assert_eq!(map.insert(k2, 84, &ctx), Some(42)); 158 assert_eq!(map.get(&k1, &ctx), None); 159 assert_eq!(*map.get(&k0, &ctx).unwrap(), 84); 160 } 161 } 162