1 //! A hashmap with "external hashing": nodes are hashed or compared for
2 //! equality only with some external context provided on lookup/insert.
3 //! This allows very memory-efficient data structures where
4 //! node-internal data references some other storage (e.g., offsets into
5 //! an array or pool of shared data).
6 
7 use hashbrown::hash_table::HashTable;
8 use std::hash::{Hash, Hasher};
9 
10 /// Trait that allows for equality comparison given some external
11 /// context.
12 ///
13 /// Note that this trait is implemented by the *context*, rather than
14 /// the item type, for somewhat complex lifetime reasons (lack of GATs
15 /// to allow `for<'ctx> Ctx<'ctx>`-like associated types in traits on
16 /// the value type).
17 pub trait CtxEq<V1: ?Sized, V2: ?Sized> {
18     /// Determine whether `a` and `b` are equal, given the context in
19     /// `self` and the union-find data structure `uf`.
20     fn ctx_eq(&self, a: &V1, b: &V2) -> bool;
21 }
22 
23 /// Trait that allows for hashing given some external context.
24 pub trait CtxHash<Value: ?Sized>: CtxEq<Value, Value> {
25     /// Compute the hash of `value`, given the context in `self` and
26     /// the union-find data structure `uf`.
27     fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Value);
28 }
29 
30 /// A null-comparator context type for underlying value types that
31 /// already have `Eq` and `Hash`.
32 #[derive(Default)]
33 pub struct NullCtx;
34 
35 impl<V: Eq + Hash> CtxEq<V, V> for NullCtx {
36     fn ctx_eq(&self, a: &V, b: &V) -> bool {
37         a.eq(b)
38     }
39 }
40 impl<V: Eq + Hash> CtxHash<V> for NullCtx {
41     fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &V) {
42         value.hash(state);
43     }
44 }
45 
46 /// A bucket in the hash table.
47 ///
48 /// Some performance-related design notes: we cache the hashcode for
49 /// speed, as this often buys a few percent speed in
50 /// interning-table-heavy workloads. We only keep the low 32 bits of
51 /// the hashcode, for memory efficiency: in common use, `K` and `V`
52 /// are often 32 bits also, and a 12-byte bucket is measurably better
53 /// than a 16-byte bucket.
54 struct BucketData<K, V> {
55     hash: u32,
56     k: K,
57     v: V,
58 }
59 
60 /// A HashMap that takes external context for all operations.
61 pub struct CtxHashMap<K, V> {
62     raw: HashTable<BucketData<K, V>>,
63 }
64 
65 impl<K, V> CtxHashMap<K, V> {
66     /// Create an empty hashmap with pre-allocated space for the given
67     /// capacity.
68     pub fn with_capacity(capacity: usize) -> Self {
69         Self {
70             raw: HashTable::with_capacity(capacity),
71         }
72     }
73 }
74 
75 fn compute_hash<Ctx, K>(ctx: &Ctx, k: &K) -> u32
76 where
77     Ctx: CtxHash<K>,
78 {
79     let mut hasher = rustc_hash::FxHasher::default();
80     ctx.ctx_hash(&mut hasher, k);
81     hasher.finish() as u32
82 }
83 
84 impl<K, V> CtxHashMap<K, V> {
85     /// Insert a new key-value pair, returning the old value associated
86     /// with this key (if any).
87     pub fn insert<Ctx>(&mut self, k: K, v: V, ctx: &Ctx) -> Option<V>
88     where
89         Ctx: CtxEq<K, K> + CtxHash<K>,
90     {
91         let hash = compute_hash(ctx, &k);
92         match self.raw.find_mut(hash as u64, |bucket| {
93             hash == bucket.hash && ctx.ctx_eq(&bucket.k, &k)
94         }) {
95             Some(bucket) => Some(std::mem::replace(&mut bucket.v, v)),
96             None => {
97                 let data = BucketData { hash, k, v };
98                 self.raw
99                     .insert_unique(hash as u64, data, |bucket| bucket.hash as u64);
100                 None
101             }
102         }
103     }
104 
105     /// Look up a key, returning a borrow of the value if present.
106     pub fn get<'a, Q, Ctx>(&'a self, k: &Q, ctx: &Ctx) -> Option<&'a V>
107     where
108         Ctx: CtxEq<K, Q> + CtxHash<Q> + CtxHash<K>,
109     {
110         let hash = compute_hash(ctx, k);
111         self.raw
112             .find(hash as u64, |bucket| {
113                 hash == bucket.hash && ctx.ctx_eq(&bucket.k, k)
114             })
115             .map(|bucket| &bucket.v)
116     }
117 }
118 
119 #[cfg(test)]
120 mod test {
121     use super::*;
122 
123     #[derive(Clone, Copy, Debug)]
124     struct Key {
125         index: u32,
126     }
127     struct Ctx {
128         vals: &'static [&'static str],
129     }
130     impl CtxEq<Key, Key> for Ctx {
131         fn ctx_eq(&self, a: &Key, b: &Key) -> bool {
132             self.vals[a.index as usize].eq(self.vals[b.index as usize])
133         }
134     }
135     impl CtxHash<Key> for Ctx {
136         fn ctx_hash<H: Hasher>(&self, state: &mut H, value: &Key) {
137             self.vals[value.index as usize].hash(state);
138         }
139     }
140 
141     #[test]
142     fn test_basic() {
143         let ctx = Ctx {
144             vals: &["a", "b", "a"],
145         };
146 
147         let k0 = Key { index: 0 };
148         let k1 = Key { index: 1 };
149         let k2 = Key { index: 2 };
150 
151         assert!(ctx.ctx_eq(&k0, &k2));
152         assert!(!ctx.ctx_eq(&k0, &k1));
153         assert!(!ctx.ctx_eq(&k2, &k1));
154 
155         let mut map: CtxHashMap<Key, u64> = CtxHashMap::with_capacity(4);
156         assert_eq!(map.insert(k0, 42, &ctx), None);
157         assert_eq!(map.insert(k2, 84, &ctx), Some(42));
158         assert_eq!(map.get(&k1, &ctx), None);
159         assert_eq!(*map.get(&k0, &ctx).unwrap(), 84);
160     }
161 }
162