1d2912cb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
27e1e7763SThomas Graf /*
37e1e7763SThomas Graf * Resizable, Scalable, Concurrent Hash Table
47e1e7763SThomas Graf *
502fd97c3SHerbert Xu * Copyright (c) 2015 Herbert Xu <[email protected]>
6a5ec68e3SThomas Graf * Copyright (c) 2014-2015 Thomas Graf <[email protected]>
77e1e7763SThomas Graf * Copyright (c) 2008-2014 Patrick McHardy <[email protected]>
87e1e7763SThomas Graf *
97e1e7763SThomas Graf * Code partially derived from nft_hash
1002fd97c3SHerbert Xu * Rewritten with rehash code from br_multicast plus single list
1102fd97c3SHerbert Xu * pointer as suggested by Josh Triplett
127e1e7763SThomas Graf */
137e1e7763SThomas Graf
1407ee0722SHerbert Xu #include <linux/atomic.h>
157e1e7763SThomas Graf #include <linux/kernel.h>
167e1e7763SThomas Graf #include <linux/init.h>
177e1e7763SThomas Graf #include <linux/log2.h>
185beb5c90SEric Dumazet #include <linux/sched.h>
19b2d09103SIngo Molnar #include <linux/rculist.h>
207e1e7763SThomas Graf #include <linux/slab.h>
217e1e7763SThomas Graf #include <linux/vmalloc.h>
227e1e7763SThomas Graf #include <linux/mm.h>
2387545899SDaniel Borkmann #include <linux/jhash.h>
247e1e7763SThomas Graf #include <linux/random.h>
257e1e7763SThomas Graf #include <linux/rhashtable.h>
2661d7b097SStephen Rothwell #include <linux/err.h>
276d795413SHauke Mehrtens #include <linux/export.h>
287e1e7763SThomas Graf
297e1e7763SThomas Graf #define HASH_DEFAULT_SIZE 64UL
30c2e213cfSHerbert Xu #define HASH_MIN_SIZE 4U
3197defe1eSThomas Graf
32da20420fSHerbert Xu union nested_table {
33da20420fSHerbert Xu union nested_table __rcu *table;
34ce9b362bSHerbert Xu struct rhash_lock_head __rcu *bucket;
35da20420fSHerbert Xu };
36da20420fSHerbert Xu
head_hashfn(struct rhashtable * ht,const struct bucket_table * tbl,const struct rhash_head * he)37988dfbd7SHerbert Xu static u32 head_hashfn(struct rhashtable *ht,
388d24c0b4SThomas Graf const struct bucket_table *tbl,
398d24c0b4SThomas Graf const struct rhash_head *he)
407e1e7763SThomas Graf {
4102fd97c3SHerbert Xu return rht_head_hashfn(ht, tbl, he, ht->p);
427e1e7763SThomas Graf }
437e1e7763SThomas Graf
44a03eaec0SThomas Graf #ifdef CONFIG_PROVE_LOCKING
45a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
46a03eaec0SThomas Graf
lockdep_rht_mutex_is_held(struct rhashtable * ht)47a03eaec0SThomas Graf int lockdep_rht_mutex_is_held(struct rhashtable *ht)
48a03eaec0SThomas Graf {
49a03eaec0SThomas Graf return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
50a03eaec0SThomas Graf }
51a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
52a03eaec0SThomas Graf
lockdep_rht_bucket_is_held(const struct bucket_table * tbl,u32 hash)53a03eaec0SThomas Graf int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
54a03eaec0SThomas Graf {
558f0db018SNeilBrown if (!debug_locks)
568f0db018SNeilBrown return 1;
578f0db018SNeilBrown if (unlikely(tbl->nest))
588f0db018SNeilBrown return 1;
59ca0b709dSNeilBrown return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
60a03eaec0SThomas Graf }
61a03eaec0SThomas Graf EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
62a03eaec0SThomas Graf #else
63a03eaec0SThomas Graf #define ASSERT_RHT_MUTEX(HT)
64a03eaec0SThomas Graf #endif
65a03eaec0SThomas Graf
nested_table_top(const struct bucket_table * tbl)664a3084aaSHerbert Xu static inline union nested_table *nested_table_top(
674a3084aaSHerbert Xu const struct bucket_table *tbl)
684a3084aaSHerbert Xu {
694a3084aaSHerbert Xu /* The top-level bucket entry does not need RCU protection
704a3084aaSHerbert Xu * because it's set at the same time as tbl->nest.
714a3084aaSHerbert Xu */
724a3084aaSHerbert Xu return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
734a3084aaSHerbert Xu }
744a3084aaSHerbert Xu
nested_table_free(union nested_table * ntbl,unsigned int size)75da20420fSHerbert Xu static void nested_table_free(union nested_table *ntbl, unsigned int size)
76da20420fSHerbert Xu {
77da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
78da20420fSHerbert Xu const unsigned int len = 1 << shift;
79da20420fSHerbert Xu unsigned int i;
80da20420fSHerbert Xu
814a3084aaSHerbert Xu ntbl = rcu_dereference_protected(ntbl->table, 1);
82da20420fSHerbert Xu if (!ntbl)
83da20420fSHerbert Xu return;
84da20420fSHerbert Xu
85da20420fSHerbert Xu if (size > len) {
86da20420fSHerbert Xu size >>= shift;
87da20420fSHerbert Xu for (i = 0; i < len; i++)
88da20420fSHerbert Xu nested_table_free(ntbl + i, size);
89da20420fSHerbert Xu }
90da20420fSHerbert Xu
91da20420fSHerbert Xu kfree(ntbl);
92da20420fSHerbert Xu }
93da20420fSHerbert Xu
nested_bucket_table_free(const struct bucket_table * tbl)94da20420fSHerbert Xu static void nested_bucket_table_free(const struct bucket_table *tbl)
95da20420fSHerbert Xu {
96da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest;
97da20420fSHerbert Xu unsigned int len = 1 << tbl->nest;
98da20420fSHerbert Xu union nested_table *ntbl;
99da20420fSHerbert Xu unsigned int i;
100da20420fSHerbert Xu
1014a3084aaSHerbert Xu ntbl = nested_table_top(tbl);
102da20420fSHerbert Xu
103da20420fSHerbert Xu for (i = 0; i < len; i++)
104da20420fSHerbert Xu nested_table_free(ntbl + i, size);
105da20420fSHerbert Xu
106da20420fSHerbert Xu kfree(ntbl);
107da20420fSHerbert Xu }
108da20420fSHerbert Xu
bucket_table_free(const struct bucket_table * tbl)10997defe1eSThomas Graf static void bucket_table_free(const struct bucket_table *tbl)
11097defe1eSThomas Graf {
111da20420fSHerbert Xu if (tbl->nest)
112da20420fSHerbert Xu nested_bucket_table_free(tbl);
113da20420fSHerbert Xu
11497defe1eSThomas Graf kvfree(tbl);
11597defe1eSThomas Graf }
11697defe1eSThomas Graf
bucket_table_free_rcu(struct rcu_head * head)1179d901bc0SHerbert Xu static void bucket_table_free_rcu(struct rcu_head *head)
1189d901bc0SHerbert Xu {
1199d901bc0SHerbert Xu bucket_table_free(container_of(head, struct bucket_table, rcu));
1209d901bc0SHerbert Xu }
1219d901bc0SHerbert Xu
nested_table_alloc(struct rhashtable * ht,union nested_table __rcu ** prev,bool leaf)122da20420fSHerbert Xu static union nested_table *nested_table_alloc(struct rhashtable *ht,
123da20420fSHerbert Xu union nested_table __rcu **prev,
1245af68ef7SNeilBrown bool leaf)
125da20420fSHerbert Xu {
126da20420fSHerbert Xu union nested_table *ntbl;
127da20420fSHerbert Xu int i;
128da20420fSHerbert Xu
129da20420fSHerbert Xu ntbl = rcu_dereference(*prev);
130da20420fSHerbert Xu if (ntbl)
131da20420fSHerbert Xu return ntbl;
132da20420fSHerbert Xu
1339e54dd8bSKent Overstreet ntbl = alloc_hooks_tag(ht->alloc_tag,
1349e54dd8bSKent Overstreet kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO));
135da20420fSHerbert Xu
1365af68ef7SNeilBrown if (ntbl && leaf) {
1375af68ef7SNeilBrown for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
1389b4f64a2SNeilBrown INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
139da20420fSHerbert Xu }
140da20420fSHerbert Xu
141e9458a4eSHerbert Xu if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
142da20420fSHerbert Xu return ntbl;
1437a41c294SNeilBrown /* Raced with another thread. */
1447a41c294SNeilBrown kfree(ntbl);
1457a41c294SNeilBrown return rcu_dereference(*prev);
146da20420fSHerbert Xu }
147da20420fSHerbert Xu
nested_bucket_table_alloc(struct rhashtable * ht,size_t nbuckets,gfp_t gfp)148da20420fSHerbert Xu static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
149da20420fSHerbert Xu size_t nbuckets,
150da20420fSHerbert Xu gfp_t gfp)
151da20420fSHerbert Xu {
152da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
153da20420fSHerbert Xu struct bucket_table *tbl;
154da20420fSHerbert Xu size_t size;
155da20420fSHerbert Xu
156da20420fSHerbert Xu if (nbuckets < (1 << (shift + 1)))
157da20420fSHerbert Xu return NULL;
158da20420fSHerbert Xu
159da20420fSHerbert Xu size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
160da20420fSHerbert Xu
1619e54dd8bSKent Overstreet tbl = alloc_hooks_tag(ht->alloc_tag,
1629e54dd8bSKent Overstreet kmalloc_noprof(size, gfp|__GFP_ZERO));
163da20420fSHerbert Xu if (!tbl)
164da20420fSHerbert Xu return NULL;
165da20420fSHerbert Xu
166da20420fSHerbert Xu if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
1675af68ef7SNeilBrown false)) {
168da20420fSHerbert Xu kfree(tbl);
169da20420fSHerbert Xu return NULL;
170da20420fSHerbert Xu }
171da20420fSHerbert Xu
172da20420fSHerbert Xu tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
173da20420fSHerbert Xu
174da20420fSHerbert Xu return tbl;
175da20420fSHerbert Xu }
176da20420fSHerbert Xu
bucket_table_alloc(struct rhashtable * ht,size_t nbuckets,gfp_t gfp)17797defe1eSThomas Graf static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
178b9ecfdaaSHerbert Xu size_t nbuckets,
179b9ecfdaaSHerbert Xu gfp_t gfp)
1807e1e7763SThomas Graf {
181eb6d1abfSDaniel Borkmann struct bucket_table *tbl = NULL;
1828f0db018SNeilBrown size_t size;
183f89bd6f8SThomas Graf int i;
184149212f0SNeilBrown static struct lock_class_key __key;
1857e1e7763SThomas Graf
1869e54dd8bSKent Overstreet tbl = alloc_hooks_tag(ht->alloc_tag,
1879e54dd8bSKent Overstreet kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
1889e54dd8bSKent Overstreet gfp|__GFP_ZERO, NUMA_NO_NODE));
189da20420fSHerbert Xu
190da20420fSHerbert Xu size = nbuckets;
191da20420fSHerbert Xu
192a15bec6aSDavidlohr Bueso if (tbl == NULL && !gfpflags_allow_blocking(gfp)) {
193da20420fSHerbert Xu tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
194da20420fSHerbert Xu nbuckets = 0;
195da20420fSHerbert Xu }
1962d22ecf6SDavidlohr Bueso
1977e1e7763SThomas Graf if (tbl == NULL)
1987e1e7763SThomas Graf return NULL;
1997e1e7763SThomas Graf
200149212f0SNeilBrown lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
201149212f0SNeilBrown
202da20420fSHerbert Xu tbl->size = size;
2037e1e7763SThomas Graf
2044feb7c7aSNeilBrown rcu_head_init(&tbl->rcu);
205eddee5baSHerbert Xu INIT_LIST_HEAD(&tbl->walkers);
206eddee5baSHerbert Xu
207d48ad080SJason A. Donenfeld tbl->hash_rnd = get_random_u32();
2085269b53dSHerbert Xu
209f89bd6f8SThomas Graf for (i = 0; i < nbuckets; i++)
2109b4f64a2SNeilBrown INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
211f89bd6f8SThomas Graf
21297defe1eSThomas Graf return tbl;
2137e1e7763SThomas Graf }
2147e1e7763SThomas Graf
rhashtable_last_table(struct rhashtable * ht,struct bucket_table * tbl)215b824478bSHerbert Xu static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
216b824478bSHerbert Xu struct bucket_table *tbl)
217b824478bSHerbert Xu {
218b824478bSHerbert Xu struct bucket_table *new_tbl;
219b824478bSHerbert Xu
220b824478bSHerbert Xu do {
221b824478bSHerbert Xu new_tbl = tbl;
222b824478bSHerbert Xu tbl = rht_dereference_rcu(tbl->future_tbl, ht);
223b824478bSHerbert Xu } while (tbl);
224b824478bSHerbert Xu
225b824478bSHerbert Xu return new_tbl;
226b824478bSHerbert Xu }
227b824478bSHerbert Xu
rhashtable_rehash_one(struct rhashtable * ht,struct rhash_lock_head __rcu ** bkt,unsigned int old_hash)2288f0db018SNeilBrown static int rhashtable_rehash_one(struct rhashtable *ht,
229ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt,
2308f0db018SNeilBrown unsigned int old_hash)
231a5ec68e3SThomas Graf {
232aa34a6cbSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
233c0690016SNeilBrown struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
234da20420fSHerbert Xu int err = -EAGAIN;
235aa34a6cbSHerbert Xu struct rhash_head *head, *next, *entry;
236e4edbe3cSNeilBrown struct rhash_head __rcu **pprev = NULL;
237299e5c32SThomas Graf unsigned int new_hash;
238e47877c7STejun Heo unsigned long flags;
239a5ec68e3SThomas Graf
240da20420fSHerbert Xu if (new_tbl->nest)
241da20420fSHerbert Xu goto out;
242da20420fSHerbert Xu
243da20420fSHerbert Xu err = -ENOENT;
244da20420fSHerbert Xu
245adc6a3abSNeilBrown rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
246adc6a3abSNeilBrown old_tbl, old_hash) {
247aa34a6cbSHerbert Xu err = 0;
248aa34a6cbSHerbert Xu next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
249a5ec68e3SThomas Graf
250aa34a6cbSHerbert Xu if (rht_is_a_nulls(next))
2517e1e7763SThomas Graf break;
25297defe1eSThomas Graf
253aa34a6cbSHerbert Xu pprev = &entry->next;
2547e1e7763SThomas Graf }
2557e1e7763SThomas Graf
256aa34a6cbSHerbert Xu if (err)
257aa34a6cbSHerbert Xu goto out;
25897defe1eSThomas Graf
259aa34a6cbSHerbert Xu new_hash = head_hashfn(ht, new_tbl, entry);
260a5ec68e3SThomas Graf
261e47877c7STejun Heo flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash],
262e47877c7STejun Heo SINGLE_DEPTH_NESTING);
263aa34a6cbSHerbert Xu
264adc6a3abSNeilBrown head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
265aa34a6cbSHerbert Xu
266aa34a6cbSHerbert Xu RCU_INIT_POINTER(entry->next, head);
267aa34a6cbSHerbert Xu
268e47877c7STejun Heo rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags);
269aa34a6cbSHerbert Xu
2708f0db018SNeilBrown if (pprev)
271aa34a6cbSHerbert Xu rcu_assign_pointer(*pprev, next);
2728f0db018SNeilBrown else
2738f0db018SNeilBrown /* Need to preserved the bit lock. */
274f4712b46SNeilBrown rht_assign_locked(bkt, next);
275aa34a6cbSHerbert Xu
276aa34a6cbSHerbert Xu out:
277aa34a6cbSHerbert Xu return err;
27897defe1eSThomas Graf }
27997defe1eSThomas Graf
rhashtable_rehash_chain(struct rhashtable * ht,unsigned int old_hash)280da20420fSHerbert Xu static int rhashtable_rehash_chain(struct rhashtable *ht,
281299e5c32SThomas Graf unsigned int old_hash)
28297defe1eSThomas Graf {
283aa34a6cbSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
284ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
285e47877c7STejun Heo unsigned long flags;
286da20420fSHerbert Xu int err;
2877cd10db8SThomas Graf
2888f0db018SNeilBrown if (!bkt)
2898f0db018SNeilBrown return 0;
290e47877c7STejun Heo flags = rht_lock(old_tbl, bkt);
291aa34a6cbSHerbert Xu
2928f0db018SNeilBrown while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
293aa34a6cbSHerbert Xu ;
294da20420fSHerbert Xu
2954feb7c7aSNeilBrown if (err == -ENOENT)
296da20420fSHerbert Xu err = 0;
297e47877c7STejun Heo rht_unlock(old_tbl, bkt, flags);
298da20420fSHerbert Xu
299da20420fSHerbert Xu return err;
300aa34a6cbSHerbert Xu }
301aa34a6cbSHerbert Xu
rhashtable_rehash_attach(struct rhashtable * ht,struct bucket_table * old_tbl,struct bucket_table * new_tbl)302b824478bSHerbert Xu static int rhashtable_rehash_attach(struct rhashtable *ht,
303b824478bSHerbert Xu struct bucket_table *old_tbl,
304aa34a6cbSHerbert Xu struct bucket_table *new_tbl)
305aa34a6cbSHerbert Xu {
306aa34a6cbSHerbert Xu /* Make insertions go into the new, empty table right away. Deletions
307aa34a6cbSHerbert Xu * and lookups will be attempted in both tables until we synchronize.
3080ad66449SNeilBrown * As cmpxchg() provides strong barriers, we do not need
3090ad66449SNeilBrown * rcu_assign_pointer().
310aa34a6cbSHerbert Xu */
311aa34a6cbSHerbert Xu
312e9458a4eSHerbert Xu if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
313e9458a4eSHerbert Xu new_tbl) != NULL)
3140ad66449SNeilBrown return -EEXIST;
315b824478bSHerbert Xu
316b824478bSHerbert Xu return 0;
317b824478bSHerbert Xu }
318b824478bSHerbert Xu
rhashtable_rehash_table(struct rhashtable * ht)319b824478bSHerbert Xu static int rhashtable_rehash_table(struct rhashtable *ht)
320b824478bSHerbert Xu {
321b824478bSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
322b824478bSHerbert Xu struct bucket_table *new_tbl;
323b824478bSHerbert Xu struct rhashtable_walker *walker;
324299e5c32SThomas Graf unsigned int old_hash;
325da20420fSHerbert Xu int err;
326b824478bSHerbert Xu
327b824478bSHerbert Xu new_tbl = rht_dereference(old_tbl->future_tbl, ht);
328b824478bSHerbert Xu if (!new_tbl)
329b824478bSHerbert Xu return 0;
330b824478bSHerbert Xu
331da20420fSHerbert Xu for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
332da20420fSHerbert Xu err = rhashtable_rehash_chain(ht, old_hash);
333da20420fSHerbert Xu if (err)
334da20420fSHerbert Xu return err;
335ae6da1f5SEric Dumazet cond_resched();
336da20420fSHerbert Xu }
337aa34a6cbSHerbert Xu
338aa34a6cbSHerbert Xu /* Publish the new table pointer. */
339aa34a6cbSHerbert Xu rcu_assign_pointer(ht->tbl, new_tbl);
340aa34a6cbSHerbert Xu
341ba7c95eaSHerbert Xu spin_lock(&ht->lock);
342eddee5baSHerbert Xu list_for_each_entry(walker, &old_tbl->walkers, list)
343eddee5baSHerbert Xu walker->tbl = NULL;
344eddee5baSHerbert Xu
345aa34a6cbSHerbert Xu /* Wait for readers. All new readers will see the new
346aa34a6cbSHerbert Xu * table, and thus no references to the old table will
347aa34a6cbSHerbert Xu * remain.
3484feb7c7aSNeilBrown * We do this inside the locked region so that
3494feb7c7aSNeilBrown * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
3504feb7c7aSNeilBrown * to check if it should not re-link the table.
351aa34a6cbSHerbert Xu */
3529d901bc0SHerbert Xu call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
3534feb7c7aSNeilBrown spin_unlock(&ht->lock);
354b824478bSHerbert Xu
355b824478bSHerbert Xu return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
3567e1e7763SThomas Graf }
3577e1e7763SThomas Graf
rhashtable_rehash_alloc(struct rhashtable * ht,struct bucket_table * old_tbl,unsigned int size)358da20420fSHerbert Xu static int rhashtable_rehash_alloc(struct rhashtable *ht,
359da20420fSHerbert Xu struct bucket_table *old_tbl,
360da20420fSHerbert Xu unsigned int size)
3617e1e7763SThomas Graf {
362da20420fSHerbert Xu struct bucket_table *new_tbl;
363b824478bSHerbert Xu int err;
3647e1e7763SThomas Graf
3657e1e7763SThomas Graf ASSERT_RHT_MUTEX(ht);
3667e1e7763SThomas Graf
367da20420fSHerbert Xu new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
3687e1e7763SThomas Graf if (new_tbl == NULL)
3697e1e7763SThomas Graf return -ENOMEM;
3707e1e7763SThomas Graf
371b824478bSHerbert Xu err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
372b824478bSHerbert Xu if (err)
373b824478bSHerbert Xu bucket_table_free(new_tbl);
374b824478bSHerbert Xu
375b824478bSHerbert Xu return err;
3767e1e7763SThomas Graf }
3777e1e7763SThomas Graf
3787e1e7763SThomas Graf /**
3797e1e7763SThomas Graf * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
3807e1e7763SThomas Graf * @ht: the hash table to shrink
3817e1e7763SThomas Graf *
38218093d1cSHerbert Xu * This function shrinks the hash table to fit, i.e., the smallest
38318093d1cSHerbert Xu * size would not cause it to expand right away automatically.
3847e1e7763SThomas Graf *
38597defe1eSThomas Graf * The caller must ensure that no concurrent resizing occurs by holding
38697defe1eSThomas Graf * ht->mutex.
38797defe1eSThomas Graf *
3887e1e7763SThomas Graf * The caller must ensure that no concurrent table mutations take place.
3897e1e7763SThomas Graf * It is however valid to have concurrent lookups if they are RCU protected.
39097defe1eSThomas Graf *
39197defe1eSThomas Graf * It is valid to have concurrent insertions and deletions protected by per
39297defe1eSThomas Graf * bucket locks or concurrent RCU protected lookups and traversals.
3937e1e7763SThomas Graf */
rhashtable_shrink(struct rhashtable * ht)394b824478bSHerbert Xu static int rhashtable_shrink(struct rhashtable *ht)
3957e1e7763SThomas Graf {
396da20420fSHerbert Xu struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
39712311959SVegard Nossum unsigned int nelems = atomic_read(&ht->nelems);
39812311959SVegard Nossum unsigned int size = 0;
3997e1e7763SThomas Graf
40012311959SVegard Nossum if (nelems)
40112311959SVegard Nossum size = roundup_pow_of_two(nelems * 3 / 2);
40218093d1cSHerbert Xu if (size < ht->p.min_size)
40318093d1cSHerbert Xu size = ht->p.min_size;
40418093d1cSHerbert Xu
40518093d1cSHerbert Xu if (old_tbl->size <= size)
40618093d1cSHerbert Xu return 0;
40718093d1cSHerbert Xu
408b824478bSHerbert Xu if (rht_dereference(old_tbl->future_tbl, ht))
409b824478bSHerbert Xu return -EEXIST;
410b824478bSHerbert Xu
411da20420fSHerbert Xu return rhashtable_rehash_alloc(ht, old_tbl, size);
4127e1e7763SThomas Graf }
4137e1e7763SThomas Graf
rht_deferred_worker(struct work_struct * work)41497defe1eSThomas Graf static void rht_deferred_worker(struct work_struct *work)
41597defe1eSThomas Graf {
41697defe1eSThomas Graf struct rhashtable *ht;
41797defe1eSThomas Graf struct bucket_table *tbl;
418b824478bSHerbert Xu int err = 0;
41997defe1eSThomas Graf
42057699a40SYing Xue ht = container_of(work, struct rhashtable, run_work);
42197defe1eSThomas Graf mutex_lock(&ht->mutex);
42228134a53SHerbert Xu
42397defe1eSThomas Graf tbl = rht_dereference(ht->tbl, ht);
424b824478bSHerbert Xu tbl = rhashtable_last_table(ht, tbl);
42597defe1eSThomas Graf
426a5b6846fSDaniel Borkmann if (rht_grow_above_75(ht, tbl))
427da20420fSHerbert Xu err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
428b5e2c150SThomas Graf else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
429da20420fSHerbert Xu err = rhashtable_shrink(ht);
430da20420fSHerbert Xu else if (tbl->nest)
431da20420fSHerbert Xu err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
432b824478bSHerbert Xu
433408f13efSHerbert Xu if (!err || err == -EEXIST) {
434408f13efSHerbert Xu int nerr;
435408f13efSHerbert Xu
436408f13efSHerbert Xu nerr = rhashtable_rehash_table(ht);
437408f13efSHerbert Xu err = err ?: nerr;
438408f13efSHerbert Xu }
439b824478bSHerbert Xu
44097defe1eSThomas Graf mutex_unlock(&ht->mutex);
441b824478bSHerbert Xu
442b824478bSHerbert Xu if (err)
443b824478bSHerbert Xu schedule_work(&ht->run_work);
44497defe1eSThomas Graf }
44597defe1eSThomas Graf
rhashtable_insert_rehash(struct rhashtable * ht,struct bucket_table * tbl)446ca26893fSHerbert Xu static int rhashtable_insert_rehash(struct rhashtable *ht,
4473cf92222SHerbert Xu struct bucket_table *tbl)
448ccd57b1bSHerbert Xu {
449ccd57b1bSHerbert Xu struct bucket_table *old_tbl;
450ccd57b1bSHerbert Xu struct bucket_table *new_tbl;
451ccd57b1bSHerbert Xu unsigned int size;
452ccd57b1bSHerbert Xu int err;
453ccd57b1bSHerbert Xu
454ccd57b1bSHerbert Xu old_tbl = rht_dereference_rcu(ht->tbl, ht);
455ccd57b1bSHerbert Xu
456ccd57b1bSHerbert Xu size = tbl->size;
457ccd57b1bSHerbert Xu
4583cf92222SHerbert Xu err = -EBUSY;
4593cf92222SHerbert Xu
460ccd57b1bSHerbert Xu if (rht_grow_above_75(ht, tbl))
461ccd57b1bSHerbert Xu size *= 2;
462a87b9ebfSThomas Graf /* Do not schedule more than one rehash */
463a87b9ebfSThomas Graf else if (old_tbl != tbl)
4643cf92222SHerbert Xu goto fail;
4653cf92222SHerbert Xu
4663cf92222SHerbert Xu err = -ENOMEM;
467ccd57b1bSHerbert Xu
46893f976b5SDavidlohr Bueso new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
4693cf92222SHerbert Xu if (new_tbl == NULL)
4703cf92222SHerbert Xu goto fail;
471ccd57b1bSHerbert Xu
472ccd57b1bSHerbert Xu err = rhashtable_rehash_attach(ht, tbl, new_tbl);
473ccd57b1bSHerbert Xu if (err) {
474ccd57b1bSHerbert Xu bucket_table_free(new_tbl);
475ccd57b1bSHerbert Xu if (err == -EEXIST)
476ccd57b1bSHerbert Xu err = 0;
477ccd57b1bSHerbert Xu } else
478ccd57b1bSHerbert Xu schedule_work(&ht->run_work);
479ccd57b1bSHerbert Xu
480ccd57b1bSHerbert Xu return err;
4813cf92222SHerbert Xu
4823cf92222SHerbert Xu fail:
4833cf92222SHerbert Xu /* Do not fail the insert if someone else did a rehash. */
484c0690016SNeilBrown if (likely(rcu_access_pointer(tbl->future_tbl)))
4853cf92222SHerbert Xu return 0;
4863cf92222SHerbert Xu
4873cf92222SHerbert Xu /* Schedule async rehash to retry allocation in process context. */
4883cf92222SHerbert Xu if (err == -ENOMEM)
4893cf92222SHerbert Xu schedule_work(&ht->run_work);
4903cf92222SHerbert Xu
4913cf92222SHerbert Xu return err;
492ccd57b1bSHerbert Xu }
493ccd57b1bSHerbert Xu
rhashtable_lookup_one(struct rhashtable * ht,struct rhash_lock_head __rcu ** bkt,struct bucket_table * tbl,unsigned int hash,const void * key,struct rhash_head * obj)494ca26893fSHerbert Xu static void *rhashtable_lookup_one(struct rhashtable *ht,
495ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt,
496ca26893fSHerbert Xu struct bucket_table *tbl, unsigned int hash,
497ca26893fSHerbert Xu const void *key, struct rhash_head *obj)
49802fd97c3SHerbert Xu {
499ca26893fSHerbert Xu struct rhashtable_compare_arg arg = {
500ca26893fSHerbert Xu .ht = ht,
501ca26893fSHerbert Xu .key = key,
502ca26893fSHerbert Xu };
503e4edbe3cSNeilBrown struct rhash_head __rcu **pprev = NULL;
50402fd97c3SHerbert Xu struct rhash_head *head;
505ca26893fSHerbert Xu int elasticity;
50602fd97c3SHerbert Xu
5075f8ddeabSFlorian Westphal elasticity = RHT_ELASTICITY;
508adc6a3abSNeilBrown rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
509ca26893fSHerbert Xu struct rhlist_head *list;
510ca26893fSHerbert Xu struct rhlist_head *plist;
51102fd97c3SHerbert Xu
512ca26893fSHerbert Xu elasticity--;
513ca26893fSHerbert Xu if (!key ||
514ca26893fSHerbert Xu (ht->p.obj_cmpfn ?
515ca26893fSHerbert Xu ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
516d3dcf8ebSPaul Blakey rhashtable_compare(&arg, rht_obj(ht, head)))) {
517d3dcf8ebSPaul Blakey pprev = &head->next;
518ca26893fSHerbert Xu continue;
519d3dcf8ebSPaul Blakey }
520ca26893fSHerbert Xu
521ca26893fSHerbert Xu if (!ht->rhlist)
522ca26893fSHerbert Xu return rht_obj(ht, head);
523ca26893fSHerbert Xu
524ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead);
525ca26893fSHerbert Xu plist = container_of(head, struct rhlist_head, rhead);
526ca26893fSHerbert Xu
527ca26893fSHerbert Xu RCU_INIT_POINTER(list->next, plist);
528ca26893fSHerbert Xu head = rht_dereference_bucket(head->next, tbl, hash);
529ca26893fSHerbert Xu RCU_INIT_POINTER(list->rhead.next, head);
5308f0db018SNeilBrown if (pprev)
531ca26893fSHerbert Xu rcu_assign_pointer(*pprev, obj);
5328f0db018SNeilBrown else
5338f0db018SNeilBrown /* Need to preserve the bit lock */
534f4712b46SNeilBrown rht_assign_locked(bkt, obj);
535ca26893fSHerbert Xu
536ca26893fSHerbert Xu return NULL;
5375ca8cc5bSPablo Neira Ayuso }
53802fd97c3SHerbert Xu
539ca26893fSHerbert Xu if (elasticity <= 0)
540ca26893fSHerbert Xu return ERR_PTR(-EAGAIN);
541ca26893fSHerbert Xu
542ca26893fSHerbert Xu return ERR_PTR(-ENOENT);
543ca26893fSHerbert Xu }
544ca26893fSHerbert Xu
rhashtable_insert_one(struct rhashtable * ht,struct rhash_lock_head __rcu ** bkt,struct bucket_table * tbl,unsigned int hash,struct rhash_head * obj,void * data)545ce9b362bSHerbert Xu static struct bucket_table *rhashtable_insert_one(
546ce9b362bSHerbert Xu struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
547ce9b362bSHerbert Xu struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
548ca26893fSHerbert Xu void *data)
549ca26893fSHerbert Xu {
550ca26893fSHerbert Xu struct bucket_table *new_tbl;
551ca26893fSHerbert Xu struct rhash_head *head;
552ca26893fSHerbert Xu
553ca26893fSHerbert Xu if (!IS_ERR_OR_NULL(data))
554ca26893fSHerbert Xu return ERR_PTR(-EEXIST);
555ca26893fSHerbert Xu
556ca26893fSHerbert Xu if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
557ca26893fSHerbert Xu return ERR_CAST(data);
558ca26893fSHerbert Xu
559c0690016SNeilBrown new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
560ca26893fSHerbert Xu if (new_tbl)
561ca26893fSHerbert Xu return new_tbl;
562ca26893fSHerbert Xu
563ca26893fSHerbert Xu if (PTR_ERR(data) != -ENOENT)
564ca26893fSHerbert Xu return ERR_CAST(data);
565ca26893fSHerbert Xu
56607ee0722SHerbert Xu if (unlikely(rht_grow_above_max(ht, tbl)))
567ca26893fSHerbert Xu return ERR_PTR(-E2BIG);
56807ee0722SHerbert Xu
569ca26893fSHerbert Xu if (unlikely(rht_grow_above_100(ht, tbl)))
570ca26893fSHerbert Xu return ERR_PTR(-EAGAIN);
57102fd97c3SHerbert Xu
572adc6a3abSNeilBrown head = rht_ptr(bkt, tbl, hash);
57302fd97c3SHerbert Xu
57402fd97c3SHerbert Xu RCU_INIT_POINTER(obj->next, head);
575ca26893fSHerbert Xu if (ht->rhlist) {
576ca26893fSHerbert Xu struct rhlist_head *list;
577ca26893fSHerbert Xu
578ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead);
579ca26893fSHerbert Xu RCU_INIT_POINTER(list->next, NULL);
580ca26893fSHerbert Xu }
58102fd97c3SHerbert Xu
5828f0db018SNeilBrown /* bkt is always the head of the list, so it holds
5838f0db018SNeilBrown * the lock, which we need to preserve
5848f0db018SNeilBrown */
585f4712b46SNeilBrown rht_assign_locked(bkt, obj);
58602fd97c3SHerbert Xu
5873cf92222SHerbert Xu return NULL;
588ca26893fSHerbert Xu }
589ca26893fSHerbert Xu
rhashtable_try_insert(struct rhashtable * ht,const void * key,struct rhash_head * obj)590ca26893fSHerbert Xu static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
591ca26893fSHerbert Xu struct rhash_head *obj)
592ca26893fSHerbert Xu {
593ca26893fSHerbert Xu struct bucket_table *new_tbl;
594ca26893fSHerbert Xu struct bucket_table *tbl;
595ce9b362bSHerbert Xu struct rhash_lock_head __rcu **bkt;
596e47877c7STejun Heo unsigned long flags;
597ca26893fSHerbert Xu unsigned int hash;
598ca26893fSHerbert Xu void *data;
599ca26893fSHerbert Xu
6004feb7c7aSNeilBrown new_tbl = rcu_dereference(ht->tbl);
601ca26893fSHerbert Xu
6024feb7c7aSNeilBrown do {
603ca26893fSHerbert Xu tbl = new_tbl;
604ca26893fSHerbert Xu hash = rht_head_hashfn(ht, tbl, obj, ht->p);
6058f0db018SNeilBrown if (rcu_access_pointer(tbl->future_tbl))
6068f0db018SNeilBrown /* Failure is OK */
6078f0db018SNeilBrown bkt = rht_bucket_var(tbl, hash);
6088f0db018SNeilBrown else
6098f0db018SNeilBrown bkt = rht_bucket_insert(ht, tbl, hash);
6108f0db018SNeilBrown if (bkt == NULL) {
6118f0db018SNeilBrown new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
6128f0db018SNeilBrown data = ERR_PTR(-EAGAIN);
6138f0db018SNeilBrown } else {
614*9d4f8e54SHerbert Xu bool inserted;
615*9d4f8e54SHerbert Xu
616e47877c7STejun Heo flags = rht_lock(tbl, bkt);
6178f0db018SNeilBrown data = rhashtable_lookup_one(ht, bkt, tbl,
6188f0db018SNeilBrown hash, key, obj);
6198f0db018SNeilBrown new_tbl = rhashtable_insert_one(ht, bkt, tbl,
6208f0db018SNeilBrown hash, obj, data);
621*9d4f8e54SHerbert Xu inserted = data && !new_tbl;
622*9d4f8e54SHerbert Xu if (inserted)
623*9d4f8e54SHerbert Xu atomic_inc(&ht->nelems);
624ca26893fSHerbert Xu if (PTR_ERR(new_tbl) != -EEXIST)
625ca26893fSHerbert Xu data = ERR_CAST(new_tbl);
626ca26893fSHerbert Xu
627e47877c7STejun Heo rht_unlock(tbl, bkt, flags);
628e1d3422cSBreno Leitao
629*9d4f8e54SHerbert Xu if (inserted && rht_grow_above_75(ht, tbl))
630e1d3422cSBreno Leitao schedule_work(&ht->run_work);
631e1d3422cSBreno Leitao }
6324feb7c7aSNeilBrown } while (!IS_ERR_OR_NULL(new_tbl));
633ca26893fSHerbert Xu
634ca26893fSHerbert Xu if (PTR_ERR(data) == -EAGAIN)
635ca26893fSHerbert Xu data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
636ca26893fSHerbert Xu -EAGAIN);
637ca26893fSHerbert Xu
638ca26893fSHerbert Xu return data;
639ca26893fSHerbert Xu }
640ca26893fSHerbert Xu
rhashtable_insert_slow(struct rhashtable * ht,const void * key,struct rhash_head * obj)641ca26893fSHerbert Xu void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
642ca26893fSHerbert Xu struct rhash_head *obj)
643ca26893fSHerbert Xu {
644ca26893fSHerbert Xu void *data;
645ca26893fSHerbert Xu
646ca26893fSHerbert Xu do {
647ca26893fSHerbert Xu rcu_read_lock();
648ca26893fSHerbert Xu data = rhashtable_try_insert(ht, key, obj);
649ca26893fSHerbert Xu rcu_read_unlock();
650ca26893fSHerbert Xu } while (PTR_ERR(data) == -EAGAIN);
651ca26893fSHerbert Xu
652ca26893fSHerbert Xu return data;
65302fd97c3SHerbert Xu }
65402fd97c3SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
65502fd97c3SHerbert Xu
656f2dba9c6SHerbert Xu /**
657246779ddSHerbert Xu * rhashtable_walk_enter - Initialise an iterator
658f2dba9c6SHerbert Xu * @ht: Table to walk over
659f2dba9c6SHerbert Xu * @iter: Hash table Iterator
660f2dba9c6SHerbert Xu *
661f2dba9c6SHerbert Xu * This function prepares a hash table walk.
662f2dba9c6SHerbert Xu *
663f2dba9c6SHerbert Xu * Note that if you restart a walk after rhashtable_walk_stop you
664f2dba9c6SHerbert Xu * may see the same object twice. Also, you may miss objects if
665f2dba9c6SHerbert Xu * there are removals in between rhashtable_walk_stop and the next
666f2dba9c6SHerbert Xu * call to rhashtable_walk_start.
667f2dba9c6SHerbert Xu *
668f2dba9c6SHerbert Xu * For a completely stable walk you should construct your own data
669f2dba9c6SHerbert Xu * structure outside the hash table.
670f2dba9c6SHerbert Xu *
67182266e98SNeilBrown * This function may be called from any process context, including
67282266e98SNeilBrown * non-preemptible context, but cannot be called from softirq or
67382266e98SNeilBrown * hardirq context.
674f2dba9c6SHerbert Xu *
675246779ddSHerbert Xu * You must call rhashtable_walk_exit after this function returns.
676f2dba9c6SHerbert Xu */
rhashtable_walk_enter(struct rhashtable * ht,struct rhashtable_iter * iter)677246779ddSHerbert Xu void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
678f2dba9c6SHerbert Xu {
679f2dba9c6SHerbert Xu iter->ht = ht;
680f2dba9c6SHerbert Xu iter->p = NULL;
681f2dba9c6SHerbert Xu iter->slot = 0;
682f2dba9c6SHerbert Xu iter->skip = 0;
6832db54b47STom Herbert iter->end_of_table = 0;
684f2dba9c6SHerbert Xu
685c6ff5268SHerbert Xu spin_lock(&ht->lock);
686246779ddSHerbert Xu iter->walker.tbl =
687179ccc0aSHerbert Xu rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
688246779ddSHerbert Xu list_add(&iter->walker.list, &iter->walker.tbl->walkers);
689c6ff5268SHerbert Xu spin_unlock(&ht->lock);
690f2dba9c6SHerbert Xu }
691246779ddSHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
692f2dba9c6SHerbert Xu
693f2dba9c6SHerbert Xu /**
694f2dba9c6SHerbert Xu * rhashtable_walk_exit - Free an iterator
695f2dba9c6SHerbert Xu * @iter: Hash table Iterator
696f2dba9c6SHerbert Xu *
6976c4128f6SHerbert Xu * This function frees resources allocated by rhashtable_walk_enter.
698f2dba9c6SHerbert Xu */
rhashtable_walk_exit(struct rhashtable_iter * iter)699f2dba9c6SHerbert Xu void rhashtable_walk_exit(struct rhashtable_iter *iter)
700f2dba9c6SHerbert Xu {
701c6ff5268SHerbert Xu spin_lock(&iter->ht->lock);
702246779ddSHerbert Xu if (iter->walker.tbl)
703246779ddSHerbert Xu list_del(&iter->walker.list);
704c6ff5268SHerbert Xu spin_unlock(&iter->ht->lock);
705f2dba9c6SHerbert Xu }
706f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
707f2dba9c6SHerbert Xu
708f2dba9c6SHerbert Xu /**
70997a6ec4aSTom Herbert * rhashtable_walk_start_check - Start a hash table walk
710f2dba9c6SHerbert Xu * @iter: Hash table iterator
711f2dba9c6SHerbert Xu *
7120647169cSAndreas Gruenbacher * Start a hash table walk at the current iterator position. Note that we take
7130647169cSAndreas Gruenbacher * the RCU lock in all cases including when we return an error. So you must
7140647169cSAndreas Gruenbacher * always call rhashtable_walk_stop to clean up.
715f2dba9c6SHerbert Xu *
716f2dba9c6SHerbert Xu * Returns zero if successful.
717f2dba9c6SHerbert Xu *
7189dbbc3b9SZhen Lei * Returns -EAGAIN if resize event occurred. Note that the iterator
719f2dba9c6SHerbert Xu * will rewind back to the beginning and you may use it immediately
720f2dba9c6SHerbert Xu * by calling rhashtable_walk_next.
72197a6ec4aSTom Herbert *
72297a6ec4aSTom Herbert * rhashtable_walk_start is defined as an inline variant that returns
72397a6ec4aSTom Herbert * void. This is preferred in cases where the caller would ignore
72497a6ec4aSTom Herbert * resize events and always continue.
725f2dba9c6SHerbert Xu */
rhashtable_walk_start_check(struct rhashtable_iter * iter)72697a6ec4aSTom Herbert int rhashtable_walk_start_check(struct rhashtable_iter *iter)
727db4374f4SThomas Graf __acquires(RCU)
728f2dba9c6SHerbert Xu {
729eddee5baSHerbert Xu struct rhashtable *ht = iter->ht;
7305d240a89SNeilBrown bool rhlist = ht->rhlist;
731eddee5baSHerbert Xu
732f2dba9c6SHerbert Xu rcu_read_lock();
733f2dba9c6SHerbert Xu
734c6ff5268SHerbert Xu spin_lock(&ht->lock);
735246779ddSHerbert Xu if (iter->walker.tbl)
736246779ddSHerbert Xu list_del(&iter->walker.list);
737c6ff5268SHerbert Xu spin_unlock(&ht->lock);
738eddee5baSHerbert Xu
7395d240a89SNeilBrown if (iter->end_of_table)
7405d240a89SNeilBrown return 0;
7415d240a89SNeilBrown if (!iter->walker.tbl) {
742246779ddSHerbert Xu iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
743b41cc04bSNeilBrown iter->slot = 0;
744b41cc04bSNeilBrown iter->skip = 0;
745f2dba9c6SHerbert Xu return -EAGAIN;
746f2dba9c6SHerbert Xu }
747f2dba9c6SHerbert Xu
7485d240a89SNeilBrown if (iter->p && !rhlist) {
7495d240a89SNeilBrown /*
7505d240a89SNeilBrown * We need to validate that 'p' is still in the table, and
7515d240a89SNeilBrown * if so, update 'skip'
7525d240a89SNeilBrown */
7535d240a89SNeilBrown struct rhash_head *p;
7545d240a89SNeilBrown int skip = 0;
7555d240a89SNeilBrown rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
7565d240a89SNeilBrown skip++;
7575d240a89SNeilBrown if (p == iter->p) {
7585d240a89SNeilBrown iter->skip = skip;
7595d240a89SNeilBrown goto found;
7605d240a89SNeilBrown }
7615d240a89SNeilBrown }
7625d240a89SNeilBrown iter->p = NULL;
7635d240a89SNeilBrown } else if (iter->p && rhlist) {
7645d240a89SNeilBrown /* Need to validate that 'list' is still in the table, and
7655d240a89SNeilBrown * if so, update 'skip' and 'p'.
7665d240a89SNeilBrown */
7675d240a89SNeilBrown struct rhash_head *p;
7685d240a89SNeilBrown struct rhlist_head *list;
7695d240a89SNeilBrown int skip = 0;
7705d240a89SNeilBrown rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
7715d240a89SNeilBrown for (list = container_of(p, struct rhlist_head, rhead);
7725d240a89SNeilBrown list;
7735d240a89SNeilBrown list = rcu_dereference(list->next)) {
7745d240a89SNeilBrown skip++;
7755d240a89SNeilBrown if (list == iter->list) {
7765d240a89SNeilBrown iter->p = p;
777c643ecf3SRishabh Bhatnagar iter->skip = skip;
7785d240a89SNeilBrown goto found;
7795d240a89SNeilBrown }
7805d240a89SNeilBrown }
7815d240a89SNeilBrown }
7825d240a89SNeilBrown iter->p = NULL;
7835d240a89SNeilBrown }
7845d240a89SNeilBrown found:
785f2dba9c6SHerbert Xu return 0;
786f2dba9c6SHerbert Xu }
78797a6ec4aSTom Herbert EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
788f2dba9c6SHerbert Xu
789f2dba9c6SHerbert Xu /**
7902db54b47STom Herbert * __rhashtable_walk_find_next - Find the next element in a table (or the first
7912db54b47STom Herbert * one in case of a new walk).
7922db54b47STom Herbert *
793f2dba9c6SHerbert Xu * @iter: Hash table iterator
794f2dba9c6SHerbert Xu *
7952db54b47STom Herbert * Returns the found object or NULL when the end of the table is reached.
796f2dba9c6SHerbert Xu *
7972db54b47STom Herbert * Returns -EAGAIN if resize event occurred.
798f2dba9c6SHerbert Xu */
__rhashtable_walk_find_next(struct rhashtable_iter * iter)7992db54b47STom Herbert static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
800f2dba9c6SHerbert Xu {
801246779ddSHerbert Xu struct bucket_table *tbl = iter->walker.tbl;
802ca26893fSHerbert Xu struct rhlist_head *list = iter->list;
803f2dba9c6SHerbert Xu struct rhashtable *ht = iter->ht;
804f2dba9c6SHerbert Xu struct rhash_head *p = iter->p;
805ca26893fSHerbert Xu bool rhlist = ht->rhlist;
806f2dba9c6SHerbert Xu
8072db54b47STom Herbert if (!tbl)
8082db54b47STom Herbert return NULL;
809f2dba9c6SHerbert Xu
810f2dba9c6SHerbert Xu for (; iter->slot < tbl->size; iter->slot++) {
811f2dba9c6SHerbert Xu int skip = iter->skip;
812f2dba9c6SHerbert Xu
813f2dba9c6SHerbert Xu rht_for_each_rcu(p, tbl, iter->slot) {
814ca26893fSHerbert Xu if (rhlist) {
815ca26893fSHerbert Xu list = container_of(p, struct rhlist_head,
816ca26893fSHerbert Xu rhead);
817ca26893fSHerbert Xu do {
818ca26893fSHerbert Xu if (!skip)
819ca26893fSHerbert Xu goto next;
820ca26893fSHerbert Xu skip--;
821ca26893fSHerbert Xu list = rcu_dereference(list->next);
822ca26893fSHerbert Xu } while (list);
823ca26893fSHerbert Xu
824ca26893fSHerbert Xu continue;
825ca26893fSHerbert Xu }
826f2dba9c6SHerbert Xu if (!skip)
827f2dba9c6SHerbert Xu break;
828f2dba9c6SHerbert Xu skip--;
829f2dba9c6SHerbert Xu }
830f2dba9c6SHerbert Xu
831f2dba9c6SHerbert Xu next:
832f2dba9c6SHerbert Xu if (!rht_is_a_nulls(p)) {
833f2dba9c6SHerbert Xu iter->skip++;
834f2dba9c6SHerbert Xu iter->p = p;
835ca26893fSHerbert Xu iter->list = list;
836ca26893fSHerbert Xu return rht_obj(ht, rhlist ? &list->rhead : p);
837f2dba9c6SHerbert Xu }
838f2dba9c6SHerbert Xu
839f2dba9c6SHerbert Xu iter->skip = 0;
840f2dba9c6SHerbert Xu }
841f2dba9c6SHerbert Xu
842142b942aSPhil Sutter iter->p = NULL;
843142b942aSPhil Sutter
844d88252f9SHerbert Xu /* Ensure we see any new tables. */
845d88252f9SHerbert Xu smp_rmb();
846d88252f9SHerbert Xu
847246779ddSHerbert Xu iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
848246779ddSHerbert Xu if (iter->walker.tbl) {
849eddee5baSHerbert Xu iter->slot = 0;
850eddee5baSHerbert Xu iter->skip = 0;
851eddee5baSHerbert Xu return ERR_PTR(-EAGAIN);
8522db54b47STom Herbert } else {
8532db54b47STom Herbert iter->end_of_table = true;
854eddee5baSHerbert Xu }
855eddee5baSHerbert Xu
856c936a79fSThomas Graf return NULL;
857f2dba9c6SHerbert Xu }
8582db54b47STom Herbert
8592db54b47STom Herbert /**
8602db54b47STom Herbert * rhashtable_walk_next - Return the next object and advance the iterator
8612db54b47STom Herbert * @iter: Hash table iterator
8622db54b47STom Herbert *
8632db54b47STom Herbert * Note that you must call rhashtable_walk_stop when you are finished
8642db54b47STom Herbert * with the walk.
8652db54b47STom Herbert *
8662db54b47STom Herbert * Returns the next object or NULL when the end of the table is reached.
8672db54b47STom Herbert *
8682db54b47STom Herbert * Returns -EAGAIN if resize event occurred. Note that the iterator
8692db54b47STom Herbert * will rewind back to the beginning and you may continue to use it.
8702db54b47STom Herbert */
rhashtable_walk_next(struct rhashtable_iter * iter)8712db54b47STom Herbert void *rhashtable_walk_next(struct rhashtable_iter *iter)
8722db54b47STom Herbert {
8732db54b47STom Herbert struct rhlist_head *list = iter->list;
8742db54b47STom Herbert struct rhashtable *ht = iter->ht;
8752db54b47STom Herbert struct rhash_head *p = iter->p;
8762db54b47STom Herbert bool rhlist = ht->rhlist;
8772db54b47STom Herbert
8782db54b47STom Herbert if (p) {
8792db54b47STom Herbert if (!rhlist || !(list = rcu_dereference(list->next))) {
8802db54b47STom Herbert p = rcu_dereference(p->next);
8812db54b47STom Herbert list = container_of(p, struct rhlist_head, rhead);
8822db54b47STom Herbert }
8832db54b47STom Herbert if (!rht_is_a_nulls(p)) {
8842db54b47STom Herbert iter->skip++;
8852db54b47STom Herbert iter->p = p;
8862db54b47STom Herbert iter->list = list;
8872db54b47STom Herbert return rht_obj(ht, rhlist ? &list->rhead : p);
8882db54b47STom Herbert }
8892db54b47STom Herbert
8902db54b47STom Herbert /* At the end of this slot, switch to next one and then find
8912db54b47STom Herbert * next entry from that point.
8922db54b47STom Herbert */
8932db54b47STom Herbert iter->skip = 0;
8942db54b47STom Herbert iter->slot++;
8952db54b47STom Herbert }
8962db54b47STom Herbert
8972db54b47STom Herbert return __rhashtable_walk_find_next(iter);
8982db54b47STom Herbert }
899f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_next);
900f2dba9c6SHerbert Xu
901f2dba9c6SHerbert Xu /**
9022db54b47STom Herbert * rhashtable_walk_peek - Return the next object but don't advance the iterator
9032db54b47STom Herbert * @iter: Hash table iterator
9042db54b47STom Herbert *
9052db54b47STom Herbert * Returns the next object or NULL when the end of the table is reached.
9062db54b47STom Herbert *
9072db54b47STom Herbert * Returns -EAGAIN if resize event occurred. Note that the iterator
9082db54b47STom Herbert * will rewind back to the beginning and you may continue to use it.
9092db54b47STom Herbert */
rhashtable_walk_peek(struct rhashtable_iter * iter)9102db54b47STom Herbert void *rhashtable_walk_peek(struct rhashtable_iter *iter)
9112db54b47STom Herbert {
9122db54b47STom Herbert struct rhlist_head *list = iter->list;
9132db54b47STom Herbert struct rhashtable *ht = iter->ht;
9142db54b47STom Herbert struct rhash_head *p = iter->p;
9152db54b47STom Herbert
9162db54b47STom Herbert if (p)
9172db54b47STom Herbert return rht_obj(ht, ht->rhlist ? &list->rhead : p);
9182db54b47STom Herbert
9192db54b47STom Herbert /* No object found in current iter, find next one in the table. */
9202db54b47STom Herbert
9212db54b47STom Herbert if (iter->skip) {
9222db54b47STom Herbert /* A nonzero skip value points to the next entry in the table
9232db54b47STom Herbert * beyond that last one that was found. Decrement skip so
9242db54b47STom Herbert * we find the current value. __rhashtable_walk_find_next
9252db54b47STom Herbert * will restore the original value of skip assuming that
9262db54b47STom Herbert * the table hasn't changed.
9272db54b47STom Herbert */
9282db54b47STom Herbert iter->skip--;
9292db54b47STom Herbert }
9302db54b47STom Herbert
9312db54b47STom Herbert return __rhashtable_walk_find_next(iter);
9322db54b47STom Herbert }
9332db54b47STom Herbert EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
9342db54b47STom Herbert
9352db54b47STom Herbert /**
936f2dba9c6SHerbert Xu * rhashtable_walk_stop - Finish a hash table walk
937f2dba9c6SHerbert Xu * @iter: Hash table iterator
938f2dba9c6SHerbert Xu *
9390647169cSAndreas Gruenbacher * Finish a hash table walk. Does not reset the iterator to the start of the
9400647169cSAndreas Gruenbacher * hash table.
941f2dba9c6SHerbert Xu */
rhashtable_walk_stop(struct rhashtable_iter * iter)942f2dba9c6SHerbert Xu void rhashtable_walk_stop(struct rhashtable_iter *iter)
943db4374f4SThomas Graf __releases(RCU)
944f2dba9c6SHerbert Xu {
945eddee5baSHerbert Xu struct rhashtable *ht;
946246779ddSHerbert Xu struct bucket_table *tbl = iter->walker.tbl;
947eddee5baSHerbert Xu
948eddee5baSHerbert Xu if (!tbl)
949963ecbd4SHerbert Xu goto out;
950eddee5baSHerbert Xu
951eddee5baSHerbert Xu ht = iter->ht;
952eddee5baSHerbert Xu
953ba7c95eaSHerbert Xu spin_lock(&ht->lock);
9544feb7c7aSNeilBrown if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
9554feb7c7aSNeilBrown /* This bucket table is being freed, don't re-link it. */
956246779ddSHerbert Xu iter->walker.tbl = NULL;
9574feb7c7aSNeilBrown else
9584feb7c7aSNeilBrown list_add(&iter->walker.list, &tbl->walkers);
959ba7c95eaSHerbert Xu spin_unlock(&ht->lock);
960eddee5baSHerbert Xu
961963ecbd4SHerbert Xu out:
962963ecbd4SHerbert Xu rcu_read_unlock();
963f2dba9c6SHerbert Xu }
964f2dba9c6SHerbert Xu EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
965f2dba9c6SHerbert Xu
rounded_hashtable_size(const struct rhashtable_params * params)966488fb86eSHerbert Xu static size_t rounded_hashtable_size(const struct rhashtable_params *params)
9677e1e7763SThomas Graf {
968107d01f5SDavidlohr Bueso size_t retsize;
969107d01f5SDavidlohr Bueso
970107d01f5SDavidlohr Bueso if (params->nelem_hint)
971107d01f5SDavidlohr Bueso retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
972e2e21c1cSHerbert Xu (unsigned long)params->min_size);
973107d01f5SDavidlohr Bueso else
974107d01f5SDavidlohr Bueso retsize = max(HASH_DEFAULT_SIZE,
975107d01f5SDavidlohr Bueso (unsigned long)params->min_size);
976107d01f5SDavidlohr Bueso
977107d01f5SDavidlohr Bueso return retsize;
9787e1e7763SThomas Graf }
9797e1e7763SThomas Graf
rhashtable_jhash2(const void * key,u32 length,u32 seed)98031ccde2dSHerbert Xu static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
98131ccde2dSHerbert Xu {
98231ccde2dSHerbert Xu return jhash2(key, length, seed);
98331ccde2dSHerbert Xu }
98431ccde2dSHerbert Xu
9857e1e7763SThomas Graf /**
9867e1e7763SThomas Graf * rhashtable_init - initialize a new hash table
9877e1e7763SThomas Graf * @ht: hash table to be initialized
9887e1e7763SThomas Graf * @params: configuration parameters
9897e1e7763SThomas Graf *
9907e1e7763SThomas Graf * Initializes a new hash table based on the provided configuration
9917e1e7763SThomas Graf * parameters. A table can be configured either with a variable or
9927e1e7763SThomas Graf * fixed length key:
9937e1e7763SThomas Graf *
9947e1e7763SThomas Graf * Configuration Example 1: Fixed length keys
9957e1e7763SThomas Graf * struct test_obj {
9967e1e7763SThomas Graf * int key;
9977e1e7763SThomas Graf * void * my_member;
9987e1e7763SThomas Graf * struct rhash_head node;
9997e1e7763SThomas Graf * };
10007e1e7763SThomas Graf *
10017e1e7763SThomas Graf * struct rhashtable_params params = {
10027e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node),
10037e1e7763SThomas Graf * .key_offset = offsetof(struct test_obj, key),
10047e1e7763SThomas Graf * .key_len = sizeof(int),
100587545899SDaniel Borkmann * .hashfn = jhash,
10067e1e7763SThomas Graf * };
10077e1e7763SThomas Graf *
10087e1e7763SThomas Graf * Configuration Example 2: Variable length keys
10097e1e7763SThomas Graf * struct test_obj {
10107e1e7763SThomas Graf * [...]
10117e1e7763SThomas Graf * struct rhash_head node;
10127e1e7763SThomas Graf * };
10137e1e7763SThomas Graf *
101449f7b33eSPatrick McHardy * u32 my_hash_fn(const void *data, u32 len, u32 seed)
10157e1e7763SThomas Graf * {
10167e1e7763SThomas Graf * struct test_obj *obj = data;
10177e1e7763SThomas Graf *
10187e1e7763SThomas Graf * return [... hash ...];
10197e1e7763SThomas Graf * }
10207e1e7763SThomas Graf *
10217e1e7763SThomas Graf * struct rhashtable_params params = {
10227e1e7763SThomas Graf * .head_offset = offsetof(struct test_obj, node),
102387545899SDaniel Borkmann * .hashfn = jhash,
10247e1e7763SThomas Graf * .obj_hashfn = my_hash_fn,
10257e1e7763SThomas Graf * };
10267e1e7763SThomas Graf */
rhashtable_init_noprof(struct rhashtable * ht,const struct rhashtable_params * params)10279e54dd8bSKent Overstreet int rhashtable_init_noprof(struct rhashtable *ht,
1028488fb86eSHerbert Xu const struct rhashtable_params *params)
10297e1e7763SThomas Graf {
10307e1e7763SThomas Graf struct bucket_table *tbl;
10317e1e7763SThomas Graf size_t size;
10327e1e7763SThomas Graf
103331ccde2dSHerbert Xu if ((!params->key_len && !params->obj_hashfn) ||
103402fd97c3SHerbert Xu (params->obj_hashfn && !params->obj_cmpfn))
10357e1e7763SThomas Graf return -EINVAL;
10367e1e7763SThomas Graf
103797defe1eSThomas Graf memset(ht, 0, sizeof(*ht));
103897defe1eSThomas Graf mutex_init(&ht->mutex);
1039ba7c95eaSHerbert Xu spin_lock_init(&ht->lock);
104097defe1eSThomas Graf memcpy(&ht->p, params, sizeof(*params));
104197defe1eSThomas Graf
10429e54dd8bSKent Overstreet alloc_tag_record(ht->alloc_tag);
10439e54dd8bSKent Overstreet
1044a998f712SThomas Graf if (params->min_size)
1045a998f712SThomas Graf ht->p.min_size = roundup_pow_of_two(params->min_size);
1046a998f712SThomas Graf
10476d684e54SHerbert Xu /* Cap total entries at 2^31 to avoid nelems overflow. */
10486d684e54SHerbert Xu ht->max_elems = 1u << 31;
10492d2ab658SHerbert Xu
10502d2ab658SHerbert Xu if (params->max_size) {
10512d2ab658SHerbert Xu ht->p.max_size = rounddown_pow_of_two(params->max_size);
10526d684e54SHerbert Xu if (ht->p.max_size < ht->max_elems / 2)
10536d684e54SHerbert Xu ht->max_elems = ht->p.max_size * 2;
10542d2ab658SHerbert Xu }
10556d684e54SHerbert Xu
105648e75b43SFlorian Westphal ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1057a998f712SThomas Graf
10583a324606SHerbert Xu size = rounded_hashtable_size(&ht->p);
10593a324606SHerbert Xu
106031ccde2dSHerbert Xu ht->key_len = ht->p.key_len;
106131ccde2dSHerbert Xu if (!params->hashfn) {
106231ccde2dSHerbert Xu ht->p.hashfn = jhash;
106331ccde2dSHerbert Xu
106431ccde2dSHerbert Xu if (!(ht->key_len & (sizeof(u32) - 1))) {
106531ccde2dSHerbert Xu ht->key_len /= sizeof(u32);
106631ccde2dSHerbert Xu ht->p.hashfn = rhashtable_jhash2;
106731ccde2dSHerbert Xu }
106831ccde2dSHerbert Xu }
106931ccde2dSHerbert Xu
10702d22ecf6SDavidlohr Bueso /*
10712d22ecf6SDavidlohr Bueso * This is api initialization and thus we need to guarantee the
10722d22ecf6SDavidlohr Bueso * initial rhashtable allocation. Upon failure, retry with the
10732d22ecf6SDavidlohr Bueso * smallest possible size with __GFP_NOFAIL semantics.
10742d22ecf6SDavidlohr Bueso */
1075b9ecfdaaSHerbert Xu tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
10762d22ecf6SDavidlohr Bueso if (unlikely(tbl == NULL)) {
10772d22ecf6SDavidlohr Bueso size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
10782d22ecf6SDavidlohr Bueso tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
10792d22ecf6SDavidlohr Bueso }
10807e1e7763SThomas Graf
1081545a148eSYing Xue atomic_set(&ht->nelems, 0);
1082a5b6846fSDaniel Borkmann
10837e1e7763SThomas Graf RCU_INIT_POINTER(ht->tbl, tbl);
10847e1e7763SThomas Graf
108557699a40SYing Xue INIT_WORK(&ht->run_work, rht_deferred_worker);
108697defe1eSThomas Graf
10877e1e7763SThomas Graf return 0;
10887e1e7763SThomas Graf }
10899e54dd8bSKent Overstreet EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
10907e1e7763SThomas Graf
10917e1e7763SThomas Graf /**
1092ca26893fSHerbert Xu * rhltable_init - initialize a new hash list table
1093ca26893fSHerbert Xu * @hlt: hash list table to be initialized
1094ca26893fSHerbert Xu * @params: configuration parameters
1095ca26893fSHerbert Xu *
1096ca26893fSHerbert Xu * Initializes a new hash list table.
1097ca26893fSHerbert Xu *
1098ca26893fSHerbert Xu * See documentation for rhashtable_init.
1099ca26893fSHerbert Xu */
rhltable_init_noprof(struct rhltable * hlt,const struct rhashtable_params * params)11009e54dd8bSKent Overstreet int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
1101ca26893fSHerbert Xu {
1102ca26893fSHerbert Xu int err;
1103ca26893fSHerbert Xu
11049e54dd8bSKent Overstreet err = rhashtable_init_noprof(&hlt->ht, params);
1105ca26893fSHerbert Xu hlt->ht.rhlist = true;
1106ca26893fSHerbert Xu return err;
1107ca26893fSHerbert Xu }
11089e54dd8bSKent Overstreet EXPORT_SYMBOL_GPL(rhltable_init_noprof);
1109ca26893fSHerbert Xu
rhashtable_free_one(struct rhashtable * ht,struct rhash_head * obj,void (* free_fn)(void * ptr,void * arg),void * arg)1110ca26893fSHerbert Xu static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1111ca26893fSHerbert Xu void (*free_fn)(void *ptr, void *arg),
1112ca26893fSHerbert Xu void *arg)
1113ca26893fSHerbert Xu {
1114ca26893fSHerbert Xu struct rhlist_head *list;
1115ca26893fSHerbert Xu
1116ca26893fSHerbert Xu if (!ht->rhlist) {
1117ca26893fSHerbert Xu free_fn(rht_obj(ht, obj), arg);
1118ca26893fSHerbert Xu return;
1119ca26893fSHerbert Xu }
1120ca26893fSHerbert Xu
1121ca26893fSHerbert Xu list = container_of(obj, struct rhlist_head, rhead);
1122ca26893fSHerbert Xu do {
1123ca26893fSHerbert Xu obj = &list->rhead;
1124ca26893fSHerbert Xu list = rht_dereference(list->next, ht);
1125ca26893fSHerbert Xu free_fn(rht_obj(ht, obj), arg);
1126ca26893fSHerbert Xu } while (list);
1127ca26893fSHerbert Xu }
1128ca26893fSHerbert Xu
1129ca26893fSHerbert Xu /**
11306b6f302cSThomas Graf * rhashtable_free_and_destroy - free elements and destroy hash table
11317e1e7763SThomas Graf * @ht: the hash table to destroy
11326b6f302cSThomas Graf * @free_fn: callback to release resources of element
11336b6f302cSThomas Graf * @arg: pointer passed to free_fn
11347e1e7763SThomas Graf *
11356b6f302cSThomas Graf * Stops an eventual async resize. If defined, invokes free_fn for each
11366b6f302cSThomas Graf * element to releasal resources. Please note that RCU protected
11376b6f302cSThomas Graf * readers may still be accessing the elements. Releasing of resources
11386b6f302cSThomas Graf * must occur in a compatible manner. Then frees the bucket array.
11396b6f302cSThomas Graf *
11406b6f302cSThomas Graf * This function will eventually sleep to wait for an async resize
11416b6f302cSThomas Graf * to complete. The caller is responsible that no further write operations
11426b6f302cSThomas Graf * occurs in parallel.
11437e1e7763SThomas Graf */
rhashtable_free_and_destroy(struct rhashtable * ht,void (* free_fn)(void * ptr,void * arg),void * arg)11446b6f302cSThomas Graf void rhashtable_free_and_destroy(struct rhashtable *ht,
11456b6f302cSThomas Graf void (*free_fn)(void *ptr, void *arg),
11466b6f302cSThomas Graf void *arg)
11477e1e7763SThomas Graf {
11480026129cSTaehee Yoo struct bucket_table *tbl, *next_tbl;
11496b6f302cSThomas Graf unsigned int i;
115097defe1eSThomas Graf
115157699a40SYing Xue cancel_work_sync(&ht->run_work);
115257699a40SYing Xue
115397defe1eSThomas Graf mutex_lock(&ht->mutex);
11546b6f302cSThomas Graf tbl = rht_dereference(ht->tbl, ht);
11550026129cSTaehee Yoo restart:
11566b6f302cSThomas Graf if (free_fn) {
11576b6f302cSThomas Graf for (i = 0; i < tbl->size; i++) {
11586b6f302cSThomas Graf struct rhash_head *pos, *next;
11596b6f302cSThomas Graf
1160ae6da1f5SEric Dumazet cond_resched();
1161adc6a3abSNeilBrown for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
11626b6f302cSThomas Graf next = !rht_is_a_nulls(pos) ?
11636b6f302cSThomas Graf rht_dereference(pos->next, ht) : NULL;
11646b6f302cSThomas Graf !rht_is_a_nulls(pos);
11656b6f302cSThomas Graf pos = next,
11666b6f302cSThomas Graf next = !rht_is_a_nulls(pos) ?
11676b6f302cSThomas Graf rht_dereference(pos->next, ht) : NULL)
1168ca26893fSHerbert Xu rhashtable_free_one(ht, pos, free_fn, arg);
11696b6f302cSThomas Graf }
11706b6f302cSThomas Graf }
11716b6f302cSThomas Graf
11720026129cSTaehee Yoo next_tbl = rht_dereference(tbl->future_tbl, ht);
11736b6f302cSThomas Graf bucket_table_free(tbl);
11740026129cSTaehee Yoo if (next_tbl) {
11750026129cSTaehee Yoo tbl = next_tbl;
11760026129cSTaehee Yoo goto restart;
11770026129cSTaehee Yoo }
117897defe1eSThomas Graf mutex_unlock(&ht->mutex);
11797e1e7763SThomas Graf }
11806b6f302cSThomas Graf EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
11816b6f302cSThomas Graf
rhashtable_destroy(struct rhashtable * ht)11826b6f302cSThomas Graf void rhashtable_destroy(struct rhashtable *ht)
11836b6f302cSThomas Graf {
11846b6f302cSThomas Graf return rhashtable_free_and_destroy(ht, NULL, NULL);
11856b6f302cSThomas Graf }
11867e1e7763SThomas Graf EXPORT_SYMBOL_GPL(rhashtable_destroy);
1187da20420fSHerbert Xu
__rht_bucket_nested(const struct bucket_table * tbl,unsigned int hash)1188ce9b362bSHerbert Xu struct rhash_lock_head __rcu **__rht_bucket_nested(
1189ce9b362bSHerbert Xu const struct bucket_table *tbl, unsigned int hash)
1190da20420fSHerbert Xu {
1191da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1192da20420fSHerbert Xu unsigned int index = hash & ((1 << tbl->nest) - 1);
1193da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest;
1194da20420fSHerbert Xu unsigned int subhash = hash;
1195da20420fSHerbert Xu union nested_table *ntbl;
1196da20420fSHerbert Xu
11974a3084aaSHerbert Xu ntbl = nested_table_top(tbl);
1198c4d2603dSHerbert Xu ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1199da20420fSHerbert Xu subhash >>= tbl->nest;
1200da20420fSHerbert Xu
1201da20420fSHerbert Xu while (ntbl && size > (1 << shift)) {
1202da20420fSHerbert Xu index = subhash & ((1 << shift) - 1);
1203c4d2603dSHerbert Xu ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1204c4d2603dSHerbert Xu tbl, hash);
1205da20420fSHerbert Xu size >>= shift;
1206da20420fSHerbert Xu subhash >>= shift;
1207da20420fSHerbert Xu }
1208da20420fSHerbert Xu
1209ff302db9SNeilBrown if (!ntbl)
1210ff302db9SNeilBrown return NULL;
1211da20420fSHerbert Xu
1212da20420fSHerbert Xu return &ntbl[subhash].bucket;
1213da20420fSHerbert Xu
1214da20420fSHerbert Xu }
1215ff302db9SNeilBrown EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1216ff302db9SNeilBrown
rht_bucket_nested(const struct bucket_table * tbl,unsigned int hash)1217ce9b362bSHerbert Xu struct rhash_lock_head __rcu **rht_bucket_nested(
1218ce9b362bSHerbert Xu const struct bucket_table *tbl, unsigned int hash)
1219ff302db9SNeilBrown {
1220ce9b362bSHerbert Xu static struct rhash_lock_head __rcu *rhnull;
1221ff302db9SNeilBrown
1222ff302db9SNeilBrown if (!rhnull)
1223ff302db9SNeilBrown INIT_RHT_NULLS_HEAD(rhnull);
1224ff302db9SNeilBrown return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1225ff302db9SNeilBrown }
1226da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested);
1227da20420fSHerbert Xu
rht_bucket_nested_insert(struct rhashtable * ht,struct bucket_table * tbl,unsigned int hash)1228ce9b362bSHerbert Xu struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1229ce9b362bSHerbert Xu struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
1230da20420fSHerbert Xu {
1231da20420fSHerbert Xu const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1232da20420fSHerbert Xu unsigned int index = hash & ((1 << tbl->nest) - 1);
1233da20420fSHerbert Xu unsigned int size = tbl->size >> tbl->nest;
1234da20420fSHerbert Xu union nested_table *ntbl;
1235da20420fSHerbert Xu
12364a3084aaSHerbert Xu ntbl = nested_table_top(tbl);
1237da20420fSHerbert Xu hash >>= tbl->nest;
1238da20420fSHerbert Xu ntbl = nested_table_alloc(ht, &ntbl[index].table,
12395af68ef7SNeilBrown size <= (1 << shift));
1240da20420fSHerbert Xu
1241da20420fSHerbert Xu while (ntbl && size > (1 << shift)) {
1242da20420fSHerbert Xu index = hash & ((1 << shift) - 1);
1243da20420fSHerbert Xu size >>= shift;
1244da20420fSHerbert Xu hash >>= shift;
1245da20420fSHerbert Xu ntbl = nested_table_alloc(ht, &ntbl[index].table,
12465af68ef7SNeilBrown size <= (1 << shift));
1247da20420fSHerbert Xu }
1248da20420fSHerbert Xu
1249da20420fSHerbert Xu if (!ntbl)
1250da20420fSHerbert Xu return NULL;
1251da20420fSHerbert Xu
1252da20420fSHerbert Xu return &ntbl[hash].bucket;
1253da20420fSHerbert Xu
1254da20420fSHerbert Xu }
1255da20420fSHerbert Xu EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
1256