xref: /linux-6.15/include/linux/sbitmap.h (revision 1fcbd5de)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Fast and scalable bitmaps.
4  *
5  * Copyright (C) 2016 Facebook
6  * Copyright (C) 2013-2014 Jens Axboe
7  */
8 
9 #ifndef __LINUX_SCALE_BITMAP_H
10 #define __LINUX_SCALE_BITMAP_H
11 
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/cache.h>
15 #include <linux/list.h>
16 #include <linux/log2.h>
17 #include <linux/minmax.h>
18 #include <linux/percpu.h>
19 #include <linux/slab.h>
20 #include <linux/smp.h>
21 #include <linux/types.h>
22 #include <linux/wait.h>
23 
24 struct seq_file;
25 
26 /**
27  * struct sbitmap_word - Word in a &struct sbitmap.
28  */
29 struct sbitmap_word {
30 	/**
31 	 * @depth: Number of bits being used in @word/@cleared
32 	 */
33 	unsigned long depth;
34 
35 	/**
36 	 * @word: word holding free bits
37 	 */
38 	unsigned long word ____cacheline_aligned_in_smp;
39 
40 	/**
41 	 * @cleared: word holding cleared bits
42 	 */
43 	unsigned long cleared ____cacheline_aligned_in_smp;
44 } ____cacheline_aligned_in_smp;
45 
46 /**
47  * struct sbitmap - Scalable bitmap.
48  *
49  * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This
50  * trades off higher memory usage for better scalability.
51  */
52 struct sbitmap {
53 	/**
54 	 * @depth: Number of bits used in the whole bitmap.
55 	 */
56 	unsigned int depth;
57 
58 	/**
59 	 * @shift: log2(number of bits used per word)
60 	 */
61 	unsigned int shift;
62 
63 	/**
64 	 * @map_nr: Number of words (cachelines) being used for the bitmap.
65 	 */
66 	unsigned int map_nr;
67 
68 	/**
69 	 * @round_robin: Allocate bits in strict round-robin order.
70 	 */
71 	bool round_robin;
72 
73 	/**
74 	 * @map: Allocated bitmap.
75 	 */
76 	struct sbitmap_word *map;
77 
78 	/*
79 	 * @alloc_hint: Cache of last successfully allocated or freed bit.
80 	 *
81 	 * This is per-cpu, which allows multiple users to stick to different
82 	 * cachelines until the map is exhausted.
83 	 */
84 	unsigned int __percpu *alloc_hint;
85 };
86 
87 #define SBQ_WAIT_QUEUES 8
88 #define SBQ_WAKE_BATCH 8
89 
90 /**
91  * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue.
92  */
93 struct sbq_wait_state {
94 	/**
95 	 * @wait_cnt: Number of frees remaining before we wake up.
96 	 */
97 	atomic_t wait_cnt;
98 
99 	/**
100 	 * @wait: Wait queue.
101 	 */
102 	wait_queue_head_t wait;
103 } ____cacheline_aligned_in_smp;
104 
105 /**
106  * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free
107  * bits.
108  *
109  * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to
110  * avoid contention on the wait queue spinlock. This ensures that we don't hit a
111  * scalability wall when we run out of free bits and have to start putting tasks
112  * to sleep.
113  */
114 struct sbitmap_queue {
115 	/**
116 	 * @sb: Scalable bitmap.
117 	 */
118 	struct sbitmap sb;
119 
120 	/**
121 	 * @wake_batch: Number of bits which must be freed before we wake up any
122 	 * waiters.
123 	 */
124 	unsigned int wake_batch;
125 
126 	/**
127 	 * @wake_index: Next wait queue in @ws to wake up.
128 	 */
129 	atomic_t wake_index;
130 
131 	/**
132 	 * @ws: Wait queues.
133 	 */
134 	struct sbq_wait_state *ws;
135 
136 	/*
137 	 * @ws_active: count of currently active ws waitqueues
138 	 */
139 	atomic_t ws_active;
140 
141 	/**
142 	 * @min_shallow_depth: The minimum shallow depth which may be passed to
143 	 * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
144 	 */
145 	unsigned int min_shallow_depth;
146 };
147 
148 /**
149  * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node.
150  * @sb: Bitmap to initialize.
151  * @depth: Number of bits to allocate.
152  * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if
153  *         given, a good default is chosen.
154  * @flags: Allocation flags.
155  * @node: Memory node to allocate on.
156  * @round_robin: If true, be stricter about allocation order; always allocate
157  *               starting from the last allocated bit. This is less efficient
158  *               than the default behavior (false).
159  * @alloc_hint: If true, apply percpu hint for where to start searching for
160  *              a free bit.
161  *
162  * Return: Zero on success or negative errno on failure.
163  */
164 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
165 		      gfp_t flags, int node, bool round_robin, bool alloc_hint);
166 
167 /**
168  * sbitmap_free() - Free memory used by a &struct sbitmap.
169  * @sb: Bitmap to free.
170  */
171 static inline void sbitmap_free(struct sbitmap *sb)
172 {
173 	free_percpu(sb->alloc_hint);
174 	kfree(sb->map);
175 	sb->map = NULL;
176 }
177 
178 /**
179  * sbitmap_resize() - Resize a &struct sbitmap.
180  * @sb: Bitmap to resize.
181  * @depth: New number of bits to resize to.
182  *
183  * Doesn't reallocate anything. It's up to the caller to ensure that the new
184  * depth doesn't exceed the depth that the sb was initialized with.
185  */
186 void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
187 
188 /**
189  * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap.
190  * @sb: Bitmap to allocate from.
191  *
192  * This operation provides acquire barrier semantics if it succeeds.
193  *
194  * Return: Non-negative allocated bit number if successful, -1 otherwise.
195  */
196 int sbitmap_get(struct sbitmap *sb);
197 
198 /**
199  * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
200  * limiting the depth used from each word.
201  * @sb: Bitmap to allocate from.
202  * @shallow_depth: The maximum number of bits to allocate from a single word.
203  *
204  * This rather specific operation allows for having multiple users with
205  * different allocation limits. E.g., there can be a high-priority class that
206  * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
207  * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
208  * class can only allocate half of the total bits in the bitmap, preventing it
209  * from starving out the high-priority class.
210  *
211  * Return: Non-negative allocated bit number if successful, -1 otherwise.
212  */
213 int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth);
214 
215 /**
216  * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
217  * @sb: Bitmap to check.
218  *
219  * Return: true if any bit in the bitmap is set, false otherwise.
220  */
221 bool sbitmap_any_bit_set(const struct sbitmap *sb);
222 
223 #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
224 #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
225 
226 typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *);
227 
228 /**
229  * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
230  * @start: Where to start the iteration.
231  * @sb: Bitmap to iterate over.
232  * @fn: Callback. Should return true to continue or false to break early.
233  * @data: Pointer to pass to callback.
234  *
235  * This is inline even though it's non-trivial so that the function calls to the
236  * callback will hopefully get optimized away.
237  */
238 static inline void __sbitmap_for_each_set(struct sbitmap *sb,
239 					  unsigned int start,
240 					  sb_for_each_fn fn, void *data)
241 {
242 	unsigned int index;
243 	unsigned int nr;
244 	unsigned int scanned = 0;
245 
246 	if (start >= sb->depth)
247 		start = 0;
248 	index = SB_NR_TO_INDEX(sb, start);
249 	nr = SB_NR_TO_BIT(sb, start);
250 
251 	while (scanned < sb->depth) {
252 		unsigned long word;
253 		unsigned int depth = min_t(unsigned int,
254 					   sb->map[index].depth - nr,
255 					   sb->depth - scanned);
256 
257 		scanned += depth;
258 		word = sb->map[index].word & ~sb->map[index].cleared;
259 		if (!word)
260 			goto next;
261 
262 		/*
263 		 * On the first iteration of the outer loop, we need to add the
264 		 * bit offset back to the size of the word for find_next_bit().
265 		 * On all other iterations, nr is zero, so this is a noop.
266 		 */
267 		depth += nr;
268 		while (1) {
269 			nr = find_next_bit(&word, depth, nr);
270 			if (nr >= depth)
271 				break;
272 			if (!fn(sb, (index << sb->shift) + nr, data))
273 				return;
274 
275 			nr++;
276 		}
277 next:
278 		nr = 0;
279 		if (++index >= sb->map_nr)
280 			index = 0;
281 	}
282 }
283 
284 /**
285  * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
286  * @sb: Bitmap to iterate over.
287  * @fn: Callback. Should return true to continue or false to break early.
288  * @data: Pointer to pass to callback.
289  */
290 static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn,
291 					void *data)
292 {
293 	__sbitmap_for_each_set(sb, 0, fn, data);
294 }
295 
296 static inline unsigned long *__sbitmap_word(struct sbitmap *sb,
297 					    unsigned int bitnr)
298 {
299 	return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word;
300 }
301 
302 /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */
303 
304 static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr)
305 {
306 	set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
307 }
308 
309 static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
310 {
311 	clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
312 }
313 
314 /*
315  * This one is special, since it doesn't actually clear the bit, rather it
316  * sets the corresponding bit in the ->cleared mask instead. Paired with
317  * the caller doing sbitmap_deferred_clear() if a given index is full, which
318  * will clear the previously freed entries in the corresponding ->word.
319  */
320 static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
321 {
322 	unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
323 
324 	set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
325 }
326 
327 /*
328  * Pair of sbitmap_get, and this one applies both cleared bit and
329  * allocation hint.
330  */
331 static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr)
332 {
333 	sbitmap_deferred_clear_bit(sb, bitnr);
334 
335 	if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth))
336 		*raw_cpu_ptr(sb->alloc_hint) = bitnr;
337 }
338 
339 static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
340 {
341 	return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
342 }
343 
344 static inline int sbitmap_calculate_shift(unsigned int depth)
345 {
346 	int	shift = ilog2(BITS_PER_LONG);
347 
348 	/*
349 	 * If the bitmap is small, shrink the number of bits per word so
350 	 * we spread over a few cachelines, at least. If less than 4
351 	 * bits, just forget about it, it's not going to work optimally
352 	 * anyway.
353 	 */
354 	if (depth >= 4) {
355 		while ((4U << shift) > depth)
356 			shift--;
357 	}
358 
359 	return shift;
360 }
361 
362 /**
363  * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
364  * @sb: Bitmap to show.
365  * @m: struct seq_file to write to.
366  *
367  * This is intended for debugging. The format may change at any time.
368  */
369 void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
370 
371 
372 /**
373  * sbitmap_weight() - Return how many set and not cleared bits in a &struct
374  * sbitmap.
375  * @sb: Bitmap to check.
376  *
377  * Return: How many set and not cleared bits set
378  */
379 unsigned int sbitmap_weight(const struct sbitmap *sb);
380 
381 /**
382  * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
383  * seq_file.
384  * @sb: Bitmap to show.
385  * @m: struct seq_file to write to.
386  *
387  * This is intended for debugging. The output isn't guaranteed to be internally
388  * consistent.
389  */
390 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
391 
392 /**
393  * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
394  * memory node.
395  * @sbq: Bitmap queue to initialize.
396  * @depth: See sbitmap_init_node().
397  * @shift: See sbitmap_init_node().
398  * @round_robin: See sbitmap_get().
399  * @flags: Allocation flags.
400  * @node: Memory node to allocate on.
401  *
402  * Return: Zero on success or negative errno on failure.
403  */
404 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
405 			    int shift, bool round_robin, gfp_t flags, int node);
406 
407 /**
408  * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue.
409  *
410  * @sbq: Bitmap queue to free.
411  */
412 static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
413 {
414 	kfree(sbq->ws);
415 	sbitmap_free(&sbq->sb);
416 }
417 
418 /**
419  * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
420  * @sbq: Bitmap queue to resize.
421  * @depth: New number of bits to resize to.
422  *
423  * Like sbitmap_resize(), this doesn't reallocate anything. It has to do
424  * some extra work on the &struct sbitmap_queue, so it's not safe to just
425  * resize the underlying &struct sbitmap.
426  */
427 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
428 
429 /**
430  * __sbitmap_queue_get() - Try to allocate a free bit from a &struct
431  * sbitmap_queue with preemption already disabled.
432  * @sbq: Bitmap queue to allocate from.
433  *
434  * Return: Non-negative allocated bit number if successful, -1 otherwise.
435  */
436 int __sbitmap_queue_get(struct sbitmap_queue *sbq);
437 
438 /**
439  * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
440  * sbitmap_queue, limiting the depth used from each word, with preemption
441  * already disabled.
442  * @sbq: Bitmap queue to allocate from.
443  * @shallow_depth: The maximum number of bits to allocate from a single word.
444  * See sbitmap_get_shallow().
445  *
446  * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
447  * initializing @sbq.
448  *
449  * Return: Non-negative allocated bit number if successful, -1 otherwise.
450  */
451 int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
452 				unsigned int shallow_depth);
453 
454 /**
455  * sbitmap_queue_get() - Try to allocate a free bit from a &struct
456  * sbitmap_queue.
457  * @sbq: Bitmap queue to allocate from.
458  * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
459  *       sbitmap_queue_clear()).
460  *
461  * Return: Non-negative allocated bit number if successful, -1 otherwise.
462  */
463 static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
464 				    unsigned int *cpu)
465 {
466 	int nr;
467 
468 	*cpu = get_cpu();
469 	nr = __sbitmap_queue_get(sbq);
470 	put_cpu();
471 	return nr;
472 }
473 
474 /**
475  * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
476  * sbitmap_queue, limiting the depth used from each word.
477  * @sbq: Bitmap queue to allocate from.
478  * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
479  *       sbitmap_queue_clear()).
480  * @shallow_depth: The maximum number of bits to allocate from a single word.
481  * See sbitmap_get_shallow().
482  *
483  * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
484  * initializing @sbq.
485  *
486  * Return: Non-negative allocated bit number if successful, -1 otherwise.
487  */
488 static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
489 					    unsigned int *cpu,
490 					    unsigned int shallow_depth)
491 {
492 	int nr;
493 
494 	*cpu = get_cpu();
495 	nr = __sbitmap_queue_get_shallow(sbq, shallow_depth);
496 	put_cpu();
497 	return nr;
498 }
499 
500 /**
501  * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
502  * minimum shallow depth that will be used.
503  * @sbq: Bitmap queue in question.
504  * @min_shallow_depth: The minimum shallow depth that will be passed to
505  * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
506  *
507  * sbitmap_queue_clear() batches wakeups as an optimization. The batch size
508  * depends on the depth of the bitmap. Since the shallow allocation functions
509  * effectively operate with a different depth, the shallow depth must be taken
510  * into account when calculating the batch size. This function must be called
511  * with the minimum shallow depth that will be used. Failure to do so can result
512  * in missed wakeups.
513  */
514 void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
515 				     unsigned int min_shallow_depth);
516 
517 /**
518  * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
519  * &struct sbitmap_queue.
520  * @sbq: Bitmap to free from.
521  * @nr: Bit number to free.
522  * @cpu: CPU the bit was allocated on.
523  */
524 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
525 			 unsigned int cpu);
526 
527 static inline int sbq_index_inc(int index)
528 {
529 	return (index + 1) & (SBQ_WAIT_QUEUES - 1);
530 }
531 
532 static inline void sbq_index_atomic_inc(atomic_t *index)
533 {
534 	int old = atomic_read(index);
535 	int new = sbq_index_inc(old);
536 	atomic_cmpxchg(index, old, new);
537 }
538 
539 /**
540  * sbq_wait_ptr() - Get the next wait queue to use for a &struct
541  * sbitmap_queue.
542  * @sbq: Bitmap queue to wait on.
543  * @wait_index: A counter per "user" of @sbq.
544  */
545 static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
546 						  atomic_t *wait_index)
547 {
548 	struct sbq_wait_state *ws;
549 
550 	ws = &sbq->ws[atomic_read(wait_index)];
551 	sbq_index_atomic_inc(wait_index);
552 	return ws;
553 }
554 
555 /**
556  * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct
557  * sbitmap_queue.
558  * @sbq: Bitmap queue to wake up.
559  */
560 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
561 
562 /**
563  * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
564  * on a &struct sbitmap_queue.
565  * @sbq: Bitmap queue to wake up.
566  */
567 void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
568 
569 /**
570  * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
571  * seq_file.
572  * @sbq: Bitmap queue to show.
573  * @m: struct seq_file to write to.
574  *
575  * This is intended for debugging. The format may change at any time.
576  */
577 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
578 
579 struct sbq_wait {
580 	struct sbitmap_queue *sbq;	/* if set, sbq_wait is accounted */
581 	struct wait_queue_entry wait;
582 };
583 
584 #define DEFINE_SBQ_WAIT(name)							\
585 	struct sbq_wait name = {						\
586 		.sbq = NULL,							\
587 		.wait = {							\
588 			.private	= current,				\
589 			.func		= autoremove_wake_function,		\
590 			.entry		= LIST_HEAD_INIT((name).wait.entry),	\
591 		}								\
592 	}
593 
594 /*
595  * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
596  * internal state.
597  */
598 void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
599 				struct sbq_wait_state *ws,
600 				struct sbq_wait *sbq_wait, int state);
601 
602 /*
603  * Must be paired with sbitmap_prepare_to_wait().
604  */
605 void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
606 				struct sbq_wait *sbq_wait);
607 
608 /*
609  * Wrapper around add_wait_queue(), which maintains some extra internal state
610  */
611 void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
612 			    struct sbq_wait_state *ws,
613 			    struct sbq_wait *sbq_wait);
614 
615 /*
616  * Must be paired with sbitmap_add_wait_queue()
617  */
618 void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait);
619 
620 #endif /* __LINUX_SCALE_BITMAP_H */
621