xref: /linux-6.15/include/linux/ptr_ring.h (revision f6bcbf2e)
1 /*
2  *	Definitions for the 'struct ptr_ring' datastructure.
3  *
4  *	Author:
5  *		Michael S. Tsirkin <[email protected]>
6  *
7  *	Copyright (C) 2016 Red Hat, Inc.
8  *
9  *	This program is free software; you can redistribute it and/or modify it
10  *	under the terms of the GNU General Public License as published by the
11  *	Free Software Foundation; either version 2 of the License, or (at your
12  *	option) any later version.
13  *
14  *	This is a limited-size FIFO maintaining pointers in FIFO order, with
15  *	one CPU producing entries and another consuming entries from a FIFO.
16  *
17  *	This implementation tries to minimize cache-contention when there is a
18  *	single producer and a single consumer CPU.
19  */
20 
21 #ifndef _LINUX_PTR_RING_H
22 #define _LINUX_PTR_RING_H 1
23 
24 #ifdef __KERNEL__
25 #include <linux/spinlock.h>
26 #include <linux/cache.h>
27 #include <linux/types.h>
28 #include <linux/compiler.h>
29 #include <linux/cache.h>
30 #include <linux/slab.h>
31 #include <asm/errno.h>
32 #endif
33 
34 struct ptr_ring {
35 	int producer ____cacheline_aligned_in_smp;
36 	spinlock_t producer_lock;
37 	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
38 	int consumer_tail; /* next entry to invalidate */
39 	spinlock_t consumer_lock;
40 	/* Shared consumer/producer data */
41 	/* Read-only by both the producer and the consumer */
42 	int size ____cacheline_aligned_in_smp; /* max entries in queue */
43 	int batch; /* number of entries to consume in a batch */
44 	void **queue;
45 };
46 
47 /* Note: callers invoking this in a loop must use a compiler barrier,
48  * for example cpu_relax().
49  *
50  * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
51  * see e.g. ptr_ring_full.
52  */
53 static inline bool __ptr_ring_full(struct ptr_ring *r)
54 {
55 	return r->queue[r->producer];
56 }
57 
58 static inline bool ptr_ring_full(struct ptr_ring *r)
59 {
60 	bool ret;
61 
62 	spin_lock(&r->producer_lock);
63 	ret = __ptr_ring_full(r);
64 	spin_unlock(&r->producer_lock);
65 
66 	return ret;
67 }
68 
69 static inline bool ptr_ring_full_irq(struct ptr_ring *r)
70 {
71 	bool ret;
72 
73 	spin_lock_irq(&r->producer_lock);
74 	ret = __ptr_ring_full(r);
75 	spin_unlock_irq(&r->producer_lock);
76 
77 	return ret;
78 }
79 
80 static inline bool ptr_ring_full_any(struct ptr_ring *r)
81 {
82 	unsigned long flags;
83 	bool ret;
84 
85 	spin_lock_irqsave(&r->producer_lock, flags);
86 	ret = __ptr_ring_full(r);
87 	spin_unlock_irqrestore(&r->producer_lock, flags);
88 
89 	return ret;
90 }
91 
92 static inline bool ptr_ring_full_bh(struct ptr_ring *r)
93 {
94 	bool ret;
95 
96 	spin_lock_bh(&r->producer_lock);
97 	ret = __ptr_ring_full(r);
98 	spin_unlock_bh(&r->producer_lock);
99 
100 	return ret;
101 }
102 
103 /* Note: callers invoking this in a loop must use a compiler barrier,
104  * for example cpu_relax(). Callers must hold producer_lock.
105  * Callers are responsible for making sure pointer that is being queued
106  * points to a valid data.
107  */
108 static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
109 {
110 	if (unlikely(!r->size) || r->queue[r->producer])
111 		return -ENOSPC;
112 
113 	/* Make sure the pointer we are storing points to a valid data. */
114 	/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
115 	smp_wmb();
116 
117 	WRITE_ONCE(r->queue[r->producer++], ptr);
118 	if (unlikely(r->producer >= r->size))
119 		r->producer = 0;
120 	return 0;
121 }
122 
123 /*
124  * Note: resize (below) nests producer lock within consumer lock, so if you
125  * consume in interrupt or BH context, you must disable interrupts/BH when
126  * calling this.
127  */
128 static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
129 {
130 	int ret;
131 
132 	spin_lock(&r->producer_lock);
133 	ret = __ptr_ring_produce(r, ptr);
134 	spin_unlock(&r->producer_lock);
135 
136 	return ret;
137 }
138 
139 static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
140 {
141 	int ret;
142 
143 	spin_lock_irq(&r->producer_lock);
144 	ret = __ptr_ring_produce(r, ptr);
145 	spin_unlock_irq(&r->producer_lock);
146 
147 	return ret;
148 }
149 
150 static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
151 {
152 	unsigned long flags;
153 	int ret;
154 
155 	spin_lock_irqsave(&r->producer_lock, flags);
156 	ret = __ptr_ring_produce(r, ptr);
157 	spin_unlock_irqrestore(&r->producer_lock, flags);
158 
159 	return ret;
160 }
161 
162 static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
163 {
164 	int ret;
165 
166 	spin_lock_bh(&r->producer_lock);
167 	ret = __ptr_ring_produce(r, ptr);
168 	spin_unlock_bh(&r->producer_lock);
169 
170 	return ret;
171 }
172 
173 static inline void *__ptr_ring_peek(struct ptr_ring *r)
174 {
175 	if (likely(r->size))
176 		return READ_ONCE(r->queue[r->consumer_head]);
177 	return NULL;
178 }
179 
180 /*
181  * Test ring empty status without taking any locks.
182  *
183  * NB: This is only safe to call if ring is never resized.
184  *
185  * However, if some other CPU consumes ring entries at the same time, the value
186  * returned is not guaranteed to be correct.
187  *
188  * In this case - to avoid incorrectly detecting the ring
189  * as empty - the CPU consuming the ring entries is responsible
190  * for either consuming all ring entries until the ring is empty,
191  * or synchronizing with some other CPU and causing it to
192  * re-test __ptr_ring_empty and/or consume the ring enteries
193  * after the synchronization point.
194  *
195  * Note: callers invoking this in a loop must use a compiler barrier,
196  * for example cpu_relax().
197  */
198 static inline bool __ptr_ring_empty(struct ptr_ring *r)
199 {
200 	if (likely(r->size))
201 		return !r->queue[READ_ONCE(r->consumer_head)];
202 	return true;
203 }
204 
205 static inline bool ptr_ring_empty(struct ptr_ring *r)
206 {
207 	bool ret;
208 
209 	spin_lock(&r->consumer_lock);
210 	ret = __ptr_ring_empty(r);
211 	spin_unlock(&r->consumer_lock);
212 
213 	return ret;
214 }
215 
216 static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
217 {
218 	bool ret;
219 
220 	spin_lock_irq(&r->consumer_lock);
221 	ret = __ptr_ring_empty(r);
222 	spin_unlock_irq(&r->consumer_lock);
223 
224 	return ret;
225 }
226 
227 static inline bool ptr_ring_empty_any(struct ptr_ring *r)
228 {
229 	unsigned long flags;
230 	bool ret;
231 
232 	spin_lock_irqsave(&r->consumer_lock, flags);
233 	ret = __ptr_ring_empty(r);
234 	spin_unlock_irqrestore(&r->consumer_lock, flags);
235 
236 	return ret;
237 }
238 
239 static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
240 {
241 	bool ret;
242 
243 	spin_lock_bh(&r->consumer_lock);
244 	ret = __ptr_ring_empty(r);
245 	spin_unlock_bh(&r->consumer_lock);
246 
247 	return ret;
248 }
249 
250 /* Must only be called after __ptr_ring_peek returned !NULL */
251 static inline void __ptr_ring_discard_one(struct ptr_ring *r)
252 {
253 	/* Fundamentally, what we want to do is update consumer
254 	 * index and zero out the entry so producer can reuse it.
255 	 * Doing it naively at each consume would be as simple as:
256 	 *       consumer = r->consumer;
257 	 *       r->queue[consumer++] = NULL;
258 	 *       if (unlikely(consumer >= r->size))
259 	 *               consumer = 0;
260 	 *       r->consumer = consumer;
261 	 * but that is suboptimal when the ring is full as producer is writing
262 	 * out new entries in the same cache line.  Defer these updates until a
263 	 * batch of entries has been consumed.
264 	 */
265 	/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
266 	 * to work correctly.
267 	 */
268 	int consumer_head = r->consumer_head;
269 	int head = consumer_head++;
270 
271 	/* Once we have processed enough entries invalidate them in
272 	 * the ring all at once so producer can reuse their space in the ring.
273 	 * We also do this when we reach end of the ring - not mandatory
274 	 * but helps keep the implementation simple.
275 	 */
276 	if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
277 		     consumer_head >= r->size)) {
278 		/* Zero out entries in the reverse order: this way we touch the
279 		 * cache line that producer might currently be reading the last;
280 		 * producer won't make progress and touch other cache lines
281 		 * besides the first one until we write out all entries.
282 		 */
283 		while (likely(head >= r->consumer_tail))
284 			r->queue[head--] = NULL;
285 		r->consumer_tail = consumer_head;
286 	}
287 	if (unlikely(consumer_head >= r->size)) {
288 		consumer_head = 0;
289 		r->consumer_tail = 0;
290 	}
291 	/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
292 	WRITE_ONCE(r->consumer_head, consumer_head);
293 }
294 
295 static inline void *__ptr_ring_consume(struct ptr_ring *r)
296 {
297 	void *ptr;
298 
299 	ptr = __ptr_ring_peek(r);
300 	if (ptr)
301 		__ptr_ring_discard_one(r);
302 
303 	/* Make sure anyone accessing data through the pointer is up to date. */
304 	/* Pairs with smp_wmb in __ptr_ring_produce. */
305 	smp_read_barrier_depends();
306 	return ptr;
307 }
308 
309 static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
310 					     void **array, int n)
311 {
312 	void *ptr;
313 	int i;
314 
315 	for (i = 0; i < n; i++) {
316 		ptr = __ptr_ring_consume(r);
317 		if (!ptr)
318 			break;
319 		array[i] = ptr;
320 	}
321 
322 	return i;
323 }
324 
325 /*
326  * Note: resize (below) nests producer lock within consumer lock, so if you
327  * call this in interrupt or BH context, you must disable interrupts/BH when
328  * producing.
329  */
330 static inline void *ptr_ring_consume(struct ptr_ring *r)
331 {
332 	void *ptr;
333 
334 	spin_lock(&r->consumer_lock);
335 	ptr = __ptr_ring_consume(r);
336 	spin_unlock(&r->consumer_lock);
337 
338 	return ptr;
339 }
340 
341 static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
342 {
343 	void *ptr;
344 
345 	spin_lock_irq(&r->consumer_lock);
346 	ptr = __ptr_ring_consume(r);
347 	spin_unlock_irq(&r->consumer_lock);
348 
349 	return ptr;
350 }
351 
352 static inline void *ptr_ring_consume_any(struct ptr_ring *r)
353 {
354 	unsigned long flags;
355 	void *ptr;
356 
357 	spin_lock_irqsave(&r->consumer_lock, flags);
358 	ptr = __ptr_ring_consume(r);
359 	spin_unlock_irqrestore(&r->consumer_lock, flags);
360 
361 	return ptr;
362 }
363 
364 static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
365 {
366 	void *ptr;
367 
368 	spin_lock_bh(&r->consumer_lock);
369 	ptr = __ptr_ring_consume(r);
370 	spin_unlock_bh(&r->consumer_lock);
371 
372 	return ptr;
373 }
374 
375 static inline int ptr_ring_consume_batched(struct ptr_ring *r,
376 					   void **array, int n)
377 {
378 	int ret;
379 
380 	spin_lock(&r->consumer_lock);
381 	ret = __ptr_ring_consume_batched(r, array, n);
382 	spin_unlock(&r->consumer_lock);
383 
384 	return ret;
385 }
386 
387 static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
388 					       void **array, int n)
389 {
390 	int ret;
391 
392 	spin_lock_irq(&r->consumer_lock);
393 	ret = __ptr_ring_consume_batched(r, array, n);
394 	spin_unlock_irq(&r->consumer_lock);
395 
396 	return ret;
397 }
398 
399 static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
400 					       void **array, int n)
401 {
402 	unsigned long flags;
403 	int ret;
404 
405 	spin_lock_irqsave(&r->consumer_lock, flags);
406 	ret = __ptr_ring_consume_batched(r, array, n);
407 	spin_unlock_irqrestore(&r->consumer_lock, flags);
408 
409 	return ret;
410 }
411 
412 static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
413 					      void **array, int n)
414 {
415 	int ret;
416 
417 	spin_lock_bh(&r->consumer_lock);
418 	ret = __ptr_ring_consume_batched(r, array, n);
419 	spin_unlock_bh(&r->consumer_lock);
420 
421 	return ret;
422 }
423 
424 /* Cast to structure type and call a function without discarding from FIFO.
425  * Function must return a value.
426  * Callers must take consumer_lock.
427  */
428 #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
429 
430 #define PTR_RING_PEEK_CALL(r, f) ({ \
431 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
432 	\
433 	spin_lock(&(r)->consumer_lock); \
434 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
435 	spin_unlock(&(r)->consumer_lock); \
436 	__PTR_RING_PEEK_CALL_v; \
437 })
438 
439 #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
440 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
441 	\
442 	spin_lock_irq(&(r)->consumer_lock); \
443 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
444 	spin_unlock_irq(&(r)->consumer_lock); \
445 	__PTR_RING_PEEK_CALL_v; \
446 })
447 
448 #define PTR_RING_PEEK_CALL_BH(r, f) ({ \
449 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
450 	\
451 	spin_lock_bh(&(r)->consumer_lock); \
452 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
453 	spin_unlock_bh(&(r)->consumer_lock); \
454 	__PTR_RING_PEEK_CALL_v; \
455 })
456 
457 #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
458 	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
459 	unsigned long __PTR_RING_PEEK_CALL_f;\
460 	\
461 	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
462 	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
463 	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
464 	__PTR_RING_PEEK_CALL_v; \
465 })
466 
467 /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
468  * documentation for vmalloc for which of them are legal.
469  */
470 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
471 {
472 	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
473 		return NULL;
474 	return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
475 }
476 
477 static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
478 {
479 	r->size = size;
480 	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
481 	/* We need to set batch at least to 1 to make logic
482 	 * in __ptr_ring_discard_one work correctly.
483 	 * Batching too much (because ring is small) would cause a lot of
484 	 * burstiness. Needs tuning, for now disable batching.
485 	 */
486 	if (r->batch > r->size / 2 || !r->batch)
487 		r->batch = 1;
488 }
489 
490 static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
491 {
492 	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
493 	if (!r->queue)
494 		return -ENOMEM;
495 
496 	__ptr_ring_set_size(r, size);
497 	r->producer = r->consumer_head = r->consumer_tail = 0;
498 	spin_lock_init(&r->producer_lock);
499 	spin_lock_init(&r->consumer_lock);
500 
501 	return 0;
502 }
503 
504 /*
505  * Return entries into ring. Destroy entries that don't fit.
506  *
507  * Note: this is expected to be a rare slow path operation.
508  *
509  * Note: producer lock is nested within consumer lock, so if you
510  * resize you must make sure all uses nest correctly.
511  * In particular if you consume ring in interrupt or BH context, you must
512  * disable interrupts/BH when doing so.
513  */
514 static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
515 				      void (*destroy)(void *))
516 {
517 	unsigned long flags;
518 	int head;
519 
520 	spin_lock_irqsave(&r->consumer_lock, flags);
521 	spin_lock(&r->producer_lock);
522 
523 	if (!r->size)
524 		goto done;
525 
526 	/*
527 	 * Clean out buffered entries (for simplicity). This way following code
528 	 * can test entries for NULL and if not assume they are valid.
529 	 */
530 	head = r->consumer_head - 1;
531 	while (likely(head >= r->consumer_tail))
532 		r->queue[head--] = NULL;
533 	r->consumer_tail = r->consumer_head;
534 
535 	/*
536 	 * Go over entries in batch, start moving head back and copy entries.
537 	 * Stop when we run into previously unconsumed entries.
538 	 */
539 	while (n) {
540 		head = r->consumer_head - 1;
541 		if (head < 0)
542 			head = r->size - 1;
543 		if (r->queue[head]) {
544 			/* This batch entry will have to be destroyed. */
545 			goto done;
546 		}
547 		r->queue[head] = batch[--n];
548 		r->consumer_tail = head;
549 		/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
550 		WRITE_ONCE(r->consumer_head, head);
551 	}
552 
553 done:
554 	/* Destroy all entries left in the batch. */
555 	while (n)
556 		destroy(batch[--n]);
557 	spin_unlock(&r->producer_lock);
558 	spin_unlock_irqrestore(&r->consumer_lock, flags);
559 }
560 
561 static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
562 					   int size, gfp_t gfp,
563 					   void (*destroy)(void *))
564 {
565 	int producer = 0;
566 	void **old;
567 	void *ptr;
568 
569 	while ((ptr = __ptr_ring_consume(r)))
570 		if (producer < size)
571 			queue[producer++] = ptr;
572 		else if (destroy)
573 			destroy(ptr);
574 
575 	__ptr_ring_set_size(r, size);
576 	r->producer = producer;
577 	r->consumer_head = 0;
578 	r->consumer_tail = 0;
579 	old = r->queue;
580 	r->queue = queue;
581 
582 	return old;
583 }
584 
585 /*
586  * Note: producer lock is nested within consumer lock, so if you
587  * resize you must make sure all uses nest correctly.
588  * In particular if you consume ring in interrupt or BH context, you must
589  * disable interrupts/BH when doing so.
590  */
591 static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
592 				  void (*destroy)(void *))
593 {
594 	unsigned long flags;
595 	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
596 	void **old;
597 
598 	if (!queue)
599 		return -ENOMEM;
600 
601 	spin_lock_irqsave(&(r)->consumer_lock, flags);
602 	spin_lock(&(r)->producer_lock);
603 
604 	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
605 
606 	spin_unlock(&(r)->producer_lock);
607 	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
608 
609 	kvfree(old);
610 
611 	return 0;
612 }
613 
614 /*
615  * Note: producer lock is nested within consumer lock, so if you
616  * resize you must make sure all uses nest correctly.
617  * In particular if you consume ring in interrupt or BH context, you must
618  * disable interrupts/BH when doing so.
619  */
620 static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
621 					   unsigned int nrings,
622 					   int size,
623 					   gfp_t gfp, void (*destroy)(void *))
624 {
625 	unsigned long flags;
626 	void ***queues;
627 	int i;
628 
629 	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
630 	if (!queues)
631 		goto noqueues;
632 
633 	for (i = 0; i < nrings; ++i) {
634 		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
635 		if (!queues[i])
636 			goto nomem;
637 	}
638 
639 	for (i = 0; i < nrings; ++i) {
640 		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
641 		spin_lock(&(rings[i])->producer_lock);
642 		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
643 						  size, gfp, destroy);
644 		spin_unlock(&(rings[i])->producer_lock);
645 		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
646 	}
647 
648 	for (i = 0; i < nrings; ++i)
649 		kvfree(queues[i]);
650 
651 	kfree(queues);
652 
653 	return 0;
654 
655 nomem:
656 	while (--i >= 0)
657 		kvfree(queues[i]);
658 
659 	kfree(queues);
660 
661 noqueues:
662 	return -ENOMEM;
663 }
664 
665 static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
666 {
667 	void *ptr;
668 
669 	if (destroy)
670 		while ((ptr = ptr_ring_consume(r)))
671 			destroy(ptr);
672 	kvfree(r->queue);
673 }
674 
675 #endif /* _LINUX_PTR_RING_H  */
676