xref: /linux-6.15/include/linux/swait.h (revision 6f63904c)
1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
213b35686SPeter Zijlstra (Intel) #ifndef _LINUX_SWAIT_H
313b35686SPeter Zijlstra (Intel) #define _LINUX_SWAIT_H
413b35686SPeter Zijlstra (Intel) 
513b35686SPeter Zijlstra (Intel) #include <linux/list.h>
613b35686SPeter Zijlstra (Intel) #include <linux/stddef.h>
713b35686SPeter Zijlstra (Intel) #include <linux/spinlock.h>
8a59a68feSSebastian Andrzej Siewior #include <linux/wait.h>
913b35686SPeter Zijlstra (Intel) #include <asm/current.h>
1013b35686SPeter Zijlstra (Intel) 
1113b35686SPeter Zijlstra (Intel) /*
1212ac6782SDavidlohr Bueso  * Simple waitqueues are semantically very different to regular wait queues
1312ac6782SDavidlohr Bueso  * (wait.h). The most important difference is that the simple waitqueue allows
1412ac6782SDavidlohr Bueso  * for deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
1512ac6782SDavidlohr Bueso  * times.
1613b35686SPeter Zijlstra (Intel)  *
1788796e7eSDavidlohr Bueso  * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
1888796e7eSDavidlohr Bueso  * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
1988796e7eSDavidlohr Bueso  * priority task a chance to run.
2088796e7eSDavidlohr Bueso  *
2188796e7eSDavidlohr Bueso  * Secondly, we had to drop a fair number of features of the other waitqueue
2288796e7eSDavidlohr Bueso  * code; notably:
2313b35686SPeter Zijlstra (Intel)  *
2413b35686SPeter Zijlstra (Intel)  *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
2513b35686SPeter Zijlstra (Intel)  *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
2613b35686SPeter Zijlstra (Intel)  *    sleeper state.
2713b35686SPeter Zijlstra (Intel)  *
280abf17bcSPeter Zijlstra  *  - the !exclusive mode; because that leads to O(n) wakeups, everything is
2912ac6782SDavidlohr Bueso  *    exclusive. As such swake_up_one will only ever awake _one_ waiter.
3013b35686SPeter Zijlstra (Intel)  *
3188796e7eSDavidlohr Bueso  *  - custom wake callback functions; because you cannot give any guarantees
3288796e7eSDavidlohr Bueso  *    about random code. This also allows swait to be used in RT, such that
3388796e7eSDavidlohr Bueso  *    raw spinlock can be used for the swait queue head.
3413b35686SPeter Zijlstra (Intel)  *
3588796e7eSDavidlohr Bueso  * As a side effect of these; the data structures are slimmer albeit more ad-hoc.
3688796e7eSDavidlohr Bueso  * For all the above, note that simple wait queues should _only_ be used under
3788796e7eSDavidlohr Bueso  * very specific realtime constraints -- it is best to stick with the regular
3888796e7eSDavidlohr Bueso  * wait queues in most cases.
3913b35686SPeter Zijlstra (Intel)  */
4013b35686SPeter Zijlstra (Intel) 
4113b35686SPeter Zijlstra (Intel) struct task_struct;
4213b35686SPeter Zijlstra (Intel) 
4313b35686SPeter Zijlstra (Intel) struct swait_queue_head {
4413b35686SPeter Zijlstra (Intel) 	raw_spinlock_t		lock;
4513b35686SPeter Zijlstra (Intel) 	struct list_head	task_list;
4613b35686SPeter Zijlstra (Intel) };
4713b35686SPeter Zijlstra (Intel) 
4813b35686SPeter Zijlstra (Intel) struct swait_queue {
4913b35686SPeter Zijlstra (Intel) 	struct task_struct	*task;
5013b35686SPeter Zijlstra (Intel) 	struct list_head	task_list;
5113b35686SPeter Zijlstra (Intel) };
5213b35686SPeter Zijlstra (Intel) 
5313b35686SPeter Zijlstra (Intel) #define __SWAITQUEUE_INITIALIZER(name) {				\
5413b35686SPeter Zijlstra (Intel) 	.task		= current,					\
5513b35686SPeter Zijlstra (Intel) 	.task_list	= LIST_HEAD_INIT((name).task_list),		\
5613b35686SPeter Zijlstra (Intel) }
5713b35686SPeter Zijlstra (Intel) 
5813b35686SPeter Zijlstra (Intel) #define DECLARE_SWAITQUEUE(name)					\
5913b35686SPeter Zijlstra (Intel) 	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
6013b35686SPeter Zijlstra (Intel) 
6113b35686SPeter Zijlstra (Intel) #define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
6213b35686SPeter Zijlstra (Intel) 	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
6313b35686SPeter Zijlstra (Intel) 	.task_list	= LIST_HEAD_INIT((name).task_list),		\
6413b35686SPeter Zijlstra (Intel) }
6513b35686SPeter Zijlstra (Intel) 
6613b35686SPeter Zijlstra (Intel) #define DECLARE_SWAIT_QUEUE_HEAD(name)					\
6713b35686SPeter Zijlstra (Intel) 	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
6813b35686SPeter Zijlstra (Intel) 
6913b35686SPeter Zijlstra (Intel) extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
7013b35686SPeter Zijlstra (Intel) 				    struct lock_class_key *key);
7113b35686SPeter Zijlstra (Intel) 
7213b35686SPeter Zijlstra (Intel) #define init_swait_queue_head(q)				\
7313b35686SPeter Zijlstra (Intel) 	do {							\
7413b35686SPeter Zijlstra (Intel) 		static struct lock_class_key __key;		\
7513b35686SPeter Zijlstra (Intel) 		__init_swait_queue_head((q), #q, &__key);	\
7613b35686SPeter Zijlstra (Intel) 	} while (0)
7713b35686SPeter Zijlstra (Intel) 
7813b35686SPeter Zijlstra (Intel) #ifdef CONFIG_LOCKDEP
7913b35686SPeter Zijlstra (Intel) # define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
8013b35686SPeter Zijlstra (Intel) 	({ init_swait_queue_head(&name); name; })
8113b35686SPeter Zijlstra (Intel) # define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
8213b35686SPeter Zijlstra (Intel) 	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
8313b35686SPeter Zijlstra (Intel) #else
8413b35686SPeter Zijlstra (Intel) # define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
8513b35686SPeter Zijlstra (Intel) 	DECLARE_SWAIT_QUEUE_HEAD(name)
8613b35686SPeter Zijlstra (Intel) #endif
8713b35686SPeter Zijlstra (Intel) 
888cd641e3SDavidlohr Bueso /**
898cd641e3SDavidlohr Bueso  * swait_active -- locklessly test for waiters on the queue
908cd641e3SDavidlohr Bueso  * @wq: the waitqueue to test for waiters
918cd641e3SDavidlohr Bueso  *
928cd641e3SDavidlohr Bueso  * returns true if the wait list is not empty
938cd641e3SDavidlohr Bueso  *
948cd641e3SDavidlohr Bueso  * NOTE: this function is lockless and requires care, incorrect usage _will_
958cd641e3SDavidlohr Bueso  * lead to sporadic and non-obvious failure.
968cd641e3SDavidlohr Bueso  *
978cd641e3SDavidlohr Bueso  * NOTE2: this function has the same above implications as regular waitqueues.
988cd641e3SDavidlohr Bueso  *
998cd641e3SDavidlohr Bueso  * Use either while holding swait_queue_head::lock or when used for wakeups
1008cd641e3SDavidlohr Bueso  * with an extra smp_mb() like:
1018cd641e3SDavidlohr Bueso  *
1028cd641e3SDavidlohr Bueso  *      CPU0 - waker                    CPU1 - waiter
1038cd641e3SDavidlohr Bueso  *
1048cd641e3SDavidlohr Bueso  *                                      for (;;) {
105b3dae109SPeter Zijlstra  *      @cond = true;                     prepare_to_swait_exclusive(&wq_head, &wait, state);
1068cd641e3SDavidlohr Bueso  *      smp_mb();                         // smp_mb() from set_current_state()
1078cd641e3SDavidlohr Bueso  *      if (swait_active(wq_head))        if (@cond)
1088cd641e3SDavidlohr Bueso  *        wake_up(wq_head);                      break;
1098cd641e3SDavidlohr Bueso  *                                        schedule();
1108cd641e3SDavidlohr Bueso  *                                      }
1118cd641e3SDavidlohr Bueso  *                                      finish_swait(&wq_head, &wait);
1128cd641e3SDavidlohr Bueso  *
1138cd641e3SDavidlohr Bueso  * Because without the explicit smp_mb() it's possible for the
1148cd641e3SDavidlohr Bueso  * swait_active() load to get hoisted over the @cond store such that we'll
1158cd641e3SDavidlohr Bueso  * observe an empty wait list while the waiter might not observe @cond.
1168cd641e3SDavidlohr Bueso  * This, in turn, can trigger missing wakeups.
1178cd641e3SDavidlohr Bueso  *
1188cd641e3SDavidlohr Bueso  * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
1198cd641e3SDavidlohr Bueso  * which (when the lock is uncontended) are of roughly equal cost.
1208cd641e3SDavidlohr Bueso  */
swait_active(struct swait_queue_head * wq)1218cd641e3SDavidlohr Bueso static inline int swait_active(struct swait_queue_head *wq)
12213b35686SPeter Zijlstra (Intel) {
1238cd641e3SDavidlohr Bueso 	return !list_empty(&wq->task_list);
1248cd641e3SDavidlohr Bueso }
1258cd641e3SDavidlohr Bueso 
1268cd641e3SDavidlohr Bueso /**
1278cd641e3SDavidlohr Bueso  * swq_has_sleeper - check if there are any waiting processes
1288cd641e3SDavidlohr Bueso  * @wq: the waitqueue to test for waiters
1298cd641e3SDavidlohr Bueso  *
1308cd641e3SDavidlohr Bueso  * Returns true if @wq has waiting processes
1318cd641e3SDavidlohr Bueso  *
1328cd641e3SDavidlohr Bueso  * Please refer to the comment for swait_active.
1338cd641e3SDavidlohr Bueso  */
swq_has_sleeper(struct swait_queue_head * wq)1348cd641e3SDavidlohr Bueso static inline bool swq_has_sleeper(struct swait_queue_head *wq)
1358cd641e3SDavidlohr Bueso {
1368cd641e3SDavidlohr Bueso 	/*
1378cd641e3SDavidlohr Bueso 	 * We need to be sure we are in sync with the list_add()
1388cd641e3SDavidlohr Bueso 	 * modifications to the wait queue (task_list).
1398cd641e3SDavidlohr Bueso 	 *
1408cd641e3SDavidlohr Bueso 	 * This memory barrier should be paired with one on the
1418cd641e3SDavidlohr Bueso 	 * waiting side.
1428cd641e3SDavidlohr Bueso 	 */
1438cd641e3SDavidlohr Bueso 	smp_mb();
1448cd641e3SDavidlohr Bueso 	return swait_active(wq);
14513b35686SPeter Zijlstra (Intel) }
14613b35686SPeter Zijlstra (Intel) 
147b3dae109SPeter Zijlstra extern void swake_up_one(struct swait_queue_head *q);
14813b35686SPeter Zijlstra (Intel) extern void swake_up_all(struct swait_queue_head *q);
149*6f63904cSAndrei Vagin extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
15013b35686SPeter Zijlstra (Intel) 
151b3dae109SPeter Zijlstra extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
15213b35686SPeter Zijlstra (Intel) extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
15313b35686SPeter Zijlstra (Intel) 
15413b35686SPeter Zijlstra (Intel) extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15513b35686SPeter Zijlstra (Intel) extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
15613b35686SPeter Zijlstra (Intel) 
1570abf17bcSPeter Zijlstra /* as per ___wait_event() but for swait, therefore "exclusive == 1" */
15813b35686SPeter Zijlstra (Intel) #define ___swait_event(wq, condition, state, ret, cmd)			\
15913b35686SPeter Zijlstra (Intel) ({									\
1600abf17bcSPeter Zijlstra 	__label__ __out;						\
16113b35686SPeter Zijlstra (Intel) 	struct swait_queue __wait;					\
16213b35686SPeter Zijlstra (Intel) 	long __ret = ret;						\
16313b35686SPeter Zijlstra (Intel) 									\
16413b35686SPeter Zijlstra (Intel) 	INIT_LIST_HEAD(&__wait.task_list);				\
16513b35686SPeter Zijlstra (Intel) 	for (;;) {							\
16613b35686SPeter Zijlstra (Intel) 		long __int = prepare_to_swait_event(&wq, &__wait, state);\
16713b35686SPeter Zijlstra (Intel) 									\
16813b35686SPeter Zijlstra (Intel) 		if (condition)						\
16913b35686SPeter Zijlstra (Intel) 			break;						\
17013b35686SPeter Zijlstra (Intel) 									\
17113b35686SPeter Zijlstra (Intel) 		if (___wait_is_interruptible(state) && __int) {		\
17213b35686SPeter Zijlstra (Intel) 			__ret = __int;					\
1730abf17bcSPeter Zijlstra 			goto __out;					\
17413b35686SPeter Zijlstra (Intel) 		}							\
17513b35686SPeter Zijlstra (Intel) 									\
17613b35686SPeter Zijlstra (Intel) 		cmd;							\
17713b35686SPeter Zijlstra (Intel) 	}								\
17813b35686SPeter Zijlstra (Intel) 	finish_swait(&wq, &__wait);					\
1790abf17bcSPeter Zijlstra __out:	__ret;								\
18013b35686SPeter Zijlstra (Intel) })
18113b35686SPeter Zijlstra (Intel) 
18213b35686SPeter Zijlstra (Intel) #define __swait_event(wq, condition)					\
18313b35686SPeter Zijlstra (Intel) 	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
18413b35686SPeter Zijlstra (Intel) 			    schedule())
18513b35686SPeter Zijlstra (Intel) 
186b3dae109SPeter Zijlstra #define swait_event_exclusive(wq, condition)				\
18713b35686SPeter Zijlstra (Intel) do {									\
18813b35686SPeter Zijlstra (Intel) 	if (condition)							\
18913b35686SPeter Zijlstra (Intel) 		break;							\
19013b35686SPeter Zijlstra (Intel) 	__swait_event(wq, condition);					\
19113b35686SPeter Zijlstra (Intel) } while (0)
19213b35686SPeter Zijlstra (Intel) 
19313b35686SPeter Zijlstra (Intel) #define __swait_event_timeout(wq, condition, timeout)			\
19413b35686SPeter Zijlstra (Intel) 	___swait_event(wq, ___wait_cond_timeout(condition),		\
19513b35686SPeter Zijlstra (Intel) 		      TASK_UNINTERRUPTIBLE, timeout,			\
19613b35686SPeter Zijlstra (Intel) 		      __ret = schedule_timeout(__ret))
19713b35686SPeter Zijlstra (Intel) 
198b3dae109SPeter Zijlstra #define swait_event_timeout_exclusive(wq, condition, timeout)		\
19913b35686SPeter Zijlstra (Intel) ({									\
20013b35686SPeter Zijlstra (Intel) 	long __ret = timeout;						\
20113b35686SPeter Zijlstra (Intel) 	if (!___wait_cond_timeout(condition))				\
20213b35686SPeter Zijlstra (Intel) 		__ret = __swait_event_timeout(wq, condition, timeout);	\
20313b35686SPeter Zijlstra (Intel) 	__ret;								\
20413b35686SPeter Zijlstra (Intel) })
20513b35686SPeter Zijlstra (Intel) 
20613b35686SPeter Zijlstra (Intel) #define __swait_event_interruptible(wq, condition)			\
20713b35686SPeter Zijlstra (Intel) 	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
20813b35686SPeter Zijlstra (Intel) 		      schedule())
20913b35686SPeter Zijlstra (Intel) 
210b3dae109SPeter Zijlstra #define swait_event_interruptible_exclusive(wq, condition)		\
21113b35686SPeter Zijlstra (Intel) ({									\
21213b35686SPeter Zijlstra (Intel) 	int __ret = 0;							\
21313b35686SPeter Zijlstra (Intel) 	if (!(condition))						\
21413b35686SPeter Zijlstra (Intel) 		__ret = __swait_event_interruptible(wq, condition);	\
21513b35686SPeter Zijlstra (Intel) 	__ret;								\
21613b35686SPeter Zijlstra (Intel) })
21713b35686SPeter Zijlstra (Intel) 
21813b35686SPeter Zijlstra (Intel) #define __swait_event_interruptible_timeout(wq, condition, timeout)	\
21913b35686SPeter Zijlstra (Intel) 	___swait_event(wq, ___wait_cond_timeout(condition),		\
22013b35686SPeter Zijlstra (Intel) 		      TASK_INTERRUPTIBLE, timeout,			\
22113b35686SPeter Zijlstra (Intel) 		      __ret = schedule_timeout(__ret))
22213b35686SPeter Zijlstra (Intel) 
223b3dae109SPeter Zijlstra #define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\
22413b35686SPeter Zijlstra (Intel) ({									\
22513b35686SPeter Zijlstra (Intel) 	long __ret = timeout;						\
22613b35686SPeter Zijlstra (Intel) 	if (!___wait_cond_timeout(condition))				\
22713b35686SPeter Zijlstra (Intel) 		__ret = __swait_event_interruptible_timeout(wq,		\
22813b35686SPeter Zijlstra (Intel) 						condition, timeout);	\
22913b35686SPeter Zijlstra (Intel) 	__ret;								\
23013b35686SPeter Zijlstra (Intel) })
23113b35686SPeter Zijlstra (Intel) 
232352eee12SLuis R. Rodriguez #define __swait_event_idle(wq, condition)				\
233352eee12SLuis R. Rodriguez 	(void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
234352eee12SLuis R. Rodriguez 
235352eee12SLuis R. Rodriguez /**
236b3dae109SPeter Zijlstra  * swait_event_idle_exclusive - wait without system load contribution
237352eee12SLuis R. Rodriguez  * @wq: the waitqueue to wait on
238352eee12SLuis R. Rodriguez  * @condition: a C expression for the event to wait for
239352eee12SLuis R. Rodriguez  *
240352eee12SLuis R. Rodriguez  * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
241352eee12SLuis R. Rodriguez  * true. The @condition is checked each time the waitqueue @wq is woken up.
242352eee12SLuis R. Rodriguez  *
243352eee12SLuis R. Rodriguez  * This function is mostly used when a kthread or workqueue waits for some
244352eee12SLuis R. Rodriguez  * condition and doesn't want to contribute to system load. Signals are
245352eee12SLuis R. Rodriguez  * ignored.
246352eee12SLuis R. Rodriguez  */
247b3dae109SPeter Zijlstra #define swait_event_idle_exclusive(wq, condition)			\
248352eee12SLuis R. Rodriguez do {									\
249352eee12SLuis R. Rodriguez 	if (condition)							\
250352eee12SLuis R. Rodriguez 		break;							\
251352eee12SLuis R. Rodriguez 	__swait_event_idle(wq, condition);				\
252352eee12SLuis R. Rodriguez } while (0)
253352eee12SLuis R. Rodriguez 
254352eee12SLuis R. Rodriguez #define __swait_event_idle_timeout(wq, condition, timeout)		\
255352eee12SLuis R. Rodriguez 	___swait_event(wq, ___wait_cond_timeout(condition),		\
256352eee12SLuis R. Rodriguez 		       TASK_IDLE, timeout,				\
257352eee12SLuis R. Rodriguez 		       __ret = schedule_timeout(__ret))
258352eee12SLuis R. Rodriguez 
259352eee12SLuis R. Rodriguez /**
260b3dae109SPeter Zijlstra  * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution
261352eee12SLuis R. Rodriguez  * @wq: the waitqueue to wait on
262352eee12SLuis R. Rodriguez  * @condition: a C expression for the event to wait for
263352eee12SLuis R. Rodriguez  * @timeout: timeout at which we'll give up in jiffies
264352eee12SLuis R. Rodriguez  *
265352eee12SLuis R. Rodriguez  * The process is put to sleep (TASK_IDLE) until the @condition evaluates to
266352eee12SLuis R. Rodriguez  * true. The @condition is checked each time the waitqueue @wq is woken up.
267352eee12SLuis R. Rodriguez  *
268352eee12SLuis R. Rodriguez  * This function is mostly used when a kthread or workqueue waits for some
269352eee12SLuis R. Rodriguez  * condition and doesn't want to contribute to system load. Signals are
270352eee12SLuis R. Rodriguez  * ignored.
271352eee12SLuis R. Rodriguez  *
272352eee12SLuis R. Rodriguez  * Returns:
273352eee12SLuis R. Rodriguez  * 0 if the @condition evaluated to %false after the @timeout elapsed,
274352eee12SLuis R. Rodriguez  * 1 if the @condition evaluated to %true after the @timeout elapsed,
275352eee12SLuis R. Rodriguez  * or the remaining jiffies (at least 1) if the @condition evaluated
276352eee12SLuis R. Rodriguez  * to %true before the @timeout elapsed.
277352eee12SLuis R. Rodriguez  */
278b3dae109SPeter Zijlstra #define swait_event_idle_timeout_exclusive(wq, condition, timeout)	\
279352eee12SLuis R. Rodriguez ({									\
280352eee12SLuis R. Rodriguez 	long __ret = timeout;						\
281352eee12SLuis R. Rodriguez 	if (!___wait_cond_timeout(condition))				\
282352eee12SLuis R. Rodriguez 		__ret = __swait_event_idle_timeout(wq,			\
283352eee12SLuis R. Rodriguez 						   condition, timeout);	\
284352eee12SLuis R. Rodriguez 	__ret;								\
285352eee12SLuis R. Rodriguez })
286352eee12SLuis R. Rodriguez 
28713b35686SPeter Zijlstra (Intel) #endif /* _LINUX_SWAIT_H */
288