xref: /linux-6.15/kernel/sched/wait.c (revision 37acade0)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
27a6354e2SPeter Zijlstra /*
37a6354e2SPeter Zijlstra  * Generic waiting primitives.
47a6354e2SPeter Zijlstra  *
57a6354e2SPeter Zijlstra  * (C) 2004 Nadia Yvette Chambers, Oracle
67a6354e2SPeter Zijlstra  */
77a6354e2SPeter Zijlstra 
__init_waitqueue_head(struct wait_queue_head * wq_head,const char * name,struct lock_class_key * key)89d9d676fSIngo Molnar void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
97a6354e2SPeter Zijlstra {
109d9d676fSIngo Molnar 	spin_lock_init(&wq_head->lock);
119d9d676fSIngo Molnar 	lockdep_set_class_and_name(&wq_head->lock, key, name);
122055da97SIngo Molnar 	INIT_LIST_HEAD(&wq_head->head);
137a6354e2SPeter Zijlstra }
147a6354e2SPeter Zijlstra 
157a6354e2SPeter Zijlstra EXPORT_SYMBOL(__init_waitqueue_head);
167a6354e2SPeter Zijlstra 
add_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)179d9d676fSIngo Molnar void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
187a6354e2SPeter Zijlstra {
197a6354e2SPeter Zijlstra 	unsigned long flags;
207a6354e2SPeter Zijlstra 
2150816c48SIngo Molnar 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
229d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
23c6b9d9a3SOmar Sandoval 	__add_wait_queue(wq_head, wq_entry);
249d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
257a6354e2SPeter Zijlstra }
267a6354e2SPeter Zijlstra EXPORT_SYMBOL(add_wait_queue);
277a6354e2SPeter Zijlstra 
add_wait_queue_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)289d9d676fSIngo Molnar void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
297a6354e2SPeter Zijlstra {
307a6354e2SPeter Zijlstra 	unsigned long flags;
317a6354e2SPeter Zijlstra 
3250816c48SIngo Molnar 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
339d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
349d9d676fSIngo Molnar 	__add_wait_queue_entry_tail(wq_head, wq_entry);
359d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
367a6354e2SPeter Zijlstra }
377a6354e2SPeter Zijlstra EXPORT_SYMBOL(add_wait_queue_exclusive);
387a6354e2SPeter Zijlstra 
add_wait_queue_priority(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)39c4d51a52SDavid Woodhouse void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
40c4d51a52SDavid Woodhouse {
41c4d51a52SDavid Woodhouse 	unsigned long flags;
42c4d51a52SDavid Woodhouse 
43c4d51a52SDavid Woodhouse 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
44c4d51a52SDavid Woodhouse 	spin_lock_irqsave(&wq_head->lock, flags);
45c4d51a52SDavid Woodhouse 	__add_wait_queue(wq_head, wq_entry);
46c4d51a52SDavid Woodhouse 	spin_unlock_irqrestore(&wq_head->lock, flags);
47c4d51a52SDavid Woodhouse }
48c4d51a52SDavid Woodhouse EXPORT_SYMBOL_GPL(add_wait_queue_priority);
49c4d51a52SDavid Woodhouse 
remove_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)509d9d676fSIngo Molnar void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
517a6354e2SPeter Zijlstra {
527a6354e2SPeter Zijlstra 	unsigned long flags;
537a6354e2SPeter Zijlstra 
549d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
559d9d676fSIngo Molnar 	__remove_wait_queue(wq_head, wq_entry);
569d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
577a6354e2SPeter Zijlstra }
587a6354e2SPeter Zijlstra EXPORT_SYMBOL(remove_wait_queue);
597a6354e2SPeter Zijlstra 
602554db91STim Chen /*
61b4145872SPeter Zijlstra  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
62b4145872SPeter Zijlstra  * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
63c4d51a52SDavid Woodhouse  * number) then we wake that number of exclusive tasks, and potentially all
64c4d51a52SDavid Woodhouse  * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
65c4d51a52SDavid Woodhouse  * the list and any non-exclusive tasks will be woken first. A priority task
66c4d51a52SDavid Woodhouse  * may be at the head of the list, and can consume the event without any other
67c4d51a52SDavid Woodhouse  * tasks being woken.
68b4145872SPeter Zijlstra  *
69b4145872SPeter Zijlstra  * There are circumstances in which we can try to wake a task which has already
70b4145872SPeter Zijlstra  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
71b4145872SPeter Zijlstra  * zero in this (rare) case, and we handle it by continuing to scan the queue.
72b4145872SPeter Zijlstra  */
__wake_up_common(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key)732554db91STim Chen static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
74*37acade0SMatthew Wilcox (Oracle) 			int nr_exclusive, int wake_flags, void *key)
75b4145872SPeter Zijlstra {
76ac6424b9SIngo Molnar 	wait_queue_entry_t *curr, *next;
77b4145872SPeter Zijlstra 
78e05a8e4dSChristoph Hellwig 	lockdep_assert_held(&wq_head->lock);
79e05a8e4dSChristoph Hellwig 
802554db91STim Chen 	curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
812554db91STim Chen 
822554db91STim Chen 	if (&curr->entry == &wq_head->head)
832554db91STim Chen 		return nr_exclusive;
842554db91STim Chen 
852554db91STim Chen 	list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
86b4145872SPeter Zijlstra 		unsigned flags = curr->flags;
872554db91STim Chen 		int ret;
882554db91STim Chen 
892554db91STim Chen 		ret = curr->func(curr, mode, wake_flags, key);
903510ca20SLinus Torvalds 		if (ret < 0)
913510ca20SLinus Torvalds 			break;
923510ca20SLinus Torvalds 		if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
93b4145872SPeter Zijlstra 			break;
942554db91STim Chen 	}
9597fb7a0aSIngo Molnar 
962554db91STim Chen 	return nr_exclusive;
972554db91STim Chen }
982554db91STim Chen 
__wake_up_common_lock(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key)99ee7dc86bSGabriel Krisman Bertazi static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
1002554db91STim Chen 			int nr_exclusive, int wake_flags, void *key)
1012554db91STim Chen {
1022554db91STim Chen 	unsigned long flags;
103*37acade0SMatthew Wilcox (Oracle) 	int remaining;
1042554db91STim Chen 
1052554db91STim Chen 	spin_lock_irqsave(&wq_head->lock, flags);
106*37acade0SMatthew Wilcox (Oracle) 	remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags,
107*37acade0SMatthew Wilcox (Oracle) 			key);
1082554db91STim Chen 	spin_unlock_irqrestore(&wq_head->lock, flags);
109ee7dc86bSGabriel Krisman Bertazi 
110ee7dc86bSGabriel Krisman Bertazi 	return nr_exclusive - remaining;
111b4145872SPeter Zijlstra }
112b4145872SPeter Zijlstra 
113b4145872SPeter Zijlstra /**
114b4145872SPeter Zijlstra  * __wake_up - wake up threads blocked on a waitqueue.
1159d9d676fSIngo Molnar  * @wq_head: the waitqueue
116b4145872SPeter Zijlstra  * @mode: which threads
117b4145872SPeter Zijlstra  * @nr_exclusive: how many wake-one or wake-many threads to wake up
118b4145872SPeter Zijlstra  * @key: is directly passed to the wakeup function
119b4145872SPeter Zijlstra  *
120ee7dc86bSGabriel Krisman Bertazi  * If this function wakes up a task, it executes a full memory barrier
121ee7dc86bSGabriel Krisman Bertazi  * before accessing the task state.  Returns the number of exclusive
122ee7dc86bSGabriel Krisman Bertazi  * tasks that were awaken.
123b4145872SPeter Zijlstra  */
__wake_up(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,void * key)124ee7dc86bSGabriel Krisman Bertazi int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
125b4145872SPeter Zijlstra 	      int nr_exclusive, void *key)
126b4145872SPeter Zijlstra {
127ee7dc86bSGabriel Krisman Bertazi 	return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
128b4145872SPeter Zijlstra }
129b4145872SPeter Zijlstra EXPORT_SYMBOL(__wake_up);
130b4145872SPeter Zijlstra 
__wake_up_on_current_cpu(struct wait_queue_head * wq_head,unsigned int mode,void * key)1316f63904cSAndrei Vagin void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
1326f63904cSAndrei Vagin {
1336f63904cSAndrei Vagin 	__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
1346f63904cSAndrei Vagin }
1356f63904cSAndrei Vagin 
136b4145872SPeter Zijlstra /*
137b4145872SPeter Zijlstra  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
138b4145872SPeter Zijlstra  */
__wake_up_locked(struct wait_queue_head * wq_head,unsigned int mode,int nr)1399d9d676fSIngo Molnar void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
140b4145872SPeter Zijlstra {
141*37acade0SMatthew Wilcox (Oracle) 	__wake_up_common(wq_head, mode, nr, 0, NULL);
142b4145872SPeter Zijlstra }
143b4145872SPeter Zijlstra EXPORT_SYMBOL_GPL(__wake_up_locked);
144b4145872SPeter Zijlstra 
__wake_up_locked_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)1459d9d676fSIngo Molnar void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
146b4145872SPeter Zijlstra {
147*37acade0SMatthew Wilcox (Oracle) 	__wake_up_common(wq_head, mode, 1, 0, key);
148b4145872SPeter Zijlstra }
149b4145872SPeter Zijlstra EXPORT_SYMBOL_GPL(__wake_up_locked_key);
150b4145872SPeter Zijlstra 
151b4145872SPeter Zijlstra /**
152b4145872SPeter Zijlstra  * __wake_up_sync_key - wake up threads blocked on a waitqueue.
1539d9d676fSIngo Molnar  * @wq_head: the waitqueue
154b4145872SPeter Zijlstra  * @mode: which threads
155b4145872SPeter Zijlstra  * @key: opaque value to be passed to wakeup targets
156b4145872SPeter Zijlstra  *
157b4145872SPeter Zijlstra  * The sync wakeup differs that the waker knows that it will schedule
158b4145872SPeter Zijlstra  * away soon, so while the target thread will be woken up, it will not
159b4145872SPeter Zijlstra  * be migrated to another CPU - ie. the two threads are 'synchronized'
160b4145872SPeter Zijlstra  * with each other. This can prevent needless bouncing between CPUs.
161b4145872SPeter Zijlstra  *
162b4145872SPeter Zijlstra  * On UP it can prevent extra preemption.
163b4145872SPeter Zijlstra  *
1647696f991SAndrea Parri  * If this function wakes up a task, it executes a full memory barrier before
1657696f991SAndrea Parri  * accessing the task state.
166b4145872SPeter Zijlstra  */
__wake_up_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)1679d9d676fSIngo Molnar void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
168ce4dd442SDavid Howells 			void *key)
169b4145872SPeter Zijlstra {
1709d9d676fSIngo Molnar 	if (unlikely(!wq_head))
171b4145872SPeter Zijlstra 		return;
172b4145872SPeter Zijlstra 
173ce4dd442SDavid Howells 	__wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
174b4145872SPeter Zijlstra }
175b4145872SPeter Zijlstra EXPORT_SYMBOL_GPL(__wake_up_sync_key);
176b4145872SPeter Zijlstra 
177f94df989SDavid Howells /**
178f94df989SDavid Howells  * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
179f94df989SDavid Howells  * @wq_head: the waitqueue
180f94df989SDavid Howells  * @mode: which threads
181f94df989SDavid Howells  * @key: opaque value to be passed to wakeup targets
182f94df989SDavid Howells  *
183f94df989SDavid Howells  * The sync wakeup differs in that the waker knows that it will schedule
184f94df989SDavid Howells  * away soon, so while the target thread will be woken up, it will not
185f94df989SDavid Howells  * be migrated to another CPU - ie. the two threads are 'synchronized'
186f94df989SDavid Howells  * with each other. This can prevent needless bouncing between CPUs.
187f94df989SDavid Howells  *
188f94df989SDavid Howells  * On UP it can prevent extra preemption.
189f94df989SDavid Howells  *
190f94df989SDavid Howells  * If this function wakes up a task, it executes a full memory barrier before
191f94df989SDavid Howells  * accessing the task state.
192f94df989SDavid Howells  */
__wake_up_locked_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)193f94df989SDavid Howells void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
194f94df989SDavid Howells 			       unsigned int mode, void *key)
195f94df989SDavid Howells {
196*37acade0SMatthew Wilcox (Oracle)         __wake_up_common(wq_head, mode, 1, WF_SYNC, key);
197f94df989SDavid Howells }
198f94df989SDavid Howells EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
199f94df989SDavid Howells 
200b4145872SPeter Zijlstra /*
201b4145872SPeter Zijlstra  * __wake_up_sync - see __wake_up_sync_key()
202b4145872SPeter Zijlstra  */
__wake_up_sync(struct wait_queue_head * wq_head,unsigned int mode)203ce4dd442SDavid Howells void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
204b4145872SPeter Zijlstra {
205ce4dd442SDavid Howells 	__wake_up_sync_key(wq_head, mode, NULL);
206b4145872SPeter Zijlstra }
207b4145872SPeter Zijlstra EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
208b4145872SPeter Zijlstra 
__wake_up_pollfree(struct wait_queue_head * wq_head)20942288cb4SEric Biggers void __wake_up_pollfree(struct wait_queue_head *wq_head)
21042288cb4SEric Biggers {
21142288cb4SEric Biggers 	__wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
21242288cb4SEric Biggers 	/* POLLFREE must have cleared the queue. */
21342288cb4SEric Biggers 	WARN_ON_ONCE(waitqueue_active(wq_head));
21442288cb4SEric Biggers }
21542288cb4SEric Biggers 
216b4145872SPeter Zijlstra /*
2177a6354e2SPeter Zijlstra  * Note: we use "set_current_state()" _after_ the wait-queue add,
2187a6354e2SPeter Zijlstra  * because we need a memory barrier there on SMP, so that any
2197a6354e2SPeter Zijlstra  * wake-function that tests for the wait-queue being active
2207a6354e2SPeter Zijlstra  * will be guaranteed to see waitqueue addition _or_ subsequent
2217a6354e2SPeter Zijlstra  * tests in this thread will see the wakeup having taken place.
2227a6354e2SPeter Zijlstra  *
2237a6354e2SPeter Zijlstra  * The spin_unlock() itself is semi-permeable and only protects
2247a6354e2SPeter Zijlstra  * one way (it only protects stuff inside the critical region and
2257a6354e2SPeter Zijlstra  * stops them from bleeding out - it would still allow subsequent
2267a6354e2SPeter Zijlstra  * loads to move into the critical region).
2277a6354e2SPeter Zijlstra  */
2287a6354e2SPeter Zijlstra void
prepare_to_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)2299d9d676fSIngo Molnar prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
2307a6354e2SPeter Zijlstra {
2317a6354e2SPeter Zijlstra 	unsigned long flags;
2327a6354e2SPeter Zijlstra 
23350816c48SIngo Molnar 	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
2349d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
2352055da97SIngo Molnar 	if (list_empty(&wq_entry->entry))
2369d9d676fSIngo Molnar 		__add_wait_queue(wq_head, wq_entry);
2377a6354e2SPeter Zijlstra 	set_current_state(state);
2389d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
2397a6354e2SPeter Zijlstra }
2407a6354e2SPeter Zijlstra EXPORT_SYMBOL(prepare_to_wait);
2417a6354e2SPeter Zijlstra 
24211c7aa0dSJan Kara /* Returns true if we are the first waiter in the queue, false otherwise. */
24311c7aa0dSJan Kara bool
prepare_to_wait_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)2449d9d676fSIngo Molnar prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
2457a6354e2SPeter Zijlstra {
2467a6354e2SPeter Zijlstra 	unsigned long flags;
24711c7aa0dSJan Kara 	bool was_empty = false;
2487a6354e2SPeter Zijlstra 
24950816c48SIngo Molnar 	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
2509d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
25111c7aa0dSJan Kara 	if (list_empty(&wq_entry->entry)) {
25211c7aa0dSJan Kara 		was_empty = list_empty(&wq_head->head);
2539d9d676fSIngo Molnar 		__add_wait_queue_entry_tail(wq_head, wq_entry);
25411c7aa0dSJan Kara 	}
2557a6354e2SPeter Zijlstra 	set_current_state(state);
2569d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
25711c7aa0dSJan Kara 	return was_empty;
2587a6354e2SPeter Zijlstra }
2597a6354e2SPeter Zijlstra EXPORT_SYMBOL(prepare_to_wait_exclusive);
2607a6354e2SPeter Zijlstra 
init_wait_entry(struct wait_queue_entry * wq_entry,int flags)26150816c48SIngo Molnar void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
2620176beafSOleg Nesterov {
26350816c48SIngo Molnar 	wq_entry->flags = flags;
26450816c48SIngo Molnar 	wq_entry->private = current;
26550816c48SIngo Molnar 	wq_entry->func = autoremove_wake_function;
2662055da97SIngo Molnar 	INIT_LIST_HEAD(&wq_entry->entry);
2670176beafSOleg Nesterov }
2680176beafSOleg Nesterov EXPORT_SYMBOL(init_wait_entry);
2690176beafSOleg Nesterov 
prepare_to_wait_event(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)2709d9d676fSIngo Molnar long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
2717a6354e2SPeter Zijlstra {
2727a6354e2SPeter Zijlstra 	unsigned long flags;
273b1ea06a9SOleg Nesterov 	long ret = 0;
2747a6354e2SPeter Zijlstra 
2759d9d676fSIngo Molnar 	spin_lock_irqsave(&wq_head->lock, flags);
27634ec35adSDavidlohr Bueso 	if (signal_pending_state(state, current)) {
277b1ea06a9SOleg Nesterov 		/*
278b1ea06a9SOleg Nesterov 		 * Exclusive waiter must not fail if it was selected by wakeup,
279b1ea06a9SOleg Nesterov 		 * it should "consume" the condition we were waiting for.
280b1ea06a9SOleg Nesterov 		 *
281b1ea06a9SOleg Nesterov 		 * The caller will recheck the condition and return success if
282b1ea06a9SOleg Nesterov 		 * we were already woken up, we can not miss the event because
2839d9d676fSIngo Molnar 		 * wakeup locks/unlocks the same wq_head->lock.
284b1ea06a9SOleg Nesterov 		 *
285b1ea06a9SOleg Nesterov 		 * But we need to ensure that set-condition + wakeup after that
286b1ea06a9SOleg Nesterov 		 * can't see us, it should wake up another exclusive waiter if
287b1ea06a9SOleg Nesterov 		 * we fail.
288b1ea06a9SOleg Nesterov 		 */
2892055da97SIngo Molnar 		list_del_init(&wq_entry->entry);
290b1ea06a9SOleg Nesterov 		ret = -ERESTARTSYS;
291b1ea06a9SOleg Nesterov 	} else {
2922055da97SIngo Molnar 		if (list_empty(&wq_entry->entry)) {
29350816c48SIngo Molnar 			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
2949d9d676fSIngo Molnar 				__add_wait_queue_entry_tail(wq_head, wq_entry);
2957a6354e2SPeter Zijlstra 			else
2969d9d676fSIngo Molnar 				__add_wait_queue(wq_head, wq_entry);
2977a6354e2SPeter Zijlstra 		}
2987a6354e2SPeter Zijlstra 		set_current_state(state);
299b1ea06a9SOleg Nesterov 	}
3009d9d676fSIngo Molnar 	spin_unlock_irqrestore(&wq_head->lock, flags);
3017a6354e2SPeter Zijlstra 
302b1ea06a9SOleg Nesterov 	return ret;
3037a6354e2SPeter Zijlstra }
3047a6354e2SPeter Zijlstra EXPORT_SYMBOL(prepare_to_wait_event);
3057a6354e2SPeter Zijlstra 
306bd0f9b35SLinus Torvalds /*
307bd0f9b35SLinus Torvalds  * Note! These two wait functions are entered with the
308bd0f9b35SLinus Torvalds  * wait-queue lock held (and interrupts off in the _irq
309bd0f9b35SLinus Torvalds  * case), so there is no race with testing the wakeup
310bd0f9b35SLinus Torvalds  * condition in the caller before they add the wait
311bd0f9b35SLinus Torvalds  * entry to the wake queue.
312bd0f9b35SLinus Torvalds  */
do_wait_intr(wait_queue_head_t * wq,wait_queue_entry_t * wait)313ac6424b9SIngo Molnar int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
314bd0f9b35SLinus Torvalds {
3152055da97SIngo Molnar 	if (likely(list_empty(&wait->entry)))
316ac6424b9SIngo Molnar 		__add_wait_queue_entry_tail(wq, wait);
317bd0f9b35SLinus Torvalds 
318bd0f9b35SLinus Torvalds 	set_current_state(TASK_INTERRUPTIBLE);
319bd0f9b35SLinus Torvalds 	if (signal_pending(current))
320bd0f9b35SLinus Torvalds 		return -ERESTARTSYS;
321bd0f9b35SLinus Torvalds 
322bd0f9b35SLinus Torvalds 	spin_unlock(&wq->lock);
323bd0f9b35SLinus Torvalds 	schedule();
324bd0f9b35SLinus Torvalds 	spin_lock(&wq->lock);
32597fb7a0aSIngo Molnar 
326bd0f9b35SLinus Torvalds 	return 0;
327bd0f9b35SLinus Torvalds }
328bd0f9b35SLinus Torvalds EXPORT_SYMBOL(do_wait_intr);
329bd0f9b35SLinus Torvalds 
do_wait_intr_irq(wait_queue_head_t * wq,wait_queue_entry_t * wait)330ac6424b9SIngo Molnar int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
331bd0f9b35SLinus Torvalds {
3322055da97SIngo Molnar 	if (likely(list_empty(&wait->entry)))
333ac6424b9SIngo Molnar 		__add_wait_queue_entry_tail(wq, wait);
334bd0f9b35SLinus Torvalds 
335bd0f9b35SLinus Torvalds 	set_current_state(TASK_INTERRUPTIBLE);
336bd0f9b35SLinus Torvalds 	if (signal_pending(current))
337bd0f9b35SLinus Torvalds 		return -ERESTARTSYS;
338bd0f9b35SLinus Torvalds 
339bd0f9b35SLinus Torvalds 	spin_unlock_irq(&wq->lock);
340bd0f9b35SLinus Torvalds 	schedule();
341bd0f9b35SLinus Torvalds 	spin_lock_irq(&wq->lock);
34297fb7a0aSIngo Molnar 
343bd0f9b35SLinus Torvalds 	return 0;
344bd0f9b35SLinus Torvalds }
345bd0f9b35SLinus Torvalds EXPORT_SYMBOL(do_wait_intr_irq);
346bd0f9b35SLinus Torvalds 
3477a6354e2SPeter Zijlstra /**
3487a6354e2SPeter Zijlstra  * finish_wait - clean up after waiting in a queue
3499d9d676fSIngo Molnar  * @wq_head: waitqueue waited on
35050816c48SIngo Molnar  * @wq_entry: wait descriptor
3517a6354e2SPeter Zijlstra  *
3527a6354e2SPeter Zijlstra  * Sets current thread back to running state and removes
3537a6354e2SPeter Zijlstra  * the wait descriptor from the given waitqueue if still
3547a6354e2SPeter Zijlstra  * queued.
3557a6354e2SPeter Zijlstra  */
finish_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)3569d9d676fSIngo Molnar void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
3577a6354e2SPeter Zijlstra {
3587a6354e2SPeter Zijlstra 	unsigned long flags;
3597a6354e2SPeter Zijlstra 
3607a6354e2SPeter Zijlstra 	__set_current_state(TASK_RUNNING);
3617a6354e2SPeter Zijlstra 	/*
3627a6354e2SPeter Zijlstra 	 * We can check for list emptiness outside the lock
3637a6354e2SPeter Zijlstra 	 * IFF:
3647a6354e2SPeter Zijlstra 	 *  - we use the "careful" check that verifies both
3657a6354e2SPeter Zijlstra 	 *    the next and prev pointers, so that there cannot
3667a6354e2SPeter Zijlstra 	 *    be any half-pending updates in progress on other
3677a6354e2SPeter Zijlstra 	 *    CPU's that we haven't seen yet (and that might
3687a6354e2SPeter Zijlstra 	 *    still change the stack area.
3697a6354e2SPeter Zijlstra 	 * and
3707a6354e2SPeter Zijlstra 	 *  - all other users take the lock (ie we can only
3717a6354e2SPeter Zijlstra 	 *    have _one_ other CPU that looks at or modifies
3727a6354e2SPeter Zijlstra 	 *    the list).
3737a6354e2SPeter Zijlstra 	 */
3742055da97SIngo Molnar 	if (!list_empty_careful(&wq_entry->entry)) {
3759d9d676fSIngo Molnar 		spin_lock_irqsave(&wq_head->lock, flags);
3762055da97SIngo Molnar 		list_del_init(&wq_entry->entry);
3779d9d676fSIngo Molnar 		spin_unlock_irqrestore(&wq_head->lock, flags);
3787a6354e2SPeter Zijlstra 	}
3797a6354e2SPeter Zijlstra }
3807a6354e2SPeter Zijlstra EXPORT_SYMBOL(finish_wait);
3817a6354e2SPeter Zijlstra 
autoremove_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)38250816c48SIngo Molnar int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
3837a6354e2SPeter Zijlstra {
38450816c48SIngo Molnar 	int ret = default_wake_function(wq_entry, mode, sync, key);
3857a6354e2SPeter Zijlstra 
3867a6354e2SPeter Zijlstra 	if (ret)
387c6fe44d9SLinus Torvalds 		list_del_init_careful(&wq_entry->entry);
38897fb7a0aSIngo Molnar 
3897a6354e2SPeter Zijlstra 	return ret;
3907a6354e2SPeter Zijlstra }
3917a6354e2SPeter Zijlstra EXPORT_SYMBOL(autoremove_wake_function);
3927a6354e2SPeter Zijlstra 
39361ada528SPeter Zijlstra /*
39461ada528SPeter Zijlstra  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
39561ada528SPeter Zijlstra  *
3969d9d676fSIngo Molnar  * add_wait_queue(&wq_head, &wait);
39761ada528SPeter Zijlstra  * for (;;) {
39861ada528SPeter Zijlstra  *     if (condition)
39961ada528SPeter Zijlstra  *         break;
40061ada528SPeter Zijlstra  *
40176e079feSAndrea Parri  *     // in wait_woken()			// in woken_wake_function()
40261ada528SPeter Zijlstra  *
40376e079feSAndrea Parri  *     p->state = mode;				wq_entry->flags |= WQ_FLAG_WOKEN;
40476e079feSAndrea Parri  *     smp_mb(); // A				try_to_wake_up():
40576e079feSAndrea Parri  *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))	   <full barrier>
40676e079feSAndrea Parri  *         schedule()				   if (p->state & mode)
40776e079feSAndrea Parri  *     p->state = TASK_RUNNING;			      p->state = TASK_RUNNING;
40876e079feSAndrea Parri  *     wq_entry->flags &= ~WQ_FLAG_WOKEN;	~~~~~~~~~~~~~~~~~~
40976e079feSAndrea Parri  *     smp_mb(); // B				condition = true;
41076e079feSAndrea Parri  * }						smp_mb(); // C
41176e079feSAndrea Parri  * remove_wait_queue(&wq_head, &wait);		wq_entry->flags |= WQ_FLAG_WOKEN;
41261ada528SPeter Zijlstra  */
wait_woken(struct wait_queue_entry * wq_entry,unsigned mode,long timeout)41350816c48SIngo Molnar long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
41461ada528SPeter Zijlstra {
41561ada528SPeter Zijlstra 	/*
41676e079feSAndrea Parri 	 * The below executes an smp_mb(), which matches with the full barrier
41776e079feSAndrea Parri 	 * executed by the try_to_wake_up() in woken_wake_function() such that
41876e079feSAndrea Parri 	 * either we see the store to wq_entry->flags in woken_wake_function()
41976e079feSAndrea Parri 	 * or woken_wake_function() sees our store to current->state.
42061ada528SPeter Zijlstra 	 */
42176e079feSAndrea Parri 	set_current_state(mode); /* A */
422ef73d6a4SArve Hjønnevåg 	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !kthread_should_stop_or_park())
42361ada528SPeter Zijlstra 		timeout = schedule_timeout(timeout);
42461ada528SPeter Zijlstra 	__set_current_state(TASK_RUNNING);
42561ada528SPeter Zijlstra 
42661ada528SPeter Zijlstra 	/*
42776e079feSAndrea Parri 	 * The below executes an smp_mb(), which matches with the smp_mb() (C)
42876e079feSAndrea Parri 	 * in woken_wake_function() such that either we see the wait condition
42976e079feSAndrea Parri 	 * being true or the store to wq_entry->flags in woken_wake_function()
43076e079feSAndrea Parri 	 * follows ours in the coherence order.
43161ada528SPeter Zijlstra 	 */
43250816c48SIngo Molnar 	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
43361ada528SPeter Zijlstra 
43461ada528SPeter Zijlstra 	return timeout;
43561ada528SPeter Zijlstra }
43661ada528SPeter Zijlstra EXPORT_SYMBOL(wait_woken);
43761ada528SPeter Zijlstra 
woken_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)43850816c48SIngo Molnar int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
43961ada528SPeter Zijlstra {
44076e079feSAndrea Parri 	/* Pairs with the smp_store_mb() in wait_woken(). */
44176e079feSAndrea Parri 	smp_mb(); /* C */
44250816c48SIngo Molnar 	wq_entry->flags |= WQ_FLAG_WOKEN;
44361ada528SPeter Zijlstra 
44450816c48SIngo Molnar 	return default_wake_function(wq_entry, mode, sync, key);
44561ada528SPeter Zijlstra }
44661ada528SPeter Zijlstra EXPORT_SYMBOL(woken_wake_function);
447