xref: /linux-6.15/kernel/time/sleep_timeout.c (revision da7bd0a9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Kernel internal schedule timeout and sleeping functions
4  */
5 
6 #include <linux/delay.h>
7 #include <linux/jiffies.h>
8 #include <linux/timer.h>
9 #include <linux/sched/signal.h>
10 #include <linux/sched/debug.h>
11 
12 #include "tick-internal.h"
13 
14 /*
15  * Since schedule_timeout()'s timer is defined on the stack, it must store
16  * the target task on the stack as well.
17  */
18 struct process_timer {
19 	struct timer_list timer;
20 	struct task_struct *task;
21 };
22 
23 static void process_timeout(struct timer_list *t)
24 {
25 	struct process_timer *timeout = from_timer(timeout, t, timer);
26 
27 	wake_up_process(timeout->task);
28 }
29 
30 /**
31  * schedule_timeout - sleep until timeout
32  * @timeout: timeout value in jiffies
33  *
34  * Make the current task sleep until @timeout jiffies have elapsed.
35  * The function behavior depends on the current task state
36  * (see also set_current_state() description):
37  *
38  * %TASK_RUNNING - the scheduler is called, but the task does not sleep
39  * at all. That happens because sched_submit_work() does nothing for
40  * tasks in %TASK_RUNNING state.
41  *
42  * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
43  * pass before the routine returns unless the current task is explicitly
44  * woken up, (e.g. by wake_up_process()).
45  *
46  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
47  * delivered to the current task or the current task is explicitly woken
48  * up.
49  *
50  * The current task state is guaranteed to be %TASK_RUNNING when this
51  * routine returns.
52  *
53  * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
54  * the CPU away without a bound on the timeout. In this case the return
55  * value will be %MAX_SCHEDULE_TIMEOUT.
56  *
57  * Returns: 0 when the timer has expired otherwise the remaining time in
58  * jiffies will be returned. In all cases the return value is guaranteed
59  * to be non-negative.
60  */
61 signed long __sched schedule_timeout(signed long timeout)
62 {
63 	struct process_timer timer;
64 	unsigned long expire;
65 
66 	switch (timeout) {
67 	case MAX_SCHEDULE_TIMEOUT:
68 		/*
69 		 * These two special cases are useful to be comfortable
70 		 * in the caller. Nothing more. We could take
71 		 * MAX_SCHEDULE_TIMEOUT from one of the negative value
72 		 * but I' d like to return a valid offset (>=0) to allow
73 		 * the caller to do everything it want with the retval.
74 		 */
75 		schedule();
76 		goto out;
77 	default:
78 		/*
79 		 * Another bit of PARANOID. Note that the retval will be
80 		 * 0 since no piece of kernel is supposed to do a check
81 		 * for a negative retval of schedule_timeout() (since it
82 		 * should never happens anyway). You just have the printk()
83 		 * that will tell you if something is gone wrong and where.
84 		 */
85 		if (timeout < 0) {
86 			pr_err("%s: wrong timeout value %lx\n", __func__, timeout);
87 			dump_stack();
88 			__set_current_state(TASK_RUNNING);
89 			goto out;
90 		}
91 	}
92 
93 	expire = timeout + jiffies;
94 
95 	timer.task = current;
96 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
97 	timer.timer.expires = expire;
98 	add_timer(&timer.timer);
99 	schedule();
100 	del_timer_sync(&timer.timer);
101 
102 	/* Remove the timer from the object tracker */
103 	destroy_timer_on_stack(&timer.timer);
104 
105 	timeout = expire - jiffies;
106 
107  out:
108 	return timeout < 0 ? 0 : timeout;
109 }
110 EXPORT_SYMBOL(schedule_timeout);
111 
112 /*
113  * We can use __set_current_state() here because schedule_timeout() calls
114  * schedule() unconditionally.
115  */
116 signed long __sched schedule_timeout_interruptible(signed long timeout)
117 {
118 	__set_current_state(TASK_INTERRUPTIBLE);
119 	return schedule_timeout(timeout);
120 }
121 EXPORT_SYMBOL(schedule_timeout_interruptible);
122 
123 signed long __sched schedule_timeout_killable(signed long timeout)
124 {
125 	__set_current_state(TASK_KILLABLE);
126 	return schedule_timeout(timeout);
127 }
128 EXPORT_SYMBOL(schedule_timeout_killable);
129 
130 signed long __sched schedule_timeout_uninterruptible(signed long timeout)
131 {
132 	__set_current_state(TASK_UNINTERRUPTIBLE);
133 	return schedule_timeout(timeout);
134 }
135 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
136 
137 /*
138  * Like schedule_timeout_uninterruptible(), except this task will not contribute
139  * to load average.
140  */
141 signed long __sched schedule_timeout_idle(signed long timeout)
142 {
143 	__set_current_state(TASK_IDLE);
144 	return schedule_timeout(timeout);
145 }
146 EXPORT_SYMBOL(schedule_timeout_idle);
147 
148 /**
149  * schedule_hrtimeout_range_clock - sleep until timeout
150  * @expires:	timeout value (ktime_t)
151  * @delta:	slack in expires timeout (ktime_t)
152  * @mode:	timer mode
153  * @clock_id:	timer clock to be used
154  */
155 int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
156 					   const enum hrtimer_mode mode, clockid_t clock_id)
157 {
158 	struct hrtimer_sleeper t;
159 
160 	/*
161 	 * Optimize when a zero timeout value is given. It does not
162 	 * matter whether this is an absolute or a relative time.
163 	 */
164 	if (expires && *expires == 0) {
165 		__set_current_state(TASK_RUNNING);
166 		return 0;
167 	}
168 
169 	/*
170 	 * A NULL parameter means "infinite"
171 	 */
172 	if (!expires) {
173 		schedule();
174 		return -EINTR;
175 	}
176 
177 	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
178 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
179 	hrtimer_sleeper_start_expires(&t, mode);
180 
181 	if (likely(t.task))
182 		schedule();
183 
184 	hrtimer_cancel(&t.timer);
185 	destroy_hrtimer_on_stack(&t.timer);
186 
187 	__set_current_state(TASK_RUNNING);
188 
189 	return !t.task ? 0 : -EINTR;
190 }
191 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
192 
193 /**
194  * schedule_hrtimeout_range - sleep until timeout
195  * @expires:	timeout value (ktime_t)
196  * @delta:	slack in expires timeout (ktime_t)
197  * @mode:	timer mode
198  *
199  * Make the current task sleep until the given expiry time has
200  * elapsed. The routine will return immediately unless
201  * the current task state has been set (see set_current_state()).
202  *
203  * The @delta argument gives the kernel the freedom to schedule the
204  * actual wakeup to a time that is both power and performance friendly
205  * for regular (non RT/DL) tasks.
206  * The kernel give the normal best effort behavior for "@expires+@delta",
207  * but may decide to fire the timer earlier, but no earlier than @expires.
208  *
209  * You can set the task state as follows -
210  *
211  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
212  * pass before the routine returns unless the current task is explicitly
213  * woken up, (e.g. by wake_up_process()).
214  *
215  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
216  * delivered to the current task or the current task is explicitly woken
217  * up.
218  *
219  * The current task state is guaranteed to be TASK_RUNNING when this
220  * routine returns.
221  *
222  * Returns: 0 when the timer has expired. If the task was woken before the
223  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
224  * by an explicit wakeup, it returns -EINTR.
225  */
226 int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
227 				     const enum hrtimer_mode mode)
228 {
229 	return schedule_hrtimeout_range_clock(expires, delta, mode,
230 					      CLOCK_MONOTONIC);
231 }
232 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
233 
234 /**
235  * schedule_hrtimeout - sleep until timeout
236  * @expires:	timeout value (ktime_t)
237  * @mode:	timer mode
238  *
239  * Make the current task sleep until the given expiry time has
240  * elapsed. The routine will return immediately unless
241  * the current task state has been set (see set_current_state()).
242  *
243  * You can set the task state as follows -
244  *
245  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
246  * pass before the routine returns unless the current task is explicitly
247  * woken up, (e.g. by wake_up_process()).
248  *
249  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
250  * delivered to the current task or the current task is explicitly woken
251  * up.
252  *
253  * The current task state is guaranteed to be TASK_RUNNING when this
254  * routine returns.
255  *
256  * Returns: 0 when the timer has expired. If the task was woken before the
257  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
258  * by an explicit wakeup, it returns -EINTR.
259  */
260 int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode)
261 {
262 	return schedule_hrtimeout_range(expires, 0, mode);
263 }
264 EXPORT_SYMBOL_GPL(schedule_hrtimeout);
265 
266 /**
267  * msleep - sleep safely even with waitqueue interruptions
268  * @msecs: Time in milliseconds to sleep for
269  */
270 void msleep(unsigned int msecs)
271 {
272 	unsigned long timeout = msecs_to_jiffies(msecs);
273 
274 	while (timeout)
275 		timeout = schedule_timeout_uninterruptible(timeout);
276 }
277 EXPORT_SYMBOL(msleep);
278 
279 /**
280  * msleep_interruptible - sleep waiting for signals
281  * @msecs: Time in milliseconds to sleep for
282  */
283 unsigned long msleep_interruptible(unsigned int msecs)
284 {
285 	unsigned long timeout = msecs_to_jiffies(msecs);
286 
287 	while (timeout && !signal_pending(current))
288 		timeout = schedule_timeout_interruptible(timeout);
289 	return jiffies_to_msecs(timeout);
290 }
291 EXPORT_SYMBOL(msleep_interruptible);
292 
293 /**
294  * usleep_range_state - Sleep for an approximate time in a given state
295  * @min:	Minimum time in usecs to sleep
296  * @max:	Maximum time in usecs to sleep
297  * @state:	State of the current task that will be while sleeping
298  *
299  * In non-atomic context where the exact wakeup time is flexible, use
300  * usleep_range_state() instead of udelay().  The sleep improves responsiveness
301  * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
302  * power usage by allowing hrtimers to take advantage of an already-
303  * scheduled interrupt instead of scheduling a new one just for this sleep.
304  */
305 void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state)
306 {
307 	ktime_t exp = ktime_add_us(ktime_get(), min);
308 	u64 delta = (u64)(max - min) * NSEC_PER_USEC;
309 
310 	for (;;) {
311 		__set_current_state(state);
312 		/* Do not return before the requested sleep time has elapsed */
313 		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
314 			break;
315 	}
316 }
317 EXPORT_SYMBOL(usleep_range_state);
318