xref: /linux-6.15/kernel/context_tracking.c (revision c33ef43a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Context tracking: Probe on high level context boundaries such as kernel
4  * and userspace. This includes syscalls and exceptions entry/exit.
5  *
6  * This is used by RCU to remove its dependency on the timer tick while a CPU
7  * runs in userspace.
8  *
9  *  Started by Frederic Weisbecker:
10  *
11  * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <[email protected]>
12  *
13  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
14  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
15  *
16  */
17 
18 #include <linux/context_tracking.h>
19 #include <linux/rcupdate.h>
20 #include <linux/sched.h>
21 #include <linux/hardirq.h>
22 #include <linux/export.h>
23 #include <linux/kprobes.h>
24 #include <trace/events/rcu.h>
25 
26 
27 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
28 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
29 	.dynticks_nesting = 1,
30 	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
31 	.dynticks = ATOMIC_INIT(1),
32 #endif
33 };
34 EXPORT_SYMBOL_GPL(context_tracking);
35 
36 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
37 #define TPS(x)  tracepoint_string(x)
38 
39 /* Record the current task on dyntick-idle entry. */
40 static __always_inline void rcu_dynticks_task_enter(void)
41 {
42 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
43 	WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
44 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
45 }
46 
47 /* Record no current task on dyntick-idle exit. */
48 static __always_inline void rcu_dynticks_task_exit(void)
49 {
50 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
51 	WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
52 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
53 }
54 
55 /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
56 static __always_inline void rcu_dynticks_task_trace_enter(void)
57 {
58 #ifdef CONFIG_TASKS_TRACE_RCU
59 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
60 		current->trc_reader_special.b.need_mb = true;
61 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
62 }
63 
64 /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
65 static __always_inline void rcu_dynticks_task_trace_exit(void)
66 {
67 #ifdef CONFIG_TASKS_TRACE_RCU
68 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
69 		current->trc_reader_special.b.need_mb = false;
70 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
71 }
72 
73 /*
74  * Record entry into an extended quiescent state.  This is only to be
75  * called when not already in an extended quiescent state, that is,
76  * RCU is watching prior to the call to this function and is no longer
77  * watching upon return.
78  */
79 static noinstr void rcu_dynticks_eqs_enter(void)
80 {
81 	int seq;
82 
83 	/*
84 	 * CPUs seeing atomic_add_return() must see prior RCU read-side
85 	 * critical sections, and we also must force ordering with the
86 	 * next idle sojourn.
87 	 */
88 	rcu_dynticks_task_trace_enter();  // Before ->dynticks update!
89 	seq = rcu_dynticks_inc(1);
90 	// RCU is no longer watching.  Better be in extended quiescent state!
91 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
92 }
93 
94 /*
95  * Record exit from an extended quiescent state.  This is only to be
96  * called from an extended quiescent state, that is, RCU is not watching
97  * prior to the call to this function and is watching upon return.
98  */
99 static noinstr void rcu_dynticks_eqs_exit(void)
100 {
101 	int seq;
102 
103 	/*
104 	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
105 	 * and we also must force ordering with the next RCU read-side
106 	 * critical section.
107 	 */
108 	seq = rcu_dynticks_inc(1);
109 	// RCU is now watching.  Better not be in an extended quiescent state!
110 	rcu_dynticks_task_trace_exit();  // After ->dynticks update!
111 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
112 }
113 
114 /*
115  * Enter an RCU extended quiescent state, which can be either the
116  * idle loop or adaptive-tickless usermode execution.
117  *
118  * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
119  * the possibility of usermode upcalls having messed up our count
120  * of interrupt nesting level during the prior busy period.
121  */
122 static void noinstr rcu_eqs_enter(bool user)
123 {
124 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
125 
126 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
127 	WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
128 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
129 		     ct_dynticks_nesting() == 0);
130 	if (ct_dynticks_nesting() != 1) {
131 		// RCU will still be watching, so just do accounting and leave.
132 		ct->dynticks_nesting--;
133 		return;
134 	}
135 
136 	instrumentation_begin();
137 	lockdep_assert_irqs_disabled();
138 	trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
139 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
140 	rcu_preempt_deferred_qs(current);
141 
142 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
143 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
144 
145 	instrumentation_end();
146 	WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
147 	// RCU is watching here ...
148 	rcu_dynticks_eqs_enter();
149 	// ... but is no longer watching here.
150 	rcu_dynticks_task_enter();
151 }
152 
153 /*
154  * Exit an RCU extended quiescent state, which can be either the
155  * idle loop or adaptive-tickless usermode execution.
156  *
157  * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
158  * allow for the possibility of usermode upcalls messing up our count of
159  * interrupt nesting level during the busy period that is just now starting.
160  */
161 static void noinstr rcu_eqs_exit(bool user)
162 {
163 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
164 	long oldval;
165 
166 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
167 	oldval = ct_dynticks_nesting();
168 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
169 	if (oldval) {
170 		// RCU was already watching, so just do accounting and leave.
171 		ct->dynticks_nesting++;
172 		return;
173 	}
174 	rcu_dynticks_task_exit();
175 	// RCU is not watching here ...
176 	rcu_dynticks_eqs_exit();
177 	// ... but is watching here.
178 	instrumentation_begin();
179 
180 	// instrumentation for the noinstr rcu_dynticks_eqs_exit()
181 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
182 
183 	trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
184 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
185 	WRITE_ONCE(ct->dynticks_nesting, 1);
186 	WARN_ON_ONCE(ct_dynticks_nmi_nesting());
187 	WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
188 	instrumentation_end();
189 }
190 
191 /**
192  * ct_nmi_exit - inform RCU of exit from NMI context
193  *
194  * If we are returning from the outermost NMI handler that interrupted an
195  * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
196  * to let the RCU grace-period handling know that the CPU is back to
197  * being RCU-idle.
198  *
199  * If you add or remove a call to ct_nmi_exit(), be sure to test
200  * with CONFIG_RCU_EQS_DEBUG=y.
201  */
202 void noinstr ct_nmi_exit(void)
203 {
204 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
205 
206 	instrumentation_begin();
207 	/*
208 	 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
209 	 * (We are exiting an NMI handler, so RCU better be paying attention
210 	 * to us!)
211 	 */
212 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
213 	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
214 
215 	/*
216 	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
217 	 * leave it in non-RCU-idle state.
218 	 */
219 	if (ct_dynticks_nmi_nesting() != 1) {
220 		trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
221 				  ct_dynticks());
222 		WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
223 			   ct_dynticks_nmi_nesting() - 2);
224 		instrumentation_end();
225 		return;
226 	}
227 
228 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
229 	trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
230 	WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
231 
232 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
233 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
234 	instrumentation_end();
235 
236 	// RCU is watching here ...
237 	rcu_dynticks_eqs_enter();
238 	// ... but is no longer watching here.
239 
240 	if (!in_nmi())
241 		rcu_dynticks_task_enter();
242 }
243 
244 /**
245  * ct_nmi_enter - inform RCU of entry to NMI context
246  *
247  * If the CPU was idle from RCU's viewpoint, update ct->dynticks and
248  * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
249  * that the CPU is active.  This implementation permits nested NMIs, as
250  * long as the nesting level does not overflow an int.  (You will probably
251  * run out of stack space first.)
252  *
253  * If you add or remove a call to ct_nmi_enter(), be sure to test
254  * with CONFIG_RCU_EQS_DEBUG=y.
255  */
256 void noinstr ct_nmi_enter(void)
257 {
258 	long incby = 2;
259 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
260 
261 	/* Complain about underflow. */
262 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
263 
264 	/*
265 	 * If idle from RCU viewpoint, atomically increment ->dynticks
266 	 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
267 	 * Otherwise, increment ->dynticks_nmi_nesting by two.  This means
268 	 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
269 	 * to be in the outermost NMI handler that interrupted an RCU-idle
270 	 * period (observation due to Andy Lutomirski).
271 	 */
272 	if (rcu_dynticks_curr_cpu_in_eqs()) {
273 
274 		if (!in_nmi())
275 			rcu_dynticks_task_exit();
276 
277 		// RCU is not watching here ...
278 		rcu_dynticks_eqs_exit();
279 		// ... but is watching here.
280 
281 		instrumentation_begin();
282 		// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
283 		instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
284 		// instrumentation for the noinstr rcu_dynticks_eqs_exit()
285 		instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
286 
287 		incby = 1;
288 	} else if (!in_nmi()) {
289 		instrumentation_begin();
290 		rcu_irq_enter_check_tick();
291 	} else  {
292 		instrumentation_begin();
293 	}
294 
295 	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
296 			  ct_dynticks_nmi_nesting(),
297 			  ct_dynticks_nmi_nesting() + incby, ct_dynticks());
298 	instrumentation_end();
299 	WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
300 		   ct_dynticks_nmi_nesting() + incby);
301 	barrier();
302 }
303 
304 /**
305  * ct_idle_enter - inform RCU that current CPU is entering idle
306  *
307  * Enter idle mode, in other words, -leave- the mode in which RCU
308  * read-side critical sections can occur.  (Though RCU read-side
309  * critical sections can occur in irq handlers in idle, a possibility
310  * handled by irq_enter() and irq_exit().)
311  *
312  * If you add or remove a call to ct_idle_enter(), be sure to test with
313  * CONFIG_RCU_EQS_DEBUG=y.
314  */
315 void noinstr ct_idle_enter(void)
316 {
317 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
318 	rcu_eqs_enter(false);
319 }
320 EXPORT_SYMBOL_GPL(ct_idle_enter);
321 
322 /**
323  * ct_idle_exit - inform RCU that current CPU is leaving idle
324  *
325  * Exit idle mode, in other words, -enter- the mode in which RCU
326  * read-side critical sections can occur.
327  *
328  * If you add or remove a call to ct_idle_exit(), be sure to test with
329  * CONFIG_RCU_EQS_DEBUG=y.
330  */
331 void noinstr ct_idle_exit(void)
332 {
333 	unsigned long flags;
334 
335 	raw_local_irq_save(flags);
336 	rcu_eqs_exit(false);
337 	raw_local_irq_restore(flags);
338 }
339 EXPORT_SYMBOL_GPL(ct_idle_exit);
340 
341 /**
342  * ct_irq_enter - inform RCU that current CPU is entering irq away from idle
343  *
344  * Enter an interrupt handler, which might possibly result in exiting
345  * idle mode, in other words, entering the mode in which read-side critical
346  * sections can occur.  The caller must have disabled interrupts.
347  *
348  * Note that the Linux kernel is fully capable of entering an interrupt
349  * handler that it never exits, for example when doing upcalls to user mode!
350  * This code assumes that the idle loop never does upcalls to user mode.
351  * If your architecture's idle loop does do upcalls to user mode (or does
352  * anything else that results in unbalanced calls to the irq_enter() and
353  * irq_exit() functions), RCU will give you what you deserve, good and hard.
354  * But very infrequently and irreproducibly.
355  *
356  * Use things like work queues to work around this limitation.
357  *
358  * You have been warned.
359  *
360  * If you add or remove a call to ct_irq_enter(), be sure to test with
361  * CONFIG_RCU_EQS_DEBUG=y.
362  */
363 noinstr void ct_irq_enter(void)
364 {
365 	lockdep_assert_irqs_disabled();
366 	ct_nmi_enter();
367 }
368 
369 /**
370  * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
371  *
372  * Exit from an interrupt handler, which might possibly result in entering
373  * idle mode, in other words, leaving the mode in which read-side critical
374  * sections can occur.  The caller must have disabled interrupts.
375  *
376  * This code assumes that the idle loop never does anything that might
377  * result in unbalanced calls to irq_enter() and irq_exit().  If your
378  * architecture's idle loop violates this assumption, RCU will give you what
379  * you deserve, good and hard.  But very infrequently and irreproducibly.
380  *
381  * Use things like work queues to work around this limitation.
382  *
383  * You have been warned.
384  *
385  * If you add or remove a call to ct_irq_exit(), be sure to test with
386  * CONFIG_RCU_EQS_DEBUG=y.
387  */
388 noinstr void ct_irq_exit(void)
389 {
390 	lockdep_assert_irqs_disabled();
391 	ct_nmi_exit();
392 }
393 
394 /*
395  * Wrapper for ct_irq_enter() where interrupts are enabled.
396  *
397  * If you add or remove a call to ct_irq_enter_irqson(), be sure to test
398  * with CONFIG_RCU_EQS_DEBUG=y.
399  */
400 void ct_irq_enter_irqson(void)
401 {
402 	unsigned long flags;
403 
404 	local_irq_save(flags);
405 	ct_irq_enter();
406 	local_irq_restore(flags);
407 }
408 
409 /*
410  * Wrapper for ct_irq_exit() where interrupts are enabled.
411  *
412  * If you add or remove a call to ct_irq_exit_irqson(), be sure to test
413  * with CONFIG_RCU_EQS_DEBUG=y.
414  */
415 void ct_irq_exit_irqson(void)
416 {
417 	unsigned long flags;
418 
419 	local_irq_save(flags);
420 	ct_irq_exit();
421 	local_irq_restore(flags);
422 }
423 #else
424 static __always_inline void rcu_eqs_enter(bool user) { }
425 static __always_inline void rcu_eqs_exit(bool user) { }
426 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
427 
428 #ifdef CONFIG_CONTEXT_TRACKING_USER
429 
430 #define CREATE_TRACE_POINTS
431 #include <trace/events/context_tracking.h>
432 
433 DEFINE_STATIC_KEY_FALSE(context_tracking_key);
434 EXPORT_SYMBOL_GPL(context_tracking_key);
435 
436 static noinstr bool context_tracking_recursion_enter(void)
437 {
438 	int recursion;
439 
440 	recursion = __this_cpu_inc_return(context_tracking.recursion);
441 	if (recursion == 1)
442 		return true;
443 
444 	WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
445 	__this_cpu_dec(context_tracking.recursion);
446 
447 	return false;
448 }
449 
450 static __always_inline void context_tracking_recursion_exit(void)
451 {
452 	__this_cpu_dec(context_tracking.recursion);
453 }
454 
455 /**
456  * __ct_user_enter - Inform the context tracking that the CPU is going
457  *		     to enter user or guest space mode.
458  *
459  * This function must be called right before we switch from the kernel
460  * to user or guest space, when it's guaranteed the remaining kernel
461  * instructions to execute won't use any RCU read side critical section
462  * because this function sets RCU in extended quiescent state.
463  */
464 void noinstr __ct_user_enter(enum ctx_state state)
465 {
466 	lockdep_assert_irqs_disabled();
467 
468 	/* Kernel threads aren't supposed to go to userspace */
469 	WARN_ON_ONCE(!current->mm);
470 
471 	if (!context_tracking_recursion_enter())
472 		return;
473 
474 	if ( __this_cpu_read(context_tracking.state) != state) {
475 		if (__this_cpu_read(context_tracking.active)) {
476 			/*
477 			 * At this stage, only low level arch entry code remains and
478 			 * then we'll run in userspace. We can assume there won't be
479 			 * any RCU read-side critical section until the next call to
480 			 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
481 			 * on the tick.
482 			 */
483 			if (state == CONTEXT_USER) {
484 				instrumentation_begin();
485 				trace_user_enter(0);
486 				vtime_user_enter(current);
487 				instrumentation_end();
488 			}
489 			/*
490 			 * Other than generic entry implementation, we may be past the last
491 			 * rescheduling opportunity in the entry code. Trigger a self IPI
492 			 * that will fire and reschedule once we resume in user/guest mode.
493 			 */
494 			rcu_irq_work_resched();
495 			/*
496 			 * Enter RCU idle mode right before resuming userspace.  No use of RCU
497 			 * is permitted between this call and rcu_eqs_exit(). This way the
498 			 * CPU doesn't need to maintain the tick for RCU maintenance purposes
499 			 * when the CPU runs in userspace.
500 			 */
501 			rcu_eqs_enter(true);
502 		}
503 		/*
504 		 * Even if context tracking is disabled on this CPU, because it's outside
505 		 * the full dynticks mask for example, we still have to keep track of the
506 		 * context transitions and states to prevent inconsistency on those of
507 		 * other CPUs.
508 		 * If a task triggers an exception in userspace, sleep on the exception
509 		 * handler and then migrate to another CPU, that new CPU must know where
510 		 * the exception returns by the time we call exception_exit().
511 		 * This information can only be provided by the previous CPU when it called
512 		 * exception_enter().
513 		 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
514 		 * is false because we know that CPU is not tickless.
515 		 */
516 		__this_cpu_write(context_tracking.state, state);
517 	}
518 	context_tracking_recursion_exit();
519 }
520 EXPORT_SYMBOL_GPL(__ct_user_enter);
521 
522 /*
523  * OBSOLETE:
524  * This function should be noinstr but the below local_irq_restore() is
525  * unsafe because it involves illegal RCU uses through tracing and lockdep.
526  * This is unlikely to be fixed as this function is obsolete. The preferred
527  * way is to call __context_tracking_enter() through user_enter_irqoff()
528  * or context_tracking_guest_enter(). It should be the arch entry code
529  * responsibility to call into context tracking with IRQs disabled.
530  */
531 void ct_user_enter(enum ctx_state state)
532 {
533 	unsigned long flags;
534 
535 	/*
536 	 * Some contexts may involve an exception occuring in an irq,
537 	 * leading to that nesting:
538 	 * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit()
539 	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
540 	 * helpers are enough to protect RCU uses inside the exception. So
541 	 * just return immediately if we detect we are in an IRQ.
542 	 */
543 	if (in_interrupt())
544 		return;
545 
546 	local_irq_save(flags);
547 	__ct_user_enter(state);
548 	local_irq_restore(flags);
549 }
550 NOKPROBE_SYMBOL(ct_user_enter);
551 EXPORT_SYMBOL_GPL(ct_user_enter);
552 
553 /**
554  * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
555  *			   archs that didn't manage to check the context tracking
556  *			   static key from low level code.
557  *
558  * This OBSOLETE function should be noinstr but it unsafely calls
559  * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
560  * This is unlikely to be fixed as this function is obsolete. The preferred
561  * way is to call user_enter_irqoff(). It should be the arch entry code
562  * responsibility to call into context tracking with IRQs disabled.
563  */
564 void user_enter_callable(void)
565 {
566 	user_enter();
567 }
568 NOKPROBE_SYMBOL(user_enter_callable);
569 
570 /**
571  * __ct_user_exit - Inform the context tracking that the CPU is
572  *		    exiting user or guest mode and entering the kernel.
573  *
574  * This function must be called after we entered the kernel from user or
575  * guest space before any use of RCU read side critical section. This
576  * potentially include any high level kernel code like syscalls, exceptions,
577  * signal handling, etc...
578  *
579  * This call supports re-entrancy. This way it can be called from any exception
580  * handler without needing to know if we came from userspace or not.
581  */
582 void noinstr __ct_user_exit(enum ctx_state state)
583 {
584 	if (!context_tracking_recursion_enter())
585 		return;
586 
587 	if (__this_cpu_read(context_tracking.state) == state) {
588 		if (__this_cpu_read(context_tracking.active)) {
589 			/*
590 			 * Exit RCU idle mode while entering the kernel because it can
591 			 * run a RCU read side critical section anytime.
592 			 */
593 			rcu_eqs_exit(true);
594 			if (state == CONTEXT_USER) {
595 				instrumentation_begin();
596 				vtime_user_exit(current);
597 				trace_user_exit(0);
598 				instrumentation_end();
599 			}
600 		}
601 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
602 	}
603 	context_tracking_recursion_exit();
604 }
605 EXPORT_SYMBOL_GPL(__ct_user_exit);
606 
607 /*
608  * OBSOLETE:
609  * This function should be noinstr but the below local_irq_save() is
610  * unsafe because it involves illegal RCU uses through tracing and lockdep.
611  * This is unlikely to be fixed as this function is obsolete. The preferred
612  * way is to call __context_tracking_exit() through user_exit_irqoff()
613  * or context_tracking_guest_exit(). It should be the arch entry code
614  * responsibility to call into context tracking with IRQs disabled.
615  */
616 void ct_user_exit(enum ctx_state state)
617 {
618 	unsigned long flags;
619 
620 	if (in_interrupt())
621 		return;
622 
623 	local_irq_save(flags);
624 	__ct_user_exit(state);
625 	local_irq_restore(flags);
626 }
627 NOKPROBE_SYMBOL(ct_user_exit);
628 EXPORT_SYMBOL_GPL(ct_user_exit);
629 
630 /**
631  * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
632  *			  archs that didn't manage to check the context tracking
633  *			  static key from low level code.
634  *
635  * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
636  * involving illegal RCU uses through tracing and lockdep. This is unlikely
637  * to be fixed as this function is obsolete. The preferred way is to call
638  * user_exit_irqoff(). It should be the arch entry code responsibility to
639  * call into context tracking with IRQs disabled.
640  */
641 void user_exit_callable(void)
642 {
643 	user_exit();
644 }
645 NOKPROBE_SYMBOL(user_exit_callable);
646 
647 void __init ct_cpu_track_user(int cpu)
648 {
649 	static __initdata bool initialized = false;
650 
651 	if (!per_cpu(context_tracking.active, cpu)) {
652 		per_cpu(context_tracking.active, cpu) = true;
653 		static_branch_inc(&context_tracking_key);
654 	}
655 
656 	if (initialized)
657 		return;
658 
659 #ifdef CONFIG_HAVE_TIF_NOHZ
660 	/*
661 	 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
662 	 * This assumes that init is the only task at this early boot stage.
663 	 */
664 	set_tsk_thread_flag(&init_task, TIF_NOHZ);
665 #endif
666 	WARN_ON_ONCE(!tasklist_empty());
667 
668 	initialized = true;
669 }
670 
671 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
672 void __init context_tracking_init(void)
673 {
674 	int cpu;
675 
676 	for_each_possible_cpu(cpu)
677 		ct_cpu_track_user(cpu);
678 }
679 #endif
680 
681 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
682