xref: /linux-6.15/kernel/context_tracking.c (revision 17211455)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Context tracking: Probe on high level context boundaries such as kernel
4  * and userspace. This includes syscalls and exceptions entry/exit.
5  *
6  * This is used by RCU to remove its dependency on the timer tick while a CPU
7  * runs in userspace.
8  *
9  *  Started by Frederic Weisbecker:
10  *
11  * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <[email protected]>
12  *
13  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
14  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
15  *
16  */
17 
18 #include <linux/context_tracking.h>
19 #include <linux/rcupdate.h>
20 #include <linux/sched.h>
21 #include <linux/hardirq.h>
22 #include <linux/export.h>
23 #include <linux/kprobes.h>
24 #include <trace/events/rcu.h>
25 
26 
27 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
28 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
29 	.dynticks_nesting = 1,
30 	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
31 	.dynticks = ATOMIC_INIT(1),
32 #endif
33 };
34 EXPORT_SYMBOL_GPL(context_tracking);
35 
36 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
37 #define TPS(x)  tracepoint_string(x)
38 
39 /* Record the current task on dyntick-idle entry. */
40 static __always_inline void rcu_dynticks_task_enter(void)
41 {
42 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
43 	WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
44 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
45 }
46 
47 /* Record no current task on dyntick-idle exit. */
48 static __always_inline void rcu_dynticks_task_exit(void)
49 {
50 #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
51 	WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
52 #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
53 }
54 
55 /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
56 static __always_inline void rcu_dynticks_task_trace_enter(void)
57 {
58 #ifdef CONFIG_TASKS_TRACE_RCU
59 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
60 		current->trc_reader_special.b.need_mb = true;
61 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
62 }
63 
64 /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
65 static __always_inline void rcu_dynticks_task_trace_exit(void)
66 {
67 #ifdef CONFIG_TASKS_TRACE_RCU
68 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
69 		current->trc_reader_special.b.need_mb = false;
70 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
71 }
72 
73 /*
74  * Record entry into an extended quiescent state.  This is only to be
75  * called when not already in an extended quiescent state, that is,
76  * RCU is watching prior to the call to this function and is no longer
77  * watching upon return.
78  */
79 static noinstr void rcu_dynticks_eqs_enter(void)
80 {
81 	int seq;
82 
83 	/*
84 	 * CPUs seeing atomic_add_return() must see prior RCU read-side
85 	 * critical sections, and we also must force ordering with the
86 	 * next idle sojourn.
87 	 */
88 	rcu_dynticks_task_trace_enter();  // Before ->dynticks update!
89 	seq = rcu_dynticks_inc(1);
90 	// RCU is no longer watching.  Better be in extended quiescent state!
91 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
92 }
93 
94 /*
95  * Record exit from an extended quiescent state.  This is only to be
96  * called from an extended quiescent state, that is, RCU is not watching
97  * prior to the call to this function and is watching upon return.
98  */
99 static noinstr void rcu_dynticks_eqs_exit(void)
100 {
101 	int seq;
102 
103 	/*
104 	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
105 	 * and we also must force ordering with the next RCU read-side
106 	 * critical section.
107 	 */
108 	seq = rcu_dynticks_inc(1);
109 	// RCU is now watching.  Better not be in an extended quiescent state!
110 	rcu_dynticks_task_trace_exit();  // After ->dynticks update!
111 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
112 }
113 
114 /*
115  * Enter an RCU extended quiescent state, which can be either the
116  * idle loop or adaptive-tickless usermode execution.
117  *
118  * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
119  * the possibility of usermode upcalls having messed up our count
120  * of interrupt nesting level during the prior busy period.
121  */
122 static void noinstr rcu_eqs_enter(bool user)
123 {
124 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
125 
126 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
127 	WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
128 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
129 		     ct_dynticks_nesting() == 0);
130 	if (ct_dynticks_nesting() != 1) {
131 		// RCU will still be watching, so just do accounting and leave.
132 		ct->dynticks_nesting--;
133 		return;
134 	}
135 
136 	instrumentation_begin();
137 	lockdep_assert_irqs_disabled();
138 	trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
139 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
140 	rcu_preempt_deferred_qs(current);
141 
142 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
143 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
144 
145 	instrumentation_end();
146 	WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
147 	// RCU is watching here ...
148 	rcu_dynticks_eqs_enter();
149 	// ... but is no longer watching here.
150 	rcu_dynticks_task_enter();
151 }
152 
153 /*
154  * Exit an RCU extended quiescent state, which can be either the
155  * idle loop or adaptive-tickless usermode execution.
156  *
157  * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
158  * allow for the possibility of usermode upcalls messing up our count of
159  * interrupt nesting level during the busy period that is just now starting.
160  */
161 static void noinstr rcu_eqs_exit(bool user)
162 {
163 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
164 	long oldval;
165 
166 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
167 	oldval = ct_dynticks_nesting();
168 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
169 	if (oldval) {
170 		// RCU was already watching, so just do accounting and leave.
171 		ct->dynticks_nesting++;
172 		return;
173 	}
174 	rcu_dynticks_task_exit();
175 	// RCU is not watching here ...
176 	rcu_dynticks_eqs_exit();
177 	// ... but is watching here.
178 	instrumentation_begin();
179 
180 	// instrumentation for the noinstr rcu_dynticks_eqs_exit()
181 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
182 
183 	trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
184 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
185 	WRITE_ONCE(ct->dynticks_nesting, 1);
186 	WARN_ON_ONCE(ct_dynticks_nmi_nesting());
187 	WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
188 	instrumentation_end();
189 }
190 
191 /**
192  * rcu_nmi_exit - inform RCU of exit from NMI context
193  *
194  * If we are returning from the outermost NMI handler that interrupted an
195  * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
196  * to let the RCU grace-period handling know that the CPU is back to
197  * being RCU-idle.
198  *
199  * If you add or remove a call to rcu_nmi_exit(), be sure to test
200  * with CONFIG_RCU_EQS_DEBUG=y.
201  */
202 void noinstr rcu_nmi_exit(void)
203 {
204 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
205 
206 	instrumentation_begin();
207 	/*
208 	 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
209 	 * (We are exiting an NMI handler, so RCU better be paying attention
210 	 * to us!)
211 	 */
212 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
213 	WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
214 
215 	/*
216 	 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
217 	 * leave it in non-RCU-idle state.
218 	 */
219 	if (ct_dynticks_nmi_nesting() != 1) {
220 		trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
221 				  ct_dynticks());
222 		WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
223 			   ct_dynticks_nmi_nesting() - 2);
224 		instrumentation_end();
225 		return;
226 	}
227 
228 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
229 	trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
230 	WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
231 
232 	// instrumentation for the noinstr rcu_dynticks_eqs_enter()
233 	instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
234 	instrumentation_end();
235 
236 	// RCU is watching here ...
237 	rcu_dynticks_eqs_enter();
238 	// ... but is no longer watching here.
239 
240 	if (!in_nmi())
241 		rcu_dynticks_task_enter();
242 }
243 
244 /**
245  * rcu_nmi_enter - inform RCU of entry to NMI context
246  *
247  * If the CPU was idle from RCU's viewpoint, update ct->dynticks and
248  * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
249  * that the CPU is active.  This implementation permits nested NMIs, as
250  * long as the nesting level does not overflow an int.  (You will probably
251  * run out of stack space first.)
252  *
253  * If you add or remove a call to rcu_nmi_enter(), be sure to test
254  * with CONFIG_RCU_EQS_DEBUG=y.
255  */
256 void noinstr rcu_nmi_enter(void)
257 {
258 	long incby = 2;
259 	struct context_tracking *ct = this_cpu_ptr(&context_tracking);
260 
261 	/* Complain about underflow. */
262 	WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
263 
264 	/*
265 	 * If idle from RCU viewpoint, atomically increment ->dynticks
266 	 * to mark non-idle and increment ->dynticks_nmi_nesting by one.
267 	 * Otherwise, increment ->dynticks_nmi_nesting by two.  This means
268 	 * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
269 	 * to be in the outermost NMI handler that interrupted an RCU-idle
270 	 * period (observation due to Andy Lutomirski).
271 	 */
272 	if (rcu_dynticks_curr_cpu_in_eqs()) {
273 
274 		if (!in_nmi())
275 			rcu_dynticks_task_exit();
276 
277 		// RCU is not watching here ...
278 		rcu_dynticks_eqs_exit();
279 		// ... but is watching here.
280 
281 		instrumentation_begin();
282 		// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
283 		instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
284 		// instrumentation for the noinstr rcu_dynticks_eqs_exit()
285 		instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
286 
287 		incby = 1;
288 	} else if (!in_nmi()) {
289 		instrumentation_begin();
290 		rcu_irq_enter_check_tick();
291 	} else  {
292 		instrumentation_begin();
293 	}
294 
295 	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
296 			  ct_dynticks_nmi_nesting(),
297 			  ct_dynticks_nmi_nesting() + incby, ct_dynticks());
298 	instrumentation_end();
299 	WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
300 		   ct_dynticks_nmi_nesting() + incby);
301 	barrier();
302 }
303 
304 /**
305  * rcu_idle_enter - inform RCU that current CPU is entering idle
306  *
307  * Enter idle mode, in other words, -leave- the mode in which RCU
308  * read-side critical sections can occur.  (Though RCU read-side
309  * critical sections can occur in irq handlers in idle, a possibility
310  * handled by irq_enter() and irq_exit().)
311  *
312  * If you add or remove a call to rcu_idle_enter(), be sure to test with
313  * CONFIG_RCU_EQS_DEBUG=y.
314  */
315 void noinstr rcu_idle_enter(void)
316 {
317 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
318 	rcu_eqs_enter(false);
319 }
320 
321 /**
322  * rcu_idle_exit - inform RCU that current CPU is leaving idle
323  *
324  * Exit idle mode, in other words, -enter- the mode in which RCU
325  * read-side critical sections can occur.
326  *
327  * If you add or remove a call to rcu_idle_exit(), be sure to test with
328  * CONFIG_RCU_EQS_DEBUG=y.
329  */
330 void noinstr rcu_idle_exit(void)
331 {
332 	unsigned long flags;
333 
334 	raw_local_irq_save(flags);
335 	rcu_eqs_exit(false);
336 	raw_local_irq_restore(flags);
337 }
338 EXPORT_SYMBOL_GPL(rcu_idle_exit);
339 
340 noinstr void ct_idle_enter(void)
341 {
342 	rcu_idle_enter();
343 }
344 EXPORT_SYMBOL_GPL(ct_idle_enter);
345 
346 void ct_idle_exit(void)
347 {
348 	rcu_idle_exit();
349 }
350 EXPORT_SYMBOL_GPL(ct_idle_exit);
351 
352 /**
353  * ct_irq_enter - inform RCU that current CPU is entering irq away from idle
354  *
355  * Enter an interrupt handler, which might possibly result in exiting
356  * idle mode, in other words, entering the mode in which read-side critical
357  * sections can occur.  The caller must have disabled interrupts.
358  *
359  * Note that the Linux kernel is fully capable of entering an interrupt
360  * handler that it never exits, for example when doing upcalls to user mode!
361  * This code assumes that the idle loop never does upcalls to user mode.
362  * If your architecture's idle loop does do upcalls to user mode (or does
363  * anything else that results in unbalanced calls to the irq_enter() and
364  * irq_exit() functions), RCU will give you what you deserve, good and hard.
365  * But very infrequently and irreproducibly.
366  *
367  * Use things like work queues to work around this limitation.
368  *
369  * You have been warned.
370  *
371  * If you add or remove a call to ct_irq_enter(), be sure to test with
372  * CONFIG_RCU_EQS_DEBUG=y.
373  */
374 noinstr void ct_irq_enter(void)
375 {
376 	lockdep_assert_irqs_disabled();
377 	ct_nmi_enter();
378 }
379 
380 /**
381  * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
382  *
383  * Exit from an interrupt handler, which might possibly result in entering
384  * idle mode, in other words, leaving the mode in which read-side critical
385  * sections can occur.  The caller must have disabled interrupts.
386  *
387  * This code assumes that the idle loop never does anything that might
388  * result in unbalanced calls to irq_enter() and irq_exit().  If your
389  * architecture's idle loop violates this assumption, RCU will give you what
390  * you deserve, good and hard.  But very infrequently and irreproducibly.
391  *
392  * Use things like work queues to work around this limitation.
393  *
394  * You have been warned.
395  *
396  * If you add or remove a call to ct_irq_exit(), be sure to test with
397  * CONFIG_RCU_EQS_DEBUG=y.
398  */
399 noinstr void ct_irq_exit(void)
400 {
401 	lockdep_assert_irqs_disabled();
402 	ct_nmi_exit();
403 }
404 
405 /*
406  * Wrapper for ct_irq_enter() where interrupts are enabled.
407  *
408  * If you add or remove a call to ct_irq_enter_irqson(), be sure to test
409  * with CONFIG_RCU_EQS_DEBUG=y.
410  */
411 void ct_irq_enter_irqson(void)
412 {
413 	unsigned long flags;
414 
415 	local_irq_save(flags);
416 	ct_irq_enter();
417 	local_irq_restore(flags);
418 }
419 
420 /*
421  * Wrapper for ct_irq_exit() where interrupts are enabled.
422  *
423  * If you add or remove a call to ct_irq_exit_irqson(), be sure to test
424  * with CONFIG_RCU_EQS_DEBUG=y.
425  */
426 void ct_irq_exit_irqson(void)
427 {
428 	unsigned long flags;
429 
430 	local_irq_save(flags);
431 	ct_irq_exit();
432 	local_irq_restore(flags);
433 }
434 
435 noinstr void ct_nmi_enter(void)
436 {
437 	rcu_nmi_enter();
438 }
439 
440 noinstr void ct_nmi_exit(void)
441 {
442 	rcu_nmi_exit();
443 }
444 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
445 
446 #ifdef CONFIG_NO_HZ_FULL
447 /**
448  * rcu_user_enter - inform RCU that we are resuming userspace.
449  *
450  * Enter RCU idle mode right before resuming userspace.  No use of RCU
451  * is permitted between this call and rcu_user_exit(). This way the
452  * CPU doesn't need to maintain the tick for RCU maintenance purposes
453  * when the CPU runs in userspace.
454  *
455  * If you add or remove a call to rcu_user_enter(), be sure to test with
456  * CONFIG_RCU_EQS_DEBUG=y.
457  */
458 noinstr void rcu_user_enter(void)
459 {
460 	rcu_eqs_enter(true);
461 }
462 
463 /**
464  * rcu_user_exit - inform RCU that we are exiting userspace.
465  *
466  * Exit RCU idle mode while entering the kernel because it can
467  * run a RCU read side critical section anytime.
468  *
469  * If you add or remove a call to rcu_user_exit(), be sure to test with
470  * CONFIG_RCU_EQS_DEBUG=y.
471  */
472 void noinstr rcu_user_exit(void)
473 {
474 	rcu_eqs_exit(true);
475 }
476 #endif /* #ifdef CONFIG_NO_HZ_FULL */
477 
478 #ifdef CONFIG_CONTEXT_TRACKING_USER
479 
480 #define CREATE_TRACE_POINTS
481 #include <trace/events/context_tracking.h>
482 
483 DEFINE_STATIC_KEY_FALSE(context_tracking_key);
484 EXPORT_SYMBOL_GPL(context_tracking_key);
485 
486 static noinstr bool context_tracking_recursion_enter(void)
487 {
488 	int recursion;
489 
490 	recursion = __this_cpu_inc_return(context_tracking.recursion);
491 	if (recursion == 1)
492 		return true;
493 
494 	WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
495 	__this_cpu_dec(context_tracking.recursion);
496 
497 	return false;
498 }
499 
500 static __always_inline void context_tracking_recursion_exit(void)
501 {
502 	__this_cpu_dec(context_tracking.recursion);
503 }
504 
505 /**
506  * __ct_user_enter - Inform the context tracking that the CPU is going
507  *		     to enter user or guest space mode.
508  *
509  * This function must be called right before we switch from the kernel
510  * to user or guest space, when it's guaranteed the remaining kernel
511  * instructions to execute won't use any RCU read side critical section
512  * because this function sets RCU in extended quiescent state.
513  */
514 void noinstr __ct_user_enter(enum ctx_state state)
515 {
516 	lockdep_assert_irqs_disabled();
517 
518 	/* Kernel threads aren't supposed to go to userspace */
519 	WARN_ON_ONCE(!current->mm);
520 
521 	if (!context_tracking_recursion_enter())
522 		return;
523 
524 	if ( __this_cpu_read(context_tracking.state) != state) {
525 		if (__this_cpu_read(context_tracking.active)) {
526 			/*
527 			 * At this stage, only low level arch entry code remains and
528 			 * then we'll run in userspace. We can assume there won't be
529 			 * any RCU read-side critical section until the next call to
530 			 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
531 			 * on the tick.
532 			 */
533 			if (state == CONTEXT_USER) {
534 				instrumentation_begin();
535 				trace_user_enter(0);
536 				vtime_user_enter(current);
537 				instrumentation_end();
538 			}
539 			/*
540 			 * Other than generic entry implementation, we may be past the last
541 			 * rescheduling opportunity in the entry code. Trigger a self IPI
542 			 * that will fire and reschedule once we resume in user/guest mode.
543 			 */
544 			rcu_irq_work_resched();
545 			rcu_user_enter();
546 		}
547 		/*
548 		 * Even if context tracking is disabled on this CPU, because it's outside
549 		 * the full dynticks mask for example, we still have to keep track of the
550 		 * context transitions and states to prevent inconsistency on those of
551 		 * other CPUs.
552 		 * If a task triggers an exception in userspace, sleep on the exception
553 		 * handler and then migrate to another CPU, that new CPU must know where
554 		 * the exception returns by the time we call exception_exit().
555 		 * This information can only be provided by the previous CPU when it called
556 		 * exception_enter().
557 		 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
558 		 * is false because we know that CPU is not tickless.
559 		 */
560 		__this_cpu_write(context_tracking.state, state);
561 	}
562 	context_tracking_recursion_exit();
563 }
564 EXPORT_SYMBOL_GPL(__ct_user_enter);
565 
566 /*
567  * OBSOLETE:
568  * This function should be noinstr but the below local_irq_restore() is
569  * unsafe because it involves illegal RCU uses through tracing and lockdep.
570  * This is unlikely to be fixed as this function is obsolete. The preferred
571  * way is to call __context_tracking_enter() through user_enter_irqoff()
572  * or context_tracking_guest_enter(). It should be the arch entry code
573  * responsibility to call into context tracking with IRQs disabled.
574  */
575 void ct_user_enter(enum ctx_state state)
576 {
577 	unsigned long flags;
578 
579 	/*
580 	 * Some contexts may involve an exception occuring in an irq,
581 	 * leading to that nesting:
582 	 * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit()
583 	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
584 	 * helpers are enough to protect RCU uses inside the exception. So
585 	 * just return immediately if we detect we are in an IRQ.
586 	 */
587 	if (in_interrupt())
588 		return;
589 
590 	local_irq_save(flags);
591 	__ct_user_enter(state);
592 	local_irq_restore(flags);
593 }
594 NOKPROBE_SYMBOL(ct_user_enter);
595 EXPORT_SYMBOL_GPL(ct_user_enter);
596 
597 /**
598  * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
599  *			   archs that didn't manage to check the context tracking
600  *			   static key from low level code.
601  *
602  * This OBSOLETE function should be noinstr but it unsafely calls
603  * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
604  * This is unlikely to be fixed as this function is obsolete. The preferred
605  * way is to call user_enter_irqoff(). It should be the arch entry code
606  * responsibility to call into context tracking with IRQs disabled.
607  */
608 void user_enter_callable(void)
609 {
610 	user_enter();
611 }
612 NOKPROBE_SYMBOL(user_enter_callable);
613 
614 /**
615  * __ct_user_exit - Inform the context tracking that the CPU is
616  *		    exiting user or guest mode and entering the kernel.
617  *
618  * This function must be called after we entered the kernel from user or
619  * guest space before any use of RCU read side critical section. This
620  * potentially include any high level kernel code like syscalls, exceptions,
621  * signal handling, etc...
622  *
623  * This call supports re-entrancy. This way it can be called from any exception
624  * handler without needing to know if we came from userspace or not.
625  */
626 void noinstr __ct_user_exit(enum ctx_state state)
627 {
628 	if (!context_tracking_recursion_enter())
629 		return;
630 
631 	if (__this_cpu_read(context_tracking.state) == state) {
632 		if (__this_cpu_read(context_tracking.active)) {
633 			/*
634 			 * We are going to run code that may use RCU. Inform
635 			 * RCU core about that (ie: we may need the tick again).
636 			 */
637 			rcu_user_exit();
638 			if (state == CONTEXT_USER) {
639 				instrumentation_begin();
640 				vtime_user_exit(current);
641 				trace_user_exit(0);
642 				instrumentation_end();
643 			}
644 		}
645 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
646 	}
647 	context_tracking_recursion_exit();
648 }
649 EXPORT_SYMBOL_GPL(__ct_user_exit);
650 
651 /*
652  * OBSOLETE:
653  * This function should be noinstr but the below local_irq_save() is
654  * unsafe because it involves illegal RCU uses through tracing and lockdep.
655  * This is unlikely to be fixed as this function is obsolete. The preferred
656  * way is to call __context_tracking_exit() through user_exit_irqoff()
657  * or context_tracking_guest_exit(). It should be the arch entry code
658  * responsibility to call into context tracking with IRQs disabled.
659  */
660 void ct_user_exit(enum ctx_state state)
661 {
662 	unsigned long flags;
663 
664 	if (in_interrupt())
665 		return;
666 
667 	local_irq_save(flags);
668 	__ct_user_exit(state);
669 	local_irq_restore(flags);
670 }
671 NOKPROBE_SYMBOL(ct_user_exit);
672 EXPORT_SYMBOL_GPL(ct_user_exit);
673 
674 /**
675  * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
676  *			  archs that didn't manage to check the context tracking
677  *			  static key from low level code.
678  *
679  * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
680  * involving illegal RCU uses through tracing and lockdep. This is unlikely
681  * to be fixed as this function is obsolete. The preferred way is to call
682  * user_exit_irqoff(). It should be the arch entry code responsibility to
683  * call into context tracking with IRQs disabled.
684  */
685 void user_exit_callable(void)
686 {
687 	user_exit();
688 }
689 NOKPROBE_SYMBOL(user_exit_callable);
690 
691 void __init ct_cpu_track_user(int cpu)
692 {
693 	static __initdata bool initialized = false;
694 
695 	if (!per_cpu(context_tracking.active, cpu)) {
696 		per_cpu(context_tracking.active, cpu) = true;
697 		static_branch_inc(&context_tracking_key);
698 	}
699 
700 	if (initialized)
701 		return;
702 
703 #ifdef CONFIG_HAVE_TIF_NOHZ
704 	/*
705 	 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
706 	 * This assumes that init is the only task at this early boot stage.
707 	 */
708 	set_tsk_thread_flag(&init_task, TIF_NOHZ);
709 #endif
710 	WARN_ON_ONCE(!tasklist_empty());
711 
712 	initialized = true;
713 }
714 
715 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
716 void __init context_tracking_init(void)
717 {
718 	int cpu;
719 
720 	for_each_possible_cpu(cpu)
721 		ct_cpu_track_user(cpu);
722 }
723 #endif
724 
725 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
726