xref: /linux-6.15/kernel/context_tracking.c (revision 493c1822)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Context tracking: Probe on high level context boundaries such as kernel
4  * and userspace. This includes syscalls and exceptions entry/exit.
5  *
6  * This is used by RCU to remove its dependency on the timer tick while a CPU
7  * runs in userspace.
8  *
9  *  Started by Frederic Weisbecker:
10  *
11  * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <[email protected]>
12  *
13  * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
14  * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
15  *
16  */
17 
18 #include <linux/context_tracking.h>
19 #include <linux/rcupdate.h>
20 #include <linux/sched.h>
21 #include <linux/hardirq.h>
22 #include <linux/export.h>
23 #include <linux/kprobes.h>
24 
25 
26 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
27 noinstr void ct_idle_enter(void)
28 {
29 	rcu_idle_enter();
30 }
31 EXPORT_SYMBOL_GPL(ct_idle_enter);
32 
33 void ct_idle_exit(void)
34 {
35 	rcu_idle_exit();
36 }
37 EXPORT_SYMBOL_GPL(ct_idle_exit);
38 
39 noinstr void ct_irq_enter(void)
40 {
41 	rcu_irq_enter();
42 }
43 
44 noinstr void ct_irq_exit(void)
45 {
46 	rcu_irq_exit();
47 }
48 
49 void ct_irq_enter_irqson(void)
50 {
51 	rcu_irq_enter_irqson();
52 }
53 
54 void ct_irq_exit_irqson(void)
55 {
56 	rcu_irq_exit_irqson();
57 }
58 
59 noinstr void ct_nmi_enter(void)
60 {
61 	rcu_nmi_enter();
62 }
63 
64 noinstr void ct_nmi_exit(void)
65 {
66 	rcu_nmi_exit();
67 }
68 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
69 
70 #ifdef CONFIG_CONTEXT_TRACKING_USER
71 
72 #define CREATE_TRACE_POINTS
73 #include <trace/events/context_tracking.h>
74 
75 DEFINE_STATIC_KEY_FALSE(context_tracking_key);
76 EXPORT_SYMBOL_GPL(context_tracking_key);
77 
78 DEFINE_PER_CPU(struct context_tracking, context_tracking);
79 EXPORT_SYMBOL_GPL(context_tracking);
80 
81 static noinstr bool context_tracking_recursion_enter(void)
82 {
83 	int recursion;
84 
85 	recursion = __this_cpu_inc_return(context_tracking.recursion);
86 	if (recursion == 1)
87 		return true;
88 
89 	WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
90 	__this_cpu_dec(context_tracking.recursion);
91 
92 	return false;
93 }
94 
95 static __always_inline void context_tracking_recursion_exit(void)
96 {
97 	__this_cpu_dec(context_tracking.recursion);
98 }
99 
100 /**
101  * __ct_user_enter - Inform the context tracking that the CPU is going
102  *		     to enter user or guest space mode.
103  *
104  * This function must be called right before we switch from the kernel
105  * to user or guest space, when it's guaranteed the remaining kernel
106  * instructions to execute won't use any RCU read side critical section
107  * because this function sets RCU in extended quiescent state.
108  */
109 void noinstr __ct_user_enter(enum ctx_state state)
110 {
111 	/* Kernel threads aren't supposed to go to userspace */
112 	WARN_ON_ONCE(!current->mm);
113 
114 	if (!context_tracking_recursion_enter())
115 		return;
116 
117 	if ( __this_cpu_read(context_tracking.state) != state) {
118 		if (__this_cpu_read(context_tracking.active)) {
119 			/*
120 			 * At this stage, only low level arch entry code remains and
121 			 * then we'll run in userspace. We can assume there won't be
122 			 * any RCU read-side critical section until the next call to
123 			 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
124 			 * on the tick.
125 			 */
126 			if (state == CONTEXT_USER) {
127 				instrumentation_begin();
128 				trace_user_enter(0);
129 				vtime_user_enter(current);
130 				instrumentation_end();
131 			}
132 			rcu_user_enter();
133 		}
134 		/*
135 		 * Even if context tracking is disabled on this CPU, because it's outside
136 		 * the full dynticks mask for example, we still have to keep track of the
137 		 * context transitions and states to prevent inconsistency on those of
138 		 * other CPUs.
139 		 * If a task triggers an exception in userspace, sleep on the exception
140 		 * handler and then migrate to another CPU, that new CPU must know where
141 		 * the exception returns by the time we call exception_exit().
142 		 * This information can only be provided by the previous CPU when it called
143 		 * exception_enter().
144 		 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
145 		 * is false because we know that CPU is not tickless.
146 		 */
147 		__this_cpu_write(context_tracking.state, state);
148 	}
149 	context_tracking_recursion_exit();
150 }
151 EXPORT_SYMBOL_GPL(__ct_user_enter);
152 
153 /*
154  * OBSOLETE:
155  * This function should be noinstr but the below local_irq_restore() is
156  * unsafe because it involves illegal RCU uses through tracing and lockdep.
157  * This is unlikely to be fixed as this function is obsolete. The preferred
158  * way is to call __context_tracking_enter() through user_enter_irqoff()
159  * or context_tracking_guest_enter(). It should be the arch entry code
160  * responsibility to call into context tracking with IRQs disabled.
161  */
162 void ct_user_enter(enum ctx_state state)
163 {
164 	unsigned long flags;
165 
166 	/*
167 	 * Some contexts may involve an exception occuring in an irq,
168 	 * leading to that nesting:
169 	 * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit()
170 	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
171 	 * helpers are enough to protect RCU uses inside the exception. So
172 	 * just return immediately if we detect we are in an IRQ.
173 	 */
174 	if (in_interrupt())
175 		return;
176 
177 	local_irq_save(flags);
178 	__ct_user_enter(state);
179 	local_irq_restore(flags);
180 }
181 NOKPROBE_SYMBOL(ct_user_enter);
182 EXPORT_SYMBOL_GPL(ct_user_enter);
183 
184 /**
185  * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
186  *			   archs that didn't manage to check the context tracking
187  *			   static key from low level code.
188  *
189  * This OBSOLETE function should be noinstr but it unsafely calls
190  * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
191  * This is unlikely to be fixed as this function is obsolete. The preferred
192  * way is to call user_enter_irqoff(). It should be the arch entry code
193  * responsibility to call into context tracking with IRQs disabled.
194  */
195 void user_enter_callable(void)
196 {
197 	user_enter();
198 }
199 NOKPROBE_SYMBOL(user_enter_callable);
200 
201 /**
202  * __ct_user_exit - Inform the context tracking that the CPU is
203  *		    exiting user or guest mode and entering the kernel.
204  *
205  * This function must be called after we entered the kernel from user or
206  * guest space before any use of RCU read side critical section. This
207  * potentially include any high level kernel code like syscalls, exceptions,
208  * signal handling, etc...
209  *
210  * This call supports re-entrancy. This way it can be called from any exception
211  * handler without needing to know if we came from userspace or not.
212  */
213 void noinstr __ct_user_exit(enum ctx_state state)
214 {
215 	if (!context_tracking_recursion_enter())
216 		return;
217 
218 	if (__this_cpu_read(context_tracking.state) == state) {
219 		if (__this_cpu_read(context_tracking.active)) {
220 			/*
221 			 * We are going to run code that may use RCU. Inform
222 			 * RCU core about that (ie: we may need the tick again).
223 			 */
224 			rcu_user_exit();
225 			if (state == CONTEXT_USER) {
226 				instrumentation_begin();
227 				vtime_user_exit(current);
228 				trace_user_exit(0);
229 				instrumentation_end();
230 			}
231 		}
232 		__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
233 	}
234 	context_tracking_recursion_exit();
235 }
236 EXPORT_SYMBOL_GPL(__ct_user_exit);
237 
238 /*
239  * OBSOLETE:
240  * This function should be noinstr but the below local_irq_save() is
241  * unsafe because it involves illegal RCU uses through tracing and lockdep.
242  * This is unlikely to be fixed as this function is obsolete. The preferred
243  * way is to call __context_tracking_exit() through user_exit_irqoff()
244  * or context_tracking_guest_exit(). It should be the arch entry code
245  * responsibility to call into context tracking with IRQs disabled.
246  */
247 void ct_user_exit(enum ctx_state state)
248 {
249 	unsigned long flags;
250 
251 	if (in_interrupt())
252 		return;
253 
254 	local_irq_save(flags);
255 	__ct_user_exit(state);
256 	local_irq_restore(flags);
257 }
258 NOKPROBE_SYMBOL(ct_user_exit);
259 EXPORT_SYMBOL_GPL(ct_user_exit);
260 
261 /**
262  * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
263  *			  archs that didn't manage to check the context tracking
264  *			  static key from low level code.
265  *
266  * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
267  * involving illegal RCU uses through tracing and lockdep. This is unlikely
268  * to be fixed as this function is obsolete. The preferred way is to call
269  * user_exit_irqoff(). It should be the arch entry code responsibility to
270  * call into context tracking with IRQs disabled.
271  */
272 void user_exit_callable(void)
273 {
274 	user_exit();
275 }
276 NOKPROBE_SYMBOL(user_exit_callable);
277 
278 void __init ct_cpu_track_user(int cpu)
279 {
280 	static __initdata bool initialized = false;
281 
282 	if (!per_cpu(context_tracking.active, cpu)) {
283 		per_cpu(context_tracking.active, cpu) = true;
284 		static_branch_inc(&context_tracking_key);
285 	}
286 
287 	if (initialized)
288 		return;
289 
290 #ifdef CONFIG_HAVE_TIF_NOHZ
291 	/*
292 	 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
293 	 * This assumes that init is the only task at this early boot stage.
294 	 */
295 	set_tsk_thread_flag(&init_task, TIF_NOHZ);
296 #endif
297 	WARN_ON_ONCE(!tasklist_empty());
298 
299 	initialized = true;
300 }
301 
302 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
303 void __init context_tracking_init(void)
304 {
305 	int cpu;
306 
307 	for_each_possible_cpu(cpu)
308 		ct_cpu_track_user(cpu);
309 }
310 #endif
311 
312 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
313