1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Context tracking: Probe on high level context boundaries such as kernel 4 * and userspace. This includes syscalls and exceptions entry/exit. 5 * 6 * This is used by RCU to remove its dependency on the timer tick while a CPU 7 * runs in userspace. 8 * 9 * Started by Frederic Weisbecker: 10 * 11 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <[email protected]> 12 * 13 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, 14 * Steven Rostedt, Peter Zijlstra for suggestions and improvements. 15 * 16 */ 17 18 #include <linux/context_tracking.h> 19 #include <linux/rcupdate.h> 20 #include <linux/sched.h> 21 #include <linux/hardirq.h> 22 #include <linux/export.h> 23 #include <linux/kprobes.h> 24 25 26 DEFINE_PER_CPU(struct context_tracking, context_tracking) = { 27 #ifdef CONFIG_CONTEXT_TRACKING_IDLE 28 .dynticks = ATOMIC_INIT(1), 29 #endif 30 }; 31 EXPORT_SYMBOL_GPL(context_tracking); 32 33 #ifdef CONFIG_CONTEXT_TRACKING_IDLE 34 noinstr void ct_idle_enter(void) 35 { 36 rcu_idle_enter(); 37 } 38 EXPORT_SYMBOL_GPL(ct_idle_enter); 39 40 void ct_idle_exit(void) 41 { 42 rcu_idle_exit(); 43 } 44 EXPORT_SYMBOL_GPL(ct_idle_exit); 45 46 /** 47 * ct_irq_enter - inform RCU that current CPU is entering irq away from idle 48 * 49 * Enter an interrupt handler, which might possibly result in exiting 50 * idle mode, in other words, entering the mode in which read-side critical 51 * sections can occur. The caller must have disabled interrupts. 52 * 53 * Note that the Linux kernel is fully capable of entering an interrupt 54 * handler that it never exits, for example when doing upcalls to user mode! 55 * This code assumes that the idle loop never does upcalls to user mode. 56 * If your architecture's idle loop does do upcalls to user mode (or does 57 * anything else that results in unbalanced calls to the irq_enter() and 58 * irq_exit() functions), RCU will give you what you deserve, good and hard. 59 * But very infrequently and irreproducibly. 60 * 61 * Use things like work queues to work around this limitation. 62 * 63 * You have been warned. 64 * 65 * If you add or remove a call to ct_irq_enter(), be sure to test with 66 * CONFIG_RCU_EQS_DEBUG=y. 67 */ 68 noinstr void ct_irq_enter(void) 69 { 70 lockdep_assert_irqs_disabled(); 71 ct_nmi_enter(); 72 } 73 74 /** 75 * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle 76 * 77 * Exit from an interrupt handler, which might possibly result in entering 78 * idle mode, in other words, leaving the mode in which read-side critical 79 * sections can occur. The caller must have disabled interrupts. 80 * 81 * This code assumes that the idle loop never does anything that might 82 * result in unbalanced calls to irq_enter() and irq_exit(). If your 83 * architecture's idle loop violates this assumption, RCU will give you what 84 * you deserve, good and hard. But very infrequently and irreproducibly. 85 * 86 * Use things like work queues to work around this limitation. 87 * 88 * You have been warned. 89 * 90 * If you add or remove a call to ct_irq_exit(), be sure to test with 91 * CONFIG_RCU_EQS_DEBUG=y. 92 */ 93 noinstr void ct_irq_exit(void) 94 { 95 lockdep_assert_irqs_disabled(); 96 ct_nmi_exit(); 97 } 98 99 /* 100 * Wrapper for ct_irq_enter() where interrupts are enabled. 101 * 102 * If you add or remove a call to ct_irq_enter_irqson(), be sure to test 103 * with CONFIG_RCU_EQS_DEBUG=y. 104 */ 105 void ct_irq_enter_irqson(void) 106 { 107 unsigned long flags; 108 109 local_irq_save(flags); 110 ct_irq_enter(); 111 local_irq_restore(flags); 112 } 113 114 /* 115 * Wrapper for ct_irq_exit() where interrupts are enabled. 116 * 117 * If you add or remove a call to ct_irq_exit_irqson(), be sure to test 118 * with CONFIG_RCU_EQS_DEBUG=y. 119 */ 120 void ct_irq_exit_irqson(void) 121 { 122 unsigned long flags; 123 124 local_irq_save(flags); 125 ct_irq_exit(); 126 local_irq_restore(flags); 127 } 128 129 noinstr void ct_nmi_enter(void) 130 { 131 rcu_nmi_enter(); 132 } 133 134 noinstr void ct_nmi_exit(void) 135 { 136 rcu_nmi_exit(); 137 } 138 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ 139 140 #ifdef CONFIG_CONTEXT_TRACKING_USER 141 142 #define CREATE_TRACE_POINTS 143 #include <trace/events/context_tracking.h> 144 145 DEFINE_STATIC_KEY_FALSE(context_tracking_key); 146 EXPORT_SYMBOL_GPL(context_tracking_key); 147 148 static noinstr bool context_tracking_recursion_enter(void) 149 { 150 int recursion; 151 152 recursion = __this_cpu_inc_return(context_tracking.recursion); 153 if (recursion == 1) 154 return true; 155 156 WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); 157 __this_cpu_dec(context_tracking.recursion); 158 159 return false; 160 } 161 162 static __always_inline void context_tracking_recursion_exit(void) 163 { 164 __this_cpu_dec(context_tracking.recursion); 165 } 166 167 /** 168 * __ct_user_enter - Inform the context tracking that the CPU is going 169 * to enter user or guest space mode. 170 * 171 * This function must be called right before we switch from the kernel 172 * to user or guest space, when it's guaranteed the remaining kernel 173 * instructions to execute won't use any RCU read side critical section 174 * because this function sets RCU in extended quiescent state. 175 */ 176 void noinstr __ct_user_enter(enum ctx_state state) 177 { 178 /* Kernel threads aren't supposed to go to userspace */ 179 WARN_ON_ONCE(!current->mm); 180 181 if (!context_tracking_recursion_enter()) 182 return; 183 184 if ( __this_cpu_read(context_tracking.state) != state) { 185 if (__this_cpu_read(context_tracking.active)) { 186 /* 187 * At this stage, only low level arch entry code remains and 188 * then we'll run in userspace. We can assume there won't be 189 * any RCU read-side critical section until the next call to 190 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency 191 * on the tick. 192 */ 193 if (state == CONTEXT_USER) { 194 instrumentation_begin(); 195 trace_user_enter(0); 196 vtime_user_enter(current); 197 instrumentation_end(); 198 } 199 rcu_user_enter(); 200 } 201 /* 202 * Even if context tracking is disabled on this CPU, because it's outside 203 * the full dynticks mask for example, we still have to keep track of the 204 * context transitions and states to prevent inconsistency on those of 205 * other CPUs. 206 * If a task triggers an exception in userspace, sleep on the exception 207 * handler and then migrate to another CPU, that new CPU must know where 208 * the exception returns by the time we call exception_exit(). 209 * This information can only be provided by the previous CPU when it called 210 * exception_enter(). 211 * OTOH we can spare the calls to vtime and RCU when context_tracking.active 212 * is false because we know that CPU is not tickless. 213 */ 214 __this_cpu_write(context_tracking.state, state); 215 } 216 context_tracking_recursion_exit(); 217 } 218 EXPORT_SYMBOL_GPL(__ct_user_enter); 219 220 /* 221 * OBSOLETE: 222 * This function should be noinstr but the below local_irq_restore() is 223 * unsafe because it involves illegal RCU uses through tracing and lockdep. 224 * This is unlikely to be fixed as this function is obsolete. The preferred 225 * way is to call __context_tracking_enter() through user_enter_irqoff() 226 * or context_tracking_guest_enter(). It should be the arch entry code 227 * responsibility to call into context tracking with IRQs disabled. 228 */ 229 void ct_user_enter(enum ctx_state state) 230 { 231 unsigned long flags; 232 233 /* 234 * Some contexts may involve an exception occuring in an irq, 235 * leading to that nesting: 236 * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit() 237 * This would mess up the dyntick_nesting count though. And rcu_irq_*() 238 * helpers are enough to protect RCU uses inside the exception. So 239 * just return immediately if we detect we are in an IRQ. 240 */ 241 if (in_interrupt()) 242 return; 243 244 local_irq_save(flags); 245 __ct_user_enter(state); 246 local_irq_restore(flags); 247 } 248 NOKPROBE_SYMBOL(ct_user_enter); 249 EXPORT_SYMBOL_GPL(ct_user_enter); 250 251 /** 252 * user_enter_callable() - Unfortunate ASM callable version of user_enter() for 253 * archs that didn't manage to check the context tracking 254 * static key from low level code. 255 * 256 * This OBSOLETE function should be noinstr but it unsafely calls 257 * local_irq_restore(), involving illegal RCU uses through tracing and lockdep. 258 * This is unlikely to be fixed as this function is obsolete. The preferred 259 * way is to call user_enter_irqoff(). It should be the arch entry code 260 * responsibility to call into context tracking with IRQs disabled. 261 */ 262 void user_enter_callable(void) 263 { 264 user_enter(); 265 } 266 NOKPROBE_SYMBOL(user_enter_callable); 267 268 /** 269 * __ct_user_exit - Inform the context tracking that the CPU is 270 * exiting user or guest mode and entering the kernel. 271 * 272 * This function must be called after we entered the kernel from user or 273 * guest space before any use of RCU read side critical section. This 274 * potentially include any high level kernel code like syscalls, exceptions, 275 * signal handling, etc... 276 * 277 * This call supports re-entrancy. This way it can be called from any exception 278 * handler without needing to know if we came from userspace or not. 279 */ 280 void noinstr __ct_user_exit(enum ctx_state state) 281 { 282 if (!context_tracking_recursion_enter()) 283 return; 284 285 if (__this_cpu_read(context_tracking.state) == state) { 286 if (__this_cpu_read(context_tracking.active)) { 287 /* 288 * We are going to run code that may use RCU. Inform 289 * RCU core about that (ie: we may need the tick again). 290 */ 291 rcu_user_exit(); 292 if (state == CONTEXT_USER) { 293 instrumentation_begin(); 294 vtime_user_exit(current); 295 trace_user_exit(0); 296 instrumentation_end(); 297 } 298 } 299 __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); 300 } 301 context_tracking_recursion_exit(); 302 } 303 EXPORT_SYMBOL_GPL(__ct_user_exit); 304 305 /* 306 * OBSOLETE: 307 * This function should be noinstr but the below local_irq_save() is 308 * unsafe because it involves illegal RCU uses through tracing and lockdep. 309 * This is unlikely to be fixed as this function is obsolete. The preferred 310 * way is to call __context_tracking_exit() through user_exit_irqoff() 311 * or context_tracking_guest_exit(). It should be the arch entry code 312 * responsibility to call into context tracking with IRQs disabled. 313 */ 314 void ct_user_exit(enum ctx_state state) 315 { 316 unsigned long flags; 317 318 if (in_interrupt()) 319 return; 320 321 local_irq_save(flags); 322 __ct_user_exit(state); 323 local_irq_restore(flags); 324 } 325 NOKPROBE_SYMBOL(ct_user_exit); 326 EXPORT_SYMBOL_GPL(ct_user_exit); 327 328 /** 329 * user_exit_callable() - Unfortunate ASM callable version of user_exit() for 330 * archs that didn't manage to check the context tracking 331 * static key from low level code. 332 * 333 * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(), 334 * involving illegal RCU uses through tracing and lockdep. This is unlikely 335 * to be fixed as this function is obsolete. The preferred way is to call 336 * user_exit_irqoff(). It should be the arch entry code responsibility to 337 * call into context tracking with IRQs disabled. 338 */ 339 void user_exit_callable(void) 340 { 341 user_exit(); 342 } 343 NOKPROBE_SYMBOL(user_exit_callable); 344 345 void __init ct_cpu_track_user(int cpu) 346 { 347 static __initdata bool initialized = false; 348 349 if (!per_cpu(context_tracking.active, cpu)) { 350 per_cpu(context_tracking.active, cpu) = true; 351 static_branch_inc(&context_tracking_key); 352 } 353 354 if (initialized) 355 return; 356 357 #ifdef CONFIG_HAVE_TIF_NOHZ 358 /* 359 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork 360 * This assumes that init is the only task at this early boot stage. 361 */ 362 set_tsk_thread_flag(&init_task, TIF_NOHZ); 363 #endif 364 WARN_ON_ONCE(!tasklist_empty()); 365 366 initialized = true; 367 } 368 369 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE 370 void __init context_tracking_init(void) 371 { 372 int cpu; 373 374 for_each_possible_cpu(cpu) 375 ct_cpu_track_user(cpu); 376 } 377 #endif 378 379 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */ 380