1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/context_tracking.h> 4 #include <linux/err.h> 5 #include <linux/compat.h> 6 7 #include <asm/asm-prototypes.h> 8 #include <asm/kup.h> 9 #include <asm/cputime.h> 10 #include <asm/interrupt.h> 11 #include <asm/hw_irq.h> 12 #include <asm/interrupt.h> 13 #include <asm/kprobes.h> 14 #include <asm/paca.h> 15 #include <asm/ptrace.h> 16 #include <asm/reg.h> 17 #include <asm/signal.h> 18 #include <asm/switch_to.h> 19 #include <asm/syscall.h> 20 #include <asm/time.h> 21 #include <asm/unistd.h> 22 23 #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) 24 unsigned long global_dbcr0[NR_CPUS]; 25 #endif 26 27 typedef long (*syscall_fn)(long, long, long, long, long, long); 28 29 /* Has to run notrace because it is entered not completely "reconciled" */ 30 notrace long system_call_exception(long r3, long r4, long r5, 31 long r6, long r7, long r8, 32 unsigned long r0, struct pt_regs *regs) 33 { 34 syscall_fn f; 35 36 kuep_lock(); 37 #ifdef CONFIG_PPC32 38 kuap_save_and_lock(regs); 39 #endif 40 41 regs->orig_gpr3 = r3; 42 43 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 44 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); 45 46 CT_WARN_ON(ct_state() == CONTEXT_KERNEL); 47 user_exit_irqoff(); 48 49 trace_hardirqs_off(); /* finish reconciling */ 50 51 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 52 BUG_ON(!(regs->msr & MSR_RI)); 53 BUG_ON(!(regs->msr & MSR_PR)); 54 BUG_ON(!FULL_REGS(regs)); 55 BUG_ON(arch_irq_disabled_regs(regs)); 56 57 #ifdef CONFIG_PPC_PKEY 58 if (mmu_has_feature(MMU_FTR_PKEY)) { 59 unsigned long amr, iamr; 60 bool flush_needed = false; 61 /* 62 * When entering from userspace we mostly have the AMR/IAMR 63 * different from kernel default values. Hence don't compare. 64 */ 65 amr = mfspr(SPRN_AMR); 66 iamr = mfspr(SPRN_IAMR); 67 regs->amr = amr; 68 regs->iamr = iamr; 69 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 70 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); 71 flush_needed = true; 72 } 73 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { 74 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); 75 flush_needed = true; 76 } 77 if (flush_needed) 78 isync(); 79 } else 80 #endif 81 kuap_assert_locked(); 82 83 booke_restore_dbcr0(); 84 85 account_cpu_user_entry(); 86 87 account_stolen_time(); 88 89 /* 90 * This is not required for the syscall exit path, but makes the 91 * stack frame look nicer. If this was initialised in the first stack 92 * frame, or if the unwinder was taught the first stack frame always 93 * returns to user with IRQS_ENABLED, this store could be avoided! 94 */ 95 irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); 96 97 local_irq_enable(); 98 99 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { 100 if (unlikely(trap_is_unsupported_scv(regs))) { 101 /* Unsupported scv vector */ 102 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 103 return regs->gpr[3]; 104 } 105 /* 106 * We use the return value of do_syscall_trace_enter() as the 107 * syscall number. If the syscall was rejected for any reason 108 * do_syscall_trace_enter() returns an invalid syscall number 109 * and the test against NR_syscalls will fail and the return 110 * value to be used is in regs->gpr[3]. 111 */ 112 r0 = do_syscall_trace_enter(regs); 113 if (unlikely(r0 >= NR_syscalls)) 114 return regs->gpr[3]; 115 r3 = regs->gpr[3]; 116 r4 = regs->gpr[4]; 117 r5 = regs->gpr[5]; 118 r6 = regs->gpr[6]; 119 r7 = regs->gpr[7]; 120 r8 = regs->gpr[8]; 121 122 } else if (unlikely(r0 >= NR_syscalls)) { 123 if (unlikely(trap_is_unsupported_scv(regs))) { 124 /* Unsupported scv vector */ 125 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 126 return regs->gpr[3]; 127 } 128 return -ENOSYS; 129 } 130 131 /* May be faster to do array_index_nospec? */ 132 barrier_nospec(); 133 134 if (unlikely(is_compat_task())) { 135 f = (void *)compat_sys_call_table[r0]; 136 137 r3 &= 0x00000000ffffffffULL; 138 r4 &= 0x00000000ffffffffULL; 139 r5 &= 0x00000000ffffffffULL; 140 r6 &= 0x00000000ffffffffULL; 141 r7 &= 0x00000000ffffffffULL; 142 r8 &= 0x00000000ffffffffULL; 143 144 } else { 145 f = (void *)sys_call_table[r0]; 146 } 147 148 return f(r3, r4, r5, r6, r7, r8); 149 } 150 151 /* 152 * local irqs must be disabled. Returns false if the caller must re-enable 153 * them, check for new work, and try again. 154 * 155 * This should be called with local irqs disabled, but if they were previously 156 * enabled when the interrupt handler returns (indicating a process-context / 157 * synchronous interrupt) then irqs_enabled should be true. 158 */ 159 static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) 160 { 161 /* This must be done with RI=1 because tracing may touch vmaps */ 162 trace_hardirqs_on(); 163 164 /* This pattern matches prep_irq_for_idle */ 165 if (clear_ri) 166 __hard_EE_RI_disable(); 167 else 168 __hard_irq_disable(); 169 #ifdef CONFIG_PPC64 170 if (unlikely(lazy_irq_pending_nocheck())) { 171 /* Took an interrupt, may have more exit work to do. */ 172 if (clear_ri) 173 __hard_RI_enable(); 174 trace_hardirqs_off(); 175 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 176 177 return false; 178 } 179 local_paca->irq_happened = 0; 180 irq_soft_mask_set(IRQS_ENABLED); 181 #endif 182 return true; 183 } 184 185 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) 186 { 187 if (__prep_irq_for_enabled_exit(clear_ri)) 188 return true; 189 190 /* 191 * Must replay pending soft-masked interrupts now. Don't just 192 * local_irq_enabe(); local_irq_disable(); because if we are 193 * returning from an asynchronous interrupt here, another one 194 * might hit after irqs are enabled, and it would exit via this 195 * same path allowing another to fire, and so on unbounded. 196 * 197 * If interrupts were enabled when this interrupt exited, 198 * indicating a process context (synchronous) interrupt, 199 * local_irq_enable/disable can be used, which will enable 200 * interrupts rather than keeping them masked (unclear how 201 * much benefit this is over just replaying for all cases, 202 * because we immediately disable again, so all we're really 203 * doing is allowing hard interrupts to execute directly for 204 * a very small time, rather than being masked and replayed). 205 */ 206 if (irqs_enabled) { 207 local_irq_enable(); 208 local_irq_disable(); 209 } else { 210 replay_soft_interrupts(); 211 } 212 213 return false; 214 } 215 216 static notrace void booke_load_dbcr0(void) 217 { 218 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 219 unsigned long dbcr0 = current->thread.debug.dbcr0; 220 221 if (likely(!(dbcr0 & DBCR0_IDM))) 222 return; 223 224 /* 225 * Check to see if the dbcr0 register is set up to debug. 226 * Use the internal debug mode bit to do this. 227 */ 228 mtmsr(mfmsr() & ~MSR_DE); 229 if (IS_ENABLED(CONFIG_PPC32)) { 230 isync(); 231 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); 232 } 233 mtspr(SPRN_DBCR0, dbcr0); 234 mtspr(SPRN_DBSR, -1); 235 #endif 236 } 237 238 /* 239 * This should be called after a syscall returns, with r3 the return value 240 * from the syscall. If this function returns non-zero, the system call 241 * exit assembly should additionally load all GPR registers and CTR and XER 242 * from the interrupt frame. 243 * 244 * The function graph tracer can not trace the return side of this function, 245 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. 246 */ 247 notrace unsigned long syscall_exit_prepare(unsigned long r3, 248 struct pt_regs *regs, 249 long scv) 250 { 251 unsigned long ti_flags; 252 unsigned long ret = 0; 253 bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; 254 255 CT_WARN_ON(ct_state() == CONTEXT_USER); 256 257 kuap_assert_locked(); 258 259 regs->result = r3; 260 261 /* Check whether the syscall is issued inside a restartable sequence */ 262 rseq_syscall(regs); 263 264 ti_flags = current_thread_info()->flags; 265 266 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { 267 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { 268 r3 = -r3; 269 regs->ccr |= 0x10000000; /* Set SO bit in CR */ 270 } 271 } 272 273 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { 274 if (ti_flags & _TIF_RESTOREALL) 275 ret = _TIF_RESTOREALL; 276 else 277 regs->gpr[3] = r3; 278 clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); 279 } else { 280 regs->gpr[3] = r3; 281 } 282 283 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { 284 do_syscall_trace_leave(regs); 285 ret |= _TIF_RESTOREALL; 286 } 287 288 local_irq_disable(); 289 290 again: 291 ti_flags = READ_ONCE(current_thread_info()->flags); 292 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 293 local_irq_enable(); 294 if (ti_flags & _TIF_NEED_RESCHED) { 295 schedule(); 296 } else { 297 /* 298 * SIGPENDING must restore signal handler function 299 * argument GPRs, and some non-volatiles (e.g., r1). 300 * Restore all for now. This could be made lighter. 301 */ 302 if (ti_flags & _TIF_SIGPENDING) 303 ret |= _TIF_RESTOREALL; 304 do_notify_resume(regs, ti_flags); 305 } 306 local_irq_disable(); 307 ti_flags = READ_ONCE(current_thread_info()->flags); 308 } 309 310 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { 311 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 312 unlikely((ti_flags & _TIF_RESTORE_TM))) { 313 restore_tm_state(regs); 314 } else { 315 unsigned long mathflags = MSR_FP; 316 317 if (cpu_has_feature(CPU_FTR_VSX)) 318 mathflags |= MSR_VEC | MSR_VSX; 319 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 320 mathflags |= MSR_VEC; 321 322 /* 323 * If userspace MSR has all available FP bits set, 324 * then they are live and no need to restore. If not, 325 * it means the regs were given up and restore_math 326 * may decide to restore them (to avoid taking an FP 327 * fault). 328 */ 329 if ((regs->msr & mathflags) != mathflags) 330 restore_math(regs); 331 } 332 } 333 334 user_enter_irqoff(); 335 336 /* scv need not set RI=0 because SRRs are not used */ 337 if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { 338 user_exit_irqoff(); 339 local_irq_enable(); 340 local_irq_disable(); 341 goto again; 342 } 343 344 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 345 local_paca->tm_scratch = regs->msr; 346 #endif 347 348 booke_load_dbcr0(); 349 350 account_cpu_user_exit(); 351 352 #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not using this */ 353 /* 354 * We do this at the end so that we do context switch with KERNEL AMR 355 */ 356 kuap_user_restore(regs); 357 #endif 358 kuep_unlock(); 359 360 return ret; 361 } 362 363 #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ 364 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) 365 { 366 unsigned long ti_flags; 367 unsigned long flags; 368 unsigned long ret = 0; 369 370 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 371 BUG_ON(!(regs->msr & MSR_RI)); 372 BUG_ON(!(regs->msr & MSR_PR)); 373 BUG_ON(!FULL_REGS(regs)); 374 BUG_ON(arch_irq_disabled_regs(regs)); 375 CT_WARN_ON(ct_state() == CONTEXT_USER); 376 377 /* 378 * We don't need to restore AMR on the way back to userspace for KUAP. 379 * AMR can only have been unlocked if we interrupted the kernel. 380 */ 381 kuap_assert_locked(); 382 383 local_irq_save(flags); 384 385 again: 386 ti_flags = READ_ONCE(current_thread_info()->flags); 387 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 388 local_irq_enable(); /* returning to user: may enable */ 389 if (ti_flags & _TIF_NEED_RESCHED) { 390 schedule(); 391 } else { 392 if (ti_flags & _TIF_SIGPENDING) 393 ret |= _TIF_RESTOREALL; 394 do_notify_resume(regs, ti_flags); 395 } 396 local_irq_disable(); 397 ti_flags = READ_ONCE(current_thread_info()->flags); 398 } 399 400 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { 401 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 402 unlikely((ti_flags & _TIF_RESTORE_TM))) { 403 restore_tm_state(regs); 404 } else { 405 unsigned long mathflags = MSR_FP; 406 407 if (cpu_has_feature(CPU_FTR_VSX)) 408 mathflags |= MSR_VEC | MSR_VSX; 409 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 410 mathflags |= MSR_VEC; 411 412 /* See above restore_math comment */ 413 if ((regs->msr & mathflags) != mathflags) 414 restore_math(regs); 415 } 416 } 417 418 user_enter_irqoff(); 419 420 if (unlikely(!__prep_irq_for_enabled_exit(true))) { 421 user_exit_irqoff(); 422 local_irq_enable(); 423 local_irq_disable(); 424 goto again; 425 } 426 427 booke_load_dbcr0(); 428 429 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 430 local_paca->tm_scratch = regs->msr; 431 #endif 432 433 account_cpu_user_exit(); 434 435 /* 436 * We do this at the end so that we do context switch with KERNEL AMR 437 */ 438 kuap_user_restore(regs); 439 return ret; 440 } 441 442 void preempt_schedule_irq(void); 443 444 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) 445 { 446 unsigned long flags; 447 unsigned long ret = 0; 448 unsigned long kuap; 449 450 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && 451 unlikely(!(regs->msr & MSR_RI))) 452 unrecoverable_exception(regs); 453 BUG_ON(regs->msr & MSR_PR); 454 BUG_ON(!FULL_REGS(regs)); 455 /* 456 * CT_WARN_ON comes here via program_check_exception, 457 * so avoid recursion. 458 */ 459 if (TRAP(regs) != 0x700) 460 CT_WARN_ON(ct_state() == CONTEXT_USER); 461 462 kuap = kuap_get_and_assert_locked(); 463 464 if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { 465 clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); 466 ret = 1; 467 } 468 469 local_irq_save(flags); 470 471 if (!arch_irq_disabled_regs(regs)) { 472 /* Returning to a kernel context with local irqs enabled. */ 473 WARN_ON_ONCE(!(regs->msr & MSR_EE)); 474 again: 475 if (IS_ENABLED(CONFIG_PREEMPT)) { 476 /* Return to preemptible kernel context */ 477 if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { 478 if (preempt_count() == 0) 479 preempt_schedule_irq(); 480 } 481 } 482 483 if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) 484 goto again; 485 } else { 486 /* Returning to a kernel context with local irqs disabled. */ 487 __hard_EE_RI_disable(); 488 #ifdef CONFIG_PPC64 489 if (regs->msr & MSR_EE) 490 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; 491 #endif 492 } 493 494 495 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 496 local_paca->tm_scratch = regs->msr; 497 #endif 498 499 /* 500 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, 501 * which would cause Read-After-Write stalls. Hence, we take the AMR 502 * value from the check above. 503 */ 504 kuap_kernel_restore(regs, kuap); 505 506 return ret; 507 } 508 #endif 509