1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/context_tracking.h> 4 #include <linux/err.h> 5 #include <linux/compat.h> 6 7 #include <asm/asm-prototypes.h> 8 #include <asm/kup.h> 9 #include <asm/cputime.h> 10 #include <asm/interrupt.h> 11 #include <asm/hw_irq.h> 12 #include <asm/interrupt.h> 13 #include <asm/kprobes.h> 14 #include <asm/paca.h> 15 #include <asm/ptrace.h> 16 #include <asm/reg.h> 17 #include <asm/signal.h> 18 #include <asm/switch_to.h> 19 #include <asm/syscall.h> 20 #include <asm/time.h> 21 #include <asm/unistd.h> 22 23 typedef long (*syscall_fn)(long, long, long, long, long, long); 24 25 /* Has to run notrace because it is entered not completely "reconciled" */ 26 notrace long system_call_exception(long r3, long r4, long r5, 27 long r6, long r7, long r8, 28 unsigned long r0, struct pt_regs *regs) 29 { 30 syscall_fn f; 31 32 regs->orig_gpr3 = r3; 33 34 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 35 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); 36 37 CT_WARN_ON(ct_state() == CONTEXT_KERNEL); 38 user_exit_irqoff(); 39 40 trace_hardirqs_off(); /* finish reconciling */ 41 42 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 43 BUG_ON(!(regs->msr & MSR_RI)); 44 BUG_ON(!(regs->msr & MSR_PR)); 45 BUG_ON(!FULL_REGS(regs)); 46 BUG_ON(arch_irq_disabled_regs(regs)); 47 48 #ifdef CONFIG_PPC_PKEY 49 if (mmu_has_feature(MMU_FTR_PKEY)) { 50 unsigned long amr, iamr; 51 bool flush_needed = false; 52 /* 53 * When entering from userspace we mostly have the AMR/IAMR 54 * different from kernel default values. Hence don't compare. 55 */ 56 amr = mfspr(SPRN_AMR); 57 iamr = mfspr(SPRN_IAMR); 58 regs->amr = amr; 59 regs->iamr = iamr; 60 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 61 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); 62 flush_needed = true; 63 } 64 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { 65 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); 66 flush_needed = true; 67 } 68 if (flush_needed) 69 isync(); 70 } else 71 #endif 72 #ifdef CONFIG_PPC64 73 kuap_check_amr(); 74 #endif 75 76 booke_restore_dbcr0(); 77 78 account_cpu_user_entry(); 79 80 account_stolen_time(); 81 82 /* 83 * This is not required for the syscall exit path, but makes the 84 * stack frame look nicer. If this was initialised in the first stack 85 * frame, or if the unwinder was taught the first stack frame always 86 * returns to user with IRQS_ENABLED, this store could be avoided! 87 */ 88 irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); 89 90 local_irq_enable(); 91 92 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { 93 if (unlikely(trap_is_unsupported_scv(regs))) { 94 /* Unsupported scv vector */ 95 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 96 return regs->gpr[3]; 97 } 98 /* 99 * We use the return value of do_syscall_trace_enter() as the 100 * syscall number. If the syscall was rejected for any reason 101 * do_syscall_trace_enter() returns an invalid syscall number 102 * and the test against NR_syscalls will fail and the return 103 * value to be used is in regs->gpr[3]. 104 */ 105 r0 = do_syscall_trace_enter(regs); 106 if (unlikely(r0 >= NR_syscalls)) 107 return regs->gpr[3]; 108 r3 = regs->gpr[3]; 109 r4 = regs->gpr[4]; 110 r5 = regs->gpr[5]; 111 r6 = regs->gpr[6]; 112 r7 = regs->gpr[7]; 113 r8 = regs->gpr[8]; 114 115 } else if (unlikely(r0 >= NR_syscalls)) { 116 if (unlikely(trap_is_unsupported_scv(regs))) { 117 /* Unsupported scv vector */ 118 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 119 return regs->gpr[3]; 120 } 121 return -ENOSYS; 122 } 123 124 /* May be faster to do array_index_nospec? */ 125 barrier_nospec(); 126 127 if (unlikely(is_compat_task())) { 128 f = (void *)compat_sys_call_table[r0]; 129 130 r3 &= 0x00000000ffffffffULL; 131 r4 &= 0x00000000ffffffffULL; 132 r5 &= 0x00000000ffffffffULL; 133 r6 &= 0x00000000ffffffffULL; 134 r7 &= 0x00000000ffffffffULL; 135 r8 &= 0x00000000ffffffffULL; 136 137 } else { 138 f = (void *)sys_call_table[r0]; 139 } 140 141 return f(r3, r4, r5, r6, r7, r8); 142 } 143 144 /* 145 * local irqs must be disabled. Returns false if the caller must re-enable 146 * them, check for new work, and try again. 147 * 148 * This should be called with local irqs disabled, but if they were previously 149 * enabled when the interrupt handler returns (indicating a process-context / 150 * synchronous interrupt) then irqs_enabled should be true. 151 */ 152 static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) 153 { 154 /* This must be done with RI=1 because tracing may touch vmaps */ 155 trace_hardirqs_on(); 156 157 /* This pattern matches prep_irq_for_idle */ 158 if (clear_ri) 159 __hard_EE_RI_disable(); 160 else 161 __hard_irq_disable(); 162 #ifdef CONFIG_PPC64 163 if (unlikely(lazy_irq_pending_nocheck())) { 164 /* Took an interrupt, may have more exit work to do. */ 165 if (clear_ri) 166 __hard_RI_enable(); 167 trace_hardirqs_off(); 168 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 169 170 return false; 171 } 172 local_paca->irq_happened = 0; 173 irq_soft_mask_set(IRQS_ENABLED); 174 #endif 175 return true; 176 } 177 178 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) 179 { 180 if (__prep_irq_for_enabled_exit(clear_ri)) 181 return true; 182 183 /* 184 * Must replay pending soft-masked interrupts now. Don't just 185 * local_irq_enabe(); local_irq_disable(); because if we are 186 * returning from an asynchronous interrupt here, another one 187 * might hit after irqs are enabled, and it would exit via this 188 * same path allowing another to fire, and so on unbounded. 189 * 190 * If interrupts were enabled when this interrupt exited, 191 * indicating a process context (synchronous) interrupt, 192 * local_irq_enable/disable can be used, which will enable 193 * interrupts rather than keeping them masked (unclear how 194 * much benefit this is over just replaying for all cases, 195 * because we immediately disable again, so all we're really 196 * doing is allowing hard interrupts to execute directly for 197 * a very small time, rather than being masked and replayed). 198 */ 199 if (irqs_enabled) { 200 local_irq_enable(); 201 local_irq_disable(); 202 } else { 203 replay_soft_interrupts(); 204 } 205 206 return false; 207 } 208 209 static notrace void booke_load_dbcr0(void) 210 { 211 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 212 unsigned long dbcr0 = current->thread.debug.dbcr0; 213 214 if (likely(!(dbcr0 & DBCR0_IDM))) 215 return; 216 217 /* 218 * Check to see if the dbcr0 register is set up to debug. 219 * Use the internal debug mode bit to do this. 220 */ 221 mtmsr(mfmsr() & ~MSR_DE); 222 if (IS_ENABLED(CONFIG_PPC32)) { 223 isync(); 224 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); 225 } 226 mtspr(SPRN_DBCR0, dbcr0); 227 mtspr(SPRN_DBSR, -1); 228 #endif 229 } 230 231 /* 232 * This should be called after a syscall returns, with r3 the return value 233 * from the syscall. If this function returns non-zero, the system call 234 * exit assembly should additionally load all GPR registers and CTR and XER 235 * from the interrupt frame. 236 * 237 * The function graph tracer can not trace the return side of this function, 238 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. 239 */ 240 notrace unsigned long syscall_exit_prepare(unsigned long r3, 241 struct pt_regs *regs, 242 long scv) 243 { 244 unsigned long ti_flags; 245 unsigned long ret = 0; 246 bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; 247 248 CT_WARN_ON(ct_state() == CONTEXT_USER); 249 250 #ifdef CONFIG_PPC64 251 kuap_check_amr(); 252 #endif 253 254 regs->result = r3; 255 256 /* Check whether the syscall is issued inside a restartable sequence */ 257 rseq_syscall(regs); 258 259 ti_flags = current_thread_info()->flags; 260 261 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { 262 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { 263 r3 = -r3; 264 regs->ccr |= 0x10000000; /* Set SO bit in CR */ 265 } 266 } 267 268 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { 269 if (ti_flags & _TIF_RESTOREALL) 270 ret = _TIF_RESTOREALL; 271 else 272 regs->gpr[3] = r3; 273 clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); 274 } else { 275 regs->gpr[3] = r3; 276 } 277 278 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { 279 do_syscall_trace_leave(regs); 280 ret |= _TIF_RESTOREALL; 281 } 282 283 local_irq_disable(); 284 285 again: 286 ti_flags = READ_ONCE(current_thread_info()->flags); 287 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 288 local_irq_enable(); 289 if (ti_flags & _TIF_NEED_RESCHED) { 290 schedule(); 291 } else { 292 /* 293 * SIGPENDING must restore signal handler function 294 * argument GPRs, and some non-volatiles (e.g., r1). 295 * Restore all for now. This could be made lighter. 296 */ 297 if (ti_flags & _TIF_SIGPENDING) 298 ret |= _TIF_RESTOREALL; 299 do_notify_resume(regs, ti_flags); 300 } 301 local_irq_disable(); 302 ti_flags = READ_ONCE(current_thread_info()->flags); 303 } 304 305 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { 306 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 307 unlikely((ti_flags & _TIF_RESTORE_TM))) { 308 restore_tm_state(regs); 309 } else { 310 unsigned long mathflags = MSR_FP; 311 312 if (cpu_has_feature(CPU_FTR_VSX)) 313 mathflags |= MSR_VEC | MSR_VSX; 314 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 315 mathflags |= MSR_VEC; 316 317 /* 318 * If userspace MSR has all available FP bits set, 319 * then they are live and no need to restore. If not, 320 * it means the regs were given up and restore_math 321 * may decide to restore them (to avoid taking an FP 322 * fault). 323 */ 324 if ((regs->msr & mathflags) != mathflags) 325 restore_math(regs); 326 } 327 } 328 329 user_enter_irqoff(); 330 331 /* scv need not set RI=0 because SRRs are not used */ 332 if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { 333 user_exit_irqoff(); 334 local_irq_enable(); 335 local_irq_disable(); 336 goto again; 337 } 338 339 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 340 local_paca->tm_scratch = regs->msr; 341 #endif 342 343 booke_load_dbcr0(); 344 345 account_cpu_user_exit(); 346 347 #ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ 348 /* 349 * We do this at the end so that we do context switch with KERNEL AMR 350 */ 351 kuap_user_restore(regs); 352 #endif 353 return ret; 354 } 355 356 #ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ 357 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) 358 { 359 unsigned long ti_flags; 360 unsigned long flags; 361 unsigned long ret = 0; 362 363 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 364 BUG_ON(!(regs->msr & MSR_RI)); 365 BUG_ON(!(regs->msr & MSR_PR)); 366 BUG_ON(!FULL_REGS(regs)); 367 BUG_ON(arch_irq_disabled_regs(regs)); 368 CT_WARN_ON(ct_state() == CONTEXT_USER); 369 370 /* 371 * We don't need to restore AMR on the way back to userspace for KUAP. 372 * AMR can only have been unlocked if we interrupted the kernel. 373 */ 374 #ifdef CONFIG_PPC64 375 kuap_check_amr(); 376 #endif 377 378 local_irq_save(flags); 379 380 again: 381 ti_flags = READ_ONCE(current_thread_info()->flags); 382 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 383 local_irq_enable(); /* returning to user: may enable */ 384 if (ti_flags & _TIF_NEED_RESCHED) { 385 schedule(); 386 } else { 387 if (ti_flags & _TIF_SIGPENDING) 388 ret |= _TIF_RESTOREALL; 389 do_notify_resume(regs, ti_flags); 390 } 391 local_irq_disable(); 392 ti_flags = READ_ONCE(current_thread_info()->flags); 393 } 394 395 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { 396 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 397 unlikely((ti_flags & _TIF_RESTORE_TM))) { 398 restore_tm_state(regs); 399 } else { 400 unsigned long mathflags = MSR_FP; 401 402 if (cpu_has_feature(CPU_FTR_VSX)) 403 mathflags |= MSR_VEC | MSR_VSX; 404 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 405 mathflags |= MSR_VEC; 406 407 /* See above restore_math comment */ 408 if ((regs->msr & mathflags) != mathflags) 409 restore_math(regs); 410 } 411 } 412 413 user_enter_irqoff(); 414 415 if (unlikely(!__prep_irq_for_enabled_exit(true))) { 416 user_exit_irqoff(); 417 local_irq_enable(); 418 local_irq_disable(); 419 goto again; 420 } 421 422 booke_load_dbcr0(); 423 424 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 425 local_paca->tm_scratch = regs->msr; 426 #endif 427 428 account_cpu_user_exit(); 429 430 /* 431 * We do this at the end so that we do context switch with KERNEL AMR 432 */ 433 #ifdef CONFIG_PPC64 434 kuap_user_restore(regs); 435 #endif 436 return ret; 437 } 438 439 void preempt_schedule_irq(void); 440 441 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) 442 { 443 unsigned long flags; 444 unsigned long ret = 0; 445 #ifdef CONFIG_PPC64 446 unsigned long amr; 447 #endif 448 449 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && 450 unlikely(!(regs->msr & MSR_RI))) 451 unrecoverable_exception(regs); 452 BUG_ON(regs->msr & MSR_PR); 453 BUG_ON(!FULL_REGS(regs)); 454 /* 455 * CT_WARN_ON comes here via program_check_exception, 456 * so avoid recursion. 457 */ 458 if (TRAP(regs) != 0x700) 459 CT_WARN_ON(ct_state() == CONTEXT_USER); 460 461 #ifdef CONFIG_PPC64 462 amr = kuap_get_and_check_amr(); 463 #endif 464 465 if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { 466 clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); 467 ret = 1; 468 } 469 470 local_irq_save(flags); 471 472 if (!arch_irq_disabled_regs(regs)) { 473 /* Returning to a kernel context with local irqs enabled. */ 474 WARN_ON_ONCE(!(regs->msr & MSR_EE)); 475 again: 476 if (IS_ENABLED(CONFIG_PREEMPT)) { 477 /* Return to preemptible kernel context */ 478 if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { 479 if (preempt_count() == 0) 480 preempt_schedule_irq(); 481 } 482 } 483 484 if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) 485 goto again; 486 } else { 487 /* Returning to a kernel context with local irqs disabled. */ 488 __hard_EE_RI_disable(); 489 #ifdef CONFIG_PPC64 490 if (regs->msr & MSR_EE) 491 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; 492 #endif 493 } 494 495 496 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 497 local_paca->tm_scratch = regs->msr; 498 #endif 499 500 /* 501 * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, 502 * which would cause Read-After-Write stalls. Hence, we take the AMR 503 * value from the check above. 504 */ 505 #ifdef CONFIG_PPC64 506 kuap_kernel_restore(regs, amr); 507 #endif 508 509 return ret; 510 } 511 #endif 512