1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/context_tracking.h> 4 #include <linux/err.h> 5 #include <linux/compat.h> 6 7 #include <asm/asm-prototypes.h> 8 #include <asm/kup.h> 9 #include <asm/cputime.h> 10 #include <asm/interrupt.h> 11 #include <asm/hw_irq.h> 12 #include <asm/interrupt.h> 13 #include <asm/kprobes.h> 14 #include <asm/paca.h> 15 #include <asm/ptrace.h> 16 #include <asm/reg.h> 17 #include <asm/signal.h> 18 #include <asm/switch_to.h> 19 #include <asm/syscall.h> 20 #include <asm/time.h> 21 #include <asm/unistd.h> 22 23 #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) 24 unsigned long global_dbcr0[NR_CPUS]; 25 #endif 26 27 typedef long (*syscall_fn)(long, long, long, long, long, long); 28 29 /* Has to run notrace because it is entered not completely "reconciled" */ 30 notrace long system_call_exception(long r3, long r4, long r5, 31 long r6, long r7, long r8, 32 unsigned long r0, struct pt_regs *regs) 33 { 34 syscall_fn f; 35 36 kuep_lock(); 37 #ifdef CONFIG_PPC32 38 kuap_save_and_lock(regs); 39 #endif 40 41 regs->orig_gpr3 = r3; 42 43 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 44 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); 45 46 trace_hardirqs_off(); /* finish reconciling */ 47 48 CT_WARN_ON(ct_state() == CONTEXT_KERNEL); 49 user_exit_irqoff(); 50 51 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 52 BUG_ON(!(regs->msr & MSR_RI)); 53 BUG_ON(!(regs->msr & MSR_PR)); 54 BUG_ON(arch_irq_disabled_regs(regs)); 55 56 #ifdef CONFIG_PPC_PKEY 57 if (mmu_has_feature(MMU_FTR_PKEY)) { 58 unsigned long amr, iamr; 59 bool flush_needed = false; 60 /* 61 * When entering from userspace we mostly have the AMR/IAMR 62 * different from kernel default values. Hence don't compare. 63 */ 64 amr = mfspr(SPRN_AMR); 65 iamr = mfspr(SPRN_IAMR); 66 regs->amr = amr; 67 regs->iamr = iamr; 68 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 69 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); 70 flush_needed = true; 71 } 72 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { 73 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); 74 flush_needed = true; 75 } 76 if (flush_needed) 77 isync(); 78 } else 79 #endif 80 kuap_assert_locked(); 81 82 booke_restore_dbcr0(); 83 84 account_cpu_user_entry(); 85 86 account_stolen_time(); 87 88 /* 89 * This is not required for the syscall exit path, but makes the 90 * stack frame look nicer. If this was initialised in the first stack 91 * frame, or if the unwinder was taught the first stack frame always 92 * returns to user with IRQS_ENABLED, this store could be avoided! 93 */ 94 irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); 95 96 local_irq_enable(); 97 98 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { 99 if (unlikely(trap_is_unsupported_scv(regs))) { 100 /* Unsupported scv vector */ 101 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 102 return regs->gpr[3]; 103 } 104 /* 105 * We use the return value of do_syscall_trace_enter() as the 106 * syscall number. If the syscall was rejected for any reason 107 * do_syscall_trace_enter() returns an invalid syscall number 108 * and the test against NR_syscalls will fail and the return 109 * value to be used is in regs->gpr[3]. 110 */ 111 r0 = do_syscall_trace_enter(regs); 112 if (unlikely(r0 >= NR_syscalls)) 113 return regs->gpr[3]; 114 r3 = regs->gpr[3]; 115 r4 = regs->gpr[4]; 116 r5 = regs->gpr[5]; 117 r6 = regs->gpr[6]; 118 r7 = regs->gpr[7]; 119 r8 = regs->gpr[8]; 120 121 } else if (unlikely(r0 >= NR_syscalls)) { 122 if (unlikely(trap_is_unsupported_scv(regs))) { 123 /* Unsupported scv vector */ 124 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 125 return regs->gpr[3]; 126 } 127 return -ENOSYS; 128 } 129 130 /* May be faster to do array_index_nospec? */ 131 barrier_nospec(); 132 133 if (unlikely(is_compat_task())) { 134 f = (void *)compat_sys_call_table[r0]; 135 136 r3 &= 0x00000000ffffffffULL; 137 r4 &= 0x00000000ffffffffULL; 138 r5 &= 0x00000000ffffffffULL; 139 r6 &= 0x00000000ffffffffULL; 140 r7 &= 0x00000000ffffffffULL; 141 r8 &= 0x00000000ffffffffULL; 142 143 } else { 144 f = (void *)sys_call_table[r0]; 145 } 146 147 return f(r3, r4, r5, r6, r7, r8); 148 } 149 150 /* 151 * local irqs must be disabled. Returns false if the caller must re-enable 152 * them, check for new work, and try again. 153 * 154 * This should be called with local irqs disabled, but if they were previously 155 * enabled when the interrupt handler returns (indicating a process-context / 156 * synchronous interrupt) then irqs_enabled should be true. 157 */ 158 static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) 159 { 160 /* This must be done with RI=1 because tracing may touch vmaps */ 161 trace_hardirqs_on(); 162 163 /* This pattern matches prep_irq_for_idle */ 164 if (clear_ri) 165 __hard_EE_RI_disable(); 166 else 167 __hard_irq_disable(); 168 #ifdef CONFIG_PPC64 169 if (unlikely(lazy_irq_pending_nocheck())) { 170 /* Took an interrupt, may have more exit work to do. */ 171 if (clear_ri) 172 __hard_RI_enable(); 173 trace_hardirqs_off(); 174 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 175 176 return false; 177 } 178 local_paca->irq_happened = 0; 179 irq_soft_mask_set(IRQS_ENABLED); 180 #endif 181 return true; 182 } 183 184 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) 185 { 186 if (__prep_irq_for_enabled_exit(clear_ri)) 187 return true; 188 189 /* 190 * Must replay pending soft-masked interrupts now. Don't just 191 * local_irq_enabe(); local_irq_disable(); because if we are 192 * returning from an asynchronous interrupt here, another one 193 * might hit after irqs are enabled, and it would exit via this 194 * same path allowing another to fire, and so on unbounded. 195 * 196 * If interrupts were enabled when this interrupt exited, 197 * indicating a process context (synchronous) interrupt, 198 * local_irq_enable/disable can be used, which will enable 199 * interrupts rather than keeping them masked (unclear how 200 * much benefit this is over just replaying for all cases, 201 * because we immediately disable again, so all we're really 202 * doing is allowing hard interrupts to execute directly for 203 * a very small time, rather than being masked and replayed). 204 */ 205 if (irqs_enabled) { 206 local_irq_enable(); 207 local_irq_disable(); 208 } else { 209 replay_soft_interrupts(); 210 } 211 212 return false; 213 } 214 215 static notrace void booke_load_dbcr0(void) 216 { 217 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 218 unsigned long dbcr0 = current->thread.debug.dbcr0; 219 220 if (likely(!(dbcr0 & DBCR0_IDM))) 221 return; 222 223 /* 224 * Check to see if the dbcr0 register is set up to debug. 225 * Use the internal debug mode bit to do this. 226 */ 227 mtmsr(mfmsr() & ~MSR_DE); 228 if (IS_ENABLED(CONFIG_PPC32)) { 229 isync(); 230 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); 231 } 232 mtspr(SPRN_DBCR0, dbcr0); 233 mtspr(SPRN_DBSR, -1); 234 #endif 235 } 236 237 /* 238 * This should be called after a syscall returns, with r3 the return value 239 * from the syscall. If this function returns non-zero, the system call 240 * exit assembly should additionally load all GPR registers and CTR and XER 241 * from the interrupt frame. 242 * 243 * The function graph tracer can not trace the return side of this function, 244 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. 245 */ 246 notrace unsigned long syscall_exit_prepare(unsigned long r3, 247 struct pt_regs *regs, 248 long scv) 249 { 250 unsigned long ti_flags; 251 unsigned long ret = 0; 252 bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; 253 254 CT_WARN_ON(ct_state() == CONTEXT_USER); 255 256 kuap_assert_locked(); 257 258 regs->result = r3; 259 260 /* Check whether the syscall is issued inside a restartable sequence */ 261 rseq_syscall(regs); 262 263 ti_flags = current_thread_info()->flags; 264 265 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { 266 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { 267 r3 = -r3; 268 regs->ccr |= 0x10000000; /* Set SO bit in CR */ 269 } 270 } 271 272 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { 273 if (ti_flags & _TIF_RESTOREALL) 274 ret = _TIF_RESTOREALL; 275 else 276 regs->gpr[3] = r3; 277 clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); 278 } else { 279 regs->gpr[3] = r3; 280 } 281 282 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { 283 do_syscall_trace_leave(regs); 284 ret |= _TIF_RESTOREALL; 285 } 286 287 local_irq_disable(); 288 289 again: 290 ti_flags = READ_ONCE(current_thread_info()->flags); 291 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 292 local_irq_enable(); 293 if (ti_flags & _TIF_NEED_RESCHED) { 294 schedule(); 295 } else { 296 /* 297 * SIGPENDING must restore signal handler function 298 * argument GPRs, and some non-volatiles (e.g., r1). 299 * Restore all for now. This could be made lighter. 300 */ 301 if (ti_flags & _TIF_SIGPENDING) 302 ret |= _TIF_RESTOREALL; 303 do_notify_resume(regs, ti_flags); 304 } 305 local_irq_disable(); 306 ti_flags = READ_ONCE(current_thread_info()->flags); 307 } 308 309 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { 310 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 311 unlikely((ti_flags & _TIF_RESTORE_TM))) { 312 restore_tm_state(regs); 313 } else { 314 unsigned long mathflags = MSR_FP; 315 316 if (cpu_has_feature(CPU_FTR_VSX)) 317 mathflags |= MSR_VEC | MSR_VSX; 318 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 319 mathflags |= MSR_VEC; 320 321 /* 322 * If userspace MSR has all available FP bits set, 323 * then they are live and no need to restore. If not, 324 * it means the regs were given up and restore_math 325 * may decide to restore them (to avoid taking an FP 326 * fault). 327 */ 328 if ((regs->msr & mathflags) != mathflags) 329 restore_math(regs); 330 } 331 } 332 333 user_enter_irqoff(); 334 335 /* scv need not set RI=0 because SRRs are not used */ 336 if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { 337 user_exit_irqoff(); 338 local_irq_enable(); 339 local_irq_disable(); 340 goto again; 341 } 342 343 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 344 local_paca->tm_scratch = regs->msr; 345 #endif 346 347 booke_load_dbcr0(); 348 349 account_cpu_user_exit(); 350 351 /* Restore user access locks last */ 352 kuap_user_restore(regs); 353 kuep_unlock(); 354 355 return ret; 356 } 357 358 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) 359 { 360 unsigned long ti_flags; 361 unsigned long flags; 362 unsigned long ret = 0; 363 364 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 365 BUG_ON(!(regs->msr & MSR_RI)); 366 BUG_ON(!(regs->msr & MSR_PR)); 367 BUG_ON(arch_irq_disabled_regs(regs)); 368 CT_WARN_ON(ct_state() == CONTEXT_USER); 369 370 /* 371 * We don't need to restore AMR on the way back to userspace for KUAP. 372 * AMR can only have been unlocked if we interrupted the kernel. 373 */ 374 kuap_assert_locked(); 375 376 local_irq_save(flags); 377 378 again: 379 ti_flags = READ_ONCE(current_thread_info()->flags); 380 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 381 local_irq_enable(); /* returning to user: may enable */ 382 if (ti_flags & _TIF_NEED_RESCHED) { 383 schedule(); 384 } else { 385 if (ti_flags & _TIF_SIGPENDING) 386 ret |= _TIF_RESTOREALL; 387 do_notify_resume(regs, ti_flags); 388 } 389 local_irq_disable(); 390 ti_flags = READ_ONCE(current_thread_info()->flags); 391 } 392 393 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { 394 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 395 unlikely((ti_flags & _TIF_RESTORE_TM))) { 396 restore_tm_state(regs); 397 } else { 398 unsigned long mathflags = MSR_FP; 399 400 if (cpu_has_feature(CPU_FTR_VSX)) 401 mathflags |= MSR_VEC | MSR_VSX; 402 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 403 mathflags |= MSR_VEC; 404 405 /* See above restore_math comment */ 406 if ((regs->msr & mathflags) != mathflags) 407 restore_math(regs); 408 } 409 } 410 411 user_enter_irqoff(); 412 413 if (unlikely(!__prep_irq_for_enabled_exit(true))) { 414 user_exit_irqoff(); 415 local_irq_enable(); 416 local_irq_disable(); 417 goto again; 418 } 419 420 booke_load_dbcr0(); 421 422 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 423 local_paca->tm_scratch = regs->msr; 424 #endif 425 426 account_cpu_user_exit(); 427 428 /* Restore user access locks last */ 429 kuap_user_restore(regs); 430 431 return ret; 432 } 433 434 void preempt_schedule_irq(void); 435 436 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) 437 { 438 unsigned long flags; 439 unsigned long ret = 0; 440 unsigned long kuap; 441 442 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && 443 unlikely(!(regs->msr & MSR_RI))) 444 unrecoverable_exception(regs); 445 BUG_ON(regs->msr & MSR_PR); 446 /* 447 * CT_WARN_ON comes here via program_check_exception, 448 * so avoid recursion. 449 */ 450 if (TRAP(regs) != INTERRUPT_PROGRAM) 451 CT_WARN_ON(ct_state() == CONTEXT_USER); 452 453 kuap = kuap_get_and_assert_locked(); 454 455 if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { 456 clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); 457 ret = 1; 458 } 459 460 local_irq_save(flags); 461 462 if (!arch_irq_disabled_regs(regs)) { 463 /* Returning to a kernel context with local irqs enabled. */ 464 WARN_ON_ONCE(!(regs->msr & MSR_EE)); 465 again: 466 if (IS_ENABLED(CONFIG_PREEMPT)) { 467 /* Return to preemptible kernel context */ 468 if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { 469 if (preempt_count() == 0) 470 preempt_schedule_irq(); 471 } 472 } 473 474 if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) 475 goto again; 476 } else { 477 /* Returning to a kernel context with local irqs disabled. */ 478 __hard_EE_RI_disable(); 479 #ifdef CONFIG_PPC64 480 if (regs->msr & MSR_EE) 481 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; 482 #endif 483 } 484 485 486 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 487 local_paca->tm_scratch = regs->msr; 488 #endif 489 490 /* 491 * 64s does not want to mfspr(SPRN_AMR) here, because this comes after 492 * mtmsr, which would cause Read-After-Write stalls. Hence, take the 493 * AMR value from the check above. 494 */ 495 kuap_kernel_restore(regs, kuap); 496 497 return ret; 498 } 499