1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/context_tracking.h> 4 #include <linux/err.h> 5 #include <linux/compat.h> 6 7 #include <asm/asm-prototypes.h> 8 #include <asm/kup.h> 9 #include <asm/cputime.h> 10 #include <asm/interrupt.h> 11 #include <asm/hw_irq.h> 12 #include <asm/interrupt.h> 13 #include <asm/kprobes.h> 14 #include <asm/paca.h> 15 #include <asm/ptrace.h> 16 #include <asm/reg.h> 17 #include <asm/signal.h> 18 #include <asm/switch_to.h> 19 #include <asm/syscall.h> 20 #include <asm/time.h> 21 #include <asm/unistd.h> 22 23 #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) 24 unsigned long global_dbcr0[NR_CPUS]; 25 #endif 26 27 typedef long (*syscall_fn)(long, long, long, long, long, long); 28 29 /* Has to run notrace because it is entered not completely "reconciled" */ 30 notrace long system_call_exception(long r3, long r4, long r5, 31 long r6, long r7, long r8, 32 unsigned long r0, struct pt_regs *regs) 33 { 34 syscall_fn f; 35 36 kuep_lock(); 37 38 regs->orig_gpr3 = r3; 39 40 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 41 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); 42 43 trace_hardirqs_off(); /* finish reconciling */ 44 45 CT_WARN_ON(ct_state() == CONTEXT_KERNEL); 46 user_exit_irqoff(); 47 48 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 49 BUG_ON(!(regs->msr & MSR_RI)); 50 BUG_ON(!(regs->msr & MSR_PR)); 51 BUG_ON(arch_irq_disabled_regs(regs)); 52 53 #ifdef CONFIG_PPC_PKEY 54 if (mmu_has_feature(MMU_FTR_PKEY)) { 55 unsigned long amr, iamr; 56 bool flush_needed = false; 57 /* 58 * When entering from userspace we mostly have the AMR/IAMR 59 * different from kernel default values. Hence don't compare. 60 */ 61 amr = mfspr(SPRN_AMR); 62 iamr = mfspr(SPRN_IAMR); 63 regs->amr = amr; 64 regs->iamr = iamr; 65 if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 66 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); 67 flush_needed = true; 68 } 69 if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { 70 mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); 71 flush_needed = true; 72 } 73 if (flush_needed) 74 isync(); 75 } else 76 #endif 77 kuap_assert_locked(); 78 79 booke_restore_dbcr0(); 80 81 account_cpu_user_entry(); 82 83 account_stolen_time(); 84 85 /* 86 * This is not required for the syscall exit path, but makes the 87 * stack frame look nicer. If this was initialised in the first stack 88 * frame, or if the unwinder was taught the first stack frame always 89 * returns to user with IRQS_ENABLED, this store could be avoided! 90 */ 91 irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); 92 93 local_irq_enable(); 94 95 if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { 96 if (unlikely(trap_is_unsupported_scv(regs))) { 97 /* Unsupported scv vector */ 98 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 99 return regs->gpr[3]; 100 } 101 /* 102 * We use the return value of do_syscall_trace_enter() as the 103 * syscall number. If the syscall was rejected for any reason 104 * do_syscall_trace_enter() returns an invalid syscall number 105 * and the test against NR_syscalls will fail and the return 106 * value to be used is in regs->gpr[3]. 107 */ 108 r0 = do_syscall_trace_enter(regs); 109 if (unlikely(r0 >= NR_syscalls)) 110 return regs->gpr[3]; 111 r3 = regs->gpr[3]; 112 r4 = regs->gpr[4]; 113 r5 = regs->gpr[5]; 114 r6 = regs->gpr[6]; 115 r7 = regs->gpr[7]; 116 r8 = regs->gpr[8]; 117 118 } else if (unlikely(r0 >= NR_syscalls)) { 119 if (unlikely(trap_is_unsupported_scv(regs))) { 120 /* Unsupported scv vector */ 121 _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); 122 return regs->gpr[3]; 123 } 124 return -ENOSYS; 125 } 126 127 /* May be faster to do array_index_nospec? */ 128 barrier_nospec(); 129 130 if (unlikely(is_compat_task())) { 131 f = (void *)compat_sys_call_table[r0]; 132 133 r3 &= 0x00000000ffffffffULL; 134 r4 &= 0x00000000ffffffffULL; 135 r5 &= 0x00000000ffffffffULL; 136 r6 &= 0x00000000ffffffffULL; 137 r7 &= 0x00000000ffffffffULL; 138 r8 &= 0x00000000ffffffffULL; 139 140 } else { 141 f = (void *)sys_call_table[r0]; 142 } 143 144 return f(r3, r4, r5, r6, r7, r8); 145 } 146 147 /* 148 * local irqs must be disabled. Returns false if the caller must re-enable 149 * them, check for new work, and try again. 150 * 151 * This should be called with local irqs disabled, but if they were previously 152 * enabled when the interrupt handler returns (indicating a process-context / 153 * synchronous interrupt) then irqs_enabled should be true. 154 */ 155 static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) 156 { 157 /* This must be done with RI=1 because tracing may touch vmaps */ 158 trace_hardirqs_on(); 159 160 /* This pattern matches prep_irq_for_idle */ 161 if (clear_ri) 162 __hard_EE_RI_disable(); 163 else 164 __hard_irq_disable(); 165 #ifdef CONFIG_PPC64 166 if (unlikely(lazy_irq_pending_nocheck())) { 167 /* Took an interrupt, may have more exit work to do. */ 168 if (clear_ri) 169 __hard_RI_enable(); 170 trace_hardirqs_off(); 171 local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 172 173 return false; 174 } 175 local_paca->irq_happened = 0; 176 irq_soft_mask_set(IRQS_ENABLED); 177 #endif 178 return true; 179 } 180 181 static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) 182 { 183 if (__prep_irq_for_enabled_exit(clear_ri)) 184 return true; 185 186 /* 187 * Must replay pending soft-masked interrupts now. Don't just 188 * local_irq_enabe(); local_irq_disable(); because if we are 189 * returning from an asynchronous interrupt here, another one 190 * might hit after irqs are enabled, and it would exit via this 191 * same path allowing another to fire, and so on unbounded. 192 * 193 * If interrupts were enabled when this interrupt exited, 194 * indicating a process context (synchronous) interrupt, 195 * local_irq_enable/disable can be used, which will enable 196 * interrupts rather than keeping them masked (unclear how 197 * much benefit this is over just replaying for all cases, 198 * because we immediately disable again, so all we're really 199 * doing is allowing hard interrupts to execute directly for 200 * a very small time, rather than being masked and replayed). 201 */ 202 if (irqs_enabled) { 203 local_irq_enable(); 204 local_irq_disable(); 205 } else { 206 replay_soft_interrupts(); 207 } 208 209 return false; 210 } 211 212 static notrace void booke_load_dbcr0(void) 213 { 214 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 215 unsigned long dbcr0 = current->thread.debug.dbcr0; 216 217 if (likely(!(dbcr0 & DBCR0_IDM))) 218 return; 219 220 /* 221 * Check to see if the dbcr0 register is set up to debug. 222 * Use the internal debug mode bit to do this. 223 */ 224 mtmsr(mfmsr() & ~MSR_DE); 225 if (IS_ENABLED(CONFIG_PPC32)) { 226 isync(); 227 global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0); 228 } 229 mtspr(SPRN_DBCR0, dbcr0); 230 mtspr(SPRN_DBSR, -1); 231 #endif 232 } 233 234 /* 235 * This should be called after a syscall returns, with r3 the return value 236 * from the syscall. If this function returns non-zero, the system call 237 * exit assembly should additionally load all GPR registers and CTR and XER 238 * from the interrupt frame. 239 * 240 * The function graph tracer can not trace the return side of this function, 241 * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. 242 */ 243 notrace unsigned long syscall_exit_prepare(unsigned long r3, 244 struct pt_regs *regs, 245 long scv) 246 { 247 unsigned long ti_flags; 248 unsigned long ret = 0; 249 bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; 250 251 CT_WARN_ON(ct_state() == CONTEXT_USER); 252 253 kuap_assert_locked(); 254 255 regs->result = r3; 256 257 /* Check whether the syscall is issued inside a restartable sequence */ 258 rseq_syscall(regs); 259 260 ti_flags = current_thread_info()->flags; 261 262 if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { 263 if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { 264 r3 = -r3; 265 regs->ccr |= 0x10000000; /* Set SO bit in CR */ 266 } 267 } 268 269 if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { 270 if (ti_flags & _TIF_RESTOREALL) 271 ret = _TIF_RESTOREALL; 272 else 273 regs->gpr[3] = r3; 274 clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); 275 } else { 276 regs->gpr[3] = r3; 277 } 278 279 if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { 280 do_syscall_trace_leave(regs); 281 ret |= _TIF_RESTOREALL; 282 } 283 284 local_irq_disable(); 285 286 again: 287 ti_flags = READ_ONCE(current_thread_info()->flags); 288 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 289 local_irq_enable(); 290 if (ti_flags & _TIF_NEED_RESCHED) { 291 schedule(); 292 } else { 293 /* 294 * SIGPENDING must restore signal handler function 295 * argument GPRs, and some non-volatiles (e.g., r1). 296 * Restore all for now. This could be made lighter. 297 */ 298 if (ti_flags & _TIF_SIGPENDING) 299 ret |= _TIF_RESTOREALL; 300 do_notify_resume(regs, ti_flags); 301 } 302 local_irq_disable(); 303 ti_flags = READ_ONCE(current_thread_info()->flags); 304 } 305 306 if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { 307 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 308 unlikely((ti_flags & _TIF_RESTORE_TM))) { 309 restore_tm_state(regs); 310 } else { 311 unsigned long mathflags = MSR_FP; 312 313 if (cpu_has_feature(CPU_FTR_VSX)) 314 mathflags |= MSR_VEC | MSR_VSX; 315 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 316 mathflags |= MSR_VEC; 317 318 /* 319 * If userspace MSR has all available FP bits set, 320 * then they are live and no need to restore. If not, 321 * it means the regs were given up and restore_math 322 * may decide to restore them (to avoid taking an FP 323 * fault). 324 */ 325 if ((regs->msr & mathflags) != mathflags) 326 restore_math(regs); 327 } 328 } 329 330 user_enter_irqoff(); 331 332 /* scv need not set RI=0 because SRRs are not used */ 333 if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { 334 user_exit_irqoff(); 335 local_irq_enable(); 336 local_irq_disable(); 337 goto again; 338 } 339 340 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 341 local_paca->tm_scratch = regs->msr; 342 #endif 343 344 booke_load_dbcr0(); 345 346 account_cpu_user_exit(); 347 348 /* Restore user access locks last */ 349 kuap_user_restore(regs); 350 kuep_unlock(); 351 352 return ret; 353 } 354 355 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) 356 { 357 unsigned long ti_flags; 358 unsigned long flags; 359 unsigned long ret = 0; 360 361 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) 362 BUG_ON(!(regs->msr & MSR_RI)); 363 BUG_ON(!(regs->msr & MSR_PR)); 364 BUG_ON(arch_irq_disabled_regs(regs)); 365 CT_WARN_ON(ct_state() == CONTEXT_USER); 366 367 /* 368 * We don't need to restore AMR on the way back to userspace for KUAP. 369 * AMR can only have been unlocked if we interrupted the kernel. 370 */ 371 kuap_assert_locked(); 372 373 local_irq_save(flags); 374 375 again: 376 ti_flags = READ_ONCE(current_thread_info()->flags); 377 while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { 378 local_irq_enable(); /* returning to user: may enable */ 379 if (ti_flags & _TIF_NEED_RESCHED) { 380 schedule(); 381 } else { 382 if (ti_flags & _TIF_SIGPENDING) 383 ret |= _TIF_RESTOREALL; 384 do_notify_resume(regs, ti_flags); 385 } 386 local_irq_disable(); 387 ti_flags = READ_ONCE(current_thread_info()->flags); 388 } 389 390 if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { 391 if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 392 unlikely((ti_flags & _TIF_RESTORE_TM))) { 393 restore_tm_state(regs); 394 } else { 395 unsigned long mathflags = MSR_FP; 396 397 if (cpu_has_feature(CPU_FTR_VSX)) 398 mathflags |= MSR_VEC | MSR_VSX; 399 else if (cpu_has_feature(CPU_FTR_ALTIVEC)) 400 mathflags |= MSR_VEC; 401 402 /* See above restore_math comment */ 403 if ((regs->msr & mathflags) != mathflags) 404 restore_math(regs); 405 } 406 } 407 408 user_enter_irqoff(); 409 410 if (unlikely(!__prep_irq_for_enabled_exit(true))) { 411 user_exit_irqoff(); 412 local_irq_enable(); 413 local_irq_disable(); 414 goto again; 415 } 416 417 booke_load_dbcr0(); 418 419 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 420 local_paca->tm_scratch = regs->msr; 421 #endif 422 423 account_cpu_user_exit(); 424 425 /* Restore user access locks last */ 426 kuap_user_restore(regs); 427 kuep_unlock(); 428 429 return ret; 430 } 431 432 void preempt_schedule_irq(void); 433 434 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) 435 { 436 unsigned long flags; 437 unsigned long ret = 0; 438 unsigned long kuap; 439 440 if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && 441 unlikely(!(regs->msr & MSR_RI))) 442 unrecoverable_exception(regs); 443 BUG_ON(regs->msr & MSR_PR); 444 /* 445 * CT_WARN_ON comes here via program_check_exception, 446 * so avoid recursion. 447 */ 448 if (TRAP(regs) != INTERRUPT_PROGRAM) 449 CT_WARN_ON(ct_state() == CONTEXT_USER); 450 451 kuap = kuap_get_and_assert_locked(); 452 453 if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { 454 clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); 455 ret = 1; 456 } 457 458 local_irq_save(flags); 459 460 if (!arch_irq_disabled_regs(regs)) { 461 /* Returning to a kernel context with local irqs enabled. */ 462 WARN_ON_ONCE(!(regs->msr & MSR_EE)); 463 again: 464 if (IS_ENABLED(CONFIG_PREEMPT)) { 465 /* Return to preemptible kernel context */ 466 if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { 467 if (preempt_count() == 0) 468 preempt_schedule_irq(); 469 } 470 } 471 472 if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) 473 goto again; 474 } else { 475 /* Returning to a kernel context with local irqs disabled. */ 476 __hard_EE_RI_disable(); 477 #ifdef CONFIG_PPC64 478 if (regs->msr & MSR_EE) 479 local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; 480 #endif 481 } 482 483 484 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 485 local_paca->tm_scratch = regs->msr; 486 #endif 487 488 /* 489 * 64s does not want to mfspr(SPRN_AMR) here, because this comes after 490 * mtmsr, which would cause Read-After-Write stalls. Hence, take the 491 * AMR value from the check above. 492 */ 493 kuap_kernel_restore(regs, kuap); 494 495 return ret; 496 } 497