1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 #define __ARM64_KVM_HYP_SWITCH_H__ 9 10 #include <hyp/adjust_pc.h> 11 #include <hyp/fault.h> 12 13 #include <linux/arm-smccc.h> 14 #include <linux/kvm_host.h> 15 #include <linux/types.h> 16 #include <linux/jump_label.h> 17 #include <uapi/linux/psci.h> 18 19 #include <kvm/arm_psci.h> 20 21 #include <asm/barrier.h> 22 #include <asm/cpufeature.h> 23 #include <asm/extable.h> 24 #include <asm/kprobes.h> 25 #include <asm/kvm_asm.h> 26 #include <asm/kvm_emulate.h> 27 #include <asm/kvm_hyp.h> 28 #include <asm/kvm_mmu.h> 29 #include <asm/kvm_nested.h> 30 #include <asm/fpsimd.h> 31 #include <asm/debug-monitors.h> 32 #include <asm/processor.h> 33 34 struct kvm_exception_table_entry { 35 int insn, fixup; 36 }; 37 38 extern struct kvm_exception_table_entry __start___kvm_ex_table; 39 extern struct kvm_exception_table_entry __stop___kvm_ex_table; 40 41 /* Check whether the FP regs are owned by the guest */ 42 static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) 43 { 44 return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; 45 } 46 47 /* Save the 32-bit only FPSIMD system register state */ 48 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 49 { 50 if (!vcpu_el1_is_32bit(vcpu)) 51 return; 52 53 __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 54 } 55 56 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 57 { 58 /* 59 * We are about to set CPTR_EL2.TFP to trap all floating point 60 * register accesses to EL2, however, the ARM ARM clearly states that 61 * traps are only taken to EL2 if the operation would not otherwise 62 * trap to EL1. Therefore, always make sure that for 32-bit guests, 63 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 64 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 65 * it will cause an exception. 66 */ 67 if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 68 write_sysreg(1 << 30, fpexc32_el2); 69 isb(); 70 } 71 } 72 73 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 74 { 75 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 76 write_sysreg(1 << 15, hstr_el2); 77 78 /* 79 * Make sure we trap PMU access from EL0 to EL2. Also sanitize 80 * PMSELR_EL0 to make sure it never contains the cycle 81 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 82 * EL1 instead of being trapped to EL2. 83 */ 84 if (kvm_arm_support_pmu_v3()) { 85 struct kvm_cpu_context *hctxt; 86 87 write_sysreg(0, pmselr_el0); 88 89 hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; 90 ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); 91 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 92 vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); 93 } 94 95 vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); 96 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 97 98 if (cpus_have_final_cap(ARM64_SME)) { 99 sysreg_clear_set_s(SYS_HFGRTR_EL2, 100 HFGxTR_EL2_nSMPRI_EL1_MASK | 101 HFGxTR_EL2_nTPIDR2_EL0_MASK, 102 0); 103 sysreg_clear_set_s(SYS_HFGWTR_EL2, 104 HFGxTR_EL2_nSMPRI_EL1_MASK | 105 HFGxTR_EL2_nTPIDR2_EL0_MASK, 106 0); 107 } 108 } 109 110 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) 111 { 112 write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); 113 114 write_sysreg(0, hstr_el2); 115 if (kvm_arm_support_pmu_v3()) { 116 struct kvm_cpu_context *hctxt; 117 118 hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; 119 write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); 120 vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); 121 } 122 123 if (cpus_have_final_cap(ARM64_SME)) { 124 sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, 125 HFGxTR_EL2_nSMPRI_EL1_MASK | 126 HFGxTR_EL2_nTPIDR2_EL0_MASK); 127 sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, 128 HFGxTR_EL2_nSMPRI_EL1_MASK | 129 HFGxTR_EL2_nTPIDR2_EL0_MASK); 130 } 131 } 132 133 static inline void ___activate_traps(struct kvm_vcpu *vcpu) 134 { 135 u64 hcr = vcpu->arch.hcr_el2; 136 137 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 138 hcr |= HCR_TVM; 139 140 write_sysreg(hcr, hcr_el2); 141 142 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 143 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 144 145 if (cpus_have_final_cap(ARM64_HAS_HCX)) 146 write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); 147 } 148 149 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 150 { 151 /* 152 * If we pended a virtual abort, preserve it until it gets 153 * cleared. See D1.14.3 (Virtual Interrupts) for details, but 154 * the crucial bit is "On taking a vSError interrupt, 155 * HCR_EL2.VSE is cleared to 0." 156 */ 157 if (vcpu->arch.hcr_el2 & HCR_VSE) { 158 vcpu->arch.hcr_el2 &= ~HCR_VSE; 159 vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 160 } 161 162 if (cpus_have_final_cap(ARM64_HAS_HCX)) 163 write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); 164 } 165 166 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 167 { 168 return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); 169 } 170 171 static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) 172 { 173 sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); 174 __sve_restore_state(vcpu_sve_pffr(vcpu), 175 &vcpu->arch.ctxt.fp_regs.fpsr); 176 write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); 177 } 178 179 /* 180 * We trap the first access to the FP/SIMD to save the host context and 181 * restore the guest context lazily. 182 * If FP/SIMD is not implemented, handle the trap and inject an undefined 183 * instruction exception to the guest. Similarly for trapped SVE accesses. 184 */ 185 static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) 186 { 187 bool sve_guest; 188 u8 esr_ec; 189 u64 reg; 190 191 if (!system_supports_fpsimd()) 192 return false; 193 194 sve_guest = vcpu_has_sve(vcpu); 195 esr_ec = kvm_vcpu_trap_get_class(vcpu); 196 197 /* Only handle traps the vCPU can support here: */ 198 switch (esr_ec) { 199 case ESR_ELx_EC_FP_ASIMD: 200 break; 201 case ESR_ELx_EC_SVE: 202 if (!sve_guest) 203 return false; 204 break; 205 default: 206 return false; 207 } 208 209 /* Valid trap. Switch the context: */ 210 211 /* First disable enough traps to allow us to update the registers */ 212 if (has_vhe()) { 213 reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; 214 if (sve_guest) 215 reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; 216 217 sysreg_clear_set(cpacr_el1, 0, reg); 218 } else { 219 reg = CPTR_EL2_TFP; 220 if (sve_guest) 221 reg |= CPTR_EL2_TZ; 222 223 sysreg_clear_set(cptr_el2, reg, 0); 224 } 225 isb(); 226 227 /* Write out the host state if it's in the registers */ 228 if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) 229 __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 230 231 /* Restore the guest state */ 232 if (sve_guest) 233 __hyp_sve_restore_guest(vcpu); 234 else 235 __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 236 237 /* Skip restoring fpexc32 for AArch64 guests */ 238 if (!(read_sysreg(hcr_el2) & HCR_RW)) 239 write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 240 241 vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; 242 243 return true; 244 } 245 246 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 247 { 248 u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 249 int rt = kvm_vcpu_sys_get_rt(vcpu); 250 u64 val = vcpu_get_reg(vcpu, rt); 251 252 /* 253 * The normal sysreg handling code expects to see the traps, 254 * let's not do anything here. 255 */ 256 if (vcpu->arch.hcr_el2 & HCR_TVM) 257 return false; 258 259 switch (sysreg) { 260 case SYS_SCTLR_EL1: 261 write_sysreg_el1(val, SYS_SCTLR); 262 break; 263 case SYS_TTBR0_EL1: 264 write_sysreg_el1(val, SYS_TTBR0); 265 break; 266 case SYS_TTBR1_EL1: 267 write_sysreg_el1(val, SYS_TTBR1); 268 break; 269 case SYS_TCR_EL1: 270 write_sysreg_el1(val, SYS_TCR); 271 break; 272 case SYS_ESR_EL1: 273 write_sysreg_el1(val, SYS_ESR); 274 break; 275 case SYS_FAR_EL1: 276 write_sysreg_el1(val, SYS_FAR); 277 break; 278 case SYS_AFSR0_EL1: 279 write_sysreg_el1(val, SYS_AFSR0); 280 break; 281 case SYS_AFSR1_EL1: 282 write_sysreg_el1(val, SYS_AFSR1); 283 break; 284 case SYS_MAIR_EL1: 285 write_sysreg_el1(val, SYS_MAIR); 286 break; 287 case SYS_AMAIR_EL1: 288 write_sysreg_el1(val, SYS_AMAIR); 289 break; 290 case SYS_CONTEXTIDR_EL1: 291 write_sysreg_el1(val, SYS_CONTEXTIDR); 292 break; 293 default: 294 return false; 295 } 296 297 __kvm_skip_instr(vcpu); 298 return true; 299 } 300 301 static inline bool esr_is_ptrauth_trap(u64 esr) 302 { 303 switch (esr_sys64_to_sysreg(esr)) { 304 case SYS_APIAKEYLO_EL1: 305 case SYS_APIAKEYHI_EL1: 306 case SYS_APIBKEYLO_EL1: 307 case SYS_APIBKEYHI_EL1: 308 case SYS_APDAKEYLO_EL1: 309 case SYS_APDAKEYHI_EL1: 310 case SYS_APDBKEYLO_EL1: 311 case SYS_APDBKEYHI_EL1: 312 case SYS_APGAKEYLO_EL1: 313 case SYS_APGAKEYHI_EL1: 314 return true; 315 } 316 317 return false; 318 } 319 320 #define __ptrauth_save_key(ctxt, key) \ 321 do { \ 322 u64 __val; \ 323 __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 324 ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 325 __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 326 ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 327 } while(0) 328 329 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 330 331 static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) 332 { 333 struct kvm_cpu_context *ctxt; 334 u64 val; 335 336 if (!vcpu_has_ptrauth(vcpu)) 337 return false; 338 339 ctxt = this_cpu_ptr(&kvm_hyp_ctxt); 340 __ptrauth_save_key(ctxt, APIA); 341 __ptrauth_save_key(ctxt, APIB); 342 __ptrauth_save_key(ctxt, APDA); 343 __ptrauth_save_key(ctxt, APDB); 344 __ptrauth_save_key(ctxt, APGA); 345 346 vcpu_ptrauth_enable(vcpu); 347 348 val = read_sysreg(hcr_el2); 349 val |= (HCR_API | HCR_APK); 350 write_sysreg(val, hcr_el2); 351 352 return true; 353 } 354 355 static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) 356 { 357 struct arch_timer_context *ctxt; 358 u32 sysreg; 359 u64 val; 360 361 /* 362 * We only get here for 64bit guests, 32bit guests will hit 363 * the long and winding road all the way to the standard 364 * handling. Yes, it sucks to be irrelevant. 365 */ 366 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 367 368 switch (sysreg) { 369 case SYS_CNTPCT_EL0: 370 case SYS_CNTPCTSS_EL0: 371 if (vcpu_has_nv(vcpu)) { 372 if (is_hyp_ctxt(vcpu)) { 373 ctxt = vcpu_hptimer(vcpu); 374 break; 375 } 376 377 /* Check for guest hypervisor trapping */ 378 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 379 if (!vcpu_el2_e2h_is_set(vcpu)) 380 val = (val & CNTHCTL_EL1PCTEN) << 10; 381 382 if (!(val & (CNTHCTL_EL1PCTEN << 10))) 383 return false; 384 } 385 386 ctxt = vcpu_ptimer(vcpu); 387 break; 388 default: 389 return false; 390 } 391 392 val = arch_timer_read_cntpct_el0(); 393 394 if (ctxt->offset.vm_offset) 395 val -= *kern_hyp_va(ctxt->offset.vm_offset); 396 if (ctxt->offset.vcpu_offset) 397 val -= *kern_hyp_va(ctxt->offset.vcpu_offset); 398 399 vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); 400 __kvm_skip_instr(vcpu); 401 return true; 402 } 403 404 static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) 405 { 406 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 407 handle_tx2_tvm(vcpu)) 408 return true; 409 410 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 411 __vgic_v3_perform_cpuif_access(vcpu) == 1) 412 return true; 413 414 if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 415 return kvm_hyp_handle_ptrauth(vcpu, exit_code); 416 417 if (kvm_hyp_handle_cntpct(vcpu)) 418 return true; 419 420 return false; 421 } 422 423 static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) 424 { 425 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 426 __vgic_v3_perform_cpuif_access(vcpu) == 1) 427 return true; 428 429 return false; 430 } 431 432 static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) 433 { 434 if (!__populate_fault_info(vcpu)) 435 return true; 436 437 return false; 438 } 439 static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 440 __alias(kvm_hyp_handle_memory_fault); 441 static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 442 __alias(kvm_hyp_handle_memory_fault); 443 444 static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) 445 { 446 if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) 447 return true; 448 449 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 450 bool valid; 451 452 valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT && 453 kvm_vcpu_dabt_isvalid(vcpu) && 454 !kvm_vcpu_abt_issea(vcpu) && 455 !kvm_vcpu_abt_iss1tw(vcpu); 456 457 if (valid) { 458 int ret = __vgic_v2_perform_cpuif_access(vcpu); 459 460 if (ret == 1) 461 return true; 462 463 /* Promote an illegal access to an SError.*/ 464 if (ret == -1) 465 *exit_code = ARM_EXCEPTION_EL1_SERROR; 466 } 467 } 468 469 return false; 470 } 471 472 typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); 473 474 static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); 475 476 static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); 477 478 /* 479 * Allow the hypervisor to handle the exit with an exit handler if it has one. 480 * 481 * Returns true if the hypervisor handled the exit, and control should go back 482 * to the guest, or false if it hasn't. 483 */ 484 static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 485 { 486 const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); 487 exit_handler_fn fn; 488 489 fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; 490 491 if (fn) 492 return fn(vcpu, exit_code); 493 494 return false; 495 } 496 497 static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) 498 { 499 /* 500 * Check for the conditions of Cortex-A510's #2077057. When these occur 501 * SPSR_EL2 can't be trusted, but isn't needed either as it is 502 * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. 503 * Are we single-stepping the guest, and took a PAC exception from the 504 * active-not-pending state? 505 */ 506 if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) && 507 vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 508 *vcpu_cpsr(vcpu) & DBG_SPSR_SS && 509 ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC) 510 write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); 511 512 vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); 513 } 514 515 /* 516 * Return true when we were able to fixup the guest exit and should return to 517 * the guest, false when we should restore the host state and return to the 518 * main run loop. 519 */ 520 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 521 { 522 /* 523 * Save PSTATE early so that we can evaluate the vcpu mode 524 * early on. 525 */ 526 synchronize_vcpu_pstate(vcpu, exit_code); 527 528 /* 529 * Check whether we want to repaint the state one way or 530 * another. 531 */ 532 early_exit_filter(vcpu, exit_code); 533 534 if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 535 vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 536 537 if (ARM_SERROR_PENDING(*exit_code) && 538 ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) { 539 u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); 540 541 /* 542 * HVC already have an adjusted PC, which we need to 543 * correct in order to return to after having injected 544 * the SError. 545 * 546 * SMC, on the other hand, is *trapped*, meaning its 547 * preferred return address is the SMC itself. 548 */ 549 if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) 550 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); 551 } 552 553 /* 554 * We're using the raw exception code in order to only process 555 * the trap if no SError is pending. We will come back to the 556 * same PC once the SError has been injected, and replay the 557 * trapping instruction. 558 */ 559 if (*exit_code != ARM_EXCEPTION_TRAP) 560 goto exit; 561 562 /* Check if there's an exit handler and allow it to handle the exit. */ 563 if (kvm_hyp_handle_exit(vcpu, exit_code)) 564 goto guest; 565 exit: 566 /* Return to the host kernel and handle the exit */ 567 return false; 568 569 guest: 570 /* Re-enter the guest */ 571 asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); 572 return true; 573 } 574 575 static inline void __kvm_unexpected_el2_exception(void) 576 { 577 extern char __guest_exit_panic[]; 578 unsigned long addr, fixup; 579 struct kvm_exception_table_entry *entry, *end; 580 unsigned long elr_el2 = read_sysreg(elr_el2); 581 582 entry = &__start___kvm_ex_table; 583 end = &__stop___kvm_ex_table; 584 585 while (entry < end) { 586 addr = (unsigned long)&entry->insn + entry->insn; 587 fixup = (unsigned long)&entry->fixup + entry->fixup; 588 589 if (addr != elr_el2) { 590 entry++; 591 continue; 592 } 593 594 write_sysreg(fixup, elr_el2); 595 return; 596 } 597 598 /* Trigger a panic after restoring the hyp context. */ 599 write_sysreg(__guest_exit_panic, elr_el2); 600 } 601 602 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ 603