1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 #define __ARM64_KVM_HYP_SWITCH_H__ 9 10 #include <linux/arm-smccc.h> 11 #include <linux/kvm_host.h> 12 #include <linux/types.h> 13 #include <linux/jump_label.h> 14 #include <uapi/linux/psci.h> 15 16 #include <kvm/arm_psci.h> 17 18 #include <asm/barrier.h> 19 #include <asm/cpufeature.h> 20 #include <asm/extable.h> 21 #include <asm/kprobes.h> 22 #include <asm/kvm_asm.h> 23 #include <asm/kvm_emulate.h> 24 #include <asm/kvm_hyp.h> 25 #include <asm/kvm_mmu.h> 26 #include <asm/fpsimd.h> 27 #include <asm/debug-monitors.h> 28 #include <asm/processor.h> 29 #include <asm/thread_info.h> 30 31 extern const char __hyp_panic_string[]; 32 33 extern struct exception_table_entry __start___kvm_ex_table; 34 extern struct exception_table_entry __stop___kvm_ex_table; 35 36 /* Check whether the FP regs were dirtied while in the host-side run loop: */ 37 static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) 38 { 39 /* 40 * When the system doesn't support FP/SIMD, we cannot rely on 41 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 42 * abort on the very first access to FP and thus we should never 43 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 44 * trap the accesses. 45 */ 46 if (!system_supports_fpsimd() || 47 vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 48 vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 49 KVM_ARM64_FP_HOST); 50 51 return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 52 } 53 54 /* Save the 32-bit only FPSIMD system register state */ 55 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 56 { 57 if (!vcpu_el1_is_32bit(vcpu)) 58 return; 59 60 __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 61 } 62 63 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 64 { 65 /* 66 * We are about to set CPTR_EL2.TFP to trap all floating point 67 * register accesses to EL2, however, the ARM ARM clearly states that 68 * traps are only taken to EL2 if the operation would not otherwise 69 * trap to EL1. Therefore, always make sure that for 32-bit guests, 70 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 71 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 72 * it will cause an exception. 73 */ 74 if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 75 write_sysreg(1 << 30, fpexc32_el2); 76 isb(); 77 } 78 } 79 80 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 81 { 82 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 83 write_sysreg(1 << 15, hstr_el2); 84 85 /* 86 * Make sure we trap PMU access from EL0 to EL2. Also sanitize 87 * PMSELR_EL0 to make sure it never contains the cycle 88 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 89 * EL1 instead of being trapped to EL2. 90 */ 91 write_sysreg(0, pmselr_el0); 92 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 93 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 94 } 95 96 static inline void __deactivate_traps_common(void) 97 { 98 write_sysreg(0, hstr_el2); 99 write_sysreg(0, pmuserenr_el0); 100 } 101 102 static inline void ___activate_traps(struct kvm_vcpu *vcpu) 103 { 104 u64 hcr = vcpu->arch.hcr_el2; 105 106 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 107 hcr |= HCR_TVM; 108 109 write_sysreg(hcr, hcr_el2); 110 111 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 112 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 113 } 114 115 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 116 { 117 /* 118 * If we pended a virtual abort, preserve it until it gets 119 * cleared. See D1.14.3 (Virtual Interrupts) for details, but 120 * the crucial bit is "On taking a vSError interrupt, 121 * HCR_EL2.VSE is cleared to 0." 122 */ 123 if (vcpu->arch.hcr_el2 & HCR_VSE) { 124 vcpu->arch.hcr_el2 &= ~HCR_VSE; 125 vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 126 } 127 } 128 129 static inline void __activate_vm(struct kvm_s2_mmu *mmu) 130 { 131 __load_guest_stage2(mmu); 132 } 133 134 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 135 { 136 u64 par, tmp; 137 138 /* 139 * Resolve the IPA the hard way using the guest VA. 140 * 141 * Stage-1 translation already validated the memory access 142 * rights. As such, we can use the EL1 translation regime, and 143 * don't have to distinguish between EL0 and EL1 access. 144 * 145 * We do need to save/restore PAR_EL1 though, as we haven't 146 * saved the guest context yet, and we may return early... 147 */ 148 par = read_sysreg(par_el1); 149 if (!__kvm_at("s1e1r", far)) 150 tmp = read_sysreg(par_el1); 151 else 152 tmp = SYS_PAR_EL1_F; /* back to the guest */ 153 write_sysreg(par, par_el1); 154 155 if (unlikely(tmp & SYS_PAR_EL1_F)) 156 return false; /* Translation failed, back to guest */ 157 158 /* Convert PAR to HPFAR format */ 159 *hpfar = PAR_TO_HPFAR(tmp); 160 return true; 161 } 162 163 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 164 { 165 u8 ec; 166 u64 esr; 167 u64 hpfar, far; 168 169 esr = vcpu->arch.fault.esr_el2; 170 ec = ESR_ELx_EC(esr); 171 172 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 173 return true; 174 175 far = read_sysreg_el2(SYS_FAR); 176 177 /* 178 * The HPFAR can be invalid if the stage 2 fault did not 179 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 180 * bit is clear) and one of the two following cases are true: 181 * 1. The fault was due to a permission fault 182 * 2. The processor carries errata 834220 183 * 184 * Therefore, for all non S1PTW faults where we either have a 185 * permission fault or the errata workaround is enabled, we 186 * resolve the IPA using the AT instruction. 187 */ 188 if (!(esr & ESR_ELx_S1PTW) && 189 (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 190 (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 191 if (!__translate_far_to_hpfar(far, &hpfar)) 192 return false; 193 } else { 194 hpfar = read_sysreg(hpfar_el2); 195 } 196 197 vcpu->arch.fault.far_el2 = far; 198 vcpu->arch.fault.hpfar_el2 = hpfar; 199 return true; 200 } 201 202 /* Check for an FPSIMD/SVE trap and handle as appropriate */ 203 static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 204 { 205 bool vhe, sve_guest, sve_host; 206 u8 esr_ec; 207 208 if (!system_supports_fpsimd()) 209 return false; 210 211 /* 212 * Currently system_supports_sve() currently implies has_vhe(), 213 * so the check is redundant. However, has_vhe() can be determined 214 * statically and helps the compiler remove dead code. 215 */ 216 if (has_vhe() && system_supports_sve()) { 217 sve_guest = vcpu_has_sve(vcpu); 218 sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 219 vhe = true; 220 } else { 221 sve_guest = false; 222 sve_host = false; 223 vhe = has_vhe(); 224 } 225 226 esr_ec = kvm_vcpu_trap_get_class(vcpu); 227 if (esr_ec != ESR_ELx_EC_FP_ASIMD && 228 esr_ec != ESR_ELx_EC_SVE) 229 return false; 230 231 /* Don't handle SVE traps for non-SVE vcpus here: */ 232 if (!sve_guest) 233 if (esr_ec != ESR_ELx_EC_FP_ASIMD) 234 return false; 235 236 /* Valid trap. Switch the context: */ 237 238 if (vhe) { 239 u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 240 241 if (sve_guest) 242 reg |= CPACR_EL1_ZEN; 243 244 write_sysreg(reg, cpacr_el1); 245 } else { 246 write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 247 cptr_el2); 248 } 249 250 isb(); 251 252 if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 253 /* 254 * In the SVE case, VHE is assumed: it is enforced by 255 * Kconfig and kvm_arch_init(). 256 */ 257 if (sve_host) { 258 struct thread_struct *thread = container_of( 259 vcpu->arch.host_fpsimd_state, 260 struct thread_struct, uw.fpsimd_state); 261 262 sve_save_state(sve_pffr(thread), 263 &vcpu->arch.host_fpsimd_state->fpsr); 264 } else { 265 __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 266 } 267 268 vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 269 } 270 271 if (sve_guest) { 272 sve_load_state(vcpu_sve_pffr(vcpu), 273 &vcpu->arch.ctxt.fp_regs.fpsr, 274 sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 275 write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); 276 } else { 277 __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 278 } 279 280 /* Skip restoring fpexc32 for AArch64 guests */ 281 if (!(read_sysreg(hcr_el2) & HCR_RW)) 282 write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 283 284 vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 285 286 return true; 287 } 288 289 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 290 { 291 u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 292 int rt = kvm_vcpu_sys_get_rt(vcpu); 293 u64 val = vcpu_get_reg(vcpu, rt); 294 295 /* 296 * The normal sysreg handling code expects to see the traps, 297 * let's not do anything here. 298 */ 299 if (vcpu->arch.hcr_el2 & HCR_TVM) 300 return false; 301 302 switch (sysreg) { 303 case SYS_SCTLR_EL1: 304 write_sysreg_el1(val, SYS_SCTLR); 305 break; 306 case SYS_TTBR0_EL1: 307 write_sysreg_el1(val, SYS_TTBR0); 308 break; 309 case SYS_TTBR1_EL1: 310 write_sysreg_el1(val, SYS_TTBR1); 311 break; 312 case SYS_TCR_EL1: 313 write_sysreg_el1(val, SYS_TCR); 314 break; 315 case SYS_ESR_EL1: 316 write_sysreg_el1(val, SYS_ESR); 317 break; 318 case SYS_FAR_EL1: 319 write_sysreg_el1(val, SYS_FAR); 320 break; 321 case SYS_AFSR0_EL1: 322 write_sysreg_el1(val, SYS_AFSR0); 323 break; 324 case SYS_AFSR1_EL1: 325 write_sysreg_el1(val, SYS_AFSR1); 326 break; 327 case SYS_MAIR_EL1: 328 write_sysreg_el1(val, SYS_MAIR); 329 break; 330 case SYS_AMAIR_EL1: 331 write_sysreg_el1(val, SYS_AMAIR); 332 break; 333 case SYS_CONTEXTIDR_EL1: 334 write_sysreg_el1(val, SYS_CONTEXTIDR); 335 break; 336 default: 337 return false; 338 } 339 340 __kvm_skip_instr(vcpu); 341 return true; 342 } 343 344 static inline bool esr_is_ptrauth_trap(u32 esr) 345 { 346 u32 ec = ESR_ELx_EC(esr); 347 348 if (ec == ESR_ELx_EC_PAC) 349 return true; 350 351 if (ec != ESR_ELx_EC_SYS64) 352 return false; 353 354 switch (esr_sys64_to_sysreg(esr)) { 355 case SYS_APIAKEYLO_EL1: 356 case SYS_APIAKEYHI_EL1: 357 case SYS_APIBKEYLO_EL1: 358 case SYS_APIBKEYHI_EL1: 359 case SYS_APDAKEYLO_EL1: 360 case SYS_APDAKEYHI_EL1: 361 case SYS_APDBKEYLO_EL1: 362 case SYS_APDBKEYHI_EL1: 363 case SYS_APGAKEYLO_EL1: 364 case SYS_APGAKEYHI_EL1: 365 return true; 366 } 367 368 return false; 369 } 370 371 #define __ptrauth_save_key(ctxt, key) \ 372 do { \ 373 u64 __val; \ 374 __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 375 ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 376 __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 377 ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 378 } while(0) 379 380 static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 381 { 382 struct kvm_cpu_context *ctxt; 383 u64 val; 384 385 if (!vcpu_has_ptrauth(vcpu) || 386 !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 387 return false; 388 389 ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 390 __ptrauth_save_key(ctxt, APIA); 391 __ptrauth_save_key(ctxt, APIB); 392 __ptrauth_save_key(ctxt, APDA); 393 __ptrauth_save_key(ctxt, APDB); 394 __ptrauth_save_key(ctxt, APGA); 395 396 vcpu_ptrauth_enable(vcpu); 397 398 val = read_sysreg(hcr_el2); 399 val |= (HCR_API | HCR_APK); 400 write_sysreg(val, hcr_el2); 401 402 return true; 403 } 404 405 /* 406 * Return true when we were able to fixup the guest exit and should return to 407 * the guest, false when we should restore the host state and return to the 408 * main run loop. 409 */ 410 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 411 { 412 if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 413 vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 414 415 /* 416 * We're using the raw exception code in order to only process 417 * the trap if no SError is pending. We will come back to the 418 * same PC once the SError has been injected, and replay the 419 * trapping instruction. 420 */ 421 if (*exit_code != ARM_EXCEPTION_TRAP) 422 goto exit; 423 424 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 425 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 426 handle_tx2_tvm(vcpu)) 427 return true; 428 429 /* 430 * We trap the first access to the FP/SIMD to save the host context 431 * and restore the guest context lazily. 432 * If FP/SIMD is not implemented, handle the trap and inject an 433 * undefined instruction exception to the guest. 434 * Similarly for trapped SVE accesses. 435 */ 436 if (__hyp_handle_fpsimd(vcpu)) 437 return true; 438 439 if (__hyp_handle_ptrauth(vcpu)) 440 return true; 441 442 if (!__populate_fault_info(vcpu)) 443 return true; 444 445 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 446 bool valid; 447 448 valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 449 kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 450 kvm_vcpu_dabt_isvalid(vcpu) && 451 !kvm_vcpu_abt_issea(vcpu) && 452 !kvm_vcpu_dabt_iss1tw(vcpu); 453 454 if (valid) { 455 int ret = __vgic_v2_perform_cpuif_access(vcpu); 456 457 if (ret == 1) 458 return true; 459 460 /* Promote an illegal access to an SError.*/ 461 if (ret == -1) 462 *exit_code = ARM_EXCEPTION_EL1_SERROR; 463 464 goto exit; 465 } 466 } 467 468 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 469 (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 470 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 471 int ret = __vgic_v3_perform_cpuif_access(vcpu); 472 473 if (ret == 1) 474 return true; 475 } 476 477 exit: 478 /* Return to the host kernel and handle the exit */ 479 return false; 480 } 481 482 static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) 483 { 484 if (!cpus_have_final_cap(ARM64_SSBD)) 485 return false; 486 487 return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); 488 } 489 490 static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) 491 { 492 #ifdef CONFIG_ARM64_SSBD 493 /* 494 * The host runs with the workaround always present. If the 495 * guest wants it disabled, so be it... 496 */ 497 if (__needs_ssbd_off(vcpu) && 498 __hyp_this_cpu_read(arm64_ssbd_callback_required)) 499 arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); 500 #endif 501 } 502 503 static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) 504 { 505 #ifdef CONFIG_ARM64_SSBD 506 /* 507 * If the guest has disabled the workaround, bring it back on. 508 */ 509 if (__needs_ssbd_off(vcpu) && 510 __hyp_this_cpu_read(arm64_ssbd_callback_required)) 511 arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); 512 #endif 513 } 514 515 static inline void __kvm_unexpected_el2_exception(void) 516 { 517 unsigned long addr, fixup; 518 struct kvm_cpu_context *host_ctxt; 519 struct exception_table_entry *entry, *end; 520 unsigned long elr_el2 = read_sysreg(elr_el2); 521 522 entry = hyp_symbol_addr(__start___kvm_ex_table); 523 end = hyp_symbol_addr(__stop___kvm_ex_table); 524 host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; 525 526 while (entry < end) { 527 addr = (unsigned long)&entry->insn + entry->insn; 528 fixup = (unsigned long)&entry->fixup + entry->fixup; 529 530 if (addr != elr_el2) { 531 entry++; 532 continue; 533 } 534 535 write_sysreg(fixup, elr_el2); 536 return; 537 } 538 539 hyp_panic(host_ctxt); 540 } 541 542 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ 543