1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 #define __ARM64_KVM_HYP_SWITCH_H__ 9 10 #include <hyp/adjust_pc.h> 11 12 #include <linux/arm-smccc.h> 13 #include <linux/kvm_host.h> 14 #include <linux/types.h> 15 #include <linux/jump_label.h> 16 #include <uapi/linux/psci.h> 17 18 #include <kvm/arm_psci.h> 19 20 #include <asm/barrier.h> 21 #include <asm/cpufeature.h> 22 #include <asm/extable.h> 23 #include <asm/kprobes.h> 24 #include <asm/kvm_asm.h> 25 #include <asm/kvm_emulate.h> 26 #include <asm/kvm_hyp.h> 27 #include <asm/kvm_mmu.h> 28 #include <asm/fpsimd.h> 29 #include <asm/debug-monitors.h> 30 #include <asm/processor.h> 31 #include <asm/thread_info.h> 32 33 extern const char __hyp_panic_string[]; 34 35 extern struct exception_table_entry __start___kvm_ex_table; 36 extern struct exception_table_entry __stop___kvm_ex_table; 37 38 /* Check whether the FP regs were dirtied while in the host-side run loop: */ 39 static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) 40 { 41 /* 42 * When the system doesn't support FP/SIMD, we cannot rely on 43 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 44 * abort on the very first access to FP and thus we should never 45 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 46 * trap the accesses. 47 */ 48 if (!system_supports_fpsimd() || 49 vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 50 vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 51 KVM_ARM64_FP_HOST); 52 53 return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 54 } 55 56 /* Save the 32-bit only FPSIMD system register state */ 57 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 58 { 59 if (!vcpu_el1_is_32bit(vcpu)) 60 return; 61 62 __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 63 } 64 65 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 66 { 67 /* 68 * We are about to set CPTR_EL2.TFP to trap all floating point 69 * register accesses to EL2, however, the ARM ARM clearly states that 70 * traps are only taken to EL2 if the operation would not otherwise 71 * trap to EL1. Therefore, always make sure that for 32-bit guests, 72 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 73 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 74 * it will cause an exception. 75 */ 76 if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 77 write_sysreg(1 << 30, fpexc32_el2); 78 isb(); 79 } 80 } 81 82 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 83 { 84 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 85 write_sysreg(1 << 15, hstr_el2); 86 87 /* 88 * Make sure we trap PMU access from EL0 to EL2. Also sanitize 89 * PMSELR_EL0 to make sure it never contains the cycle 90 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 91 * EL1 instead of being trapped to EL2. 92 */ 93 if (kvm_arm_support_pmu_v3()) { 94 write_sysreg(0, pmselr_el0); 95 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 96 } 97 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 98 } 99 100 static inline void __deactivate_traps_common(void) 101 { 102 write_sysreg(0, hstr_el2); 103 if (kvm_arm_support_pmu_v3()) 104 write_sysreg(0, pmuserenr_el0); 105 } 106 107 static inline void ___activate_traps(struct kvm_vcpu *vcpu) 108 { 109 u64 hcr = vcpu->arch.hcr_el2; 110 111 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 112 hcr |= HCR_TVM; 113 114 write_sysreg(hcr, hcr_el2); 115 116 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 117 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 118 } 119 120 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 121 { 122 /* 123 * If we pended a virtual abort, preserve it until it gets 124 * cleared. See D1.14.3 (Virtual Interrupts) for details, but 125 * the crucial bit is "On taking a vSError interrupt, 126 * HCR_EL2.VSE is cleared to 0." 127 */ 128 if (vcpu->arch.hcr_el2 & HCR_VSE) { 129 vcpu->arch.hcr_el2 &= ~HCR_VSE; 130 vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 131 } 132 } 133 134 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 135 { 136 u64 par, tmp; 137 138 /* 139 * Resolve the IPA the hard way using the guest VA. 140 * 141 * Stage-1 translation already validated the memory access 142 * rights. As such, we can use the EL1 translation regime, and 143 * don't have to distinguish between EL0 and EL1 access. 144 * 145 * We do need to save/restore PAR_EL1 though, as we haven't 146 * saved the guest context yet, and we may return early... 147 */ 148 par = read_sysreg_par(); 149 if (!__kvm_at("s1e1r", far)) 150 tmp = read_sysreg_par(); 151 else 152 tmp = SYS_PAR_EL1_F; /* back to the guest */ 153 write_sysreg(par, par_el1); 154 155 if (unlikely(tmp & SYS_PAR_EL1_F)) 156 return false; /* Translation failed, back to guest */ 157 158 /* Convert PAR to HPFAR format */ 159 *hpfar = PAR_TO_HPFAR(tmp); 160 return true; 161 } 162 163 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 164 { 165 u8 ec; 166 u64 esr; 167 u64 hpfar, far; 168 169 esr = vcpu->arch.fault.esr_el2; 170 ec = ESR_ELx_EC(esr); 171 172 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 173 return true; 174 175 far = read_sysreg_el2(SYS_FAR); 176 177 /* 178 * The HPFAR can be invalid if the stage 2 fault did not 179 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 180 * bit is clear) and one of the two following cases are true: 181 * 1. The fault was due to a permission fault 182 * 2. The processor carries errata 834220 183 * 184 * Therefore, for all non S1PTW faults where we either have a 185 * permission fault or the errata workaround is enabled, we 186 * resolve the IPA using the AT instruction. 187 */ 188 if (!(esr & ESR_ELx_S1PTW) && 189 (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 190 (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 191 if (!__translate_far_to_hpfar(far, &hpfar)) 192 return false; 193 } else { 194 hpfar = read_sysreg(hpfar_el2); 195 } 196 197 vcpu->arch.fault.far_el2 = far; 198 vcpu->arch.fault.hpfar_el2 = hpfar; 199 return true; 200 } 201 202 /* Check for an FPSIMD/SVE trap and handle as appropriate */ 203 static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 204 { 205 bool vhe, sve_guest, sve_host; 206 u8 esr_ec; 207 208 if (!system_supports_fpsimd()) 209 return false; 210 211 /* 212 * Currently system_supports_sve() currently implies has_vhe(), 213 * so the check is redundant. However, has_vhe() can be determined 214 * statically and helps the compiler remove dead code. 215 */ 216 if (has_vhe() && system_supports_sve()) { 217 sve_guest = vcpu_has_sve(vcpu); 218 sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 219 vhe = true; 220 } else { 221 sve_guest = false; 222 sve_host = false; 223 vhe = has_vhe(); 224 } 225 226 esr_ec = kvm_vcpu_trap_get_class(vcpu); 227 if (esr_ec != ESR_ELx_EC_FP_ASIMD && 228 esr_ec != ESR_ELx_EC_SVE) 229 return false; 230 231 /* Don't handle SVE traps for non-SVE vcpus here: */ 232 if (!sve_guest) 233 if (esr_ec != ESR_ELx_EC_FP_ASIMD) 234 return false; 235 236 /* Valid trap. Switch the context: */ 237 238 if (vhe) { 239 u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 240 241 if (sve_guest) 242 reg |= CPACR_EL1_ZEN; 243 244 write_sysreg(reg, cpacr_el1); 245 } else { 246 write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 247 cptr_el2); 248 } 249 250 isb(); 251 252 if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 253 /* 254 * In the SVE case, VHE is assumed: it is enforced by 255 * Kconfig and kvm_arch_init(). 256 */ 257 if (sve_host) { 258 struct thread_struct *thread = container_of( 259 vcpu->arch.host_fpsimd_state, 260 struct thread_struct, uw.fpsimd_state); 261 262 sve_save_state(sve_pffr(thread), 263 &vcpu->arch.host_fpsimd_state->fpsr); 264 } else { 265 __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 266 } 267 268 vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 269 } 270 271 if (sve_guest) { 272 sve_load_state(vcpu_sve_pffr(vcpu), 273 &vcpu->arch.ctxt.fp_regs.fpsr, 274 sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 275 write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); 276 } else { 277 __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 278 } 279 280 /* Skip restoring fpexc32 for AArch64 guests */ 281 if (!(read_sysreg(hcr_el2) & HCR_RW)) 282 write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 283 284 vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 285 286 return true; 287 } 288 289 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 290 { 291 u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 292 int rt = kvm_vcpu_sys_get_rt(vcpu); 293 u64 val = vcpu_get_reg(vcpu, rt); 294 295 /* 296 * The normal sysreg handling code expects to see the traps, 297 * let's not do anything here. 298 */ 299 if (vcpu->arch.hcr_el2 & HCR_TVM) 300 return false; 301 302 switch (sysreg) { 303 case SYS_SCTLR_EL1: 304 write_sysreg_el1(val, SYS_SCTLR); 305 break; 306 case SYS_TTBR0_EL1: 307 write_sysreg_el1(val, SYS_TTBR0); 308 break; 309 case SYS_TTBR1_EL1: 310 write_sysreg_el1(val, SYS_TTBR1); 311 break; 312 case SYS_TCR_EL1: 313 write_sysreg_el1(val, SYS_TCR); 314 break; 315 case SYS_ESR_EL1: 316 write_sysreg_el1(val, SYS_ESR); 317 break; 318 case SYS_FAR_EL1: 319 write_sysreg_el1(val, SYS_FAR); 320 break; 321 case SYS_AFSR0_EL1: 322 write_sysreg_el1(val, SYS_AFSR0); 323 break; 324 case SYS_AFSR1_EL1: 325 write_sysreg_el1(val, SYS_AFSR1); 326 break; 327 case SYS_MAIR_EL1: 328 write_sysreg_el1(val, SYS_MAIR); 329 break; 330 case SYS_AMAIR_EL1: 331 write_sysreg_el1(val, SYS_AMAIR); 332 break; 333 case SYS_CONTEXTIDR_EL1: 334 write_sysreg_el1(val, SYS_CONTEXTIDR); 335 break; 336 default: 337 return false; 338 } 339 340 __kvm_skip_instr(vcpu); 341 return true; 342 } 343 344 static inline bool esr_is_ptrauth_trap(u32 esr) 345 { 346 u32 ec = ESR_ELx_EC(esr); 347 348 if (ec == ESR_ELx_EC_PAC) 349 return true; 350 351 if (ec != ESR_ELx_EC_SYS64) 352 return false; 353 354 switch (esr_sys64_to_sysreg(esr)) { 355 case SYS_APIAKEYLO_EL1: 356 case SYS_APIAKEYHI_EL1: 357 case SYS_APIBKEYLO_EL1: 358 case SYS_APIBKEYHI_EL1: 359 case SYS_APDAKEYLO_EL1: 360 case SYS_APDAKEYHI_EL1: 361 case SYS_APDBKEYLO_EL1: 362 case SYS_APDBKEYHI_EL1: 363 case SYS_APGAKEYLO_EL1: 364 case SYS_APGAKEYHI_EL1: 365 return true; 366 } 367 368 return false; 369 } 370 371 #define __ptrauth_save_key(ctxt, key) \ 372 do { \ 373 u64 __val; \ 374 __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 375 ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 376 __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 377 ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 378 } while(0) 379 380 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 381 382 static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 383 { 384 struct kvm_cpu_context *ctxt; 385 u64 val; 386 387 if (!vcpu_has_ptrauth(vcpu) || 388 !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 389 return false; 390 391 ctxt = this_cpu_ptr(&kvm_hyp_ctxt); 392 __ptrauth_save_key(ctxt, APIA); 393 __ptrauth_save_key(ctxt, APIB); 394 __ptrauth_save_key(ctxt, APDA); 395 __ptrauth_save_key(ctxt, APDB); 396 __ptrauth_save_key(ctxt, APGA); 397 398 vcpu_ptrauth_enable(vcpu); 399 400 val = read_sysreg(hcr_el2); 401 val |= (HCR_API | HCR_APK); 402 write_sysreg(val, hcr_el2); 403 404 return true; 405 } 406 407 /* 408 * Return true when we were able to fixup the guest exit and should return to 409 * the guest, false when we should restore the host state and return to the 410 * main run loop. 411 */ 412 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 413 { 414 if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 415 vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 416 417 if (ARM_SERROR_PENDING(*exit_code)) { 418 u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); 419 420 /* 421 * HVC already have an adjusted PC, which we need to 422 * correct in order to return to after having injected 423 * the SError. 424 * 425 * SMC, on the other hand, is *trapped*, meaning its 426 * preferred return address is the SMC itself. 427 */ 428 if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) 429 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); 430 } 431 432 /* 433 * We're using the raw exception code in order to only process 434 * the trap if no SError is pending. We will come back to the 435 * same PC once the SError has been injected, and replay the 436 * trapping instruction. 437 */ 438 if (*exit_code != ARM_EXCEPTION_TRAP) 439 goto exit; 440 441 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 442 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 443 handle_tx2_tvm(vcpu)) 444 goto guest; 445 446 /* 447 * We trap the first access to the FP/SIMD to save the host context 448 * and restore the guest context lazily. 449 * If FP/SIMD is not implemented, handle the trap and inject an 450 * undefined instruction exception to the guest. 451 * Similarly for trapped SVE accesses. 452 */ 453 if (__hyp_handle_fpsimd(vcpu)) 454 goto guest; 455 456 if (__hyp_handle_ptrauth(vcpu)) 457 goto guest; 458 459 if (!__populate_fault_info(vcpu)) 460 goto guest; 461 462 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 463 bool valid; 464 465 valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 466 kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 467 kvm_vcpu_dabt_isvalid(vcpu) && 468 !kvm_vcpu_abt_issea(vcpu) && 469 !kvm_vcpu_abt_iss1tw(vcpu); 470 471 if (valid) { 472 int ret = __vgic_v2_perform_cpuif_access(vcpu); 473 474 if (ret == 1) 475 goto guest; 476 477 /* Promote an illegal access to an SError.*/ 478 if (ret == -1) 479 *exit_code = ARM_EXCEPTION_EL1_SERROR; 480 481 goto exit; 482 } 483 } 484 485 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 486 (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 487 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 488 int ret = __vgic_v3_perform_cpuif_access(vcpu); 489 490 if (ret == 1) 491 goto guest; 492 } 493 494 exit: 495 /* Return to the host kernel and handle the exit */ 496 return false; 497 498 guest: 499 /* Re-enter the guest */ 500 asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); 501 return true; 502 } 503 504 static inline void __kvm_unexpected_el2_exception(void) 505 { 506 extern char __guest_exit_panic[]; 507 unsigned long addr, fixup; 508 struct exception_table_entry *entry, *end; 509 unsigned long elr_el2 = read_sysreg(elr_el2); 510 511 entry = &__start___kvm_ex_table; 512 end = &__stop___kvm_ex_table; 513 514 while (entry < end) { 515 addr = (unsigned long)&entry->insn + entry->insn; 516 fixup = (unsigned long)&entry->fixup + entry->fixup; 517 518 if (addr != elr_el2) { 519 entry++; 520 continue; 521 } 522 523 write_sysreg(fixup, elr_el2); 524 return; 525 } 526 527 /* Trigger a panic after restoring the hyp context. */ 528 write_sysreg(__guest_exit_panic, elr_el2); 529 } 530 531 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ 532