1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 #define __ARM64_KVM_HYP_SWITCH_H__ 9 10 #include <hyp/adjust_pc.h> 11 12 #include <linux/arm-smccc.h> 13 #include <linux/kvm_host.h> 14 #include <linux/types.h> 15 #include <linux/jump_label.h> 16 #include <uapi/linux/psci.h> 17 18 #include <kvm/arm_psci.h> 19 20 #include <asm/barrier.h> 21 #include <asm/cpufeature.h> 22 #include <asm/extable.h> 23 #include <asm/kprobes.h> 24 #include <asm/kvm_asm.h> 25 #include <asm/kvm_emulate.h> 26 #include <asm/kvm_hyp.h> 27 #include <asm/kvm_mmu.h> 28 #include <asm/fpsimd.h> 29 #include <asm/debug-monitors.h> 30 #include <asm/processor.h> 31 #include <asm/thread_info.h> 32 33 extern const char __hyp_panic_string[]; 34 35 extern struct exception_table_entry __start___kvm_ex_table; 36 extern struct exception_table_entry __stop___kvm_ex_table; 37 38 /* Check whether the FP regs were dirtied while in the host-side run loop: */ 39 static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) 40 { 41 /* 42 * When the system doesn't support FP/SIMD, we cannot rely on 43 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 44 * abort on the very first access to FP and thus we should never 45 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 46 * trap the accesses. 47 */ 48 if (!system_supports_fpsimd() || 49 vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 50 vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 51 KVM_ARM64_FP_HOST); 52 53 return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 54 } 55 56 /* Save the 32-bit only FPSIMD system register state */ 57 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 58 { 59 if (!vcpu_el1_is_32bit(vcpu)) 60 return; 61 62 __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 63 } 64 65 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 66 { 67 /* 68 * We are about to set CPTR_EL2.TFP to trap all floating point 69 * register accesses to EL2, however, the ARM ARM clearly states that 70 * traps are only taken to EL2 if the operation would not otherwise 71 * trap to EL1. Therefore, always make sure that for 32-bit guests, 72 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 73 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 74 * it will cause an exception. 75 */ 76 if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 77 write_sysreg(1 << 30, fpexc32_el2); 78 isb(); 79 } 80 } 81 82 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 83 { 84 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 85 write_sysreg(1 << 15, hstr_el2); 86 87 /* 88 * Make sure we trap PMU access from EL0 to EL2. Also sanitize 89 * PMSELR_EL0 to make sure it never contains the cycle 90 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 91 * EL1 instead of being trapped to EL2. 92 */ 93 write_sysreg(0, pmselr_el0); 94 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 95 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 96 } 97 98 static inline void __deactivate_traps_common(void) 99 { 100 write_sysreg(0, hstr_el2); 101 write_sysreg(0, pmuserenr_el0); 102 } 103 104 static inline void ___activate_traps(struct kvm_vcpu *vcpu) 105 { 106 u64 hcr = vcpu->arch.hcr_el2; 107 108 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 109 hcr |= HCR_TVM; 110 111 write_sysreg(hcr, hcr_el2); 112 113 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 114 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 115 } 116 117 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 118 { 119 /* 120 * If we pended a virtual abort, preserve it until it gets 121 * cleared. See D1.14.3 (Virtual Interrupts) for details, but 122 * the crucial bit is "On taking a vSError interrupt, 123 * HCR_EL2.VSE is cleared to 0." 124 */ 125 if (vcpu->arch.hcr_el2 & HCR_VSE) { 126 vcpu->arch.hcr_el2 &= ~HCR_VSE; 127 vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 128 } 129 } 130 131 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 132 { 133 u64 par, tmp; 134 135 /* 136 * Resolve the IPA the hard way using the guest VA. 137 * 138 * Stage-1 translation already validated the memory access 139 * rights. As such, we can use the EL1 translation regime, and 140 * don't have to distinguish between EL0 and EL1 access. 141 * 142 * We do need to save/restore PAR_EL1 though, as we haven't 143 * saved the guest context yet, and we may return early... 144 */ 145 par = read_sysreg_par(); 146 if (!__kvm_at("s1e1r", far)) 147 tmp = read_sysreg_par(); 148 else 149 tmp = SYS_PAR_EL1_F; /* back to the guest */ 150 write_sysreg(par, par_el1); 151 152 if (unlikely(tmp & SYS_PAR_EL1_F)) 153 return false; /* Translation failed, back to guest */ 154 155 /* Convert PAR to HPFAR format */ 156 *hpfar = PAR_TO_HPFAR(tmp); 157 return true; 158 } 159 160 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 161 { 162 u8 ec; 163 u64 esr; 164 u64 hpfar, far; 165 166 esr = vcpu->arch.fault.esr_el2; 167 ec = ESR_ELx_EC(esr); 168 169 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 170 return true; 171 172 far = read_sysreg_el2(SYS_FAR); 173 174 /* 175 * The HPFAR can be invalid if the stage 2 fault did not 176 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 177 * bit is clear) and one of the two following cases are true: 178 * 1. The fault was due to a permission fault 179 * 2. The processor carries errata 834220 180 * 181 * Therefore, for all non S1PTW faults where we either have a 182 * permission fault or the errata workaround is enabled, we 183 * resolve the IPA using the AT instruction. 184 */ 185 if (!(esr & ESR_ELx_S1PTW) && 186 (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 187 (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 188 if (!__translate_far_to_hpfar(far, &hpfar)) 189 return false; 190 } else { 191 hpfar = read_sysreg(hpfar_el2); 192 } 193 194 vcpu->arch.fault.far_el2 = far; 195 vcpu->arch.fault.hpfar_el2 = hpfar; 196 return true; 197 } 198 199 /* Check for an FPSIMD/SVE trap and handle as appropriate */ 200 static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 201 { 202 bool vhe, sve_guest, sve_host; 203 u8 esr_ec; 204 205 if (!system_supports_fpsimd()) 206 return false; 207 208 /* 209 * Currently system_supports_sve() currently implies has_vhe(), 210 * so the check is redundant. However, has_vhe() can be determined 211 * statically and helps the compiler remove dead code. 212 */ 213 if (has_vhe() && system_supports_sve()) { 214 sve_guest = vcpu_has_sve(vcpu); 215 sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 216 vhe = true; 217 } else { 218 sve_guest = false; 219 sve_host = false; 220 vhe = has_vhe(); 221 } 222 223 esr_ec = kvm_vcpu_trap_get_class(vcpu); 224 if (esr_ec != ESR_ELx_EC_FP_ASIMD && 225 esr_ec != ESR_ELx_EC_SVE) 226 return false; 227 228 /* Don't handle SVE traps for non-SVE vcpus here: */ 229 if (!sve_guest) 230 if (esr_ec != ESR_ELx_EC_FP_ASIMD) 231 return false; 232 233 /* Valid trap. Switch the context: */ 234 235 if (vhe) { 236 u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 237 238 if (sve_guest) 239 reg |= CPACR_EL1_ZEN; 240 241 write_sysreg(reg, cpacr_el1); 242 } else { 243 write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 244 cptr_el2); 245 } 246 247 isb(); 248 249 if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 250 /* 251 * In the SVE case, VHE is assumed: it is enforced by 252 * Kconfig and kvm_arch_init(). 253 */ 254 if (sve_host) { 255 struct thread_struct *thread = container_of( 256 vcpu->arch.host_fpsimd_state, 257 struct thread_struct, uw.fpsimd_state); 258 259 sve_save_state(sve_pffr(thread), 260 &vcpu->arch.host_fpsimd_state->fpsr); 261 } else { 262 __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 263 } 264 265 vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 266 } 267 268 if (sve_guest) { 269 sve_load_state(vcpu_sve_pffr(vcpu), 270 &vcpu->arch.ctxt.fp_regs.fpsr, 271 sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 272 write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); 273 } else { 274 __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 275 } 276 277 /* Skip restoring fpexc32 for AArch64 guests */ 278 if (!(read_sysreg(hcr_el2) & HCR_RW)) 279 write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 280 281 vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 282 283 return true; 284 } 285 286 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 287 { 288 u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 289 int rt = kvm_vcpu_sys_get_rt(vcpu); 290 u64 val = vcpu_get_reg(vcpu, rt); 291 292 /* 293 * The normal sysreg handling code expects to see the traps, 294 * let's not do anything here. 295 */ 296 if (vcpu->arch.hcr_el2 & HCR_TVM) 297 return false; 298 299 switch (sysreg) { 300 case SYS_SCTLR_EL1: 301 write_sysreg_el1(val, SYS_SCTLR); 302 break; 303 case SYS_TTBR0_EL1: 304 write_sysreg_el1(val, SYS_TTBR0); 305 break; 306 case SYS_TTBR1_EL1: 307 write_sysreg_el1(val, SYS_TTBR1); 308 break; 309 case SYS_TCR_EL1: 310 write_sysreg_el1(val, SYS_TCR); 311 break; 312 case SYS_ESR_EL1: 313 write_sysreg_el1(val, SYS_ESR); 314 break; 315 case SYS_FAR_EL1: 316 write_sysreg_el1(val, SYS_FAR); 317 break; 318 case SYS_AFSR0_EL1: 319 write_sysreg_el1(val, SYS_AFSR0); 320 break; 321 case SYS_AFSR1_EL1: 322 write_sysreg_el1(val, SYS_AFSR1); 323 break; 324 case SYS_MAIR_EL1: 325 write_sysreg_el1(val, SYS_MAIR); 326 break; 327 case SYS_AMAIR_EL1: 328 write_sysreg_el1(val, SYS_AMAIR); 329 break; 330 case SYS_CONTEXTIDR_EL1: 331 write_sysreg_el1(val, SYS_CONTEXTIDR); 332 break; 333 default: 334 return false; 335 } 336 337 __kvm_skip_instr(vcpu); 338 return true; 339 } 340 341 static inline bool esr_is_ptrauth_trap(u32 esr) 342 { 343 u32 ec = ESR_ELx_EC(esr); 344 345 if (ec == ESR_ELx_EC_PAC) 346 return true; 347 348 if (ec != ESR_ELx_EC_SYS64) 349 return false; 350 351 switch (esr_sys64_to_sysreg(esr)) { 352 case SYS_APIAKEYLO_EL1: 353 case SYS_APIAKEYHI_EL1: 354 case SYS_APIBKEYLO_EL1: 355 case SYS_APIBKEYHI_EL1: 356 case SYS_APDAKEYLO_EL1: 357 case SYS_APDAKEYHI_EL1: 358 case SYS_APDBKEYLO_EL1: 359 case SYS_APDBKEYHI_EL1: 360 case SYS_APGAKEYLO_EL1: 361 case SYS_APGAKEYHI_EL1: 362 return true; 363 } 364 365 return false; 366 } 367 368 #define __ptrauth_save_key(ctxt, key) \ 369 do { \ 370 u64 __val; \ 371 __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 372 ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 373 __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 374 ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 375 } while(0) 376 377 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 378 379 static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 380 { 381 struct kvm_cpu_context *ctxt; 382 u64 val; 383 384 if (!vcpu_has_ptrauth(vcpu) || 385 !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 386 return false; 387 388 ctxt = this_cpu_ptr(&kvm_hyp_ctxt); 389 __ptrauth_save_key(ctxt, APIA); 390 __ptrauth_save_key(ctxt, APIB); 391 __ptrauth_save_key(ctxt, APDA); 392 __ptrauth_save_key(ctxt, APDB); 393 __ptrauth_save_key(ctxt, APGA); 394 395 vcpu_ptrauth_enable(vcpu); 396 397 val = read_sysreg(hcr_el2); 398 val |= (HCR_API | HCR_APK); 399 write_sysreg(val, hcr_el2); 400 401 return true; 402 } 403 404 /* 405 * Return true when we were able to fixup the guest exit and should return to 406 * the guest, false when we should restore the host state and return to the 407 * main run loop. 408 */ 409 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 410 { 411 if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 412 vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 413 414 if (ARM_SERROR_PENDING(*exit_code)) { 415 u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); 416 417 /* 418 * HVC already have an adjusted PC, which we need to 419 * correct in order to return to after having injected 420 * the SError. 421 * 422 * SMC, on the other hand, is *trapped*, meaning its 423 * preferred return address is the SMC itself. 424 */ 425 if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) 426 write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); 427 } 428 429 /* 430 * We're using the raw exception code in order to only process 431 * the trap if no SError is pending. We will come back to the 432 * same PC once the SError has been injected, and replay the 433 * trapping instruction. 434 */ 435 if (*exit_code != ARM_EXCEPTION_TRAP) 436 goto exit; 437 438 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 439 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 440 handle_tx2_tvm(vcpu)) 441 goto guest; 442 443 /* 444 * We trap the first access to the FP/SIMD to save the host context 445 * and restore the guest context lazily. 446 * If FP/SIMD is not implemented, handle the trap and inject an 447 * undefined instruction exception to the guest. 448 * Similarly for trapped SVE accesses. 449 */ 450 if (__hyp_handle_fpsimd(vcpu)) 451 goto guest; 452 453 if (__hyp_handle_ptrauth(vcpu)) 454 goto guest; 455 456 if (!__populate_fault_info(vcpu)) 457 goto guest; 458 459 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 460 bool valid; 461 462 valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 463 kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 464 kvm_vcpu_dabt_isvalid(vcpu) && 465 !kvm_vcpu_abt_issea(vcpu) && 466 !kvm_vcpu_abt_iss1tw(vcpu); 467 468 if (valid) { 469 int ret = __vgic_v2_perform_cpuif_access(vcpu); 470 471 if (ret == 1) 472 goto guest; 473 474 /* Promote an illegal access to an SError.*/ 475 if (ret == -1) 476 *exit_code = ARM_EXCEPTION_EL1_SERROR; 477 478 goto exit; 479 } 480 } 481 482 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 483 (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 484 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 485 int ret = __vgic_v3_perform_cpuif_access(vcpu); 486 487 if (ret == 1) 488 goto guest; 489 } 490 491 exit: 492 /* Return to the host kernel and handle the exit */ 493 return false; 494 495 guest: 496 /* Re-enter the guest */ 497 asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); 498 return true; 499 } 500 501 static inline void __kvm_unexpected_el2_exception(void) 502 { 503 extern char __guest_exit_panic[]; 504 unsigned long addr, fixup; 505 struct exception_table_entry *entry, *end; 506 unsigned long elr_el2 = read_sysreg(elr_el2); 507 508 entry = hyp_symbol_addr(__start___kvm_ex_table); 509 end = hyp_symbol_addr(__stop___kvm_ex_table); 510 511 while (entry < end) { 512 addr = (unsigned long)&entry->insn + entry->insn; 513 fixup = (unsigned long)&entry->fixup + entry->fixup; 514 515 if (addr != elr_el2) { 516 entry++; 517 continue; 518 } 519 520 write_sysreg(fixup, elr_el2); 521 return; 522 } 523 524 /* Trigger a panic after restoring the hyp context. */ 525 write_sysreg(__guest_exit_panic, elr_el2); 526 } 527 528 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ 529