1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 - ARM Ltd 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #ifndef __ARM64_KVM_HYP_SWITCH_H__ 8 #define __ARM64_KVM_HYP_SWITCH_H__ 9 10 #include <linux/arm-smccc.h> 11 #include <linux/kvm_host.h> 12 #include <linux/types.h> 13 #include <linux/jump_label.h> 14 #include <uapi/linux/psci.h> 15 16 #include <kvm/arm_psci.h> 17 18 #include <asm/barrier.h> 19 #include <asm/cpufeature.h> 20 #include <asm/extable.h> 21 #include <asm/kprobes.h> 22 #include <asm/kvm_asm.h> 23 #include <asm/kvm_emulate.h> 24 #include <asm/kvm_hyp.h> 25 #include <asm/kvm_mmu.h> 26 #include <asm/fpsimd.h> 27 #include <asm/debug-monitors.h> 28 #include <asm/processor.h> 29 #include <asm/thread_info.h> 30 31 extern const char __hyp_panic_string[]; 32 33 extern struct exception_table_entry __start___kvm_ex_table; 34 extern struct exception_table_entry __stop___kvm_ex_table; 35 36 /* Check whether the FP regs were dirtied while in the host-side run loop: */ 37 static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) 38 { 39 /* 40 * When the system doesn't support FP/SIMD, we cannot rely on 41 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an 42 * abort on the very first access to FP and thus we should never 43 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always 44 * trap the accesses. 45 */ 46 if (!system_supports_fpsimd() || 47 vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) 48 vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | 49 KVM_ARM64_FP_HOST); 50 51 return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); 52 } 53 54 /* Save the 32-bit only FPSIMD system register state */ 55 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) 56 { 57 if (!vcpu_el1_is_32bit(vcpu)) 58 return; 59 60 __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); 61 } 62 63 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) 64 { 65 /* 66 * We are about to set CPTR_EL2.TFP to trap all floating point 67 * register accesses to EL2, however, the ARM ARM clearly states that 68 * traps are only taken to EL2 if the operation would not otherwise 69 * trap to EL1. Therefore, always make sure that for 32-bit guests, 70 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. 71 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to 72 * it will cause an exception. 73 */ 74 if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { 75 write_sysreg(1 << 30, fpexc32_el2); 76 isb(); 77 } 78 } 79 80 static inline void __activate_traps_common(struct kvm_vcpu *vcpu) 81 { 82 /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ 83 write_sysreg(1 << 15, hstr_el2); 84 85 /* 86 * Make sure we trap PMU access from EL0 to EL2. Also sanitize 87 * PMSELR_EL0 to make sure it never contains the cycle 88 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at 89 * EL1 instead of being trapped to EL2. 90 */ 91 write_sysreg(0, pmselr_el0); 92 write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); 93 write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); 94 } 95 96 static inline void __deactivate_traps_common(void) 97 { 98 write_sysreg(0, hstr_el2); 99 write_sysreg(0, pmuserenr_el0); 100 } 101 102 static inline void ___activate_traps(struct kvm_vcpu *vcpu) 103 { 104 u64 hcr = vcpu->arch.hcr_el2; 105 106 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) 107 hcr |= HCR_TVM; 108 109 write_sysreg(hcr, hcr_el2); 110 111 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) 112 write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); 113 } 114 115 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) 116 { 117 /* 118 * If we pended a virtual abort, preserve it until it gets 119 * cleared. See D1.14.3 (Virtual Interrupts) for details, but 120 * the crucial bit is "On taking a vSError interrupt, 121 * HCR_EL2.VSE is cleared to 0." 122 */ 123 if (vcpu->arch.hcr_el2 & HCR_VSE) { 124 vcpu->arch.hcr_el2 &= ~HCR_VSE; 125 vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; 126 } 127 } 128 129 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) 130 { 131 u64 par, tmp; 132 133 /* 134 * Resolve the IPA the hard way using the guest VA. 135 * 136 * Stage-1 translation already validated the memory access 137 * rights. As such, we can use the EL1 translation regime, and 138 * don't have to distinguish between EL0 and EL1 access. 139 * 140 * We do need to save/restore PAR_EL1 though, as we haven't 141 * saved the guest context yet, and we may return early... 142 */ 143 par = read_sysreg_par(); 144 if (!__kvm_at("s1e1r", far)) 145 tmp = read_sysreg_par(); 146 else 147 tmp = SYS_PAR_EL1_F; /* back to the guest */ 148 write_sysreg(par, par_el1); 149 150 if (unlikely(tmp & SYS_PAR_EL1_F)) 151 return false; /* Translation failed, back to guest */ 152 153 /* Convert PAR to HPFAR format */ 154 *hpfar = PAR_TO_HPFAR(tmp); 155 return true; 156 } 157 158 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) 159 { 160 u8 ec; 161 u64 esr; 162 u64 hpfar, far; 163 164 esr = vcpu->arch.fault.esr_el2; 165 ec = ESR_ELx_EC(esr); 166 167 if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) 168 return true; 169 170 far = read_sysreg_el2(SYS_FAR); 171 172 /* 173 * The HPFAR can be invalid if the stage 2 fault did not 174 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW 175 * bit is clear) and one of the two following cases are true: 176 * 1. The fault was due to a permission fault 177 * 2. The processor carries errata 834220 178 * 179 * Therefore, for all non S1PTW faults where we either have a 180 * permission fault or the errata workaround is enabled, we 181 * resolve the IPA using the AT instruction. 182 */ 183 if (!(esr & ESR_ELx_S1PTW) && 184 (cpus_have_final_cap(ARM64_WORKAROUND_834220) || 185 (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { 186 if (!__translate_far_to_hpfar(far, &hpfar)) 187 return false; 188 } else { 189 hpfar = read_sysreg(hpfar_el2); 190 } 191 192 vcpu->arch.fault.far_el2 = far; 193 vcpu->arch.fault.hpfar_el2 = hpfar; 194 return true; 195 } 196 197 /* Check for an FPSIMD/SVE trap and handle as appropriate */ 198 static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) 199 { 200 bool vhe, sve_guest, sve_host; 201 u8 esr_ec; 202 203 if (!system_supports_fpsimd()) 204 return false; 205 206 /* 207 * Currently system_supports_sve() currently implies has_vhe(), 208 * so the check is redundant. However, has_vhe() can be determined 209 * statically and helps the compiler remove dead code. 210 */ 211 if (has_vhe() && system_supports_sve()) { 212 sve_guest = vcpu_has_sve(vcpu); 213 sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; 214 vhe = true; 215 } else { 216 sve_guest = false; 217 sve_host = false; 218 vhe = has_vhe(); 219 } 220 221 esr_ec = kvm_vcpu_trap_get_class(vcpu); 222 if (esr_ec != ESR_ELx_EC_FP_ASIMD && 223 esr_ec != ESR_ELx_EC_SVE) 224 return false; 225 226 /* Don't handle SVE traps for non-SVE vcpus here: */ 227 if (!sve_guest) 228 if (esr_ec != ESR_ELx_EC_FP_ASIMD) 229 return false; 230 231 /* Valid trap. Switch the context: */ 232 233 if (vhe) { 234 u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; 235 236 if (sve_guest) 237 reg |= CPACR_EL1_ZEN; 238 239 write_sysreg(reg, cpacr_el1); 240 } else { 241 write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, 242 cptr_el2); 243 } 244 245 isb(); 246 247 if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { 248 /* 249 * In the SVE case, VHE is assumed: it is enforced by 250 * Kconfig and kvm_arch_init(). 251 */ 252 if (sve_host) { 253 struct thread_struct *thread = container_of( 254 vcpu->arch.host_fpsimd_state, 255 struct thread_struct, uw.fpsimd_state); 256 257 sve_save_state(sve_pffr(thread), 258 &vcpu->arch.host_fpsimd_state->fpsr); 259 } else { 260 __fpsimd_save_state(vcpu->arch.host_fpsimd_state); 261 } 262 263 vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; 264 } 265 266 if (sve_guest) { 267 sve_load_state(vcpu_sve_pffr(vcpu), 268 &vcpu->arch.ctxt.fp_regs.fpsr, 269 sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); 270 write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); 271 } else { 272 __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); 273 } 274 275 /* Skip restoring fpexc32 for AArch64 guests */ 276 if (!(read_sysreg(hcr_el2) & HCR_RW)) 277 write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); 278 279 vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; 280 281 return true; 282 } 283 284 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) 285 { 286 u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); 287 int rt = kvm_vcpu_sys_get_rt(vcpu); 288 u64 val = vcpu_get_reg(vcpu, rt); 289 290 /* 291 * The normal sysreg handling code expects to see the traps, 292 * let's not do anything here. 293 */ 294 if (vcpu->arch.hcr_el2 & HCR_TVM) 295 return false; 296 297 switch (sysreg) { 298 case SYS_SCTLR_EL1: 299 write_sysreg_el1(val, SYS_SCTLR); 300 break; 301 case SYS_TTBR0_EL1: 302 write_sysreg_el1(val, SYS_TTBR0); 303 break; 304 case SYS_TTBR1_EL1: 305 write_sysreg_el1(val, SYS_TTBR1); 306 break; 307 case SYS_TCR_EL1: 308 write_sysreg_el1(val, SYS_TCR); 309 break; 310 case SYS_ESR_EL1: 311 write_sysreg_el1(val, SYS_ESR); 312 break; 313 case SYS_FAR_EL1: 314 write_sysreg_el1(val, SYS_FAR); 315 break; 316 case SYS_AFSR0_EL1: 317 write_sysreg_el1(val, SYS_AFSR0); 318 break; 319 case SYS_AFSR1_EL1: 320 write_sysreg_el1(val, SYS_AFSR1); 321 break; 322 case SYS_MAIR_EL1: 323 write_sysreg_el1(val, SYS_MAIR); 324 break; 325 case SYS_AMAIR_EL1: 326 write_sysreg_el1(val, SYS_AMAIR); 327 break; 328 case SYS_CONTEXTIDR_EL1: 329 write_sysreg_el1(val, SYS_CONTEXTIDR); 330 break; 331 default: 332 return false; 333 } 334 335 __kvm_skip_instr(vcpu); 336 return true; 337 } 338 339 static inline bool esr_is_ptrauth_trap(u32 esr) 340 { 341 u32 ec = ESR_ELx_EC(esr); 342 343 if (ec == ESR_ELx_EC_PAC) 344 return true; 345 346 if (ec != ESR_ELx_EC_SYS64) 347 return false; 348 349 switch (esr_sys64_to_sysreg(esr)) { 350 case SYS_APIAKEYLO_EL1: 351 case SYS_APIAKEYHI_EL1: 352 case SYS_APIBKEYLO_EL1: 353 case SYS_APIBKEYHI_EL1: 354 case SYS_APDAKEYLO_EL1: 355 case SYS_APDAKEYHI_EL1: 356 case SYS_APDBKEYLO_EL1: 357 case SYS_APDBKEYHI_EL1: 358 case SYS_APGAKEYLO_EL1: 359 case SYS_APGAKEYHI_EL1: 360 return true; 361 } 362 363 return false; 364 } 365 366 #define __ptrauth_save_key(ctxt, key) \ 367 do { \ 368 u64 __val; \ 369 __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ 370 ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ 371 __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ 372 ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ 373 } while(0) 374 375 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 376 377 static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) 378 { 379 struct kvm_cpu_context *ctxt; 380 u64 val; 381 382 if (!vcpu_has_ptrauth(vcpu) || 383 !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) 384 return false; 385 386 ctxt = this_cpu_ptr(&kvm_hyp_ctxt); 387 __ptrauth_save_key(ctxt, APIA); 388 __ptrauth_save_key(ctxt, APIB); 389 __ptrauth_save_key(ctxt, APDA); 390 __ptrauth_save_key(ctxt, APDB); 391 __ptrauth_save_key(ctxt, APGA); 392 393 vcpu_ptrauth_enable(vcpu); 394 395 val = read_sysreg(hcr_el2); 396 val |= (HCR_API | HCR_APK); 397 write_sysreg(val, hcr_el2); 398 399 return true; 400 } 401 402 /* 403 * Return true when we were able to fixup the guest exit and should return to 404 * the guest, false when we should restore the host state and return to the 405 * main run loop. 406 */ 407 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) 408 { 409 if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) 410 vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); 411 412 /* 413 * We're using the raw exception code in order to only process 414 * the trap if no SError is pending. We will come back to the 415 * same PC once the SError has been injected, and replay the 416 * trapping instruction. 417 */ 418 if (*exit_code != ARM_EXCEPTION_TRAP) 419 goto exit; 420 421 if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && 422 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && 423 handle_tx2_tvm(vcpu)) 424 goto guest; 425 426 /* 427 * We trap the first access to the FP/SIMD to save the host context 428 * and restore the guest context lazily. 429 * If FP/SIMD is not implemented, handle the trap and inject an 430 * undefined instruction exception to the guest. 431 * Similarly for trapped SVE accesses. 432 */ 433 if (__hyp_handle_fpsimd(vcpu)) 434 goto guest; 435 436 if (__hyp_handle_ptrauth(vcpu)) 437 goto guest; 438 439 if (!__populate_fault_info(vcpu)) 440 goto guest; 441 442 if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { 443 bool valid; 444 445 valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && 446 kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && 447 kvm_vcpu_dabt_isvalid(vcpu) && 448 !kvm_vcpu_abt_issea(vcpu) && 449 !kvm_vcpu_abt_iss1tw(vcpu); 450 451 if (valid) { 452 int ret = __vgic_v2_perform_cpuif_access(vcpu); 453 454 if (ret == 1) 455 goto guest; 456 457 /* Promote an illegal access to an SError.*/ 458 if (ret == -1) 459 *exit_code = ARM_EXCEPTION_EL1_SERROR; 460 461 goto exit; 462 } 463 } 464 465 if (static_branch_unlikely(&vgic_v3_cpuif_trap) && 466 (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || 467 kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { 468 int ret = __vgic_v3_perform_cpuif_access(vcpu); 469 470 if (ret == 1) 471 goto guest; 472 } 473 474 exit: 475 /* Return to the host kernel and handle the exit */ 476 return false; 477 478 guest: 479 /* Re-enter the guest */ 480 asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); 481 return true; 482 } 483 484 static inline void __kvm_unexpected_el2_exception(void) 485 { 486 extern char __guest_exit_panic[]; 487 unsigned long addr, fixup; 488 struct exception_table_entry *entry, *end; 489 unsigned long elr_el2 = read_sysreg(elr_el2); 490 491 entry = hyp_symbol_addr(__start___kvm_ex_table); 492 end = hyp_symbol_addr(__stop___kvm_ex_table); 493 494 while (entry < end) { 495 addr = (unsigned long)&entry->insn + entry->insn; 496 fixup = (unsigned long)&entry->fixup + entry->fixup; 497 498 if (addr != elr_el2) { 499 entry++; 500 continue; 501 } 502 503 write_sysreg(fixup, elr_el2); 504 return; 505 } 506 507 /* Trigger a panic after restoring the hyp context. */ 508 write_sysreg(__guest_exit_panic, elr_el2); 509 } 510 511 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ 512