xref: /openbmc/linux/arch/arm64/kvm/hyp/include/hyp/switch.h (revision f7af616c632ee2ac3af0876fe33bf9e0232e665a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  */
6 
7 #ifndef __ARM64_KVM_HYP_SWITCH_H__
8 #define __ARM64_KVM_HYP_SWITCH_H__
9 
10 #include <hyp/adjust_pc.h>
11 
12 #include <linux/arm-smccc.h>
13 #include <linux/kvm_host.h>
14 #include <linux/types.h>
15 #include <linux/jump_label.h>
16 #include <uapi/linux/psci.h>
17 
18 #include <kvm/arm_psci.h>
19 
20 #include <asm/barrier.h>
21 #include <asm/cpufeature.h>
22 #include <asm/extable.h>
23 #include <asm/kprobes.h>
24 #include <asm/kvm_asm.h>
25 #include <asm/kvm_emulate.h>
26 #include <asm/kvm_hyp.h>
27 #include <asm/kvm_mmu.h>
28 #include <asm/fpsimd.h>
29 #include <asm/debug-monitors.h>
30 #include <asm/processor.h>
31 #include <asm/thread_info.h>
32 
33 extern struct exception_table_entry __start___kvm_ex_table;
34 extern struct exception_table_entry __stop___kvm_ex_table;
35 
36 /* Check whether the FP regs were dirtied while in the host-side run loop: */
37 static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
38 {
39 	/*
40 	 * When the system doesn't support FP/SIMD, we cannot rely on
41 	 * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
42 	 * abort on the very first access to FP and thus we should never
43 	 * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
44 	 * trap the accesses.
45 	 */
46 	if (!system_supports_fpsimd() ||
47 	    vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
48 		vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
49 				      KVM_ARM64_FP_HOST);
50 
51 	return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
52 }
53 
54 /* Save the 32-bit only FPSIMD system register state */
55 static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
56 {
57 	if (!vcpu_el1_is_32bit(vcpu))
58 		return;
59 
60 	__vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2);
61 }
62 
63 static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
64 {
65 	/*
66 	 * We are about to set CPTR_EL2.TFP to trap all floating point
67 	 * register accesses to EL2, however, the ARM ARM clearly states that
68 	 * traps are only taken to EL2 if the operation would not otherwise
69 	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
70 	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
71 	 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
72 	 * it will cause an exception.
73 	 */
74 	if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
75 		write_sysreg(1 << 30, fpexc32_el2);
76 		isb();
77 	}
78 }
79 
80 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
81 {
82 	/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
83 	write_sysreg(1 << 15, hstr_el2);
84 
85 	/*
86 	 * Make sure we trap PMU access from EL0 to EL2. Also sanitize
87 	 * PMSELR_EL0 to make sure it never contains the cycle
88 	 * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
89 	 * EL1 instead of being trapped to EL2.
90 	 */
91 	if (kvm_arm_support_pmu_v3()) {
92 		write_sysreg(0, pmselr_el0);
93 		write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
94 	}
95 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
96 }
97 
98 static inline void __deactivate_traps_common(void)
99 {
100 	write_sysreg(0, hstr_el2);
101 	if (kvm_arm_support_pmu_v3())
102 		write_sysreg(0, pmuserenr_el0);
103 }
104 
105 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
106 {
107 	u64 hcr = vcpu->arch.hcr_el2;
108 
109 	if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
110 		hcr |= HCR_TVM;
111 
112 	write_sysreg(hcr, hcr_el2);
113 
114 	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
115 		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
116 }
117 
118 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
119 {
120 	/*
121 	 * If we pended a virtual abort, preserve it until it gets
122 	 * cleared. See D1.14.3 (Virtual Interrupts) for details, but
123 	 * the crucial bit is "On taking a vSError interrupt,
124 	 * HCR_EL2.VSE is cleared to 0."
125 	 */
126 	if (vcpu->arch.hcr_el2 & HCR_VSE) {
127 		vcpu->arch.hcr_el2 &= ~HCR_VSE;
128 		vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
129 	}
130 }
131 
132 static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
133 {
134 	u64 par, tmp;
135 
136 	/*
137 	 * Resolve the IPA the hard way using the guest VA.
138 	 *
139 	 * Stage-1 translation already validated the memory access
140 	 * rights. As such, we can use the EL1 translation regime, and
141 	 * don't have to distinguish between EL0 and EL1 access.
142 	 *
143 	 * We do need to save/restore PAR_EL1 though, as we haven't
144 	 * saved the guest context yet, and we may return early...
145 	 */
146 	par = read_sysreg_par();
147 	if (!__kvm_at("s1e1r", far))
148 		tmp = read_sysreg_par();
149 	else
150 		tmp = SYS_PAR_EL1_F; /* back to the guest */
151 	write_sysreg(par, par_el1);
152 
153 	if (unlikely(tmp & SYS_PAR_EL1_F))
154 		return false; /* Translation failed, back to guest */
155 
156 	/* Convert PAR to HPFAR format */
157 	*hpfar = PAR_TO_HPFAR(tmp);
158 	return true;
159 }
160 
161 static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
162 {
163 	u64 hpfar, far;
164 
165 	far = read_sysreg_el2(SYS_FAR);
166 
167 	/*
168 	 * The HPFAR can be invalid if the stage 2 fault did not
169 	 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
170 	 * bit is clear) and one of the two following cases are true:
171 	 *   1. The fault was due to a permission fault
172 	 *   2. The processor carries errata 834220
173 	 *
174 	 * Therefore, for all non S1PTW faults where we either have a
175 	 * permission fault or the errata workaround is enabled, we
176 	 * resolve the IPA using the AT instruction.
177 	 */
178 	if (!(esr & ESR_ELx_S1PTW) &&
179 	    (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
180 	     (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
181 		if (!__translate_far_to_hpfar(far, &hpfar))
182 			return false;
183 	} else {
184 		hpfar = read_sysreg(hpfar_el2);
185 	}
186 
187 	fault->far_el2 = far;
188 	fault->hpfar_el2 = hpfar;
189 	return true;
190 }
191 
192 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
193 {
194 	u8 ec;
195 	u64 esr;
196 
197 	esr = vcpu->arch.fault.esr_el2;
198 	ec = ESR_ELx_EC(esr);
199 
200 	if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
201 		return true;
202 
203 	return __get_fault_info(esr, &vcpu->arch.fault);
204 }
205 
206 static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
207 {
208 	struct thread_struct *thread;
209 
210 	thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
211 			      uw.fpsimd_state);
212 
213 	__sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
214 }
215 
216 static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
217 {
218 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
219 	__sve_restore_state(vcpu_sve_pffr(vcpu),
220 			    &vcpu->arch.ctxt.fp_regs.fpsr);
221 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
222 }
223 
224 /* Check for an FPSIMD/SVE trap and handle as appropriate */
225 static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
226 {
227 	bool sve_guest, sve_host;
228 	u8 esr_ec;
229 	u64 reg;
230 
231 	if (!system_supports_fpsimd())
232 		return false;
233 
234 	if (system_supports_sve()) {
235 		sve_guest = vcpu_has_sve(vcpu);
236 		sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
237 	} else {
238 		sve_guest = false;
239 		sve_host = false;
240 	}
241 
242 	esr_ec = kvm_vcpu_trap_get_class(vcpu);
243 	if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
244 	    esr_ec != ESR_ELx_EC_SVE)
245 		return false;
246 
247 	/* Don't handle SVE traps for non-SVE vcpus here: */
248 	if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
249 		return false;
250 
251 	/* Valid trap.  Switch the context: */
252 	if (has_vhe()) {
253 		reg = CPACR_EL1_FPEN;
254 		if (sve_guest)
255 			reg |= CPACR_EL1_ZEN;
256 
257 		sysreg_clear_set(cpacr_el1, 0, reg);
258 	} else {
259 		reg = CPTR_EL2_TFP;
260 		if (sve_guest)
261 			reg |= CPTR_EL2_TZ;
262 
263 		sysreg_clear_set(cptr_el2, reg, 0);
264 	}
265 	isb();
266 
267 	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
268 		if (sve_host)
269 			__hyp_sve_save_host(vcpu);
270 		else
271 			__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
272 
273 		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
274 	}
275 
276 	if (sve_guest)
277 		__hyp_sve_restore_guest(vcpu);
278 	else
279 		__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
280 
281 	/* Skip restoring fpexc32 for AArch64 guests */
282 	if (!(read_sysreg(hcr_el2) & HCR_RW))
283 		write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
284 
285 	vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
286 
287 	return true;
288 }
289 
290 static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
291 {
292 	u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
293 	int rt = kvm_vcpu_sys_get_rt(vcpu);
294 	u64 val = vcpu_get_reg(vcpu, rt);
295 
296 	/*
297 	 * The normal sysreg handling code expects to see the traps,
298 	 * let's not do anything here.
299 	 */
300 	if (vcpu->arch.hcr_el2 & HCR_TVM)
301 		return false;
302 
303 	switch (sysreg) {
304 	case SYS_SCTLR_EL1:
305 		write_sysreg_el1(val, SYS_SCTLR);
306 		break;
307 	case SYS_TTBR0_EL1:
308 		write_sysreg_el1(val, SYS_TTBR0);
309 		break;
310 	case SYS_TTBR1_EL1:
311 		write_sysreg_el1(val, SYS_TTBR1);
312 		break;
313 	case SYS_TCR_EL1:
314 		write_sysreg_el1(val, SYS_TCR);
315 		break;
316 	case SYS_ESR_EL1:
317 		write_sysreg_el1(val, SYS_ESR);
318 		break;
319 	case SYS_FAR_EL1:
320 		write_sysreg_el1(val, SYS_FAR);
321 		break;
322 	case SYS_AFSR0_EL1:
323 		write_sysreg_el1(val, SYS_AFSR0);
324 		break;
325 	case SYS_AFSR1_EL1:
326 		write_sysreg_el1(val, SYS_AFSR1);
327 		break;
328 	case SYS_MAIR_EL1:
329 		write_sysreg_el1(val, SYS_MAIR);
330 		break;
331 	case SYS_AMAIR_EL1:
332 		write_sysreg_el1(val, SYS_AMAIR);
333 		break;
334 	case SYS_CONTEXTIDR_EL1:
335 		write_sysreg_el1(val, SYS_CONTEXTIDR);
336 		break;
337 	default:
338 		return false;
339 	}
340 
341 	__kvm_skip_instr(vcpu);
342 	return true;
343 }
344 
345 static inline bool esr_is_ptrauth_trap(u32 esr)
346 {
347 	u32 ec = ESR_ELx_EC(esr);
348 
349 	if (ec == ESR_ELx_EC_PAC)
350 		return true;
351 
352 	if (ec != ESR_ELx_EC_SYS64)
353 		return false;
354 
355 	switch (esr_sys64_to_sysreg(esr)) {
356 	case SYS_APIAKEYLO_EL1:
357 	case SYS_APIAKEYHI_EL1:
358 	case SYS_APIBKEYLO_EL1:
359 	case SYS_APIBKEYHI_EL1:
360 	case SYS_APDAKEYLO_EL1:
361 	case SYS_APDAKEYHI_EL1:
362 	case SYS_APDBKEYLO_EL1:
363 	case SYS_APDBKEYHI_EL1:
364 	case SYS_APGAKEYLO_EL1:
365 	case SYS_APGAKEYHI_EL1:
366 		return true;
367 	}
368 
369 	return false;
370 }
371 
372 #define __ptrauth_save_key(ctxt, key)					\
373 	do {								\
374 	u64 __val;                                                      \
375 	__val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1);                \
376 	ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val;                   \
377 	__val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1);                \
378 	ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val;                   \
379 } while(0)
380 
381 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
382 
383 static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
384 {
385 	struct kvm_cpu_context *ctxt;
386 	u64 val;
387 
388 	if (!vcpu_has_ptrauth(vcpu) ||
389 	    !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
390 		return false;
391 
392 	ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
393 	__ptrauth_save_key(ctxt, APIA);
394 	__ptrauth_save_key(ctxt, APIB);
395 	__ptrauth_save_key(ctxt, APDA);
396 	__ptrauth_save_key(ctxt, APDB);
397 	__ptrauth_save_key(ctxt, APGA);
398 
399 	vcpu_ptrauth_enable(vcpu);
400 
401 	val = read_sysreg(hcr_el2);
402 	val |= (HCR_API | HCR_APK);
403 	write_sysreg(val, hcr_el2);
404 
405 	return true;
406 }
407 
408 /*
409  * Return true when we were able to fixup the guest exit and should return to
410  * the guest, false when we should restore the host state and return to the
411  * main run loop.
412  */
413 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
414 {
415 	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
416 		vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
417 
418 	if (ARM_SERROR_PENDING(*exit_code)) {
419 		u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
420 
421 		/*
422 		 * HVC already have an adjusted PC, which we need to
423 		 * correct in order to return to after having injected
424 		 * the SError.
425 		 *
426 		 * SMC, on the other hand, is *trapped*, meaning its
427 		 * preferred return address is the SMC itself.
428 		 */
429 		if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64)
430 			write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
431 	}
432 
433 	/*
434 	 * We're using the raw exception code in order to only process
435 	 * the trap if no SError is pending. We will come back to the
436 	 * same PC once the SError has been injected, and replay the
437 	 * trapping instruction.
438 	 */
439 	if (*exit_code != ARM_EXCEPTION_TRAP)
440 		goto exit;
441 
442 	if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
443 	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
444 	    handle_tx2_tvm(vcpu))
445 		goto guest;
446 
447 	/*
448 	 * We trap the first access to the FP/SIMD to save the host context
449 	 * and restore the guest context lazily.
450 	 * If FP/SIMD is not implemented, handle the trap and inject an
451 	 * undefined instruction exception to the guest.
452 	 * Similarly for trapped SVE accesses.
453 	 */
454 	if (__hyp_handle_fpsimd(vcpu))
455 		goto guest;
456 
457 	if (__hyp_handle_ptrauth(vcpu))
458 		goto guest;
459 
460 	if (!__populate_fault_info(vcpu))
461 		goto guest;
462 
463 	if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
464 		bool valid;
465 
466 		valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
467 			kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
468 			kvm_vcpu_dabt_isvalid(vcpu) &&
469 			!kvm_vcpu_abt_issea(vcpu) &&
470 			!kvm_vcpu_abt_iss1tw(vcpu);
471 
472 		if (valid) {
473 			int ret = __vgic_v2_perform_cpuif_access(vcpu);
474 
475 			if (ret == 1)
476 				goto guest;
477 
478 			/* Promote an illegal access to an SError.*/
479 			if (ret == -1)
480 				*exit_code = ARM_EXCEPTION_EL1_SERROR;
481 
482 			goto exit;
483 		}
484 	}
485 
486 	if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
487 	    (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
488 	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
489 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
490 
491 		if (ret == 1)
492 			goto guest;
493 	}
494 
495 exit:
496 	/* Return to the host kernel and handle the exit */
497 	return false;
498 
499 guest:
500 	/* Re-enter the guest */
501 	asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412));
502 	return true;
503 }
504 
505 static inline void __kvm_unexpected_el2_exception(void)
506 {
507 	extern char __guest_exit_panic[];
508 	unsigned long addr, fixup;
509 	struct exception_table_entry *entry, *end;
510 	unsigned long elr_el2 = read_sysreg(elr_el2);
511 
512 	entry = &__start___kvm_ex_table;
513 	end = &__stop___kvm_ex_table;
514 
515 	while (entry < end) {
516 		addr = (unsigned long)&entry->insn + entry->insn;
517 		fixup = (unsigned long)&entry->fixup + entry->fixup;
518 
519 		if (addr != elr_el2) {
520 			entry++;
521 			continue;
522 		}
523 
524 		write_sysreg(fixup, elr_el2);
525 		return;
526 	}
527 
528 	/* Trigger a panic after restoring the hyp context. */
529 	write_sysreg(__guest_exit_panic, elr_el2);
530 }
531 
532 #endif /* __ARM64_KVM_HYP_SWITCH_H__ */
533