xref: /openbmc/linux/arch/arm64/kvm/handle_exit.c (revision 5e0266f0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012,2013 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  *
6  * Derived from arch/arm/kvm/handle_exit.c:
7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9  */
10 
11 #include <linux/kvm.h>
12 #include <linux/kvm_host.h>
13 
14 #include <asm/esr.h>
15 #include <asm/exception.h>
16 #include <asm/kvm_asm.h>
17 #include <asm/kvm_emulate.h>
18 #include <asm/kvm_mmu.h>
19 #include <asm/kvm_nested.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/stacktrace/nvhe.h>
22 #include <asm/traps.h>
23 
24 #include <kvm/arm_hypercalls.h>
25 
26 #define CREATE_TRACE_POINTS
27 #include "trace_handle_exit.h"
28 
29 typedef int (*exit_handle_fn)(struct kvm_vcpu *);
30 
31 static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
32 {
33 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
34 		kvm_inject_vabt(vcpu);
35 }
36 
37 static int handle_hvc(struct kvm_vcpu *vcpu)
38 {
39 	int ret;
40 
41 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
42 			    kvm_vcpu_hvc_get_imm(vcpu));
43 	vcpu->stat.hvc_exit_stat++;
44 
45 	/* Forward hvc instructions to the virtual EL2 if the guest has EL2. */
46 	if (vcpu_has_nv(vcpu)) {
47 		if (vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_HCD)
48 			kvm_inject_undefined(vcpu);
49 		else
50 			kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
51 
52 		return 1;
53 	}
54 
55 	ret = kvm_hvc_call_handler(vcpu);
56 	if (ret < 0) {
57 		vcpu_set_reg(vcpu, 0, ~0UL);
58 		return 1;
59 	}
60 
61 	return ret;
62 }
63 
64 static int handle_smc(struct kvm_vcpu *vcpu)
65 {
66 	int ret;
67 
68 	/*
69 	 * "If an SMC instruction executed at Non-secure EL1 is
70 	 * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
71 	 * Trap exception, not a Secure Monitor Call exception [...]"
72 	 *
73 	 * We need to advance the PC after the trap, as it would
74 	 * otherwise return to the same address...
75 	 *
76 	 * Only handle SMCs from the virtual EL2 with an immediate of zero and
77 	 * skip it otherwise.
78 	 */
79 	if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) {
80 		vcpu_set_reg(vcpu, 0, ~0UL);
81 		kvm_incr_pc(vcpu);
82 		return 1;
83 	}
84 
85 	/*
86 	 * If imm is zero then it is likely an SMCCC call.
87 	 *
88 	 * Note that on ARMv8.3, even if EL3 is not implemented, SMC executed
89 	 * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than
90 	 * being treated as UNDEFINED.
91 	 */
92 	ret = kvm_hvc_call_handler(vcpu);
93 	if (ret < 0)
94 		vcpu_set_reg(vcpu, 0, ~0UL);
95 
96 	kvm_incr_pc(vcpu);
97 
98 	return ret;
99 }
100 
101 /*
102  * Guest access to FP/ASIMD registers are routed to this handler only
103  * when the system doesn't support FP/ASIMD.
104  */
105 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
106 {
107 	kvm_inject_undefined(vcpu);
108 	return 1;
109 }
110 
111 /**
112  * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
113  *		    instruction executed by a guest
114  *
115  * @vcpu:	the vcpu pointer
116  *
117  * WFE[T]: Yield the CPU and come back to this vcpu when the scheduler
118  * decides to.
119  * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
120  * world-switches and schedule other host processes until there is an
121  * incoming IRQ or FIQ to the VM.
122  * WFIT: Same as WFI, with a timed wakeup implemented as a background timer
123  *
124  * WF{I,E}T can immediately return if the deadline has already expired.
125  */
126 static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
127 {
128 	u64 esr = kvm_vcpu_get_esr(vcpu);
129 
130 	if (esr & ESR_ELx_WFx_ISS_WFE) {
131 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
132 		vcpu->stat.wfe_exit_stat++;
133 	} else {
134 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
135 		vcpu->stat.wfi_exit_stat++;
136 	}
137 
138 	if (esr & ESR_ELx_WFx_ISS_WFxT) {
139 		if (esr & ESR_ELx_WFx_ISS_RV) {
140 			u64 val, now;
141 
142 			now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT);
143 			val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
144 
145 			if (now >= val)
146 				goto out;
147 		} else {
148 			/* Treat WFxT as WFx if RN is invalid */
149 			esr &= ~ESR_ELx_WFx_ISS_WFxT;
150 		}
151 	}
152 
153 	if (esr & ESR_ELx_WFx_ISS_WFE) {
154 		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
155 	} else {
156 		if (esr & ESR_ELx_WFx_ISS_WFxT)
157 			vcpu_set_flag(vcpu, IN_WFIT);
158 
159 		kvm_vcpu_wfi(vcpu);
160 	}
161 out:
162 	kvm_incr_pc(vcpu);
163 
164 	return 1;
165 }
166 
167 /**
168  * kvm_handle_guest_debug - handle a debug exception instruction
169  *
170  * @vcpu:	the vcpu pointer
171  *
172  * We route all debug exceptions through the same handler. If both the
173  * guest and host are using the same debug facilities it will be up to
174  * userspace to re-inject the correct exception for guest delivery.
175  *
176  * @return: 0 (while setting vcpu->run->exit_reason)
177  */
178 static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
179 {
180 	struct kvm_run *run = vcpu->run;
181 	u64 esr = kvm_vcpu_get_esr(vcpu);
182 
183 	run->exit_reason = KVM_EXIT_DEBUG;
184 	run->debug.arch.hsr = lower_32_bits(esr);
185 	run->debug.arch.hsr_high = upper_32_bits(esr);
186 	run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
187 
188 	switch (ESR_ELx_EC(esr)) {
189 	case ESR_ELx_EC_WATCHPT_LOW:
190 		run->debug.arch.far = vcpu->arch.fault.far_el2;
191 		break;
192 	case ESR_ELx_EC_SOFTSTP_LOW:
193 		vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
194 		break;
195 	}
196 
197 	return 0;
198 }
199 
200 static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
201 {
202 	u64 esr = kvm_vcpu_get_esr(vcpu);
203 
204 	kvm_pr_unimpl("Unknown exception class: esr: %#016llx -- %s\n",
205 		      esr, esr_get_class_string(esr));
206 
207 	kvm_inject_undefined(vcpu);
208 	return 1;
209 }
210 
211 /*
212  * Guest access to SVE registers should be routed to this handler only
213  * when the system doesn't support SVE.
214  */
215 static int handle_sve(struct kvm_vcpu *vcpu)
216 {
217 	kvm_inject_undefined(vcpu);
218 	return 1;
219 }
220 
221 /*
222  * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
223  * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
224  * that we can do is give the guest an UNDEF.
225  */
226 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
227 {
228 	kvm_inject_undefined(vcpu);
229 	return 1;
230 }
231 
232 static int kvm_handle_eret(struct kvm_vcpu *vcpu)
233 {
234 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
235 		return kvm_handle_ptrauth(vcpu);
236 
237 	kvm_emulate_nested_eret(vcpu);
238 	return 1;
239 }
240 
241 static exit_handle_fn arm_exit_handlers[] = {
242 	[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,
243 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
244 	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
245 	[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,
246 	[ESR_ELx_EC_CP14_MR]	= kvm_handle_cp14_32,
247 	[ESR_ELx_EC_CP14_LS]	= kvm_handle_cp14_load_store,
248 	[ESR_ELx_EC_CP10_ID]	= kvm_handle_cp10_id,
249 	[ESR_ELx_EC_CP14_64]	= kvm_handle_cp14_64,
250 	[ESR_ELx_EC_HVC32]	= handle_hvc,
251 	[ESR_ELx_EC_SMC32]	= handle_smc,
252 	[ESR_ELx_EC_HVC64]	= handle_hvc,
253 	[ESR_ELx_EC_SMC64]	= handle_smc,
254 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
255 	[ESR_ELx_EC_SVE]	= handle_sve,
256 	[ESR_ELx_EC_ERET]	= kvm_handle_eret,
257 	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
258 	[ESR_ELx_EC_DABT_LOW]	= kvm_handle_guest_abort,
259 	[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
260 	[ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
261 	[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
262 	[ESR_ELx_EC_BKPT32]	= kvm_handle_guest_debug,
263 	[ESR_ELx_EC_BRK64]	= kvm_handle_guest_debug,
264 	[ESR_ELx_EC_FP_ASIMD]	= handle_no_fpsimd,
265 	[ESR_ELx_EC_PAC]	= kvm_handle_ptrauth,
266 };
267 
268 static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
269 {
270 	u64 esr = kvm_vcpu_get_esr(vcpu);
271 	u8 esr_ec = ESR_ELx_EC(esr);
272 
273 	return arm_exit_handlers[esr_ec];
274 }
275 
276 /*
277  * We may be single-stepping an emulated instruction. If the emulation
278  * has been completed in the kernel, we can return to userspace with a
279  * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
280  * emulation first.
281  */
282 static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
283 {
284 	int handled;
285 
286 	/*
287 	 * See ARM ARM B1.14.1: "Hyp traps on instructions
288 	 * that fail their condition code check"
289 	 */
290 	if (!kvm_condition_valid(vcpu)) {
291 		kvm_incr_pc(vcpu);
292 		handled = 1;
293 	} else {
294 		exit_handle_fn exit_handler;
295 
296 		exit_handler = kvm_get_exit_handler(vcpu);
297 		handled = exit_handler(vcpu);
298 	}
299 
300 	return handled;
301 }
302 
303 /*
304  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
305  * proper exit to userspace.
306  */
307 int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
308 {
309 	struct kvm_run *run = vcpu->run;
310 
311 	if (ARM_SERROR_PENDING(exception_index)) {
312 		/*
313 		 * The SError is handled by handle_exit_early(). If the guest
314 		 * survives it will re-execute the original instruction.
315 		 */
316 		return 1;
317 	}
318 
319 	exception_index = ARM_EXCEPTION_CODE(exception_index);
320 
321 	switch (exception_index) {
322 	case ARM_EXCEPTION_IRQ:
323 		return 1;
324 	case ARM_EXCEPTION_EL1_SERROR:
325 		return 1;
326 	case ARM_EXCEPTION_TRAP:
327 		return handle_trap_exceptions(vcpu);
328 	case ARM_EXCEPTION_HYP_GONE:
329 		/*
330 		 * EL2 has been reset to the hyp-stub. This happens when a guest
331 		 * is pre-emptied by kvm_reboot()'s shutdown call.
332 		 */
333 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
334 		return 0;
335 	case ARM_EXCEPTION_IL:
336 		/*
337 		 * We attempted an illegal exception return.  Guest state must
338 		 * have been corrupted somehow.  Give up.
339 		 */
340 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
341 		return -EINVAL;
342 	default:
343 		kvm_pr_unimpl("Unsupported exception type: %d",
344 			      exception_index);
345 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
346 		return 0;
347 	}
348 }
349 
350 /* For exit types that need handling before we can be preempted */
351 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
352 {
353 	if (ARM_SERROR_PENDING(exception_index)) {
354 		if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
355 			u64 disr = kvm_vcpu_get_disr(vcpu);
356 
357 			kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
358 		} else {
359 			kvm_inject_vabt(vcpu);
360 		}
361 
362 		return;
363 	}
364 
365 	exception_index = ARM_EXCEPTION_CODE(exception_index);
366 
367 	if (exception_index == ARM_EXCEPTION_EL1_SERROR)
368 		kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
369 }
370 
371 void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
372 					      u64 elr_virt, u64 elr_phys,
373 					      u64 par, uintptr_t vcpu,
374 					      u64 far, u64 hpfar) {
375 	u64 elr_in_kimg = __phys_to_kimg(elr_phys);
376 	u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
377 	u64 mode = spsr & PSR_MODE_MASK;
378 	u64 panic_addr = elr_virt + hyp_offset;
379 
380 	if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
381 		kvm_err("Invalid host exception to nVHE hyp!\n");
382 	} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
383 		   (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
384 		const char *file = NULL;
385 		unsigned int line = 0;
386 
387 		/* All hyp bugs, including warnings, are treated as fatal. */
388 		if (!is_protected_kvm_enabled() ||
389 		    IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
390 			struct bug_entry *bug = find_bug(elr_in_kimg);
391 
392 			if (bug)
393 				bug_get_file_line(bug, &file, &line);
394 		}
395 
396 		if (file)
397 			kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
398 		else
399 			kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
400 					(void *)(panic_addr + kaslr_offset()));
401 	} else {
402 		kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
403 				(void *)(panic_addr + kaslr_offset()));
404 	}
405 
406 	/* Dump the nVHE hypervisor backtrace */
407 	kvm_nvhe_dump_backtrace(hyp_offset);
408 
409 	/*
410 	 * Hyp has panicked and we're going to handle that by panicking the
411 	 * kernel. The kernel offset will be revealed in the panic so we're
412 	 * also safe to reveal the hyp offset as a debugging aid for translating
413 	 * hyp VAs to vmlinux addresses.
414 	 */
415 	kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
416 
417 	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%016llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
418 	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
419 }
420