xref: /openbmc/linux/arch/arm64/kvm/guest.c (revision 8dda2eac)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012,2013 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  *
6  * Derived from arch/arm/kvm/guest.c:
7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9  */
10 
11 #include <linux/bits.h>
12 #include <linux/errno.h>
13 #include <linux/err.h>
14 #include <linux/nospec.h>
15 #include <linux/kvm_host.h>
16 #include <linux/module.h>
17 #include <linux/stddef.h>
18 #include <linux/string.h>
19 #include <linux/vmalloc.h>
20 #include <linux/fs.h>
21 #include <kvm/arm_psci.h>
22 #include <asm/cputype.h>
23 #include <linux/uaccess.h>
24 #include <asm/fpsimd.h>
25 #include <asm/kvm.h>
26 #include <asm/kvm_emulate.h>
27 #include <asm/sigcontext.h>
28 
29 #include "trace.h"
30 
31 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
32 	KVM_GENERIC_VM_STATS()
33 };
34 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
35 		sizeof(struct kvm_vm_stat) / sizeof(u64));
36 
37 const struct kvm_stats_header kvm_vm_stats_header = {
38 	.name_size = KVM_STATS_NAME_SIZE,
39 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
40 	.id_offset =  sizeof(struct kvm_stats_header),
41 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
42 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
43 		       sizeof(kvm_vm_stats_desc),
44 };
45 
46 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
47 	KVM_GENERIC_VCPU_STATS(),
48 	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
49 	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
50 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
51 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
52 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
53 	STATS_DESC_COUNTER(VCPU, exits)
54 };
55 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
56 		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
57 
58 const struct kvm_stats_header kvm_vcpu_stats_header = {
59 	.name_size = KVM_STATS_NAME_SIZE,
60 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
61 	.id_offset = sizeof(struct kvm_stats_header),
62 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
63 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
64 		       sizeof(kvm_vcpu_stats_desc),
65 };
66 
67 static bool core_reg_offset_is_vreg(u64 off)
68 {
69 	return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
70 		off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
71 }
72 
73 static u64 core_reg_offset_from_id(u64 id)
74 {
75 	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
76 }
77 
78 static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off)
79 {
80 	int size;
81 
82 	switch (off) {
83 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
84 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
85 	case KVM_REG_ARM_CORE_REG(regs.sp):
86 	case KVM_REG_ARM_CORE_REG(regs.pc):
87 	case KVM_REG_ARM_CORE_REG(regs.pstate):
88 	case KVM_REG_ARM_CORE_REG(sp_el1):
89 	case KVM_REG_ARM_CORE_REG(elr_el1):
90 	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
91 	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
92 		size = sizeof(__u64);
93 		break;
94 
95 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
96 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
97 		size = sizeof(__uint128_t);
98 		break;
99 
100 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
101 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
102 		size = sizeof(__u32);
103 		break;
104 
105 	default:
106 		return -EINVAL;
107 	}
108 
109 	if (!IS_ALIGNED(off, size / sizeof(__u32)))
110 		return -EINVAL;
111 
112 	/*
113 	 * The KVM_REG_ARM64_SVE regs must be used instead of
114 	 * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
115 	 * SVE-enabled vcpus:
116 	 */
117 	if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
118 		return -EINVAL;
119 
120 	return size;
121 }
122 
123 static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
124 {
125 	u64 off = core_reg_offset_from_id(reg->id);
126 	int size = core_reg_size_from_offset(vcpu, off);
127 
128 	if (size < 0)
129 		return NULL;
130 
131 	if (KVM_REG_SIZE(reg->id) != size)
132 		return NULL;
133 
134 	switch (off) {
135 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
136 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
137 		off -= KVM_REG_ARM_CORE_REG(regs.regs[0]);
138 		off /= 2;
139 		return &vcpu->arch.ctxt.regs.regs[off];
140 
141 	case KVM_REG_ARM_CORE_REG(regs.sp):
142 		return &vcpu->arch.ctxt.regs.sp;
143 
144 	case KVM_REG_ARM_CORE_REG(regs.pc):
145 		return &vcpu->arch.ctxt.regs.pc;
146 
147 	case KVM_REG_ARM_CORE_REG(regs.pstate):
148 		return &vcpu->arch.ctxt.regs.pstate;
149 
150 	case KVM_REG_ARM_CORE_REG(sp_el1):
151 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1);
152 
153 	case KVM_REG_ARM_CORE_REG(elr_el1):
154 		return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1);
155 
156 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]):
157 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1);
158 
159 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]):
160 		return &vcpu->arch.ctxt.spsr_abt;
161 
162 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]):
163 		return &vcpu->arch.ctxt.spsr_und;
164 
165 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]):
166 		return &vcpu->arch.ctxt.spsr_irq;
167 
168 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]):
169 		return &vcpu->arch.ctxt.spsr_fiq;
170 
171 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
172 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
173 		off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]);
174 		off /= 4;
175 		return &vcpu->arch.ctxt.fp_regs.vregs[off];
176 
177 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
178 		return &vcpu->arch.ctxt.fp_regs.fpsr;
179 
180 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
181 		return &vcpu->arch.ctxt.fp_regs.fpcr;
182 
183 	default:
184 		return NULL;
185 	}
186 }
187 
188 static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
189 {
190 	/*
191 	 * Because the kvm_regs structure is a mix of 32, 64 and
192 	 * 128bit fields, we index it as if it was a 32bit
193 	 * array. Hence below, nr_regs is the number of entries, and
194 	 * off the index in the "array".
195 	 */
196 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
197 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
198 	void *addr;
199 	u32 off;
200 
201 	/* Our ID is an index into the kvm_regs struct. */
202 	off = core_reg_offset_from_id(reg->id);
203 	if (off >= nr_regs ||
204 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
205 		return -ENOENT;
206 
207 	addr = core_reg_addr(vcpu, reg);
208 	if (!addr)
209 		return -EINVAL;
210 
211 	if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id)))
212 		return -EFAULT;
213 
214 	return 0;
215 }
216 
217 static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
218 {
219 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
220 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
221 	__uint128_t tmp;
222 	void *valp = &tmp, *addr;
223 	u64 off;
224 	int err = 0;
225 
226 	/* Our ID is an index into the kvm_regs struct. */
227 	off = core_reg_offset_from_id(reg->id);
228 	if (off >= nr_regs ||
229 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
230 		return -ENOENT;
231 
232 	addr = core_reg_addr(vcpu, reg);
233 	if (!addr)
234 		return -EINVAL;
235 
236 	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
237 		return -EINVAL;
238 
239 	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
240 		err = -EFAULT;
241 		goto out;
242 	}
243 
244 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
245 		u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
246 		switch (mode) {
247 		case PSR_AA32_MODE_USR:
248 			if (!system_supports_32bit_el0())
249 				return -EINVAL;
250 			break;
251 		case PSR_AA32_MODE_FIQ:
252 		case PSR_AA32_MODE_IRQ:
253 		case PSR_AA32_MODE_SVC:
254 		case PSR_AA32_MODE_ABT:
255 		case PSR_AA32_MODE_UND:
256 			if (!vcpu_el1_is_32bit(vcpu))
257 				return -EINVAL;
258 			break;
259 		case PSR_MODE_EL0t:
260 		case PSR_MODE_EL1t:
261 		case PSR_MODE_EL1h:
262 			if (vcpu_el1_is_32bit(vcpu))
263 				return -EINVAL;
264 			break;
265 		default:
266 			err = -EINVAL;
267 			goto out;
268 		}
269 	}
270 
271 	memcpy(addr, valp, KVM_REG_SIZE(reg->id));
272 
273 	if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
274 		int i, nr_reg;
275 
276 		switch (*vcpu_cpsr(vcpu)) {
277 		/*
278 		 * Either we are dealing with user mode, and only the
279 		 * first 15 registers (+ PC) must be narrowed to 32bit.
280 		 * AArch32 r0-r14 conveniently map to AArch64 x0-x14.
281 		 */
282 		case PSR_AA32_MODE_USR:
283 		case PSR_AA32_MODE_SYS:
284 			nr_reg = 15;
285 			break;
286 
287 		/*
288 		 * Otherwide, this is a priviledged mode, and *all* the
289 		 * registers must be narrowed to 32bit.
290 		 */
291 		default:
292 			nr_reg = 31;
293 			break;
294 		}
295 
296 		for (i = 0; i < nr_reg; i++)
297 			vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i));
298 
299 		*vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu);
300 	}
301 out:
302 	return err;
303 }
304 
305 #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
306 #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
307 #define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
308 
309 static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
310 {
311 	unsigned int max_vq, vq;
312 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
313 
314 	if (!vcpu_has_sve(vcpu))
315 		return -ENOENT;
316 
317 	if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)))
318 		return -EINVAL;
319 
320 	memset(vqs, 0, sizeof(vqs));
321 
322 	max_vq = vcpu_sve_max_vq(vcpu);
323 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
324 		if (sve_vq_available(vq))
325 			vqs[vq_word(vq)] |= vq_mask(vq);
326 
327 	if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs)))
328 		return -EFAULT;
329 
330 	return 0;
331 }
332 
333 static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
334 {
335 	unsigned int max_vq, vq;
336 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
337 
338 	if (!vcpu_has_sve(vcpu))
339 		return -ENOENT;
340 
341 	if (kvm_arm_vcpu_sve_finalized(vcpu))
342 		return -EPERM; /* too late! */
343 
344 	if (WARN_ON(vcpu->arch.sve_state))
345 		return -EINVAL;
346 
347 	if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs)))
348 		return -EFAULT;
349 
350 	max_vq = 0;
351 	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq)
352 		if (vq_present(vqs, vq))
353 			max_vq = vq;
354 
355 	if (max_vq > sve_vq_from_vl(kvm_sve_max_vl))
356 		return -EINVAL;
357 
358 	/*
359 	 * Vector lengths supported by the host can't currently be
360 	 * hidden from the guest individually: instead we can only set a
361 	 * maximum via ZCR_EL2.LEN.  So, make sure the available vector
362 	 * lengths match the set requested exactly up to the requested
363 	 * maximum:
364 	 */
365 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
366 		if (vq_present(vqs, vq) != sve_vq_available(vq))
367 			return -EINVAL;
368 
369 	/* Can't run with no vector lengths at all: */
370 	if (max_vq < SVE_VQ_MIN)
371 		return -EINVAL;
372 
373 	/* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */
374 	vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq);
375 
376 	return 0;
377 }
378 
379 #define SVE_REG_SLICE_SHIFT	0
380 #define SVE_REG_SLICE_BITS	5
381 #define SVE_REG_ID_SHIFT	(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS)
382 #define SVE_REG_ID_BITS		5
383 
384 #define SVE_REG_SLICE_MASK					\
385 	GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1,	\
386 		SVE_REG_SLICE_SHIFT)
387 #define SVE_REG_ID_MASK							\
388 	GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT)
389 
390 #define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS)
391 
392 #define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0))
393 #define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0))
394 
395 /*
396  * Number of register slices required to cover each whole SVE register.
397  * NOTE: Only the first slice every exists, for now.
398  * If you are tempted to modify this, you must also rework sve_reg_to_region()
399  * to match:
400  */
401 #define vcpu_sve_slices(vcpu) 1
402 
403 /* Bounds of a single SVE register slice within vcpu->arch.sve_state */
404 struct sve_state_reg_region {
405 	unsigned int koffset;	/* offset into sve_state in kernel memory */
406 	unsigned int klen;	/* length in kernel memory */
407 	unsigned int upad;	/* extra trailing padding in user memory */
408 };
409 
410 /*
411  * Validate SVE register ID and get sanitised bounds for user/kernel SVE
412  * register copy
413  */
414 static int sve_reg_to_region(struct sve_state_reg_region *region,
415 			     struct kvm_vcpu *vcpu,
416 			     const struct kvm_one_reg *reg)
417 {
418 	/* reg ID ranges for Z- registers */
419 	const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0);
420 	const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1,
421 						       SVE_NUM_SLICES - 1);
422 
423 	/* reg ID ranges for P- registers and FFR (which are contiguous) */
424 	const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0);
425 	const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1);
426 
427 	unsigned int vq;
428 	unsigned int reg_num;
429 
430 	unsigned int reqoffset, reqlen; /* User-requested offset and length */
431 	unsigned int maxlen; /* Maximum permitted length */
432 
433 	size_t sve_state_size;
434 
435 	const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1,
436 							SVE_NUM_SLICES - 1);
437 
438 	/* Verify that the P-regs and FFR really do have contiguous IDs: */
439 	BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1);
440 
441 	/* Verify that we match the UAPI header: */
442 	BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES);
443 
444 	reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT;
445 
446 	if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) {
447 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
448 			return -ENOENT;
449 
450 		vq = vcpu_sve_max_vq(vcpu);
451 
452 		reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
453 				SVE_SIG_REGS_OFFSET;
454 		reqlen = KVM_SVE_ZREG_SIZE;
455 		maxlen = SVE_SIG_ZREG_SIZE(vq);
456 	} else if (reg->id >= preg_id_min && reg->id <= preg_id_max) {
457 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
458 			return -ENOENT;
459 
460 		vq = vcpu_sve_max_vq(vcpu);
461 
462 		reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
463 				SVE_SIG_REGS_OFFSET;
464 		reqlen = KVM_SVE_PREG_SIZE;
465 		maxlen = SVE_SIG_PREG_SIZE(vq);
466 	} else {
467 		return -EINVAL;
468 	}
469 
470 	sve_state_size = vcpu_sve_state_size(vcpu);
471 	if (WARN_ON(!sve_state_size))
472 		return -EINVAL;
473 
474 	region->koffset = array_index_nospec(reqoffset, sve_state_size);
475 	region->klen = min(maxlen, reqlen);
476 	region->upad = reqlen - region->klen;
477 
478 	return 0;
479 }
480 
481 static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
482 {
483 	int ret;
484 	struct sve_state_reg_region region;
485 	char __user *uptr = (char __user *)reg->addr;
486 
487 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
488 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
489 		return get_sve_vls(vcpu, reg);
490 
491 	/* Try to interpret reg ID as an architectural SVE register... */
492 	ret = sve_reg_to_region(&region, vcpu, reg);
493 	if (ret)
494 		return ret;
495 
496 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
497 		return -EPERM;
498 
499 	if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
500 			 region.klen) ||
501 	    clear_user(uptr + region.klen, region.upad))
502 		return -EFAULT;
503 
504 	return 0;
505 }
506 
507 static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
508 {
509 	int ret;
510 	struct sve_state_reg_region region;
511 	const char __user *uptr = (const char __user *)reg->addr;
512 
513 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
514 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
515 		return set_sve_vls(vcpu, reg);
516 
517 	/* Try to interpret reg ID as an architectural SVE register... */
518 	ret = sve_reg_to_region(&region, vcpu, reg);
519 	if (ret)
520 		return ret;
521 
522 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
523 		return -EPERM;
524 
525 	if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
526 			   region.klen))
527 		return -EFAULT;
528 
529 	return 0;
530 }
531 
532 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
533 {
534 	return -EINVAL;
535 }
536 
537 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
538 {
539 	return -EINVAL;
540 }
541 
542 static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
543 				 u64 __user *uindices)
544 {
545 	unsigned int i;
546 	int n = 0;
547 
548 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
549 		u64 reg = KVM_REG_ARM64 | KVM_REG_ARM_CORE | i;
550 		int size = core_reg_size_from_offset(vcpu, i);
551 
552 		if (size < 0)
553 			continue;
554 
555 		switch (size) {
556 		case sizeof(__u32):
557 			reg |= KVM_REG_SIZE_U32;
558 			break;
559 
560 		case sizeof(__u64):
561 			reg |= KVM_REG_SIZE_U64;
562 			break;
563 
564 		case sizeof(__uint128_t):
565 			reg |= KVM_REG_SIZE_U128;
566 			break;
567 
568 		default:
569 			WARN_ON(1);
570 			continue;
571 		}
572 
573 		if (uindices) {
574 			if (put_user(reg, uindices))
575 				return -EFAULT;
576 			uindices++;
577 		}
578 
579 		n++;
580 	}
581 
582 	return n;
583 }
584 
585 static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
586 {
587 	return copy_core_reg_indices(vcpu, NULL);
588 }
589 
590 /**
591  * ARM64 versions of the TIMER registers, always available on arm64
592  */
593 
594 #define NUM_TIMER_REGS 3
595 
596 static bool is_timer_reg(u64 index)
597 {
598 	switch (index) {
599 	case KVM_REG_ARM_TIMER_CTL:
600 	case KVM_REG_ARM_TIMER_CNT:
601 	case KVM_REG_ARM_TIMER_CVAL:
602 		return true;
603 	}
604 	return false;
605 }
606 
607 static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
608 {
609 	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
610 		return -EFAULT;
611 	uindices++;
612 	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
613 		return -EFAULT;
614 	uindices++;
615 	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
616 		return -EFAULT;
617 
618 	return 0;
619 }
620 
621 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
622 {
623 	void __user *uaddr = (void __user *)(long)reg->addr;
624 	u64 val;
625 	int ret;
626 
627 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
628 	if (ret != 0)
629 		return -EFAULT;
630 
631 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
632 }
633 
634 static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
635 {
636 	void __user *uaddr = (void __user *)(long)reg->addr;
637 	u64 val;
638 
639 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
640 	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
641 }
642 
643 static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
644 {
645 	const unsigned int slices = vcpu_sve_slices(vcpu);
646 
647 	if (!vcpu_has_sve(vcpu))
648 		return 0;
649 
650 	/* Policed by KVM_GET_REG_LIST: */
651 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
652 
653 	return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */)
654 		+ 1; /* KVM_REG_ARM64_SVE_VLS */
655 }
656 
657 static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
658 				u64 __user *uindices)
659 {
660 	const unsigned int slices = vcpu_sve_slices(vcpu);
661 	u64 reg;
662 	unsigned int i, n;
663 	int num_regs = 0;
664 
665 	if (!vcpu_has_sve(vcpu))
666 		return 0;
667 
668 	/* Policed by KVM_GET_REG_LIST: */
669 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
670 
671 	/*
672 	 * Enumerate this first, so that userspace can save/restore in
673 	 * the order reported by KVM_GET_REG_LIST:
674 	 */
675 	reg = KVM_REG_ARM64_SVE_VLS;
676 	if (put_user(reg, uindices++))
677 		return -EFAULT;
678 	++num_regs;
679 
680 	for (i = 0; i < slices; i++) {
681 		for (n = 0; n < SVE_NUM_ZREGS; n++) {
682 			reg = KVM_REG_ARM64_SVE_ZREG(n, i);
683 			if (put_user(reg, uindices++))
684 				return -EFAULT;
685 			num_regs++;
686 		}
687 
688 		for (n = 0; n < SVE_NUM_PREGS; n++) {
689 			reg = KVM_REG_ARM64_SVE_PREG(n, i);
690 			if (put_user(reg, uindices++))
691 				return -EFAULT;
692 			num_regs++;
693 		}
694 
695 		reg = KVM_REG_ARM64_SVE_FFR(i);
696 		if (put_user(reg, uindices++))
697 			return -EFAULT;
698 		num_regs++;
699 	}
700 
701 	return num_regs;
702 }
703 
704 /**
705  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
706  *
707  * This is for all registers.
708  */
709 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
710 {
711 	unsigned long res = 0;
712 
713 	res += num_core_regs(vcpu);
714 	res += num_sve_regs(vcpu);
715 	res += kvm_arm_num_sys_reg_descs(vcpu);
716 	res += kvm_arm_get_fw_num_regs(vcpu);
717 	res += NUM_TIMER_REGS;
718 
719 	return res;
720 }
721 
722 /**
723  * kvm_arm_copy_reg_indices - get indices of all registers.
724  *
725  * We do core registers right here, then we append system regs.
726  */
727 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
728 {
729 	int ret;
730 
731 	ret = copy_core_reg_indices(vcpu, uindices);
732 	if (ret < 0)
733 		return ret;
734 	uindices += ret;
735 
736 	ret = copy_sve_reg_indices(vcpu, uindices);
737 	if (ret < 0)
738 		return ret;
739 	uindices += ret;
740 
741 	ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
742 	if (ret < 0)
743 		return ret;
744 	uindices += kvm_arm_get_fw_num_regs(vcpu);
745 
746 	ret = copy_timer_indices(vcpu, uindices);
747 	if (ret < 0)
748 		return ret;
749 	uindices += NUM_TIMER_REGS;
750 
751 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
752 }
753 
754 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
755 {
756 	/* We currently use nothing arch-specific in upper 32 bits */
757 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
758 		return -EINVAL;
759 
760 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
761 	case KVM_REG_ARM_CORE:	return get_core_reg(vcpu, reg);
762 	case KVM_REG_ARM_FW:	return kvm_arm_get_fw_reg(vcpu, reg);
763 	case KVM_REG_ARM64_SVE:	return get_sve_reg(vcpu, reg);
764 	}
765 
766 	if (is_timer_reg(reg->id))
767 		return get_timer_reg(vcpu, reg);
768 
769 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
770 }
771 
772 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
773 {
774 	/* We currently use nothing arch-specific in upper 32 bits */
775 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
776 		return -EINVAL;
777 
778 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
779 	case KVM_REG_ARM_CORE:	return set_core_reg(vcpu, reg);
780 	case KVM_REG_ARM_FW:	return kvm_arm_set_fw_reg(vcpu, reg);
781 	case KVM_REG_ARM64_SVE:	return set_sve_reg(vcpu, reg);
782 	}
783 
784 	if (is_timer_reg(reg->id))
785 		return set_timer_reg(vcpu, reg);
786 
787 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
788 }
789 
790 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
791 				  struct kvm_sregs *sregs)
792 {
793 	return -EINVAL;
794 }
795 
796 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
797 				  struct kvm_sregs *sregs)
798 {
799 	return -EINVAL;
800 }
801 
802 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
803 			      struct kvm_vcpu_events *events)
804 {
805 	events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
806 	events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
807 
808 	if (events->exception.serror_pending && events->exception.serror_has_esr)
809 		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
810 
811 	/*
812 	 * We never return a pending ext_dabt here because we deliver it to
813 	 * the virtual CPU directly when setting the event and it's no longer
814 	 * 'pending' at this point.
815 	 */
816 
817 	return 0;
818 }
819 
820 int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
821 			      struct kvm_vcpu_events *events)
822 {
823 	bool serror_pending = events->exception.serror_pending;
824 	bool has_esr = events->exception.serror_has_esr;
825 	bool ext_dabt_pending = events->exception.ext_dabt_pending;
826 
827 	if (serror_pending && has_esr) {
828 		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
829 			return -EINVAL;
830 
831 		if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
832 			kvm_set_sei_esr(vcpu, events->exception.serror_esr);
833 		else
834 			return -EINVAL;
835 	} else if (serror_pending) {
836 		kvm_inject_vabt(vcpu);
837 	}
838 
839 	if (ext_dabt_pending)
840 		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
841 
842 	return 0;
843 }
844 
845 int __attribute_const__ kvm_target_cpu(void)
846 {
847 	unsigned long implementor = read_cpuid_implementor();
848 	unsigned long part_number = read_cpuid_part_number();
849 
850 	switch (implementor) {
851 	case ARM_CPU_IMP_ARM:
852 		switch (part_number) {
853 		case ARM_CPU_PART_AEM_V8:
854 			return KVM_ARM_TARGET_AEM_V8;
855 		case ARM_CPU_PART_FOUNDATION:
856 			return KVM_ARM_TARGET_FOUNDATION_V8;
857 		case ARM_CPU_PART_CORTEX_A53:
858 			return KVM_ARM_TARGET_CORTEX_A53;
859 		case ARM_CPU_PART_CORTEX_A57:
860 			return KVM_ARM_TARGET_CORTEX_A57;
861 		}
862 		break;
863 	case ARM_CPU_IMP_APM:
864 		switch (part_number) {
865 		case APM_CPU_PART_POTENZA:
866 			return KVM_ARM_TARGET_XGENE_POTENZA;
867 		}
868 		break;
869 	}
870 
871 	/* Return a default generic target */
872 	return KVM_ARM_TARGET_GENERIC_V8;
873 }
874 
875 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
876 {
877 	int target = kvm_target_cpu();
878 
879 	if (target < 0)
880 		return -ENODEV;
881 
882 	memset(init, 0, sizeof(*init));
883 
884 	/*
885 	 * For now, we don't return any features.
886 	 * In future, we might use features to return target
887 	 * specific features available for the preferred
888 	 * target type.
889 	 */
890 	init->target = (__u32)target;
891 
892 	return 0;
893 }
894 
895 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
896 {
897 	return -EINVAL;
898 }
899 
900 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
901 {
902 	return -EINVAL;
903 }
904 
905 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
906 				  struct kvm_translation *tr)
907 {
908 	return -EINVAL;
909 }
910 
911 /**
912  * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
913  * @kvm:	pointer to the KVM struct
914  * @kvm_guest_debug: the ioctl data buffer
915  *
916  * This sets up and enables the VM for guest debugging. Userspace
917  * passes in a control flag to enable different debug types and
918  * potentially other architecture specific information in the rest of
919  * the structure.
920  */
921 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
922 					struct kvm_guest_debug *dbg)
923 {
924 	int ret = 0;
925 
926 	trace_kvm_set_guest_debug(vcpu, dbg->control);
927 
928 	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
929 		ret = -EINVAL;
930 		goto out;
931 	}
932 
933 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
934 		vcpu->guest_debug = dbg->control;
935 
936 		/* Hardware assisted Break and Watch points */
937 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
938 			vcpu->arch.external_debug_state = dbg->arch;
939 		}
940 
941 	} else {
942 		/* If not enabled clear all flags */
943 		vcpu->guest_debug = 0;
944 	}
945 
946 out:
947 	return ret;
948 }
949 
950 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
951 			       struct kvm_device_attr *attr)
952 {
953 	int ret;
954 
955 	switch (attr->group) {
956 	case KVM_ARM_VCPU_PMU_V3_CTRL:
957 		ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
958 		break;
959 	case KVM_ARM_VCPU_TIMER_CTRL:
960 		ret = kvm_arm_timer_set_attr(vcpu, attr);
961 		break;
962 	case KVM_ARM_VCPU_PVTIME_CTRL:
963 		ret = kvm_arm_pvtime_set_attr(vcpu, attr);
964 		break;
965 	default:
966 		ret = -ENXIO;
967 		break;
968 	}
969 
970 	return ret;
971 }
972 
973 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
974 			       struct kvm_device_attr *attr)
975 {
976 	int ret;
977 
978 	switch (attr->group) {
979 	case KVM_ARM_VCPU_PMU_V3_CTRL:
980 		ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
981 		break;
982 	case KVM_ARM_VCPU_TIMER_CTRL:
983 		ret = kvm_arm_timer_get_attr(vcpu, attr);
984 		break;
985 	case KVM_ARM_VCPU_PVTIME_CTRL:
986 		ret = kvm_arm_pvtime_get_attr(vcpu, attr);
987 		break;
988 	default:
989 		ret = -ENXIO;
990 		break;
991 	}
992 
993 	return ret;
994 }
995 
996 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
997 			       struct kvm_device_attr *attr)
998 {
999 	int ret;
1000 
1001 	switch (attr->group) {
1002 	case KVM_ARM_VCPU_PMU_V3_CTRL:
1003 		ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
1004 		break;
1005 	case KVM_ARM_VCPU_TIMER_CTRL:
1006 		ret = kvm_arm_timer_has_attr(vcpu, attr);
1007 		break;
1008 	case KVM_ARM_VCPU_PVTIME_CTRL:
1009 		ret = kvm_arm_pvtime_has_attr(vcpu, attr);
1010 		break;
1011 	default:
1012 		ret = -ENXIO;
1013 		break;
1014 	}
1015 
1016 	return ret;
1017 }
1018 
1019 long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
1020 				struct kvm_arm_copy_mte_tags *copy_tags)
1021 {
1022 	gpa_t guest_ipa = copy_tags->guest_ipa;
1023 	size_t length = copy_tags->length;
1024 	void __user *tags = copy_tags->addr;
1025 	gpa_t gfn;
1026 	bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
1027 	int ret = 0;
1028 
1029 	if (!kvm_has_mte(kvm))
1030 		return -EINVAL;
1031 
1032 	if (copy_tags->reserved[0] || copy_tags->reserved[1])
1033 		return -EINVAL;
1034 
1035 	if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
1036 		return -EINVAL;
1037 
1038 	if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
1039 		return -EINVAL;
1040 
1041 	gfn = gpa_to_gfn(guest_ipa);
1042 
1043 	mutex_lock(&kvm->slots_lock);
1044 
1045 	while (length > 0) {
1046 		kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
1047 		void *maddr;
1048 		unsigned long num_tags;
1049 		struct page *page;
1050 
1051 		if (is_error_noslot_pfn(pfn)) {
1052 			ret = -EFAULT;
1053 			goto out;
1054 		}
1055 
1056 		page = pfn_to_online_page(pfn);
1057 		if (!page) {
1058 			/* Reject ZONE_DEVICE memory */
1059 			ret = -EFAULT;
1060 			goto out;
1061 		}
1062 		maddr = page_address(page);
1063 
1064 		if (!write) {
1065 			if (test_bit(PG_mte_tagged, &page->flags))
1066 				num_tags = mte_copy_tags_to_user(tags, maddr,
1067 							MTE_GRANULES_PER_PAGE);
1068 			else
1069 				/* No tags in memory, so write zeros */
1070 				num_tags = MTE_GRANULES_PER_PAGE -
1071 					clear_user(tags, MTE_GRANULES_PER_PAGE);
1072 			kvm_release_pfn_clean(pfn);
1073 		} else {
1074 			num_tags = mte_copy_tags_from_user(maddr, tags,
1075 							MTE_GRANULES_PER_PAGE);
1076 
1077 			/*
1078 			 * Set the flag after checking the write
1079 			 * completed fully
1080 			 */
1081 			if (num_tags == MTE_GRANULES_PER_PAGE)
1082 				set_bit(PG_mte_tagged, &page->flags);
1083 
1084 			kvm_release_pfn_dirty(pfn);
1085 		}
1086 
1087 		if (num_tags != MTE_GRANULES_PER_PAGE) {
1088 			ret = -EFAULT;
1089 			goto out;
1090 		}
1091 
1092 		gfn++;
1093 		tags += num_tags;
1094 		length -= PAGE_SIZE;
1095 	}
1096 
1097 out:
1098 	mutex_unlock(&kvm->slots_lock);
1099 	/* If some data has been copied report the number of bytes copied */
1100 	if (length != copy_tags->length)
1101 		return copy_tags->length - length;
1102 	return ret;
1103 }
1104