xref: /openbmc/linux/arch/arm64/kvm/guest.c (revision 98a52692)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012,2013 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  *
6  * Derived from arch/arm/kvm/guest.c:
7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9  */
10 
11 #include <linux/bits.h>
12 #include <linux/errno.h>
13 #include <linux/err.h>
14 #include <linux/nospec.h>
15 #include <linux/kvm_host.h>
16 #include <linux/module.h>
17 #include <linux/stddef.h>
18 #include <linux/string.h>
19 #include <linux/vmalloc.h>
20 #include <linux/fs.h>
21 #include <kvm/arm_hypercalls.h>
22 #include <asm/cputype.h>
23 #include <linux/uaccess.h>
24 #include <asm/fpsimd.h>
25 #include <asm/kvm.h>
26 #include <asm/kvm_emulate.h>
27 #include <asm/kvm_nested.h>
28 #include <asm/sigcontext.h>
29 
30 #include "trace.h"
31 
32 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
33 	KVM_GENERIC_VM_STATS()
34 };
35 
36 const struct kvm_stats_header kvm_vm_stats_header = {
37 	.name_size = KVM_STATS_NAME_SIZE,
38 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
39 	.id_offset =  sizeof(struct kvm_stats_header),
40 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
41 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
42 		       sizeof(kvm_vm_stats_desc),
43 };
44 
45 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
46 	KVM_GENERIC_VCPU_STATS(),
47 	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
48 	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
49 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
50 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
51 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
52 	STATS_DESC_COUNTER(VCPU, signal_exits),
53 	STATS_DESC_COUNTER(VCPU, exits)
54 };
55 
56 const struct kvm_stats_header kvm_vcpu_stats_header = {
57 	.name_size = KVM_STATS_NAME_SIZE,
58 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
59 	.id_offset = sizeof(struct kvm_stats_header),
60 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
61 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
62 		       sizeof(kvm_vcpu_stats_desc),
63 };
64 
65 static bool core_reg_offset_is_vreg(u64 off)
66 {
67 	return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
68 		off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
69 }
70 
71 static u64 core_reg_offset_from_id(u64 id)
72 {
73 	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
74 }
75 
76 static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off)
77 {
78 	int size;
79 
80 	switch (off) {
81 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
82 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
83 	case KVM_REG_ARM_CORE_REG(regs.sp):
84 	case KVM_REG_ARM_CORE_REG(regs.pc):
85 	case KVM_REG_ARM_CORE_REG(regs.pstate):
86 	case KVM_REG_ARM_CORE_REG(sp_el1):
87 	case KVM_REG_ARM_CORE_REG(elr_el1):
88 	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
89 	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
90 		size = sizeof(__u64);
91 		break;
92 
93 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
94 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
95 		size = sizeof(__uint128_t);
96 		break;
97 
98 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
99 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
100 		size = sizeof(__u32);
101 		break;
102 
103 	default:
104 		return -EINVAL;
105 	}
106 
107 	if (!IS_ALIGNED(off, size / sizeof(__u32)))
108 		return -EINVAL;
109 
110 	/*
111 	 * The KVM_REG_ARM64_SVE regs must be used instead of
112 	 * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
113 	 * SVE-enabled vcpus:
114 	 */
115 	if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
116 		return -EINVAL;
117 
118 	return size;
119 }
120 
121 static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
122 {
123 	u64 off = core_reg_offset_from_id(reg->id);
124 	int size = core_reg_size_from_offset(vcpu, off);
125 
126 	if (size < 0)
127 		return NULL;
128 
129 	if (KVM_REG_SIZE(reg->id) != size)
130 		return NULL;
131 
132 	switch (off) {
133 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
134 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
135 		off -= KVM_REG_ARM_CORE_REG(regs.regs[0]);
136 		off /= 2;
137 		return &vcpu->arch.ctxt.regs.regs[off];
138 
139 	case KVM_REG_ARM_CORE_REG(regs.sp):
140 		return &vcpu->arch.ctxt.regs.sp;
141 
142 	case KVM_REG_ARM_CORE_REG(regs.pc):
143 		return &vcpu->arch.ctxt.regs.pc;
144 
145 	case KVM_REG_ARM_CORE_REG(regs.pstate):
146 		return &vcpu->arch.ctxt.regs.pstate;
147 
148 	case KVM_REG_ARM_CORE_REG(sp_el1):
149 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1);
150 
151 	case KVM_REG_ARM_CORE_REG(elr_el1):
152 		return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1);
153 
154 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]):
155 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1);
156 
157 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]):
158 		return &vcpu->arch.ctxt.spsr_abt;
159 
160 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]):
161 		return &vcpu->arch.ctxt.spsr_und;
162 
163 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]):
164 		return &vcpu->arch.ctxt.spsr_irq;
165 
166 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]):
167 		return &vcpu->arch.ctxt.spsr_fiq;
168 
169 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
170 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
171 		off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]);
172 		off /= 4;
173 		return &vcpu->arch.ctxt.fp_regs.vregs[off];
174 
175 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
176 		return &vcpu->arch.ctxt.fp_regs.fpsr;
177 
178 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
179 		return &vcpu->arch.ctxt.fp_regs.fpcr;
180 
181 	default:
182 		return NULL;
183 	}
184 }
185 
186 static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
187 {
188 	/*
189 	 * Because the kvm_regs structure is a mix of 32, 64 and
190 	 * 128bit fields, we index it as if it was a 32bit
191 	 * array. Hence below, nr_regs is the number of entries, and
192 	 * off the index in the "array".
193 	 */
194 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
195 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
196 	void *addr;
197 	u32 off;
198 
199 	/* Our ID is an index into the kvm_regs struct. */
200 	off = core_reg_offset_from_id(reg->id);
201 	if (off >= nr_regs ||
202 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
203 		return -ENOENT;
204 
205 	addr = core_reg_addr(vcpu, reg);
206 	if (!addr)
207 		return -EINVAL;
208 
209 	if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id)))
210 		return -EFAULT;
211 
212 	return 0;
213 }
214 
215 static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
216 {
217 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
218 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
219 	__uint128_t tmp;
220 	void *valp = &tmp, *addr;
221 	u64 off;
222 	int err = 0;
223 
224 	/* Our ID is an index into the kvm_regs struct. */
225 	off = core_reg_offset_from_id(reg->id);
226 	if (off >= nr_regs ||
227 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
228 		return -ENOENT;
229 
230 	addr = core_reg_addr(vcpu, reg);
231 	if (!addr)
232 		return -EINVAL;
233 
234 	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
235 		return -EINVAL;
236 
237 	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
238 		err = -EFAULT;
239 		goto out;
240 	}
241 
242 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
243 		u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
244 		switch (mode) {
245 		case PSR_AA32_MODE_USR:
246 			if (!kvm_supports_32bit_el0())
247 				return -EINVAL;
248 			break;
249 		case PSR_AA32_MODE_FIQ:
250 		case PSR_AA32_MODE_IRQ:
251 		case PSR_AA32_MODE_SVC:
252 		case PSR_AA32_MODE_ABT:
253 		case PSR_AA32_MODE_UND:
254 			if (!vcpu_el1_is_32bit(vcpu))
255 				return -EINVAL;
256 			break;
257 		case PSR_MODE_EL2h:
258 		case PSR_MODE_EL2t:
259 			if (!vcpu_has_nv(vcpu))
260 				return -EINVAL;
261 			fallthrough;
262 		case PSR_MODE_EL0t:
263 		case PSR_MODE_EL1t:
264 		case PSR_MODE_EL1h:
265 			if (vcpu_el1_is_32bit(vcpu))
266 				return -EINVAL;
267 			break;
268 		default:
269 			err = -EINVAL;
270 			goto out;
271 		}
272 	}
273 
274 	memcpy(addr, valp, KVM_REG_SIZE(reg->id));
275 
276 	if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
277 		int i, nr_reg;
278 
279 		switch (*vcpu_cpsr(vcpu)) {
280 		/*
281 		 * Either we are dealing with user mode, and only the
282 		 * first 15 registers (+ PC) must be narrowed to 32bit.
283 		 * AArch32 r0-r14 conveniently map to AArch64 x0-x14.
284 		 */
285 		case PSR_AA32_MODE_USR:
286 		case PSR_AA32_MODE_SYS:
287 			nr_reg = 15;
288 			break;
289 
290 		/*
291 		 * Otherwise, this is a privileged mode, and *all* the
292 		 * registers must be narrowed to 32bit.
293 		 */
294 		default:
295 			nr_reg = 31;
296 			break;
297 		}
298 
299 		for (i = 0; i < nr_reg; i++)
300 			vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i));
301 
302 		*vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu);
303 	}
304 out:
305 	return err;
306 }
307 
308 #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
309 #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
310 #define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
311 
312 static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
313 {
314 	unsigned int max_vq, vq;
315 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
316 
317 	if (!vcpu_has_sve(vcpu))
318 		return -ENOENT;
319 
320 	if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)))
321 		return -EINVAL;
322 
323 	memset(vqs, 0, sizeof(vqs));
324 
325 	max_vq = vcpu_sve_max_vq(vcpu);
326 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
327 		if (sve_vq_available(vq))
328 			vqs[vq_word(vq)] |= vq_mask(vq);
329 
330 	if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs)))
331 		return -EFAULT;
332 
333 	return 0;
334 }
335 
336 static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
337 {
338 	unsigned int max_vq, vq;
339 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
340 
341 	if (!vcpu_has_sve(vcpu))
342 		return -ENOENT;
343 
344 	if (kvm_arm_vcpu_sve_finalized(vcpu))
345 		return -EPERM; /* too late! */
346 
347 	if (WARN_ON(vcpu->arch.sve_state))
348 		return -EINVAL;
349 
350 	if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs)))
351 		return -EFAULT;
352 
353 	max_vq = 0;
354 	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq)
355 		if (vq_present(vqs, vq))
356 			max_vq = vq;
357 
358 	if (max_vq > sve_vq_from_vl(kvm_sve_max_vl))
359 		return -EINVAL;
360 
361 	/*
362 	 * Vector lengths supported by the host can't currently be
363 	 * hidden from the guest individually: instead we can only set a
364 	 * maximum via ZCR_EL2.LEN.  So, make sure the available vector
365 	 * lengths match the set requested exactly up to the requested
366 	 * maximum:
367 	 */
368 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
369 		if (vq_present(vqs, vq) != sve_vq_available(vq))
370 			return -EINVAL;
371 
372 	/* Can't run with no vector lengths at all: */
373 	if (max_vq < SVE_VQ_MIN)
374 		return -EINVAL;
375 
376 	/* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */
377 	vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq);
378 
379 	return 0;
380 }
381 
382 #define SVE_REG_SLICE_SHIFT	0
383 #define SVE_REG_SLICE_BITS	5
384 #define SVE_REG_ID_SHIFT	(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS)
385 #define SVE_REG_ID_BITS		5
386 
387 #define SVE_REG_SLICE_MASK					\
388 	GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1,	\
389 		SVE_REG_SLICE_SHIFT)
390 #define SVE_REG_ID_MASK							\
391 	GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT)
392 
393 #define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS)
394 
395 #define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0))
396 #define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0))
397 
398 /*
399  * Number of register slices required to cover each whole SVE register.
400  * NOTE: Only the first slice every exists, for now.
401  * If you are tempted to modify this, you must also rework sve_reg_to_region()
402  * to match:
403  */
404 #define vcpu_sve_slices(vcpu) 1
405 
406 /* Bounds of a single SVE register slice within vcpu->arch.sve_state */
407 struct sve_state_reg_region {
408 	unsigned int koffset;	/* offset into sve_state in kernel memory */
409 	unsigned int klen;	/* length in kernel memory */
410 	unsigned int upad;	/* extra trailing padding in user memory */
411 };
412 
413 /*
414  * Validate SVE register ID and get sanitised bounds for user/kernel SVE
415  * register copy
416  */
417 static int sve_reg_to_region(struct sve_state_reg_region *region,
418 			     struct kvm_vcpu *vcpu,
419 			     const struct kvm_one_reg *reg)
420 {
421 	/* reg ID ranges for Z- registers */
422 	const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0);
423 	const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1,
424 						       SVE_NUM_SLICES - 1);
425 
426 	/* reg ID ranges for P- registers and FFR (which are contiguous) */
427 	const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0);
428 	const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1);
429 
430 	unsigned int vq;
431 	unsigned int reg_num;
432 
433 	unsigned int reqoffset, reqlen; /* User-requested offset and length */
434 	unsigned int maxlen; /* Maximum permitted length */
435 
436 	size_t sve_state_size;
437 
438 	const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1,
439 							SVE_NUM_SLICES - 1);
440 
441 	/* Verify that the P-regs and FFR really do have contiguous IDs: */
442 	BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1);
443 
444 	/* Verify that we match the UAPI header: */
445 	BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES);
446 
447 	reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT;
448 
449 	if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) {
450 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
451 			return -ENOENT;
452 
453 		vq = vcpu_sve_max_vq(vcpu);
454 
455 		reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
456 				SVE_SIG_REGS_OFFSET;
457 		reqlen = KVM_SVE_ZREG_SIZE;
458 		maxlen = SVE_SIG_ZREG_SIZE(vq);
459 	} else if (reg->id >= preg_id_min && reg->id <= preg_id_max) {
460 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
461 			return -ENOENT;
462 
463 		vq = vcpu_sve_max_vq(vcpu);
464 
465 		reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
466 				SVE_SIG_REGS_OFFSET;
467 		reqlen = KVM_SVE_PREG_SIZE;
468 		maxlen = SVE_SIG_PREG_SIZE(vq);
469 	} else {
470 		return -EINVAL;
471 	}
472 
473 	sve_state_size = vcpu_sve_state_size(vcpu);
474 	if (WARN_ON(!sve_state_size))
475 		return -EINVAL;
476 
477 	region->koffset = array_index_nospec(reqoffset, sve_state_size);
478 	region->klen = min(maxlen, reqlen);
479 	region->upad = reqlen - region->klen;
480 
481 	return 0;
482 }
483 
484 static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
485 {
486 	int ret;
487 	struct sve_state_reg_region region;
488 	char __user *uptr = (char __user *)reg->addr;
489 
490 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
491 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
492 		return get_sve_vls(vcpu, reg);
493 
494 	/* Try to interpret reg ID as an architectural SVE register... */
495 	ret = sve_reg_to_region(&region, vcpu, reg);
496 	if (ret)
497 		return ret;
498 
499 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
500 		return -EPERM;
501 
502 	if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
503 			 region.klen) ||
504 	    clear_user(uptr + region.klen, region.upad))
505 		return -EFAULT;
506 
507 	return 0;
508 }
509 
510 static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
511 {
512 	int ret;
513 	struct sve_state_reg_region region;
514 	const char __user *uptr = (const char __user *)reg->addr;
515 
516 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
517 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
518 		return set_sve_vls(vcpu, reg);
519 
520 	/* Try to interpret reg ID as an architectural SVE register... */
521 	ret = sve_reg_to_region(&region, vcpu, reg);
522 	if (ret)
523 		return ret;
524 
525 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
526 		return -EPERM;
527 
528 	if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
529 			   region.klen))
530 		return -EFAULT;
531 
532 	return 0;
533 }
534 
535 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
536 {
537 	return -EINVAL;
538 }
539 
540 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
541 {
542 	return -EINVAL;
543 }
544 
545 static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
546 				 u64 __user *uindices)
547 {
548 	unsigned int i;
549 	int n = 0;
550 
551 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
552 		u64 reg = KVM_REG_ARM64 | KVM_REG_ARM_CORE | i;
553 		int size = core_reg_size_from_offset(vcpu, i);
554 
555 		if (size < 0)
556 			continue;
557 
558 		switch (size) {
559 		case sizeof(__u32):
560 			reg |= KVM_REG_SIZE_U32;
561 			break;
562 
563 		case sizeof(__u64):
564 			reg |= KVM_REG_SIZE_U64;
565 			break;
566 
567 		case sizeof(__uint128_t):
568 			reg |= KVM_REG_SIZE_U128;
569 			break;
570 
571 		default:
572 			WARN_ON(1);
573 			continue;
574 		}
575 
576 		if (uindices) {
577 			if (put_user(reg, uindices))
578 				return -EFAULT;
579 			uindices++;
580 		}
581 
582 		n++;
583 	}
584 
585 	return n;
586 }
587 
588 static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
589 {
590 	return copy_core_reg_indices(vcpu, NULL);
591 }
592 
593 static const u64 timer_reg_list[] = {
594 	KVM_REG_ARM_TIMER_CTL,
595 	KVM_REG_ARM_TIMER_CNT,
596 	KVM_REG_ARM_TIMER_CVAL,
597 	KVM_REG_ARM_PTIMER_CTL,
598 	KVM_REG_ARM_PTIMER_CNT,
599 	KVM_REG_ARM_PTIMER_CVAL,
600 };
601 
602 #define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list)
603 
604 static bool is_timer_reg(u64 index)
605 {
606 	switch (index) {
607 	case KVM_REG_ARM_TIMER_CTL:
608 	case KVM_REG_ARM_TIMER_CNT:
609 	case KVM_REG_ARM_TIMER_CVAL:
610 	case KVM_REG_ARM_PTIMER_CTL:
611 	case KVM_REG_ARM_PTIMER_CNT:
612 	case KVM_REG_ARM_PTIMER_CVAL:
613 		return true;
614 	}
615 	return false;
616 }
617 
618 static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
619 {
620 	for (int i = 0; i < NUM_TIMER_REGS; i++) {
621 		if (put_user(timer_reg_list[i], uindices))
622 			return -EFAULT;
623 		uindices++;
624 	}
625 
626 	return 0;
627 }
628 
629 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
630 {
631 	void __user *uaddr = (void __user *)(long)reg->addr;
632 	u64 val;
633 	int ret;
634 
635 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
636 	if (ret != 0)
637 		return -EFAULT;
638 
639 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
640 }
641 
642 static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
643 {
644 	void __user *uaddr = (void __user *)(long)reg->addr;
645 	u64 val;
646 
647 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
648 	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
649 }
650 
651 static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
652 {
653 	const unsigned int slices = vcpu_sve_slices(vcpu);
654 
655 	if (!vcpu_has_sve(vcpu))
656 		return 0;
657 
658 	/* Policed by KVM_GET_REG_LIST: */
659 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
660 
661 	return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */)
662 		+ 1; /* KVM_REG_ARM64_SVE_VLS */
663 }
664 
665 static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
666 				u64 __user *uindices)
667 {
668 	const unsigned int slices = vcpu_sve_slices(vcpu);
669 	u64 reg;
670 	unsigned int i, n;
671 	int num_regs = 0;
672 
673 	if (!vcpu_has_sve(vcpu))
674 		return 0;
675 
676 	/* Policed by KVM_GET_REG_LIST: */
677 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
678 
679 	/*
680 	 * Enumerate this first, so that userspace can save/restore in
681 	 * the order reported by KVM_GET_REG_LIST:
682 	 */
683 	reg = KVM_REG_ARM64_SVE_VLS;
684 	if (put_user(reg, uindices++))
685 		return -EFAULT;
686 	++num_regs;
687 
688 	for (i = 0; i < slices; i++) {
689 		for (n = 0; n < SVE_NUM_ZREGS; n++) {
690 			reg = KVM_REG_ARM64_SVE_ZREG(n, i);
691 			if (put_user(reg, uindices++))
692 				return -EFAULT;
693 			num_regs++;
694 		}
695 
696 		for (n = 0; n < SVE_NUM_PREGS; n++) {
697 			reg = KVM_REG_ARM64_SVE_PREG(n, i);
698 			if (put_user(reg, uindices++))
699 				return -EFAULT;
700 			num_regs++;
701 		}
702 
703 		reg = KVM_REG_ARM64_SVE_FFR(i);
704 		if (put_user(reg, uindices++))
705 			return -EFAULT;
706 		num_regs++;
707 	}
708 
709 	return num_regs;
710 }
711 
712 /**
713  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
714  *
715  * This is for all registers.
716  */
717 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
718 {
719 	unsigned long res = 0;
720 
721 	res += num_core_regs(vcpu);
722 	res += num_sve_regs(vcpu);
723 	res += kvm_arm_num_sys_reg_descs(vcpu);
724 	res += kvm_arm_get_fw_num_regs(vcpu);
725 	res += NUM_TIMER_REGS;
726 
727 	return res;
728 }
729 
730 /**
731  * kvm_arm_copy_reg_indices - get indices of all registers.
732  *
733  * We do core registers right here, then we append system regs.
734  */
735 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
736 {
737 	int ret;
738 
739 	ret = copy_core_reg_indices(vcpu, uindices);
740 	if (ret < 0)
741 		return ret;
742 	uindices += ret;
743 
744 	ret = copy_sve_reg_indices(vcpu, uindices);
745 	if (ret < 0)
746 		return ret;
747 	uindices += ret;
748 
749 	ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
750 	if (ret < 0)
751 		return ret;
752 	uindices += kvm_arm_get_fw_num_regs(vcpu);
753 
754 	ret = copy_timer_indices(vcpu, uindices);
755 	if (ret < 0)
756 		return ret;
757 	uindices += NUM_TIMER_REGS;
758 
759 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
760 }
761 
762 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
763 {
764 	/* We currently use nothing arch-specific in upper 32 bits */
765 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
766 		return -EINVAL;
767 
768 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
769 	case KVM_REG_ARM_CORE:	return get_core_reg(vcpu, reg);
770 	case KVM_REG_ARM_FW:
771 	case KVM_REG_ARM_FW_FEAT_BMAP:
772 		return kvm_arm_get_fw_reg(vcpu, reg);
773 	case KVM_REG_ARM64_SVE:	return get_sve_reg(vcpu, reg);
774 	}
775 
776 	if (is_timer_reg(reg->id))
777 		return get_timer_reg(vcpu, reg);
778 
779 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
780 }
781 
782 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
783 {
784 	/* We currently use nothing arch-specific in upper 32 bits */
785 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
786 		return -EINVAL;
787 
788 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
789 	case KVM_REG_ARM_CORE:	return set_core_reg(vcpu, reg);
790 	case KVM_REG_ARM_FW:
791 	case KVM_REG_ARM_FW_FEAT_BMAP:
792 		return kvm_arm_set_fw_reg(vcpu, reg);
793 	case KVM_REG_ARM64_SVE:	return set_sve_reg(vcpu, reg);
794 	}
795 
796 	if (is_timer_reg(reg->id))
797 		return set_timer_reg(vcpu, reg);
798 
799 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
800 }
801 
802 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
803 				  struct kvm_sregs *sregs)
804 {
805 	return -EINVAL;
806 }
807 
808 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
809 				  struct kvm_sregs *sregs)
810 {
811 	return -EINVAL;
812 }
813 
814 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
815 			      struct kvm_vcpu_events *events)
816 {
817 	events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
818 	events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
819 
820 	if (events->exception.serror_pending && events->exception.serror_has_esr)
821 		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
822 
823 	/*
824 	 * We never return a pending ext_dabt here because we deliver it to
825 	 * the virtual CPU directly when setting the event and it's no longer
826 	 * 'pending' at this point.
827 	 */
828 
829 	return 0;
830 }
831 
832 int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
833 			      struct kvm_vcpu_events *events)
834 {
835 	bool serror_pending = events->exception.serror_pending;
836 	bool has_esr = events->exception.serror_has_esr;
837 	bool ext_dabt_pending = events->exception.ext_dabt_pending;
838 
839 	if (serror_pending && has_esr) {
840 		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
841 			return -EINVAL;
842 
843 		if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
844 			kvm_set_sei_esr(vcpu, events->exception.serror_esr);
845 		else
846 			return -EINVAL;
847 	} else if (serror_pending) {
848 		kvm_inject_vabt(vcpu);
849 	}
850 
851 	if (ext_dabt_pending)
852 		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
853 
854 	return 0;
855 }
856 
857 u32 __attribute_const__ kvm_target_cpu(void)
858 {
859 	unsigned long implementor = read_cpuid_implementor();
860 	unsigned long part_number = read_cpuid_part_number();
861 
862 	switch (implementor) {
863 	case ARM_CPU_IMP_ARM:
864 		switch (part_number) {
865 		case ARM_CPU_PART_AEM_V8:
866 			return KVM_ARM_TARGET_AEM_V8;
867 		case ARM_CPU_PART_FOUNDATION:
868 			return KVM_ARM_TARGET_FOUNDATION_V8;
869 		case ARM_CPU_PART_CORTEX_A53:
870 			return KVM_ARM_TARGET_CORTEX_A53;
871 		case ARM_CPU_PART_CORTEX_A57:
872 			return KVM_ARM_TARGET_CORTEX_A57;
873 		}
874 		break;
875 	case ARM_CPU_IMP_APM:
876 		switch (part_number) {
877 		case APM_CPU_PART_XGENE:
878 			return KVM_ARM_TARGET_XGENE_POTENZA;
879 		}
880 		break;
881 	}
882 
883 	/* Return a default generic target */
884 	return KVM_ARM_TARGET_GENERIC_V8;
885 }
886 
887 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
888 {
889 	return -EINVAL;
890 }
891 
892 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
893 {
894 	return -EINVAL;
895 }
896 
897 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
898 				  struct kvm_translation *tr)
899 {
900 	return -EINVAL;
901 }
902 
903 /**
904  * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
905  * @kvm:	pointer to the KVM struct
906  * @kvm_guest_debug: the ioctl data buffer
907  *
908  * This sets up and enables the VM for guest debugging. Userspace
909  * passes in a control flag to enable different debug types and
910  * potentially other architecture specific information in the rest of
911  * the structure.
912  */
913 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
914 					struct kvm_guest_debug *dbg)
915 {
916 	int ret = 0;
917 
918 	trace_kvm_set_guest_debug(vcpu, dbg->control);
919 
920 	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
921 		ret = -EINVAL;
922 		goto out;
923 	}
924 
925 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
926 		vcpu->guest_debug = dbg->control;
927 
928 		/* Hardware assisted Break and Watch points */
929 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
930 			vcpu->arch.external_debug_state = dbg->arch;
931 		}
932 
933 	} else {
934 		/* If not enabled clear all flags */
935 		vcpu->guest_debug = 0;
936 		vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
937 	}
938 
939 out:
940 	return ret;
941 }
942 
943 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
944 			       struct kvm_device_attr *attr)
945 {
946 	int ret;
947 
948 	switch (attr->group) {
949 	case KVM_ARM_VCPU_PMU_V3_CTRL:
950 		mutex_lock(&vcpu->kvm->arch.config_lock);
951 		ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
952 		mutex_unlock(&vcpu->kvm->arch.config_lock);
953 		break;
954 	case KVM_ARM_VCPU_TIMER_CTRL:
955 		ret = kvm_arm_timer_set_attr(vcpu, attr);
956 		break;
957 	case KVM_ARM_VCPU_PVTIME_CTRL:
958 		ret = kvm_arm_pvtime_set_attr(vcpu, attr);
959 		break;
960 	default:
961 		ret = -ENXIO;
962 		break;
963 	}
964 
965 	return ret;
966 }
967 
968 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
969 			       struct kvm_device_attr *attr)
970 {
971 	int ret;
972 
973 	switch (attr->group) {
974 	case KVM_ARM_VCPU_PMU_V3_CTRL:
975 		ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
976 		break;
977 	case KVM_ARM_VCPU_TIMER_CTRL:
978 		ret = kvm_arm_timer_get_attr(vcpu, attr);
979 		break;
980 	case KVM_ARM_VCPU_PVTIME_CTRL:
981 		ret = kvm_arm_pvtime_get_attr(vcpu, attr);
982 		break;
983 	default:
984 		ret = -ENXIO;
985 		break;
986 	}
987 
988 	return ret;
989 }
990 
991 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
992 			       struct kvm_device_attr *attr)
993 {
994 	int ret;
995 
996 	switch (attr->group) {
997 	case KVM_ARM_VCPU_PMU_V3_CTRL:
998 		ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
999 		break;
1000 	case KVM_ARM_VCPU_TIMER_CTRL:
1001 		ret = kvm_arm_timer_has_attr(vcpu, attr);
1002 		break;
1003 	case KVM_ARM_VCPU_PVTIME_CTRL:
1004 		ret = kvm_arm_pvtime_has_attr(vcpu, attr);
1005 		break;
1006 	default:
1007 		ret = -ENXIO;
1008 		break;
1009 	}
1010 
1011 	return ret;
1012 }
1013 
1014 int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
1015 			       struct kvm_arm_copy_mte_tags *copy_tags)
1016 {
1017 	gpa_t guest_ipa = copy_tags->guest_ipa;
1018 	size_t length = copy_tags->length;
1019 	void __user *tags = copy_tags->addr;
1020 	gpa_t gfn;
1021 	bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
1022 	int ret = 0;
1023 
1024 	if (!kvm_has_mte(kvm))
1025 		return -EINVAL;
1026 
1027 	if (copy_tags->reserved[0] || copy_tags->reserved[1])
1028 		return -EINVAL;
1029 
1030 	if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
1031 		return -EINVAL;
1032 
1033 	if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
1034 		return -EINVAL;
1035 
1036 	/* Lengths above INT_MAX cannot be represented in the return value */
1037 	if (length > INT_MAX)
1038 		return -EINVAL;
1039 
1040 	gfn = gpa_to_gfn(guest_ipa);
1041 
1042 	mutex_lock(&kvm->slots_lock);
1043 
1044 	while (length > 0) {
1045 		kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
1046 		void *maddr;
1047 		unsigned long num_tags;
1048 		struct page *page;
1049 
1050 		if (is_error_noslot_pfn(pfn)) {
1051 			ret = -EFAULT;
1052 			goto out;
1053 		}
1054 
1055 		page = pfn_to_online_page(pfn);
1056 		if (!page) {
1057 			/* Reject ZONE_DEVICE memory */
1058 			ret = -EFAULT;
1059 			goto out;
1060 		}
1061 		maddr = page_address(page);
1062 
1063 		if (!write) {
1064 			if (page_mte_tagged(page))
1065 				num_tags = mte_copy_tags_to_user(tags, maddr,
1066 							MTE_GRANULES_PER_PAGE);
1067 			else
1068 				/* No tags in memory, so write zeros */
1069 				num_tags = MTE_GRANULES_PER_PAGE -
1070 					clear_user(tags, MTE_GRANULES_PER_PAGE);
1071 			kvm_release_pfn_clean(pfn);
1072 		} else {
1073 			/*
1074 			 * Only locking to serialise with a concurrent
1075 			 * set_pte_at() in the VMM but still overriding the
1076 			 * tags, hence ignoring the return value.
1077 			 */
1078 			try_page_mte_tagging(page);
1079 			num_tags = mte_copy_tags_from_user(maddr, tags,
1080 							MTE_GRANULES_PER_PAGE);
1081 
1082 			/* uaccess failed, don't leave stale tags */
1083 			if (num_tags != MTE_GRANULES_PER_PAGE)
1084 				mte_clear_page_tags(maddr);
1085 			set_page_mte_tagged(page);
1086 
1087 			kvm_release_pfn_dirty(pfn);
1088 		}
1089 
1090 		if (num_tags != MTE_GRANULES_PER_PAGE) {
1091 			ret = -EFAULT;
1092 			goto out;
1093 		}
1094 
1095 		gfn++;
1096 		tags += num_tags;
1097 		length -= PAGE_SIZE;
1098 	}
1099 
1100 out:
1101 	mutex_unlock(&kvm->slots_lock);
1102 	/* If some data has been copied report the number of bytes copied */
1103 	if (length != copy_tags->length)
1104 		return copy_tags->length - length;
1105 	return ret;
1106 }
1107