xref: /openbmc/linux/arch/arm64/kvm/guest.c (revision 4c8b18af)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012,2013 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  *
6  * Derived from arch/arm/kvm/guest.c:
7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
9  */
10 
11 #include <linux/bits.h>
12 #include <linux/errno.h>
13 #include <linux/err.h>
14 #include <linux/nospec.h>
15 #include <linux/kvm_host.h>
16 #include <linux/module.h>
17 #include <linux/stddef.h>
18 #include <linux/string.h>
19 #include <linux/vmalloc.h>
20 #include <linux/fs.h>
21 #include <kvm/arm_hypercalls.h>
22 #include <asm/cputype.h>
23 #include <linux/uaccess.h>
24 #include <asm/fpsimd.h>
25 #include <asm/kvm.h>
26 #include <asm/kvm_emulate.h>
27 #include <asm/sigcontext.h>
28 
29 #include "trace.h"
30 
31 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
32 	KVM_GENERIC_VM_STATS()
33 };
34 
35 const struct kvm_stats_header kvm_vm_stats_header = {
36 	.name_size = KVM_STATS_NAME_SIZE,
37 	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
38 	.id_offset =  sizeof(struct kvm_stats_header),
39 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
40 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
41 		       sizeof(kvm_vm_stats_desc),
42 };
43 
44 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
45 	KVM_GENERIC_VCPU_STATS(),
46 	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
47 	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
48 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
49 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
50 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
51 	STATS_DESC_COUNTER(VCPU, signal_exits),
52 	STATS_DESC_COUNTER(VCPU, exits)
53 };
54 
55 const struct kvm_stats_header kvm_vcpu_stats_header = {
56 	.name_size = KVM_STATS_NAME_SIZE,
57 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
58 	.id_offset = sizeof(struct kvm_stats_header),
59 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
60 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
61 		       sizeof(kvm_vcpu_stats_desc),
62 };
63 
64 static bool core_reg_offset_is_vreg(u64 off)
65 {
66 	return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
67 		off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
68 }
69 
70 static u64 core_reg_offset_from_id(u64 id)
71 {
72 	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
73 }
74 
75 static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off)
76 {
77 	int size;
78 
79 	switch (off) {
80 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
81 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
82 	case KVM_REG_ARM_CORE_REG(regs.sp):
83 	case KVM_REG_ARM_CORE_REG(regs.pc):
84 	case KVM_REG_ARM_CORE_REG(regs.pstate):
85 	case KVM_REG_ARM_CORE_REG(sp_el1):
86 	case KVM_REG_ARM_CORE_REG(elr_el1):
87 	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
88 	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
89 		size = sizeof(__u64);
90 		break;
91 
92 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
93 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
94 		size = sizeof(__uint128_t);
95 		break;
96 
97 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
98 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
99 		size = sizeof(__u32);
100 		break;
101 
102 	default:
103 		return -EINVAL;
104 	}
105 
106 	if (!IS_ALIGNED(off, size / sizeof(__u32)))
107 		return -EINVAL;
108 
109 	/*
110 	 * The KVM_REG_ARM64_SVE regs must be used instead of
111 	 * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
112 	 * SVE-enabled vcpus:
113 	 */
114 	if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
115 		return -EINVAL;
116 
117 	return size;
118 }
119 
120 static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
121 {
122 	u64 off = core_reg_offset_from_id(reg->id);
123 	int size = core_reg_size_from_offset(vcpu, off);
124 
125 	if (size < 0)
126 		return NULL;
127 
128 	if (KVM_REG_SIZE(reg->id) != size)
129 		return NULL;
130 
131 	switch (off) {
132 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
133 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
134 		off -= KVM_REG_ARM_CORE_REG(regs.regs[0]);
135 		off /= 2;
136 		return &vcpu->arch.ctxt.regs.regs[off];
137 
138 	case KVM_REG_ARM_CORE_REG(regs.sp):
139 		return &vcpu->arch.ctxt.regs.sp;
140 
141 	case KVM_REG_ARM_CORE_REG(regs.pc):
142 		return &vcpu->arch.ctxt.regs.pc;
143 
144 	case KVM_REG_ARM_CORE_REG(regs.pstate):
145 		return &vcpu->arch.ctxt.regs.pstate;
146 
147 	case KVM_REG_ARM_CORE_REG(sp_el1):
148 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1);
149 
150 	case KVM_REG_ARM_CORE_REG(elr_el1):
151 		return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1);
152 
153 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]):
154 		return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1);
155 
156 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]):
157 		return &vcpu->arch.ctxt.spsr_abt;
158 
159 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]):
160 		return &vcpu->arch.ctxt.spsr_und;
161 
162 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]):
163 		return &vcpu->arch.ctxt.spsr_irq;
164 
165 	case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]):
166 		return &vcpu->arch.ctxt.spsr_fiq;
167 
168 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
169 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
170 		off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]);
171 		off /= 4;
172 		return &vcpu->arch.ctxt.fp_regs.vregs[off];
173 
174 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
175 		return &vcpu->arch.ctxt.fp_regs.fpsr;
176 
177 	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
178 		return &vcpu->arch.ctxt.fp_regs.fpcr;
179 
180 	default:
181 		return NULL;
182 	}
183 }
184 
185 static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
186 {
187 	/*
188 	 * Because the kvm_regs structure is a mix of 32, 64 and
189 	 * 128bit fields, we index it as if it was a 32bit
190 	 * array. Hence below, nr_regs is the number of entries, and
191 	 * off the index in the "array".
192 	 */
193 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
194 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
195 	void *addr;
196 	u32 off;
197 
198 	/* Our ID is an index into the kvm_regs struct. */
199 	off = core_reg_offset_from_id(reg->id);
200 	if (off >= nr_regs ||
201 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
202 		return -ENOENT;
203 
204 	addr = core_reg_addr(vcpu, reg);
205 	if (!addr)
206 		return -EINVAL;
207 
208 	if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id)))
209 		return -EFAULT;
210 
211 	return 0;
212 }
213 
214 static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
215 {
216 	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
217 	int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32);
218 	__uint128_t tmp;
219 	void *valp = &tmp, *addr;
220 	u64 off;
221 	int err = 0;
222 
223 	/* Our ID is an index into the kvm_regs struct. */
224 	off = core_reg_offset_from_id(reg->id);
225 	if (off >= nr_regs ||
226 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
227 		return -ENOENT;
228 
229 	addr = core_reg_addr(vcpu, reg);
230 	if (!addr)
231 		return -EINVAL;
232 
233 	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
234 		return -EINVAL;
235 
236 	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
237 		err = -EFAULT;
238 		goto out;
239 	}
240 
241 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
242 		u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
243 		switch (mode) {
244 		case PSR_AA32_MODE_USR:
245 			if (!kvm_supports_32bit_el0())
246 				return -EINVAL;
247 			break;
248 		case PSR_AA32_MODE_FIQ:
249 		case PSR_AA32_MODE_IRQ:
250 		case PSR_AA32_MODE_SVC:
251 		case PSR_AA32_MODE_ABT:
252 		case PSR_AA32_MODE_UND:
253 			if (!vcpu_el1_is_32bit(vcpu))
254 				return -EINVAL;
255 			break;
256 		case PSR_MODE_EL0t:
257 		case PSR_MODE_EL1t:
258 		case PSR_MODE_EL1h:
259 			if (vcpu_el1_is_32bit(vcpu))
260 				return -EINVAL;
261 			break;
262 		default:
263 			err = -EINVAL;
264 			goto out;
265 		}
266 	}
267 
268 	memcpy(addr, valp, KVM_REG_SIZE(reg->id));
269 
270 	if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
271 		int i, nr_reg;
272 
273 		switch (*vcpu_cpsr(vcpu)) {
274 		/*
275 		 * Either we are dealing with user mode, and only the
276 		 * first 15 registers (+ PC) must be narrowed to 32bit.
277 		 * AArch32 r0-r14 conveniently map to AArch64 x0-x14.
278 		 */
279 		case PSR_AA32_MODE_USR:
280 		case PSR_AA32_MODE_SYS:
281 			nr_reg = 15;
282 			break;
283 
284 		/*
285 		 * Otherwise, this is a privileged mode, and *all* the
286 		 * registers must be narrowed to 32bit.
287 		 */
288 		default:
289 			nr_reg = 31;
290 			break;
291 		}
292 
293 		for (i = 0; i < nr_reg; i++)
294 			vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i));
295 
296 		*vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu);
297 	}
298 out:
299 	return err;
300 }
301 
302 #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
303 #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
304 #define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
305 
306 static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
307 {
308 	unsigned int max_vq, vq;
309 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
310 
311 	if (!vcpu_has_sve(vcpu))
312 		return -ENOENT;
313 
314 	if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)))
315 		return -EINVAL;
316 
317 	memset(vqs, 0, sizeof(vqs));
318 
319 	max_vq = vcpu_sve_max_vq(vcpu);
320 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
321 		if (sve_vq_available(vq))
322 			vqs[vq_word(vq)] |= vq_mask(vq);
323 
324 	if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs)))
325 		return -EFAULT;
326 
327 	return 0;
328 }
329 
330 static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
331 {
332 	unsigned int max_vq, vq;
333 	u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
334 
335 	if (!vcpu_has_sve(vcpu))
336 		return -ENOENT;
337 
338 	if (kvm_arm_vcpu_sve_finalized(vcpu))
339 		return -EPERM; /* too late! */
340 
341 	if (WARN_ON(vcpu->arch.sve_state))
342 		return -EINVAL;
343 
344 	if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs)))
345 		return -EFAULT;
346 
347 	max_vq = 0;
348 	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq)
349 		if (vq_present(vqs, vq))
350 			max_vq = vq;
351 
352 	if (max_vq > sve_vq_from_vl(kvm_sve_max_vl))
353 		return -EINVAL;
354 
355 	/*
356 	 * Vector lengths supported by the host can't currently be
357 	 * hidden from the guest individually: instead we can only set a
358 	 * maximum via ZCR_EL2.LEN.  So, make sure the available vector
359 	 * lengths match the set requested exactly up to the requested
360 	 * maximum:
361 	 */
362 	for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
363 		if (vq_present(vqs, vq) != sve_vq_available(vq))
364 			return -EINVAL;
365 
366 	/* Can't run with no vector lengths at all: */
367 	if (max_vq < SVE_VQ_MIN)
368 		return -EINVAL;
369 
370 	/* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */
371 	vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq);
372 
373 	return 0;
374 }
375 
376 #define SVE_REG_SLICE_SHIFT	0
377 #define SVE_REG_SLICE_BITS	5
378 #define SVE_REG_ID_SHIFT	(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS)
379 #define SVE_REG_ID_BITS		5
380 
381 #define SVE_REG_SLICE_MASK					\
382 	GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1,	\
383 		SVE_REG_SLICE_SHIFT)
384 #define SVE_REG_ID_MASK							\
385 	GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT)
386 
387 #define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS)
388 
389 #define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0))
390 #define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0))
391 
392 /*
393  * Number of register slices required to cover each whole SVE register.
394  * NOTE: Only the first slice every exists, for now.
395  * If you are tempted to modify this, you must also rework sve_reg_to_region()
396  * to match:
397  */
398 #define vcpu_sve_slices(vcpu) 1
399 
400 /* Bounds of a single SVE register slice within vcpu->arch.sve_state */
401 struct sve_state_reg_region {
402 	unsigned int koffset;	/* offset into sve_state in kernel memory */
403 	unsigned int klen;	/* length in kernel memory */
404 	unsigned int upad;	/* extra trailing padding in user memory */
405 };
406 
407 /*
408  * Validate SVE register ID and get sanitised bounds for user/kernel SVE
409  * register copy
410  */
411 static int sve_reg_to_region(struct sve_state_reg_region *region,
412 			     struct kvm_vcpu *vcpu,
413 			     const struct kvm_one_reg *reg)
414 {
415 	/* reg ID ranges for Z- registers */
416 	const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0);
417 	const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1,
418 						       SVE_NUM_SLICES - 1);
419 
420 	/* reg ID ranges for P- registers and FFR (which are contiguous) */
421 	const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0);
422 	const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1);
423 
424 	unsigned int vq;
425 	unsigned int reg_num;
426 
427 	unsigned int reqoffset, reqlen; /* User-requested offset and length */
428 	unsigned int maxlen; /* Maximum permitted length */
429 
430 	size_t sve_state_size;
431 
432 	const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1,
433 							SVE_NUM_SLICES - 1);
434 
435 	/* Verify that the P-regs and FFR really do have contiguous IDs: */
436 	BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1);
437 
438 	/* Verify that we match the UAPI header: */
439 	BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES);
440 
441 	reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT;
442 
443 	if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) {
444 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
445 			return -ENOENT;
446 
447 		vq = vcpu_sve_max_vq(vcpu);
448 
449 		reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
450 				SVE_SIG_REGS_OFFSET;
451 		reqlen = KVM_SVE_ZREG_SIZE;
452 		maxlen = SVE_SIG_ZREG_SIZE(vq);
453 	} else if (reg->id >= preg_id_min && reg->id <= preg_id_max) {
454 		if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
455 			return -ENOENT;
456 
457 		vq = vcpu_sve_max_vq(vcpu);
458 
459 		reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
460 				SVE_SIG_REGS_OFFSET;
461 		reqlen = KVM_SVE_PREG_SIZE;
462 		maxlen = SVE_SIG_PREG_SIZE(vq);
463 	} else {
464 		return -EINVAL;
465 	}
466 
467 	sve_state_size = vcpu_sve_state_size(vcpu);
468 	if (WARN_ON(!sve_state_size))
469 		return -EINVAL;
470 
471 	region->koffset = array_index_nospec(reqoffset, sve_state_size);
472 	region->klen = min(maxlen, reqlen);
473 	region->upad = reqlen - region->klen;
474 
475 	return 0;
476 }
477 
478 static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
479 {
480 	int ret;
481 	struct sve_state_reg_region region;
482 	char __user *uptr = (char __user *)reg->addr;
483 
484 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
485 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
486 		return get_sve_vls(vcpu, reg);
487 
488 	/* Try to interpret reg ID as an architectural SVE register... */
489 	ret = sve_reg_to_region(&region, vcpu, reg);
490 	if (ret)
491 		return ret;
492 
493 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
494 		return -EPERM;
495 
496 	if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
497 			 region.klen) ||
498 	    clear_user(uptr + region.klen, region.upad))
499 		return -EFAULT;
500 
501 	return 0;
502 }
503 
504 static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
505 {
506 	int ret;
507 	struct sve_state_reg_region region;
508 	const char __user *uptr = (const char __user *)reg->addr;
509 
510 	/* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
511 	if (reg->id == KVM_REG_ARM64_SVE_VLS)
512 		return set_sve_vls(vcpu, reg);
513 
514 	/* Try to interpret reg ID as an architectural SVE register... */
515 	ret = sve_reg_to_region(&region, vcpu, reg);
516 	if (ret)
517 		return ret;
518 
519 	if (!kvm_arm_vcpu_sve_finalized(vcpu))
520 		return -EPERM;
521 
522 	if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
523 			   region.klen))
524 		return -EFAULT;
525 
526 	return 0;
527 }
528 
529 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
530 {
531 	return -EINVAL;
532 }
533 
534 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
535 {
536 	return -EINVAL;
537 }
538 
539 static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
540 				 u64 __user *uindices)
541 {
542 	unsigned int i;
543 	int n = 0;
544 
545 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
546 		u64 reg = KVM_REG_ARM64 | KVM_REG_ARM_CORE | i;
547 		int size = core_reg_size_from_offset(vcpu, i);
548 
549 		if (size < 0)
550 			continue;
551 
552 		switch (size) {
553 		case sizeof(__u32):
554 			reg |= KVM_REG_SIZE_U32;
555 			break;
556 
557 		case sizeof(__u64):
558 			reg |= KVM_REG_SIZE_U64;
559 			break;
560 
561 		case sizeof(__uint128_t):
562 			reg |= KVM_REG_SIZE_U128;
563 			break;
564 
565 		default:
566 			WARN_ON(1);
567 			continue;
568 		}
569 
570 		if (uindices) {
571 			if (put_user(reg, uindices))
572 				return -EFAULT;
573 			uindices++;
574 		}
575 
576 		n++;
577 	}
578 
579 	return n;
580 }
581 
582 static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
583 {
584 	return copy_core_reg_indices(vcpu, NULL);
585 }
586 
587 /**
588  * ARM64 versions of the TIMER registers, always available on arm64
589  */
590 
591 #define NUM_TIMER_REGS 3
592 
593 static bool is_timer_reg(u64 index)
594 {
595 	switch (index) {
596 	case KVM_REG_ARM_TIMER_CTL:
597 	case KVM_REG_ARM_TIMER_CNT:
598 	case KVM_REG_ARM_TIMER_CVAL:
599 		return true;
600 	}
601 	return false;
602 }
603 
604 static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
605 {
606 	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
607 		return -EFAULT;
608 	uindices++;
609 	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
610 		return -EFAULT;
611 	uindices++;
612 	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
613 		return -EFAULT;
614 
615 	return 0;
616 }
617 
618 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
619 {
620 	void __user *uaddr = (void __user *)(long)reg->addr;
621 	u64 val;
622 	int ret;
623 
624 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
625 	if (ret != 0)
626 		return -EFAULT;
627 
628 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
629 }
630 
631 static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
632 {
633 	void __user *uaddr = (void __user *)(long)reg->addr;
634 	u64 val;
635 
636 	val = kvm_arm_timer_get_reg(vcpu, reg->id);
637 	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
638 }
639 
640 static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
641 {
642 	const unsigned int slices = vcpu_sve_slices(vcpu);
643 
644 	if (!vcpu_has_sve(vcpu))
645 		return 0;
646 
647 	/* Policed by KVM_GET_REG_LIST: */
648 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
649 
650 	return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */)
651 		+ 1; /* KVM_REG_ARM64_SVE_VLS */
652 }
653 
654 static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
655 				u64 __user *uindices)
656 {
657 	const unsigned int slices = vcpu_sve_slices(vcpu);
658 	u64 reg;
659 	unsigned int i, n;
660 	int num_regs = 0;
661 
662 	if (!vcpu_has_sve(vcpu))
663 		return 0;
664 
665 	/* Policed by KVM_GET_REG_LIST: */
666 	WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
667 
668 	/*
669 	 * Enumerate this first, so that userspace can save/restore in
670 	 * the order reported by KVM_GET_REG_LIST:
671 	 */
672 	reg = KVM_REG_ARM64_SVE_VLS;
673 	if (put_user(reg, uindices++))
674 		return -EFAULT;
675 	++num_regs;
676 
677 	for (i = 0; i < slices; i++) {
678 		for (n = 0; n < SVE_NUM_ZREGS; n++) {
679 			reg = KVM_REG_ARM64_SVE_ZREG(n, i);
680 			if (put_user(reg, uindices++))
681 				return -EFAULT;
682 			num_regs++;
683 		}
684 
685 		for (n = 0; n < SVE_NUM_PREGS; n++) {
686 			reg = KVM_REG_ARM64_SVE_PREG(n, i);
687 			if (put_user(reg, uindices++))
688 				return -EFAULT;
689 			num_regs++;
690 		}
691 
692 		reg = KVM_REG_ARM64_SVE_FFR(i);
693 		if (put_user(reg, uindices++))
694 			return -EFAULT;
695 		num_regs++;
696 	}
697 
698 	return num_regs;
699 }
700 
701 /**
702  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
703  *
704  * This is for all registers.
705  */
706 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
707 {
708 	unsigned long res = 0;
709 
710 	res += num_core_regs(vcpu);
711 	res += num_sve_regs(vcpu);
712 	res += kvm_arm_num_sys_reg_descs(vcpu);
713 	res += kvm_arm_get_fw_num_regs(vcpu);
714 	res += NUM_TIMER_REGS;
715 
716 	return res;
717 }
718 
719 /**
720  * kvm_arm_copy_reg_indices - get indices of all registers.
721  *
722  * We do core registers right here, then we append system regs.
723  */
724 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
725 {
726 	int ret;
727 
728 	ret = copy_core_reg_indices(vcpu, uindices);
729 	if (ret < 0)
730 		return ret;
731 	uindices += ret;
732 
733 	ret = copy_sve_reg_indices(vcpu, uindices);
734 	if (ret < 0)
735 		return ret;
736 	uindices += ret;
737 
738 	ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
739 	if (ret < 0)
740 		return ret;
741 	uindices += kvm_arm_get_fw_num_regs(vcpu);
742 
743 	ret = copy_timer_indices(vcpu, uindices);
744 	if (ret < 0)
745 		return ret;
746 	uindices += NUM_TIMER_REGS;
747 
748 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
749 }
750 
751 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
752 {
753 	/* We currently use nothing arch-specific in upper 32 bits */
754 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
755 		return -EINVAL;
756 
757 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
758 	case KVM_REG_ARM_CORE:	return get_core_reg(vcpu, reg);
759 	case KVM_REG_ARM_FW:
760 	case KVM_REG_ARM_FW_FEAT_BMAP:
761 		return kvm_arm_get_fw_reg(vcpu, reg);
762 	case KVM_REG_ARM64_SVE:	return get_sve_reg(vcpu, reg);
763 	}
764 
765 	if (is_timer_reg(reg->id))
766 		return get_timer_reg(vcpu, reg);
767 
768 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
769 }
770 
771 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
772 {
773 	/* We currently use nothing arch-specific in upper 32 bits */
774 	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
775 		return -EINVAL;
776 
777 	switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
778 	case KVM_REG_ARM_CORE:	return set_core_reg(vcpu, reg);
779 	case KVM_REG_ARM_FW:
780 	case KVM_REG_ARM_FW_FEAT_BMAP:
781 		return kvm_arm_set_fw_reg(vcpu, reg);
782 	case KVM_REG_ARM64_SVE:	return set_sve_reg(vcpu, reg);
783 	}
784 
785 	if (is_timer_reg(reg->id))
786 		return set_timer_reg(vcpu, reg);
787 
788 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
789 }
790 
791 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
792 				  struct kvm_sregs *sregs)
793 {
794 	return -EINVAL;
795 }
796 
797 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
798 				  struct kvm_sregs *sregs)
799 {
800 	return -EINVAL;
801 }
802 
803 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
804 			      struct kvm_vcpu_events *events)
805 {
806 	events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
807 	events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
808 
809 	if (events->exception.serror_pending && events->exception.serror_has_esr)
810 		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
811 
812 	/*
813 	 * We never return a pending ext_dabt here because we deliver it to
814 	 * the virtual CPU directly when setting the event and it's no longer
815 	 * 'pending' at this point.
816 	 */
817 
818 	return 0;
819 }
820 
821 int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
822 			      struct kvm_vcpu_events *events)
823 {
824 	bool serror_pending = events->exception.serror_pending;
825 	bool has_esr = events->exception.serror_has_esr;
826 	bool ext_dabt_pending = events->exception.ext_dabt_pending;
827 
828 	if (serror_pending && has_esr) {
829 		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
830 			return -EINVAL;
831 
832 		if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
833 			kvm_set_sei_esr(vcpu, events->exception.serror_esr);
834 		else
835 			return -EINVAL;
836 	} else if (serror_pending) {
837 		kvm_inject_vabt(vcpu);
838 	}
839 
840 	if (ext_dabt_pending)
841 		kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
842 
843 	return 0;
844 }
845 
846 u32 __attribute_const__ kvm_target_cpu(void)
847 {
848 	unsigned long implementor = read_cpuid_implementor();
849 	unsigned long part_number = read_cpuid_part_number();
850 
851 	switch (implementor) {
852 	case ARM_CPU_IMP_ARM:
853 		switch (part_number) {
854 		case ARM_CPU_PART_AEM_V8:
855 			return KVM_ARM_TARGET_AEM_V8;
856 		case ARM_CPU_PART_FOUNDATION:
857 			return KVM_ARM_TARGET_FOUNDATION_V8;
858 		case ARM_CPU_PART_CORTEX_A53:
859 			return KVM_ARM_TARGET_CORTEX_A53;
860 		case ARM_CPU_PART_CORTEX_A57:
861 			return KVM_ARM_TARGET_CORTEX_A57;
862 		}
863 		break;
864 	case ARM_CPU_IMP_APM:
865 		switch (part_number) {
866 		case APM_CPU_PART_POTENZA:
867 			return KVM_ARM_TARGET_XGENE_POTENZA;
868 		}
869 		break;
870 	}
871 
872 	/* Return a default generic target */
873 	return KVM_ARM_TARGET_GENERIC_V8;
874 }
875 
876 void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
877 {
878 	u32 target = kvm_target_cpu();
879 
880 	memset(init, 0, sizeof(*init));
881 
882 	/*
883 	 * For now, we don't return any features.
884 	 * In future, we might use features to return target
885 	 * specific features available for the preferred
886 	 * target type.
887 	 */
888 	init->target = (__u32)target;
889 }
890 
891 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
892 {
893 	return -EINVAL;
894 }
895 
896 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
897 {
898 	return -EINVAL;
899 }
900 
901 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
902 				  struct kvm_translation *tr)
903 {
904 	return -EINVAL;
905 }
906 
907 /**
908  * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
909  * @kvm:	pointer to the KVM struct
910  * @kvm_guest_debug: the ioctl data buffer
911  *
912  * This sets up and enables the VM for guest debugging. Userspace
913  * passes in a control flag to enable different debug types and
914  * potentially other architecture specific information in the rest of
915  * the structure.
916  */
917 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
918 					struct kvm_guest_debug *dbg)
919 {
920 	int ret = 0;
921 
922 	trace_kvm_set_guest_debug(vcpu, dbg->control);
923 
924 	if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
925 		ret = -EINVAL;
926 		goto out;
927 	}
928 
929 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
930 		vcpu->guest_debug = dbg->control;
931 
932 		/* Hardware assisted Break and Watch points */
933 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
934 			vcpu->arch.external_debug_state = dbg->arch;
935 		}
936 
937 	} else {
938 		/* If not enabled clear all flags */
939 		vcpu->guest_debug = 0;
940 	}
941 
942 out:
943 	return ret;
944 }
945 
946 int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
947 			       struct kvm_device_attr *attr)
948 {
949 	int ret;
950 
951 	switch (attr->group) {
952 	case KVM_ARM_VCPU_PMU_V3_CTRL:
953 		ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
954 		break;
955 	case KVM_ARM_VCPU_TIMER_CTRL:
956 		ret = kvm_arm_timer_set_attr(vcpu, attr);
957 		break;
958 	case KVM_ARM_VCPU_PVTIME_CTRL:
959 		ret = kvm_arm_pvtime_set_attr(vcpu, attr);
960 		break;
961 	default:
962 		ret = -ENXIO;
963 		break;
964 	}
965 
966 	return ret;
967 }
968 
969 int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
970 			       struct kvm_device_attr *attr)
971 {
972 	int ret;
973 
974 	switch (attr->group) {
975 	case KVM_ARM_VCPU_PMU_V3_CTRL:
976 		ret = kvm_arm_pmu_v3_get_attr(vcpu, attr);
977 		break;
978 	case KVM_ARM_VCPU_TIMER_CTRL:
979 		ret = kvm_arm_timer_get_attr(vcpu, attr);
980 		break;
981 	case KVM_ARM_VCPU_PVTIME_CTRL:
982 		ret = kvm_arm_pvtime_get_attr(vcpu, attr);
983 		break;
984 	default:
985 		ret = -ENXIO;
986 		break;
987 	}
988 
989 	return ret;
990 }
991 
992 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
993 			       struct kvm_device_attr *attr)
994 {
995 	int ret;
996 
997 	switch (attr->group) {
998 	case KVM_ARM_VCPU_PMU_V3_CTRL:
999 		ret = kvm_arm_pmu_v3_has_attr(vcpu, attr);
1000 		break;
1001 	case KVM_ARM_VCPU_TIMER_CTRL:
1002 		ret = kvm_arm_timer_has_attr(vcpu, attr);
1003 		break;
1004 	case KVM_ARM_VCPU_PVTIME_CTRL:
1005 		ret = kvm_arm_pvtime_has_attr(vcpu, attr);
1006 		break;
1007 	default:
1008 		ret = -ENXIO;
1009 		break;
1010 	}
1011 
1012 	return ret;
1013 }
1014 
1015 long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
1016 				struct kvm_arm_copy_mte_tags *copy_tags)
1017 {
1018 	gpa_t guest_ipa = copy_tags->guest_ipa;
1019 	size_t length = copy_tags->length;
1020 	void __user *tags = copy_tags->addr;
1021 	gpa_t gfn;
1022 	bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
1023 	int ret = 0;
1024 
1025 	if (!kvm_has_mte(kvm))
1026 		return -EINVAL;
1027 
1028 	if (copy_tags->reserved[0] || copy_tags->reserved[1])
1029 		return -EINVAL;
1030 
1031 	if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
1032 		return -EINVAL;
1033 
1034 	if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
1035 		return -EINVAL;
1036 
1037 	gfn = gpa_to_gfn(guest_ipa);
1038 
1039 	mutex_lock(&kvm->slots_lock);
1040 
1041 	while (length > 0) {
1042 		kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
1043 		void *maddr;
1044 		unsigned long num_tags;
1045 		struct page *page;
1046 
1047 		if (is_error_noslot_pfn(pfn)) {
1048 			ret = -EFAULT;
1049 			goto out;
1050 		}
1051 
1052 		page = pfn_to_online_page(pfn);
1053 		if (!page) {
1054 			/* Reject ZONE_DEVICE memory */
1055 			ret = -EFAULT;
1056 			goto out;
1057 		}
1058 		maddr = page_address(page);
1059 
1060 		if (!write) {
1061 			if (test_bit(PG_mte_tagged, &page->flags))
1062 				num_tags = mte_copy_tags_to_user(tags, maddr,
1063 							MTE_GRANULES_PER_PAGE);
1064 			else
1065 				/* No tags in memory, so write zeros */
1066 				num_tags = MTE_GRANULES_PER_PAGE -
1067 					clear_user(tags, MTE_GRANULES_PER_PAGE);
1068 			kvm_release_pfn_clean(pfn);
1069 		} else {
1070 			num_tags = mte_copy_tags_from_user(maddr, tags,
1071 							MTE_GRANULES_PER_PAGE);
1072 
1073 			/*
1074 			 * Set the flag after checking the write
1075 			 * completed fully
1076 			 */
1077 			if (num_tags == MTE_GRANULES_PER_PAGE)
1078 				set_bit(PG_mte_tagged, &page->flags);
1079 
1080 			kvm_release_pfn_dirty(pfn);
1081 		}
1082 
1083 		if (num_tags != MTE_GRANULES_PER_PAGE) {
1084 			ret = -EFAULT;
1085 			goto out;
1086 		}
1087 
1088 		gfn++;
1089 		tags += num_tags;
1090 		length -= PAGE_SIZE;
1091 	}
1092 
1093 out:
1094 	mutex_unlock(&kvm->slots_lock);
1095 	/* If some data has been copied report the number of bytes copied */
1096 	if (length != copy_tags->length)
1097 		return copy_tags->length - length;
1098 	return ret;
1099 }
1100