xref: /openbmc/linux/arch/riscv/kvm/vcpu.c (revision 2133dc91)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4  *
5  * Authors:
6  *     Anup Patel <anup.patel@wdc.com>
7  */
8 
9 #include <linux/bitops.h>
10 #include <linux/entry-kvm.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kdebug.h>
14 #include <linux/module.h>
15 #include <linux/percpu.h>
16 #include <linux/uaccess.h>
17 #include <linux/vmalloc.h>
18 #include <linux/sched/signal.h>
19 #include <linux/fs.h>
20 #include <linux/kvm_host.h>
21 #include <asm/csr.h>
22 #include <asm/cacheflush.h>
23 #include <asm/hwcap.h>
24 
25 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
26 	KVM_GENERIC_VCPU_STATS(),
27 	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
28 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
29 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
30 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
31 	STATS_DESC_COUNTER(VCPU, csr_exit_user),
32 	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
33 	STATS_DESC_COUNTER(VCPU, signal_exits),
34 	STATS_DESC_COUNTER(VCPU, exits)
35 };
36 
37 const struct kvm_stats_header kvm_vcpu_stats_header = {
38 	.name_size = KVM_STATS_NAME_SIZE,
39 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
40 	.id_offset = sizeof(struct kvm_stats_header),
41 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
42 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
43 		       sizeof(kvm_vcpu_stats_desc),
44 };
45 
46 #define KVM_RISCV_BASE_ISA_MASK		GENMASK(25, 0)
47 
48 #define KVM_ISA_EXT_ARR(ext)		[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
49 
50 /* Mapping between KVM ISA Extension ID & Host ISA extension ID */
51 static const unsigned long kvm_isa_ext_arr[] = {
52 	[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
53 	[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
54 	[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
55 	[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
56 	[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
57 	[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
58 	[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
59 
60 	KVM_ISA_EXT_ARR(SSTC),
61 	KVM_ISA_EXT_ARR(SVINVAL),
62 	KVM_ISA_EXT_ARR(SVPBMT),
63 	KVM_ISA_EXT_ARR(ZIHINTPAUSE),
64 	KVM_ISA_EXT_ARR(ZICBOM),
65 };
66 
67 static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
68 {
69 	unsigned long i;
70 
71 	for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
72 		if (kvm_isa_ext_arr[i] == base_ext)
73 			return i;
74 	}
75 
76 	return KVM_RISCV_ISA_EXT_MAX;
77 }
78 
79 static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
80 {
81 	switch (ext) {
82 	case KVM_RISCV_ISA_EXT_H:
83 		return false;
84 	default:
85 		break;
86 	}
87 
88 	return true;
89 }
90 
91 static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
92 {
93 	switch (ext) {
94 	case KVM_RISCV_ISA_EXT_A:
95 	case KVM_RISCV_ISA_EXT_C:
96 	case KVM_RISCV_ISA_EXT_I:
97 	case KVM_RISCV_ISA_EXT_M:
98 	case KVM_RISCV_ISA_EXT_SSTC:
99 	case KVM_RISCV_ISA_EXT_SVINVAL:
100 	case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
101 		return false;
102 	default:
103 		break;
104 	}
105 
106 	return true;
107 }
108 
109 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
110 {
111 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
112 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
113 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
114 	struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
115 	bool loaded;
116 
117 	/**
118 	 * The preemption should be disabled here because it races with
119 	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
120 	 * also calls vcpu_load/put.
121 	 */
122 	get_cpu();
123 	loaded = (vcpu->cpu != -1);
124 	if (loaded)
125 		kvm_arch_vcpu_put(vcpu);
126 
127 	vcpu->arch.last_exit_cpu = -1;
128 
129 	memcpy(csr, reset_csr, sizeof(*csr));
130 
131 	memcpy(cntx, reset_cntx, sizeof(*cntx));
132 
133 	kvm_riscv_vcpu_fp_reset(vcpu);
134 
135 	kvm_riscv_vcpu_timer_reset(vcpu);
136 
137 	WRITE_ONCE(vcpu->arch.irqs_pending, 0);
138 	WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
139 
140 	vcpu->arch.hfence_head = 0;
141 	vcpu->arch.hfence_tail = 0;
142 	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
143 
144 	/* Reset the guest CSRs for hotplug usecase */
145 	if (loaded)
146 		kvm_arch_vcpu_load(vcpu, smp_processor_id());
147 	put_cpu();
148 }
149 
150 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
151 {
152 	return 0;
153 }
154 
155 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
156 {
157 	struct kvm_cpu_context *cntx;
158 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
159 	unsigned long host_isa, i;
160 
161 	/* Mark this VCPU never ran */
162 	vcpu->arch.ran_atleast_once = false;
163 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
164 	bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
165 
166 	/* Setup ISA features available to VCPU */
167 	for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
168 		host_isa = kvm_isa_ext_arr[i];
169 		if (__riscv_isa_extension_available(NULL, host_isa) &&
170 		    kvm_riscv_vcpu_isa_enable_allowed(i))
171 			set_bit(host_isa, vcpu->arch.isa);
172 	}
173 
174 	/* Setup VCPU hfence queue */
175 	spin_lock_init(&vcpu->arch.hfence_lock);
176 
177 	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
178 	cntx = &vcpu->arch.guest_reset_context;
179 	cntx->sstatus = SR_SPP | SR_SPIE;
180 	cntx->hstatus = 0;
181 	cntx->hstatus |= HSTATUS_VTW;
182 	cntx->hstatus |= HSTATUS_SPVP;
183 	cntx->hstatus |= HSTATUS_SPV;
184 
185 	/* By default, make CY, TM, and IR counters accessible in VU mode */
186 	reset_csr->scounteren = 0x7;
187 
188 	/* Setup VCPU timer */
189 	kvm_riscv_vcpu_timer_init(vcpu);
190 
191 	/* Reset VCPU */
192 	kvm_riscv_reset_vcpu(vcpu);
193 
194 	return 0;
195 }
196 
197 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
198 {
199 	/**
200 	 * vcpu with id 0 is the designated boot cpu.
201 	 * Keep all vcpus with non-zero id in power-off state so that
202 	 * they can be brought up using SBI HSM extension.
203 	 */
204 	if (vcpu->vcpu_idx != 0)
205 		kvm_riscv_vcpu_power_off(vcpu);
206 }
207 
208 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
209 {
210 	/* Cleanup VCPU timer */
211 	kvm_riscv_vcpu_timer_deinit(vcpu);
212 
213 	/* Free unused pages pre-allocated for G-stage page table mappings */
214 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
215 }
216 
217 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
218 {
219 	return kvm_riscv_vcpu_timer_pending(vcpu);
220 }
221 
222 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
223 {
224 }
225 
226 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
227 {
228 }
229 
230 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
231 {
232 	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
233 		!vcpu->arch.power_off && !vcpu->arch.pause);
234 }
235 
236 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
237 {
238 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
239 }
240 
241 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
242 {
243 	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
244 }
245 
246 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
247 {
248 	return VM_FAULT_SIGBUS;
249 }
250 
251 static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
252 					 const struct kvm_one_reg *reg)
253 {
254 	unsigned long __user *uaddr =
255 			(unsigned long __user *)(unsigned long)reg->addr;
256 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
257 					    KVM_REG_SIZE_MASK |
258 					    KVM_REG_RISCV_CONFIG);
259 	unsigned long reg_val;
260 
261 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
262 		return -EINVAL;
263 
264 	switch (reg_num) {
265 	case KVM_REG_RISCV_CONFIG_REG(isa):
266 		reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
267 		break;
268 	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
269 		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
270 			return -EINVAL;
271 		reg_val = riscv_cbom_block_size;
272 		break;
273 	default:
274 		return -EINVAL;
275 	}
276 
277 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
278 		return -EFAULT;
279 
280 	return 0;
281 }
282 
283 static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
284 					 const struct kvm_one_reg *reg)
285 {
286 	unsigned long __user *uaddr =
287 			(unsigned long __user *)(unsigned long)reg->addr;
288 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
289 					    KVM_REG_SIZE_MASK |
290 					    KVM_REG_RISCV_CONFIG);
291 	unsigned long i, isa_ext, reg_val;
292 
293 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
294 		return -EINVAL;
295 
296 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
297 		return -EFAULT;
298 
299 	/* This ONE REG interface is only defined for single letter extensions */
300 	if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
301 		return -EINVAL;
302 
303 	switch (reg_num) {
304 	case KVM_REG_RISCV_CONFIG_REG(isa):
305 		if (!vcpu->arch.ran_atleast_once) {
306 			/* Ignore the enable/disable request for certain extensions */
307 			for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
308 				isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
309 				if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
310 					reg_val &= ~BIT(i);
311 					continue;
312 				}
313 				if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
314 					if (reg_val & BIT(i))
315 						reg_val &= ~BIT(i);
316 				if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
317 					if (!(reg_val & BIT(i)))
318 						reg_val |= BIT(i);
319 			}
320 			reg_val &= riscv_isa_extension_base(NULL);
321 			/* Do not modify anything beyond single letter extensions */
322 			reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
323 				  (reg_val & KVM_RISCV_BASE_ISA_MASK);
324 			vcpu->arch.isa[0] = reg_val;
325 			kvm_riscv_vcpu_fp_reset(vcpu);
326 		} else {
327 			return -EOPNOTSUPP;
328 		}
329 		break;
330 	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
331 		return -EOPNOTSUPP;
332 	default:
333 		return -EINVAL;
334 	}
335 
336 	return 0;
337 }
338 
339 static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
340 				       const struct kvm_one_reg *reg)
341 {
342 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
343 	unsigned long __user *uaddr =
344 			(unsigned long __user *)(unsigned long)reg->addr;
345 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
346 					    KVM_REG_SIZE_MASK |
347 					    KVM_REG_RISCV_CORE);
348 	unsigned long reg_val;
349 
350 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
351 		return -EINVAL;
352 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
353 		return -EINVAL;
354 
355 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
356 		reg_val = cntx->sepc;
357 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
358 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
359 		reg_val = ((unsigned long *)cntx)[reg_num];
360 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
361 		reg_val = (cntx->sstatus & SR_SPP) ?
362 				KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
363 	else
364 		return -EINVAL;
365 
366 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
367 		return -EFAULT;
368 
369 	return 0;
370 }
371 
372 static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
373 				       const struct kvm_one_reg *reg)
374 {
375 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
376 	unsigned long __user *uaddr =
377 			(unsigned long __user *)(unsigned long)reg->addr;
378 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
379 					    KVM_REG_SIZE_MASK |
380 					    KVM_REG_RISCV_CORE);
381 	unsigned long reg_val;
382 
383 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
384 		return -EINVAL;
385 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
386 		return -EINVAL;
387 
388 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
389 		return -EFAULT;
390 
391 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
392 		cntx->sepc = reg_val;
393 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
394 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
395 		((unsigned long *)cntx)[reg_num] = reg_val;
396 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
397 		if (reg_val == KVM_RISCV_MODE_S)
398 			cntx->sstatus |= SR_SPP;
399 		else
400 			cntx->sstatus &= ~SR_SPP;
401 	} else
402 		return -EINVAL;
403 
404 	return 0;
405 }
406 
407 static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
408 				      const struct kvm_one_reg *reg)
409 {
410 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
411 	unsigned long __user *uaddr =
412 			(unsigned long __user *)(unsigned long)reg->addr;
413 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
414 					    KVM_REG_SIZE_MASK |
415 					    KVM_REG_RISCV_CSR);
416 	unsigned long reg_val;
417 
418 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
419 		return -EINVAL;
420 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
421 		return -EINVAL;
422 
423 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
424 		kvm_riscv_vcpu_flush_interrupts(vcpu);
425 		reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
426 	} else
427 		reg_val = ((unsigned long *)csr)[reg_num];
428 
429 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
430 		return -EFAULT;
431 
432 	return 0;
433 }
434 
435 static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
436 				      const struct kvm_one_reg *reg)
437 {
438 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
439 	unsigned long __user *uaddr =
440 			(unsigned long __user *)(unsigned long)reg->addr;
441 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
442 					    KVM_REG_SIZE_MASK |
443 					    KVM_REG_RISCV_CSR);
444 	unsigned long reg_val;
445 
446 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
447 		return -EINVAL;
448 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
449 		return -EINVAL;
450 
451 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
452 		return -EFAULT;
453 
454 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
455 		reg_val &= VSIP_VALID_MASK;
456 		reg_val <<= VSIP_TO_HVIP_SHIFT;
457 	}
458 
459 	((unsigned long *)csr)[reg_num] = reg_val;
460 
461 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
462 		WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
463 
464 	return 0;
465 }
466 
467 static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
468 					  const struct kvm_one_reg *reg)
469 {
470 	unsigned long __user *uaddr =
471 			(unsigned long __user *)(unsigned long)reg->addr;
472 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
473 					    KVM_REG_SIZE_MASK |
474 					    KVM_REG_RISCV_ISA_EXT);
475 	unsigned long reg_val = 0;
476 	unsigned long host_isa_ext;
477 
478 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
479 		return -EINVAL;
480 
481 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
482 	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
483 		return -EINVAL;
484 
485 	host_isa_ext = kvm_isa_ext_arr[reg_num];
486 	if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
487 		reg_val = 1; /* Mark the given extension as available */
488 
489 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
490 		return -EFAULT;
491 
492 	return 0;
493 }
494 
495 static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
496 					  const struct kvm_one_reg *reg)
497 {
498 	unsigned long __user *uaddr =
499 			(unsigned long __user *)(unsigned long)reg->addr;
500 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
501 					    KVM_REG_SIZE_MASK |
502 					    KVM_REG_RISCV_ISA_EXT);
503 	unsigned long reg_val;
504 	unsigned long host_isa_ext;
505 
506 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
507 		return -EINVAL;
508 
509 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
510 	    reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
511 		return -EINVAL;
512 
513 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
514 		return -EFAULT;
515 
516 	host_isa_ext = kvm_isa_ext_arr[reg_num];
517 	if (!__riscv_isa_extension_available(NULL, host_isa_ext))
518 		return	-EOPNOTSUPP;
519 
520 	if (!vcpu->arch.ran_atleast_once) {
521 		/*
522 		 * All multi-letter extension and a few single letter
523 		 * extension can be disabled
524 		 */
525 		if (reg_val == 1 &&
526 		    kvm_riscv_vcpu_isa_enable_allowed(reg_num))
527 			set_bit(host_isa_ext, vcpu->arch.isa);
528 		else if (!reg_val &&
529 			 kvm_riscv_vcpu_isa_disable_allowed(reg_num))
530 			clear_bit(host_isa_ext, vcpu->arch.isa);
531 		else
532 			return -EINVAL;
533 		kvm_riscv_vcpu_fp_reset(vcpu);
534 	} else {
535 		return -EOPNOTSUPP;
536 	}
537 
538 	return 0;
539 }
540 
541 static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
542 				  const struct kvm_one_reg *reg)
543 {
544 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
545 		return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
546 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
547 		return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
548 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
549 		return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
550 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
551 		return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
552 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
553 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
554 						 KVM_REG_RISCV_FP_F);
555 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
556 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
557 						 KVM_REG_RISCV_FP_D);
558 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
559 		return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
560 
561 	return -EINVAL;
562 }
563 
564 static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
565 				  const struct kvm_one_reg *reg)
566 {
567 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
568 		return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
569 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
570 		return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
571 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
572 		return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
573 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
574 		return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
575 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
576 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
577 						 KVM_REG_RISCV_FP_F);
578 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
579 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
580 						 KVM_REG_RISCV_FP_D);
581 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
582 		return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
583 
584 	return -EINVAL;
585 }
586 
587 long kvm_arch_vcpu_async_ioctl(struct file *filp,
588 			       unsigned int ioctl, unsigned long arg)
589 {
590 	struct kvm_vcpu *vcpu = filp->private_data;
591 	void __user *argp = (void __user *)arg;
592 
593 	if (ioctl == KVM_INTERRUPT) {
594 		struct kvm_interrupt irq;
595 
596 		if (copy_from_user(&irq, argp, sizeof(irq)))
597 			return -EFAULT;
598 
599 		if (irq.irq == KVM_INTERRUPT_SET)
600 			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
601 		else
602 			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
603 	}
604 
605 	return -ENOIOCTLCMD;
606 }
607 
608 long kvm_arch_vcpu_ioctl(struct file *filp,
609 			 unsigned int ioctl, unsigned long arg)
610 {
611 	struct kvm_vcpu *vcpu = filp->private_data;
612 	void __user *argp = (void __user *)arg;
613 	long r = -EINVAL;
614 
615 	switch (ioctl) {
616 	case KVM_SET_ONE_REG:
617 	case KVM_GET_ONE_REG: {
618 		struct kvm_one_reg reg;
619 
620 		r = -EFAULT;
621 		if (copy_from_user(&reg, argp, sizeof(reg)))
622 			break;
623 
624 		if (ioctl == KVM_SET_ONE_REG)
625 			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
626 		else
627 			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
628 		break;
629 	}
630 	default:
631 		break;
632 	}
633 
634 	return r;
635 }
636 
637 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
638 				  struct kvm_sregs *sregs)
639 {
640 	return -EINVAL;
641 }
642 
643 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
644 				  struct kvm_sregs *sregs)
645 {
646 	return -EINVAL;
647 }
648 
649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
650 {
651 	return -EINVAL;
652 }
653 
654 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
655 {
656 	return -EINVAL;
657 }
658 
659 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
660 				  struct kvm_translation *tr)
661 {
662 	return -EINVAL;
663 }
664 
665 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
666 {
667 	return -EINVAL;
668 }
669 
670 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
671 {
672 	return -EINVAL;
673 }
674 
675 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
676 {
677 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
678 	unsigned long mask, val;
679 
680 	if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
681 		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
682 		val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
683 
684 		csr->hvip &= ~mask;
685 		csr->hvip |= val;
686 	}
687 }
688 
689 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
690 {
691 	unsigned long hvip;
692 	struct kvm_vcpu_arch *v = &vcpu->arch;
693 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
694 
695 	/* Read current HVIP and VSIE CSRs */
696 	csr->vsie = csr_read(CSR_VSIE);
697 
698 	/* Sync-up HVIP.VSSIP bit changes does by Guest */
699 	hvip = csr_read(CSR_HVIP);
700 	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
701 		if (hvip & (1UL << IRQ_VS_SOFT)) {
702 			if (!test_and_set_bit(IRQ_VS_SOFT,
703 					      &v->irqs_pending_mask))
704 				set_bit(IRQ_VS_SOFT, &v->irqs_pending);
705 		} else {
706 			if (!test_and_set_bit(IRQ_VS_SOFT,
707 					      &v->irqs_pending_mask))
708 				clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
709 		}
710 	}
711 }
712 
713 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
714 {
715 	if (irq != IRQ_VS_SOFT &&
716 	    irq != IRQ_VS_TIMER &&
717 	    irq != IRQ_VS_EXT)
718 		return -EINVAL;
719 
720 	set_bit(irq, &vcpu->arch.irqs_pending);
721 	smp_mb__before_atomic();
722 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
723 
724 	kvm_vcpu_kick(vcpu);
725 
726 	return 0;
727 }
728 
729 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
730 {
731 	if (irq != IRQ_VS_SOFT &&
732 	    irq != IRQ_VS_TIMER &&
733 	    irq != IRQ_VS_EXT)
734 		return -EINVAL;
735 
736 	clear_bit(irq, &vcpu->arch.irqs_pending);
737 	smp_mb__before_atomic();
738 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
739 
740 	return 0;
741 }
742 
743 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
744 {
745 	unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
746 			    << VSIP_TO_HVIP_SHIFT) & mask;
747 
748 	return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
749 }
750 
751 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
752 {
753 	vcpu->arch.power_off = true;
754 	kvm_make_request(KVM_REQ_SLEEP, vcpu);
755 	kvm_vcpu_kick(vcpu);
756 }
757 
758 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
759 {
760 	vcpu->arch.power_off = false;
761 	kvm_vcpu_wake_up(vcpu);
762 }
763 
764 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
765 				    struct kvm_mp_state *mp_state)
766 {
767 	if (vcpu->arch.power_off)
768 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
769 	else
770 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
771 
772 	return 0;
773 }
774 
775 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
776 				    struct kvm_mp_state *mp_state)
777 {
778 	int ret = 0;
779 
780 	switch (mp_state->mp_state) {
781 	case KVM_MP_STATE_RUNNABLE:
782 		vcpu->arch.power_off = false;
783 		break;
784 	case KVM_MP_STATE_STOPPED:
785 		kvm_riscv_vcpu_power_off(vcpu);
786 		break;
787 	default:
788 		ret = -EINVAL;
789 	}
790 
791 	return ret;
792 }
793 
794 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
795 					struct kvm_guest_debug *dbg)
796 {
797 	/* TODO; To be implemented later. */
798 	return -EINVAL;
799 }
800 
801 static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
802 {
803 	u64 henvcfg = 0;
804 
805 	if (riscv_isa_extension_available(isa, SVPBMT))
806 		henvcfg |= ENVCFG_PBMTE;
807 
808 	if (riscv_isa_extension_available(isa, SSTC))
809 		henvcfg |= ENVCFG_STCE;
810 
811 	if (riscv_isa_extension_available(isa, ZICBOM))
812 		henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
813 
814 	csr_write(CSR_HENVCFG, henvcfg);
815 #ifdef CONFIG_32BIT
816 	csr_write(CSR_HENVCFGH, henvcfg >> 32);
817 #endif
818 }
819 
820 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
821 {
822 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
823 
824 	csr_write(CSR_VSSTATUS, csr->vsstatus);
825 	csr_write(CSR_VSIE, csr->vsie);
826 	csr_write(CSR_VSTVEC, csr->vstvec);
827 	csr_write(CSR_VSSCRATCH, csr->vsscratch);
828 	csr_write(CSR_VSEPC, csr->vsepc);
829 	csr_write(CSR_VSCAUSE, csr->vscause);
830 	csr_write(CSR_VSTVAL, csr->vstval);
831 	csr_write(CSR_HVIP, csr->hvip);
832 	csr_write(CSR_VSATP, csr->vsatp);
833 
834 	kvm_riscv_vcpu_update_config(vcpu->arch.isa);
835 
836 	kvm_riscv_gstage_update_hgatp(vcpu);
837 
838 	kvm_riscv_vcpu_timer_restore(vcpu);
839 
840 	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
841 	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
842 					vcpu->arch.isa);
843 
844 	vcpu->cpu = cpu;
845 }
846 
847 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
848 {
849 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
850 
851 	vcpu->cpu = -1;
852 
853 	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
854 				     vcpu->arch.isa);
855 	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
856 
857 	kvm_riscv_vcpu_timer_save(vcpu);
858 
859 	csr->vsstatus = csr_read(CSR_VSSTATUS);
860 	csr->vsie = csr_read(CSR_VSIE);
861 	csr->vstvec = csr_read(CSR_VSTVEC);
862 	csr->vsscratch = csr_read(CSR_VSSCRATCH);
863 	csr->vsepc = csr_read(CSR_VSEPC);
864 	csr->vscause = csr_read(CSR_VSCAUSE);
865 	csr->vstval = csr_read(CSR_VSTVAL);
866 	csr->hvip = csr_read(CSR_HVIP);
867 	csr->vsatp = csr_read(CSR_VSATP);
868 }
869 
870 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
871 {
872 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
873 
874 	if (kvm_request_pending(vcpu)) {
875 		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
876 			kvm_vcpu_srcu_read_unlock(vcpu);
877 			rcuwait_wait_event(wait,
878 				(!vcpu->arch.power_off) && (!vcpu->arch.pause),
879 				TASK_INTERRUPTIBLE);
880 			kvm_vcpu_srcu_read_lock(vcpu);
881 
882 			if (vcpu->arch.power_off || vcpu->arch.pause) {
883 				/*
884 				 * Awaken to handle a signal, request to
885 				 * sleep again later.
886 				 */
887 				kvm_make_request(KVM_REQ_SLEEP, vcpu);
888 			}
889 		}
890 
891 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
892 			kvm_riscv_reset_vcpu(vcpu);
893 
894 		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
895 			kvm_riscv_gstage_update_hgatp(vcpu);
896 
897 		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
898 			kvm_riscv_fence_i_process(vcpu);
899 
900 		/*
901 		 * The generic KVM_REQ_TLB_FLUSH is same as
902 		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
903 		 */
904 		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
905 			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
906 
907 		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
908 			kvm_riscv_hfence_vvma_all_process(vcpu);
909 
910 		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
911 			kvm_riscv_hfence_process(vcpu);
912 	}
913 }
914 
915 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
916 {
917 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
918 
919 	csr_write(CSR_HVIP, csr->hvip);
920 }
921 
922 /*
923  * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
924  * the vCPU is running.
925  *
926  * This must be noinstr as instrumentation may make use of RCU, and this is not
927  * safe during the EQS.
928  */
929 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
930 {
931 	guest_state_enter_irqoff();
932 	__kvm_riscv_switch_to(&vcpu->arch);
933 	vcpu->arch.last_exit_cpu = vcpu->cpu;
934 	guest_state_exit_irqoff();
935 }
936 
937 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
938 {
939 	int ret;
940 	struct kvm_cpu_trap trap;
941 	struct kvm_run *run = vcpu->run;
942 
943 	/* Mark this VCPU ran at least once */
944 	vcpu->arch.ran_atleast_once = true;
945 
946 	kvm_vcpu_srcu_read_lock(vcpu);
947 
948 	switch (run->exit_reason) {
949 	case KVM_EXIT_MMIO:
950 		/* Process MMIO value returned from user-space */
951 		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
952 		break;
953 	case KVM_EXIT_RISCV_SBI:
954 		/* Process SBI value returned from user-space */
955 		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
956 		break;
957 	case KVM_EXIT_RISCV_CSR:
958 		/* Process CSR value returned from user-space */
959 		ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
960 		break;
961 	default:
962 		ret = 0;
963 		break;
964 	}
965 	if (ret) {
966 		kvm_vcpu_srcu_read_unlock(vcpu);
967 		return ret;
968 	}
969 
970 	if (run->immediate_exit) {
971 		kvm_vcpu_srcu_read_unlock(vcpu);
972 		return -EINTR;
973 	}
974 
975 	vcpu_load(vcpu);
976 
977 	kvm_sigset_activate(vcpu);
978 
979 	ret = 1;
980 	run->exit_reason = KVM_EXIT_UNKNOWN;
981 	while (ret > 0) {
982 		/* Check conditions before entering the guest */
983 		ret = xfer_to_guest_mode_handle_work(vcpu);
984 		if (!ret)
985 			ret = 1;
986 
987 		kvm_riscv_gstage_vmid_update(vcpu);
988 
989 		kvm_riscv_check_vcpu_requests(vcpu);
990 
991 		local_irq_disable();
992 
993 		/*
994 		 * Ensure we set mode to IN_GUEST_MODE after we disable
995 		 * interrupts and before the final VCPU requests check.
996 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
997 		 * Documentation/virt/kvm/vcpu-requests.rst
998 		 */
999 		vcpu->mode = IN_GUEST_MODE;
1000 
1001 		kvm_vcpu_srcu_read_unlock(vcpu);
1002 		smp_mb__after_srcu_read_unlock();
1003 
1004 		/*
1005 		 * We might have got VCPU interrupts updated asynchronously
1006 		 * so update it in HW.
1007 		 */
1008 		kvm_riscv_vcpu_flush_interrupts(vcpu);
1009 
1010 		/* Update HVIP CSR for current CPU */
1011 		kvm_riscv_update_hvip(vcpu);
1012 
1013 		if (ret <= 0 ||
1014 		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
1015 		    kvm_request_pending(vcpu) ||
1016 		    xfer_to_guest_mode_work_pending()) {
1017 			vcpu->mode = OUTSIDE_GUEST_MODE;
1018 			local_irq_enable();
1019 			kvm_vcpu_srcu_read_lock(vcpu);
1020 			continue;
1021 		}
1022 
1023 		/*
1024 		 * Cleanup stale TLB enteries
1025 		 *
1026 		 * Note: This should be done after G-stage VMID has been
1027 		 * updated using kvm_riscv_gstage_vmid_ver_changed()
1028 		 */
1029 		kvm_riscv_local_tlb_sanitize(vcpu);
1030 
1031 		guest_timing_enter_irqoff();
1032 
1033 		kvm_riscv_vcpu_enter_exit(vcpu);
1034 
1035 		vcpu->mode = OUTSIDE_GUEST_MODE;
1036 		vcpu->stat.exits++;
1037 
1038 		/*
1039 		 * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
1040 		 * get an interrupt between __kvm_riscv_switch_to() and
1041 		 * local_irq_enable() which can potentially change CSRs.
1042 		 */
1043 		trap.sepc = vcpu->arch.guest_context.sepc;
1044 		trap.scause = csr_read(CSR_SCAUSE);
1045 		trap.stval = csr_read(CSR_STVAL);
1046 		trap.htval = csr_read(CSR_HTVAL);
1047 		trap.htinst = csr_read(CSR_HTINST);
1048 
1049 		/* Syncup interrupts state with HW */
1050 		kvm_riscv_vcpu_sync_interrupts(vcpu);
1051 
1052 		preempt_disable();
1053 
1054 		/*
1055 		 * We must ensure that any pending interrupts are taken before
1056 		 * we exit guest timing so that timer ticks are accounted as
1057 		 * guest time. Transiently unmask interrupts so that any
1058 		 * pending interrupts are taken.
1059 		 *
1060 		 * There's no barrier which ensures that pending interrupts are
1061 		 * recognised, so we just hope that the CPU takes any pending
1062 		 * interrupts between the enable and disable.
1063 		 */
1064 		local_irq_enable();
1065 		local_irq_disable();
1066 
1067 		guest_timing_exit_irqoff();
1068 
1069 		local_irq_enable();
1070 
1071 		preempt_enable();
1072 
1073 		kvm_vcpu_srcu_read_lock(vcpu);
1074 
1075 		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
1076 	}
1077 
1078 	kvm_sigset_deactivate(vcpu);
1079 
1080 	vcpu_put(vcpu);
1081 
1082 	kvm_vcpu_srcu_read_unlock(vcpu);
1083 
1084 	return ret;
1085 }
1086