xref: /openbmc/linux/arch/riscv/kvm/vcpu.c (revision 255490f9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4  *
5  * Authors:
6  *     Anup Patel <anup.patel@wdc.com>
7  */
8 
9 #include <linux/bitops.h>
10 #include <linux/errno.h>
11 #include <linux/err.h>
12 #include <linux/kdebug.h>
13 #include <linux/module.h>
14 #include <linux/percpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
18 #include <linux/fs.h>
19 #include <linux/kvm_host.h>
20 #include <asm/csr.h>
21 #include <asm/hwcap.h>
22 
23 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
24 	KVM_GENERIC_VCPU_STATS(),
25 	STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
26 	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
27 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
28 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
29 	STATS_DESC_COUNTER(VCPU, exits)
30 };
31 
32 const struct kvm_stats_header kvm_vcpu_stats_header = {
33 	.name_size = KVM_STATS_NAME_SIZE,
34 	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
35 	.id_offset = sizeof(struct kvm_stats_header),
36 	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
37 	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
38 		       sizeof(kvm_vcpu_stats_desc),
39 };
40 
41 #define KVM_RISCV_ISA_DISABLE_ALLOWED	(riscv_isa_extension_mask(d) | \
42 					riscv_isa_extension_mask(f))
43 
44 #define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED	(riscv_isa_extension_mask(a) | \
45 						riscv_isa_extension_mask(c) | \
46 						riscv_isa_extension_mask(i) | \
47 						riscv_isa_extension_mask(m))
48 
49 #define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
50 			       KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
51 
52 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
53 {
54 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
55 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
56 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
57 	struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
58 	bool loaded;
59 
60 	/**
61 	 * The preemption should be disabled here because it races with
62 	 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
63 	 * also calls vcpu_load/put.
64 	 */
65 	get_cpu();
66 	loaded = (vcpu->cpu != -1);
67 	if (loaded)
68 		kvm_arch_vcpu_put(vcpu);
69 
70 	vcpu->arch.last_exit_cpu = -1;
71 
72 	memcpy(csr, reset_csr, sizeof(*csr));
73 
74 	memcpy(cntx, reset_cntx, sizeof(*cntx));
75 
76 	kvm_riscv_vcpu_fp_reset(vcpu);
77 
78 	kvm_riscv_vcpu_timer_reset(vcpu);
79 
80 	WRITE_ONCE(vcpu->arch.irqs_pending, 0);
81 	WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
82 
83 	vcpu->arch.hfence_head = 0;
84 	vcpu->arch.hfence_tail = 0;
85 	memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
86 
87 	/* Reset the guest CSRs for hotplug usecase */
88 	if (loaded)
89 		kvm_arch_vcpu_load(vcpu, smp_processor_id());
90 	put_cpu();
91 }
92 
93 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
94 {
95 	return 0;
96 }
97 
98 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
99 {
100 	struct kvm_cpu_context *cntx;
101 	struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
102 
103 	/* Mark this VCPU never ran */
104 	vcpu->arch.ran_atleast_once = false;
105 	vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
106 
107 	/* Setup ISA features available to VCPU */
108 	vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
109 
110 	/* Setup VCPU hfence queue */
111 	spin_lock_init(&vcpu->arch.hfence_lock);
112 
113 	/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
114 	cntx = &vcpu->arch.guest_reset_context;
115 	cntx->sstatus = SR_SPP | SR_SPIE;
116 	cntx->hstatus = 0;
117 	cntx->hstatus |= HSTATUS_VTW;
118 	cntx->hstatus |= HSTATUS_SPVP;
119 	cntx->hstatus |= HSTATUS_SPV;
120 
121 	/* By default, make CY, TM, and IR counters accessible in VU mode */
122 	reset_csr->scounteren = 0x7;
123 
124 	/* Setup VCPU timer */
125 	kvm_riscv_vcpu_timer_init(vcpu);
126 
127 	/* Reset VCPU */
128 	kvm_riscv_reset_vcpu(vcpu);
129 
130 	return 0;
131 }
132 
133 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
134 {
135 	/**
136 	 * vcpu with id 0 is the designated boot cpu.
137 	 * Keep all vcpus with non-zero id in power-off state so that
138 	 * they can be brought up using SBI HSM extension.
139 	 */
140 	if (vcpu->vcpu_idx != 0)
141 		kvm_riscv_vcpu_power_off(vcpu);
142 }
143 
144 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
145 {
146 	/* Cleanup VCPU timer */
147 	kvm_riscv_vcpu_timer_deinit(vcpu);
148 
149 	/* Free unused pages pre-allocated for G-stage page table mappings */
150 	kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
151 }
152 
153 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
154 {
155 	return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
156 }
157 
158 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
159 {
160 }
161 
162 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
163 {
164 }
165 
166 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
167 {
168 	return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
169 		!vcpu->arch.power_off && !vcpu->arch.pause);
170 }
171 
172 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
173 {
174 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
175 }
176 
177 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
178 {
179 	return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
180 }
181 
182 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
183 {
184 	return VM_FAULT_SIGBUS;
185 }
186 
187 static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
188 					 const struct kvm_one_reg *reg)
189 {
190 	unsigned long __user *uaddr =
191 			(unsigned long __user *)(unsigned long)reg->addr;
192 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
193 					    KVM_REG_SIZE_MASK |
194 					    KVM_REG_RISCV_CONFIG);
195 	unsigned long reg_val;
196 
197 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
198 		return -EINVAL;
199 
200 	switch (reg_num) {
201 	case KVM_REG_RISCV_CONFIG_REG(isa):
202 		reg_val = vcpu->arch.isa;
203 		break;
204 	default:
205 		return -EINVAL;
206 	}
207 
208 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
209 		return -EFAULT;
210 
211 	return 0;
212 }
213 
214 static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
215 					 const struct kvm_one_reg *reg)
216 {
217 	unsigned long __user *uaddr =
218 			(unsigned long __user *)(unsigned long)reg->addr;
219 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
220 					    KVM_REG_SIZE_MASK |
221 					    KVM_REG_RISCV_CONFIG);
222 	unsigned long reg_val;
223 
224 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
225 		return -EINVAL;
226 
227 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
228 		return -EFAULT;
229 
230 	switch (reg_num) {
231 	case KVM_REG_RISCV_CONFIG_REG(isa):
232 		if (!vcpu->arch.ran_atleast_once) {
233 			/* Ignore the disable request for these extensions */
234 			vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
235 			vcpu->arch.isa &= riscv_isa_extension_base(NULL);
236 			vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
237 			kvm_riscv_vcpu_fp_reset(vcpu);
238 		} else {
239 			return -EOPNOTSUPP;
240 		}
241 		break;
242 	default:
243 		return -EINVAL;
244 	}
245 
246 	return 0;
247 }
248 
249 static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
250 				       const struct kvm_one_reg *reg)
251 {
252 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
253 	unsigned long __user *uaddr =
254 			(unsigned long __user *)(unsigned long)reg->addr;
255 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
256 					    KVM_REG_SIZE_MASK |
257 					    KVM_REG_RISCV_CORE);
258 	unsigned long reg_val;
259 
260 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
261 		return -EINVAL;
262 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
263 		return -EINVAL;
264 
265 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
266 		reg_val = cntx->sepc;
267 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
268 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
269 		reg_val = ((unsigned long *)cntx)[reg_num];
270 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
271 		reg_val = (cntx->sstatus & SR_SPP) ?
272 				KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
273 	else
274 		return -EINVAL;
275 
276 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
277 		return -EFAULT;
278 
279 	return 0;
280 }
281 
282 static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
283 				       const struct kvm_one_reg *reg)
284 {
285 	struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
286 	unsigned long __user *uaddr =
287 			(unsigned long __user *)(unsigned long)reg->addr;
288 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
289 					    KVM_REG_SIZE_MASK |
290 					    KVM_REG_RISCV_CORE);
291 	unsigned long reg_val;
292 
293 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
294 		return -EINVAL;
295 	if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
296 		return -EINVAL;
297 
298 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
299 		return -EFAULT;
300 
301 	if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
302 		cntx->sepc = reg_val;
303 	else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
304 		 reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
305 		((unsigned long *)cntx)[reg_num] = reg_val;
306 	else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
307 		if (reg_val == KVM_RISCV_MODE_S)
308 			cntx->sstatus |= SR_SPP;
309 		else
310 			cntx->sstatus &= ~SR_SPP;
311 	} else
312 		return -EINVAL;
313 
314 	return 0;
315 }
316 
317 static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
318 				      const struct kvm_one_reg *reg)
319 {
320 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
321 	unsigned long __user *uaddr =
322 			(unsigned long __user *)(unsigned long)reg->addr;
323 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
324 					    KVM_REG_SIZE_MASK |
325 					    KVM_REG_RISCV_CSR);
326 	unsigned long reg_val;
327 
328 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
329 		return -EINVAL;
330 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
331 		return -EINVAL;
332 
333 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
334 		kvm_riscv_vcpu_flush_interrupts(vcpu);
335 		reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
336 	} else
337 		reg_val = ((unsigned long *)csr)[reg_num];
338 
339 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
340 		return -EFAULT;
341 
342 	return 0;
343 }
344 
345 static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
346 				      const struct kvm_one_reg *reg)
347 {
348 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
349 	unsigned long __user *uaddr =
350 			(unsigned long __user *)(unsigned long)reg->addr;
351 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
352 					    KVM_REG_SIZE_MASK |
353 					    KVM_REG_RISCV_CSR);
354 	unsigned long reg_val;
355 
356 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
357 		return -EINVAL;
358 	if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
359 		return -EINVAL;
360 
361 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
362 		return -EFAULT;
363 
364 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
365 		reg_val &= VSIP_VALID_MASK;
366 		reg_val <<= VSIP_TO_HVIP_SHIFT;
367 	}
368 
369 	((unsigned long *)csr)[reg_num] = reg_val;
370 
371 	if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
372 		WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
373 
374 	return 0;
375 }
376 
377 /* Mapping between KVM ISA Extension ID & Host ISA extension ID */
378 static unsigned long kvm_isa_ext_arr[] = {
379 	RISCV_ISA_EXT_a,
380 	RISCV_ISA_EXT_c,
381 	RISCV_ISA_EXT_d,
382 	RISCV_ISA_EXT_f,
383 	RISCV_ISA_EXT_h,
384 	RISCV_ISA_EXT_i,
385 	RISCV_ISA_EXT_m,
386 };
387 
388 static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
389 					  const struct kvm_one_reg *reg)
390 {
391 	unsigned long __user *uaddr =
392 			(unsigned long __user *)(unsigned long)reg->addr;
393 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
394 					    KVM_REG_SIZE_MASK |
395 					    KVM_REG_RISCV_ISA_EXT);
396 	unsigned long reg_val = 0;
397 	unsigned long host_isa_ext;
398 
399 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
400 		return -EINVAL;
401 
402 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
403 		return -EINVAL;
404 
405 	host_isa_ext = kvm_isa_ext_arr[reg_num];
406 	if (__riscv_isa_extension_available(&vcpu->arch.isa, host_isa_ext))
407 		reg_val = 1; /* Mark the given extension as available */
408 
409 	if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
410 		return -EFAULT;
411 
412 	return 0;
413 }
414 
415 static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
416 					  const struct kvm_one_reg *reg)
417 {
418 	unsigned long __user *uaddr =
419 			(unsigned long __user *)(unsigned long)reg->addr;
420 	unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
421 					    KVM_REG_SIZE_MASK |
422 					    KVM_REG_RISCV_ISA_EXT);
423 	unsigned long reg_val;
424 	unsigned long host_isa_ext;
425 	unsigned long host_isa_ext_mask;
426 
427 	if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
428 		return -EINVAL;
429 
430 	if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
431 		return -EINVAL;
432 
433 	if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
434 		return -EFAULT;
435 
436 	host_isa_ext = kvm_isa_ext_arr[reg_num];
437 	if (!__riscv_isa_extension_available(NULL, host_isa_ext))
438 		return	-EOPNOTSUPP;
439 
440 	if (host_isa_ext >= RISCV_ISA_EXT_BASE &&
441 	    host_isa_ext < RISCV_ISA_EXT_MAX) {
442 		/*
443 		 * Multi-letter ISA extension. Currently there is no provision
444 		 * to enable/disable the multi-letter ISA extensions for guests.
445 		 * Return success if the request is to enable any ISA extension
446 		 * that is available in the hardware.
447 		 * Return -EOPNOTSUPP otherwise.
448 		 */
449 		if (!reg_val)
450 			return -EOPNOTSUPP;
451 		else
452 			return 0;
453 	}
454 
455 	/* Single letter base ISA extension */
456 	if (!vcpu->arch.ran_atleast_once) {
457 		host_isa_ext_mask = BIT_MASK(host_isa_ext);
458 		if (!reg_val && (host_isa_ext_mask & KVM_RISCV_ISA_DISABLE_ALLOWED))
459 			vcpu->arch.isa &= ~host_isa_ext_mask;
460 		else
461 			vcpu->arch.isa |= host_isa_ext_mask;
462 		vcpu->arch.isa &= riscv_isa_extension_base(NULL);
463 		vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
464 		kvm_riscv_vcpu_fp_reset(vcpu);
465 	} else {
466 		return -EOPNOTSUPP;
467 	}
468 
469 	return 0;
470 }
471 
472 static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
473 				  const struct kvm_one_reg *reg)
474 {
475 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
476 		return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
477 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
478 		return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
479 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
480 		return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
481 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
482 		return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
483 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
484 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
485 						 KVM_REG_RISCV_FP_F);
486 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
487 		return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
488 						 KVM_REG_RISCV_FP_D);
489 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
490 		return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
491 
492 	return -EINVAL;
493 }
494 
495 static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
496 				  const struct kvm_one_reg *reg)
497 {
498 	if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG)
499 		return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
500 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE)
501 		return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
502 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR)
503 		return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
504 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER)
505 		return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
506 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F)
507 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
508 						 KVM_REG_RISCV_FP_F);
509 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
510 		return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
511 						 KVM_REG_RISCV_FP_D);
512 	else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
513 		return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
514 
515 	return -EINVAL;
516 }
517 
518 long kvm_arch_vcpu_async_ioctl(struct file *filp,
519 			       unsigned int ioctl, unsigned long arg)
520 {
521 	struct kvm_vcpu *vcpu = filp->private_data;
522 	void __user *argp = (void __user *)arg;
523 
524 	if (ioctl == KVM_INTERRUPT) {
525 		struct kvm_interrupt irq;
526 
527 		if (copy_from_user(&irq, argp, sizeof(irq)))
528 			return -EFAULT;
529 
530 		if (irq.irq == KVM_INTERRUPT_SET)
531 			return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
532 		else
533 			return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
534 	}
535 
536 	return -ENOIOCTLCMD;
537 }
538 
539 long kvm_arch_vcpu_ioctl(struct file *filp,
540 			 unsigned int ioctl, unsigned long arg)
541 {
542 	struct kvm_vcpu *vcpu = filp->private_data;
543 	void __user *argp = (void __user *)arg;
544 	long r = -EINVAL;
545 
546 	switch (ioctl) {
547 	case KVM_SET_ONE_REG:
548 	case KVM_GET_ONE_REG: {
549 		struct kvm_one_reg reg;
550 
551 		r = -EFAULT;
552 		if (copy_from_user(&reg, argp, sizeof(reg)))
553 			break;
554 
555 		if (ioctl == KVM_SET_ONE_REG)
556 			r = kvm_riscv_vcpu_set_reg(vcpu, &reg);
557 		else
558 			r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
559 		break;
560 	}
561 	default:
562 		break;
563 	}
564 
565 	return r;
566 }
567 
568 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
569 				  struct kvm_sregs *sregs)
570 {
571 	return -EINVAL;
572 }
573 
574 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
575 				  struct kvm_sregs *sregs)
576 {
577 	return -EINVAL;
578 }
579 
580 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
581 {
582 	return -EINVAL;
583 }
584 
585 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
586 {
587 	return -EINVAL;
588 }
589 
590 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
591 				  struct kvm_translation *tr)
592 {
593 	return -EINVAL;
594 }
595 
596 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
597 {
598 	return -EINVAL;
599 }
600 
601 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
602 {
603 	return -EINVAL;
604 }
605 
606 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
607 {
608 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
609 	unsigned long mask, val;
610 
611 	if (READ_ONCE(vcpu->arch.irqs_pending_mask)) {
612 		mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0);
613 		val = READ_ONCE(vcpu->arch.irqs_pending) & mask;
614 
615 		csr->hvip &= ~mask;
616 		csr->hvip |= val;
617 	}
618 }
619 
620 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
621 {
622 	unsigned long hvip;
623 	struct kvm_vcpu_arch *v = &vcpu->arch;
624 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
625 
626 	/* Read current HVIP and VSIE CSRs */
627 	csr->vsie = csr_read(CSR_VSIE);
628 
629 	/* Sync-up HVIP.VSSIP bit changes does by Guest */
630 	hvip = csr_read(CSR_HVIP);
631 	if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
632 		if (hvip & (1UL << IRQ_VS_SOFT)) {
633 			if (!test_and_set_bit(IRQ_VS_SOFT,
634 					      &v->irqs_pending_mask))
635 				set_bit(IRQ_VS_SOFT, &v->irqs_pending);
636 		} else {
637 			if (!test_and_set_bit(IRQ_VS_SOFT,
638 					      &v->irqs_pending_mask))
639 				clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
640 		}
641 	}
642 }
643 
644 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
645 {
646 	if (irq != IRQ_VS_SOFT &&
647 	    irq != IRQ_VS_TIMER &&
648 	    irq != IRQ_VS_EXT)
649 		return -EINVAL;
650 
651 	set_bit(irq, &vcpu->arch.irqs_pending);
652 	smp_mb__before_atomic();
653 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
654 
655 	kvm_vcpu_kick(vcpu);
656 
657 	return 0;
658 }
659 
660 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
661 {
662 	if (irq != IRQ_VS_SOFT &&
663 	    irq != IRQ_VS_TIMER &&
664 	    irq != IRQ_VS_EXT)
665 		return -EINVAL;
666 
667 	clear_bit(irq, &vcpu->arch.irqs_pending);
668 	smp_mb__before_atomic();
669 	set_bit(irq, &vcpu->arch.irqs_pending_mask);
670 
671 	return 0;
672 }
673 
674 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask)
675 {
676 	unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
677 			    << VSIP_TO_HVIP_SHIFT) & mask;
678 
679 	return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false;
680 }
681 
682 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
683 {
684 	vcpu->arch.power_off = true;
685 	kvm_make_request(KVM_REQ_SLEEP, vcpu);
686 	kvm_vcpu_kick(vcpu);
687 }
688 
689 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
690 {
691 	vcpu->arch.power_off = false;
692 	kvm_vcpu_wake_up(vcpu);
693 }
694 
695 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
696 				    struct kvm_mp_state *mp_state)
697 {
698 	if (vcpu->arch.power_off)
699 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
700 	else
701 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
702 
703 	return 0;
704 }
705 
706 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
707 				    struct kvm_mp_state *mp_state)
708 {
709 	int ret = 0;
710 
711 	switch (mp_state->mp_state) {
712 	case KVM_MP_STATE_RUNNABLE:
713 		vcpu->arch.power_off = false;
714 		break;
715 	case KVM_MP_STATE_STOPPED:
716 		kvm_riscv_vcpu_power_off(vcpu);
717 		break;
718 	default:
719 		ret = -EINVAL;
720 	}
721 
722 	return ret;
723 }
724 
725 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
726 					struct kvm_guest_debug *dbg)
727 {
728 	/* TODO; To be implemented later. */
729 	return -EINVAL;
730 }
731 
732 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
733 {
734 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
735 
736 	csr_write(CSR_VSSTATUS, csr->vsstatus);
737 	csr_write(CSR_VSIE, csr->vsie);
738 	csr_write(CSR_VSTVEC, csr->vstvec);
739 	csr_write(CSR_VSSCRATCH, csr->vsscratch);
740 	csr_write(CSR_VSEPC, csr->vsepc);
741 	csr_write(CSR_VSCAUSE, csr->vscause);
742 	csr_write(CSR_VSTVAL, csr->vstval);
743 	csr_write(CSR_HVIP, csr->hvip);
744 	csr_write(CSR_VSATP, csr->vsatp);
745 
746 	kvm_riscv_gstage_update_hgatp(vcpu);
747 
748 	kvm_riscv_vcpu_timer_restore(vcpu);
749 
750 	kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
751 	kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
752 					vcpu->arch.isa);
753 
754 	vcpu->cpu = cpu;
755 }
756 
757 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
758 {
759 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
760 
761 	vcpu->cpu = -1;
762 
763 	kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
764 				     vcpu->arch.isa);
765 	kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
766 
767 	csr->vsstatus = csr_read(CSR_VSSTATUS);
768 	csr->vsie = csr_read(CSR_VSIE);
769 	csr->vstvec = csr_read(CSR_VSTVEC);
770 	csr->vsscratch = csr_read(CSR_VSSCRATCH);
771 	csr->vsepc = csr_read(CSR_VSEPC);
772 	csr->vscause = csr_read(CSR_VSCAUSE);
773 	csr->vstval = csr_read(CSR_VSTVAL);
774 	csr->hvip = csr_read(CSR_HVIP);
775 	csr->vsatp = csr_read(CSR_VSATP);
776 }
777 
778 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
779 {
780 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
781 
782 	if (kvm_request_pending(vcpu)) {
783 		if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
784 			rcuwait_wait_event(wait,
785 				(!vcpu->arch.power_off) && (!vcpu->arch.pause),
786 				TASK_INTERRUPTIBLE);
787 
788 			if (vcpu->arch.power_off || vcpu->arch.pause) {
789 				/*
790 				 * Awaken to handle a signal, request to
791 				 * sleep again later.
792 				 */
793 				kvm_make_request(KVM_REQ_SLEEP, vcpu);
794 			}
795 		}
796 
797 		if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
798 			kvm_riscv_reset_vcpu(vcpu);
799 
800 		if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
801 			kvm_riscv_gstage_update_hgatp(vcpu);
802 
803 		if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
804 			kvm_riscv_fence_i_process(vcpu);
805 
806 		/*
807 		 * The generic KVM_REQ_TLB_FLUSH is same as
808 		 * KVM_REQ_HFENCE_GVMA_VMID_ALL
809 		 */
810 		if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
811 			kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
812 
813 		if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
814 			kvm_riscv_hfence_vvma_all_process(vcpu);
815 
816 		if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
817 			kvm_riscv_hfence_process(vcpu);
818 	}
819 }
820 
821 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
822 {
823 	struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
824 
825 	csr_write(CSR_HVIP, csr->hvip);
826 }
827 
828 /*
829  * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
830  * the vCPU is running.
831  *
832  * This must be noinstr as instrumentation may make use of RCU, and this is not
833  * safe during the EQS.
834  */
835 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
836 {
837 	guest_state_enter_irqoff();
838 	__kvm_riscv_switch_to(&vcpu->arch);
839 	vcpu->arch.last_exit_cpu = vcpu->cpu;
840 	guest_state_exit_irqoff();
841 }
842 
843 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
844 {
845 	int ret;
846 	struct kvm_cpu_trap trap;
847 	struct kvm_run *run = vcpu->run;
848 
849 	/* Mark this VCPU ran at least once */
850 	vcpu->arch.ran_atleast_once = true;
851 
852 	kvm_vcpu_srcu_read_lock(vcpu);
853 
854 	/* Process MMIO value returned from user-space */
855 	if (run->exit_reason == KVM_EXIT_MMIO) {
856 		ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
857 		if (ret) {
858 			kvm_vcpu_srcu_read_unlock(vcpu);
859 			return ret;
860 		}
861 	}
862 
863 	/* Process SBI value returned from user-space */
864 	if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
865 		ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
866 		if (ret) {
867 			kvm_vcpu_srcu_read_unlock(vcpu);
868 			return ret;
869 		}
870 	}
871 
872 	if (run->immediate_exit) {
873 		kvm_vcpu_srcu_read_unlock(vcpu);
874 		return -EINTR;
875 	}
876 
877 	vcpu_load(vcpu);
878 
879 	kvm_sigset_activate(vcpu);
880 
881 	ret = 1;
882 	run->exit_reason = KVM_EXIT_UNKNOWN;
883 	while (ret > 0) {
884 		/* Check conditions before entering the guest */
885 		cond_resched();
886 
887 		kvm_riscv_gstage_vmid_update(vcpu);
888 
889 		kvm_riscv_check_vcpu_requests(vcpu);
890 
891 		preempt_disable();
892 
893 		local_irq_disable();
894 
895 		/*
896 		 * Exit if we have a signal pending so that we can deliver
897 		 * the signal to user space.
898 		 */
899 		if (signal_pending(current)) {
900 			ret = -EINTR;
901 			run->exit_reason = KVM_EXIT_INTR;
902 		}
903 
904 		/*
905 		 * Ensure we set mode to IN_GUEST_MODE after we disable
906 		 * interrupts and before the final VCPU requests check.
907 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
908 		 * Documentation/virt/kvm/vcpu-requests.rst
909 		 */
910 		vcpu->mode = IN_GUEST_MODE;
911 
912 		kvm_vcpu_srcu_read_unlock(vcpu);
913 		smp_mb__after_srcu_read_unlock();
914 
915 		/*
916 		 * We might have got VCPU interrupts updated asynchronously
917 		 * so update it in HW.
918 		 */
919 		kvm_riscv_vcpu_flush_interrupts(vcpu);
920 
921 		/* Update HVIP CSR for current CPU */
922 		kvm_riscv_update_hvip(vcpu);
923 
924 		if (ret <= 0 ||
925 		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
926 		    kvm_request_pending(vcpu)) {
927 			vcpu->mode = OUTSIDE_GUEST_MODE;
928 			local_irq_enable();
929 			preempt_enable();
930 			kvm_vcpu_srcu_read_lock(vcpu);
931 			continue;
932 		}
933 
934 		/*
935 		 * Cleanup stale TLB enteries
936 		 *
937 		 * Note: This should be done after G-stage VMID has been
938 		 * updated using kvm_riscv_gstage_vmid_ver_changed()
939 		 */
940 		kvm_riscv_local_tlb_sanitize(vcpu);
941 
942 		guest_timing_enter_irqoff();
943 
944 		kvm_riscv_vcpu_enter_exit(vcpu);
945 
946 		vcpu->mode = OUTSIDE_GUEST_MODE;
947 		vcpu->stat.exits++;
948 
949 		/*
950 		 * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
951 		 * get an interrupt between __kvm_riscv_switch_to() and
952 		 * local_irq_enable() which can potentially change CSRs.
953 		 */
954 		trap.sepc = vcpu->arch.guest_context.sepc;
955 		trap.scause = csr_read(CSR_SCAUSE);
956 		trap.stval = csr_read(CSR_STVAL);
957 		trap.htval = csr_read(CSR_HTVAL);
958 		trap.htinst = csr_read(CSR_HTINST);
959 
960 		/* Syncup interrupts state with HW */
961 		kvm_riscv_vcpu_sync_interrupts(vcpu);
962 
963 		/*
964 		 * We must ensure that any pending interrupts are taken before
965 		 * we exit guest timing so that timer ticks are accounted as
966 		 * guest time. Transiently unmask interrupts so that any
967 		 * pending interrupts are taken.
968 		 *
969 		 * There's no barrier which ensures that pending interrupts are
970 		 * recognised, so we just hope that the CPU takes any pending
971 		 * interrupts between the enable and disable.
972 		 */
973 		local_irq_enable();
974 		local_irq_disable();
975 
976 		guest_timing_exit_irqoff();
977 
978 		local_irq_enable();
979 
980 		preempt_enable();
981 
982 		kvm_vcpu_srcu_read_lock(vcpu);
983 
984 		ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
985 	}
986 
987 	kvm_sigset_deactivate(vcpu);
988 
989 	vcpu_put(vcpu);
990 
991 	kvm_vcpu_srcu_read_unlock(vcpu);
992 
993 	return ret;
994 }
995