1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * RISC-V code
4  *
5  * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6  */
7 
8 #include <linux/compiler.h>
9 #include <assert.h>
10 
11 #include "kvm_util.h"
12 #include "processor.h"
13 
14 #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN	0xac0000
15 
16 static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
17 {
18 	return (v + vm->page_size) & ~(vm->page_size - 1);
19 }
20 
21 static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
22 {
23 	return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
24 		PGTBL_PAGE_SIZE_SHIFT;
25 }
26 
27 static uint64_t ptrs_per_pte(struct kvm_vm *vm)
28 {
29 	return PGTBL_PAGE_SIZE / sizeof(uint64_t);
30 }
31 
32 static uint64_t pte_index_mask[] = {
33 	PGTBL_L0_INDEX_MASK,
34 	PGTBL_L1_INDEX_MASK,
35 	PGTBL_L2_INDEX_MASK,
36 	PGTBL_L3_INDEX_MASK,
37 };
38 
39 static uint32_t pte_index_shift[] = {
40 	PGTBL_L0_INDEX_SHIFT,
41 	PGTBL_L1_INDEX_SHIFT,
42 	PGTBL_L2_INDEX_SHIFT,
43 	PGTBL_L3_INDEX_SHIFT,
44 };
45 
46 static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
47 {
48 	TEST_ASSERT(level > -1,
49 		"Negative page table level (%d) not possible", level);
50 	TEST_ASSERT(level < vm->pgtable_levels,
51 		"Invalid page table level (%d)", level);
52 
53 	return (gva & pte_index_mask[level]) >> pte_index_shift[level];
54 }
55 
56 void virt_arch_pgd_alloc(struct kvm_vm *vm)
57 {
58 	size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
59 
60 	if (vm->pgd_created)
61 		return;
62 
63 	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
64 				     KVM_GUEST_PAGE_TABLE_MIN_PADDR,
65 				     vm->memslots[MEM_REGION_PT]);
66 	vm->pgd_created = true;
67 }
68 
69 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
70 {
71 	uint64_t *ptep, next_ppn;
72 	int level = vm->pgtable_levels - 1;
73 
74 	TEST_ASSERT((vaddr % vm->page_size) == 0,
75 		"Virtual address not on page boundary,\n"
76 		"  vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
77 	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
78 		(vaddr >> vm->page_shift)),
79 		"Invalid virtual address, vaddr: 0x%lx", vaddr);
80 	TEST_ASSERT((paddr % vm->page_size) == 0,
81 		"Physical address not on page boundary,\n"
82 		"  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
83 	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
84 		"Physical address beyond maximum supported,\n"
85 		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
86 		paddr, vm->max_gfn, vm->page_size);
87 
88 	ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
89 	if (!*ptep) {
90 		next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
91 		*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
92 			PGTBL_PTE_VALID_MASK;
93 	}
94 	level--;
95 
96 	while (level > -1) {
97 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
98 		       pte_index(vm, vaddr, level) * 8;
99 		if (!*ptep && level > 0) {
100 			next_ppn = vm_alloc_page_table(vm) >>
101 				   PGTBL_PAGE_SIZE_SHIFT;
102 			*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
103 				PGTBL_PTE_VALID_MASK;
104 		}
105 		level--;
106 	}
107 
108 	paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
109 	*ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
110 		PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
111 }
112 
113 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
114 {
115 	uint64_t *ptep;
116 	int level = vm->pgtable_levels - 1;
117 
118 	if (!vm->pgd_created)
119 		goto unmapped_gva;
120 
121 	ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
122 	if (!ptep)
123 		goto unmapped_gva;
124 	level--;
125 
126 	while (level > -1) {
127 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
128 		       pte_index(vm, gva, level) * 8;
129 		if (!ptep)
130 			goto unmapped_gva;
131 		level--;
132 	}
133 
134 	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
135 
136 unmapped_gva:
137 	TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
138 		  gva, level);
139 	exit(1);
140 }
141 
142 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
143 		     uint64_t page, int level)
144 {
145 #ifdef DEBUG
146 	static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
147 	uint64_t pte, *ptep;
148 
149 	if (level < 0)
150 		return;
151 
152 	for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
153 		ptep = addr_gpa2hva(vm, pte);
154 		if (!*ptep)
155 			continue;
156 		fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
157 			type[level], pte, *ptep, ptep);
158 		pte_dump(stream, vm, indent + 1,
159 			 pte_addr(vm, *ptep), level - 1);
160 	}
161 #endif
162 }
163 
164 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
165 {
166 	int level = vm->pgtable_levels - 1;
167 	uint64_t pgd, *ptep;
168 
169 	if (!vm->pgd_created)
170 		return;
171 
172 	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
173 		ptep = addr_gpa2hva(vm, pgd);
174 		if (!*ptep)
175 			continue;
176 		fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
177 			pgd, *ptep, ptep);
178 		pte_dump(stream, vm, indent + 1,
179 			 pte_addr(vm, *ptep), level - 1);
180 	}
181 }
182 
183 void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
184 {
185 	struct kvm_vm *vm = vcpu->vm;
186 	unsigned long satp;
187 
188 	/*
189 	 * The RISC-V Sv48 MMU mode supports 56-bit physical address
190 	 * for 48-bit virtual address with 4KB last level page size.
191 	 */
192 	switch (vm->mode) {
193 	case VM_MODE_P52V48_4K:
194 	case VM_MODE_P48V48_4K:
195 	case VM_MODE_P40V48_4K:
196 		break;
197 	default:
198 		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
199 	}
200 
201 	satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
202 	satp |= SATP_MODE_48;
203 
204 	vcpu_set_reg(vcpu, RISCV_CSR_REG(satp), satp);
205 }
206 
207 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
208 {
209 	struct kvm_riscv_core core;
210 
211 	vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
212 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
213 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
214 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
215 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
216 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
217 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
218 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
219 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
220 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
221 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
222 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
223 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
224 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
225 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
226 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
227 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
228 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
229 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
230 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
231 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
232 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
233 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
234 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
235 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
236 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
237 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
238 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
239 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
240 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
241 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
242 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
243 	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
244 
245 	fprintf(stream,
246 		" MODE:  0x%lx\n", core.mode);
247 	fprintf(stream,
248 		" PC: 0x%016lx   RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
249 		core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
250 	fprintf(stream,
251 		" TP: 0x%016lx   T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
252 		core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
253 	fprintf(stream,
254 		" S0: 0x%016lx   S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
255 		core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
256 	fprintf(stream,
257 		" A2: 0x%016lx   A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
258 		core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
259 	fprintf(stream,
260 		" A6: 0x%016lx   A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
261 		core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
262 	fprintf(stream,
263 		" S4: 0x%016lx   S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
264 		core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
265 	fprintf(stream,
266 		" S8: 0x%016lx   S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
267 		core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
268 	fprintf(stream,
269 		" T3: 0x%016lx   T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
270 		core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
271 }
272 
273 static void __aligned(16) guest_unexp_trap(void)
274 {
275 	sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
276 		  KVM_RISCV_SELFTESTS_SBI_UNEXP,
277 		  0, 0, 0, 0, 0, 0);
278 }
279 
280 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
281 				  void *guest_code)
282 {
283 	int r;
284 	size_t stack_size;
285 	unsigned long stack_vaddr;
286 	unsigned long current_gp = 0;
287 	struct kvm_mp_state mps;
288 	struct kvm_vcpu *vcpu;
289 
290 	stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
291 					     vm->page_size;
292 	stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
293 				       DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
294 				       MEM_REGION_DATA);
295 
296 	vcpu = __vm_vcpu_add(vm, vcpu_id);
297 	riscv_vcpu_mmu_setup(vcpu);
298 
299 	/*
300 	 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
301 	 * powered-off by default so we ensure that all secondary VCPUs
302 	 * are powered-on using KVM_SET_MP_STATE ioctl().
303 	 */
304 	mps.mp_state = KVM_MP_STATE_RUNNABLE;
305 	r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
306 	TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
307 
308 	/* Setup global pointer of guest to be same as the host */
309 	asm volatile (
310 		"add %0, gp, zero" : "=r" (current_gp) : : "memory");
311 	vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
312 
313 	/* Setup stack pointer and program counter of guest */
314 	vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
315 	vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
316 
317 	/* Setup default exception vector of guest */
318 	vcpu_set_reg(vcpu, RISCV_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
319 
320 	return vcpu;
321 }
322 
323 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
324 {
325 	va_list ap;
326 	uint64_t id = RISCV_CORE_REG(regs.a0);
327 	int i;
328 
329 	TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
330 		    "  num: %u\n", num);
331 
332 	va_start(ap, num);
333 
334 	for (i = 0; i < num; i++) {
335 		switch (i) {
336 		case 0:
337 			id = RISCV_CORE_REG(regs.a0);
338 			break;
339 		case 1:
340 			id = RISCV_CORE_REG(regs.a1);
341 			break;
342 		case 2:
343 			id = RISCV_CORE_REG(regs.a2);
344 			break;
345 		case 3:
346 			id = RISCV_CORE_REG(regs.a3);
347 			break;
348 		case 4:
349 			id = RISCV_CORE_REG(regs.a4);
350 			break;
351 		case 5:
352 			id = RISCV_CORE_REG(regs.a5);
353 			break;
354 		case 6:
355 			id = RISCV_CORE_REG(regs.a6);
356 			break;
357 		case 7:
358 			id = RISCV_CORE_REG(regs.a7);
359 			break;
360 		}
361 		vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
362 	}
363 
364 	va_end(ap);
365 }
366 
367 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
368 {
369 }
370