1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/kvm_util.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #include "test_util.h"
9 #include "kvm_util.h"
10 #include "kvm_util_internal.h"
11 #include "processor.h"
12 
13 #include <assert.h>
14 #include <sys/mman.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <unistd.h>
18 #include <linux/kernel.h>
19 
20 #define KVM_UTIL_PGS_PER_HUGEPG 512
21 #define KVM_UTIL_MIN_PFN	2
22 
23 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
24 static void *align(void *x, size_t size)
25 {
26 	size_t mask = size - 1;
27 	TEST_ASSERT(size != 0 && !(size & (size - 1)),
28 		    "size not a power of 2: %lu", size);
29 	return (void *) (((size_t) x + mask) & ~mask);
30 }
31 
32 /*
33  * Capability
34  *
35  * Input Args:
36  *   cap - Capability
37  *
38  * Output Args: None
39  *
40  * Return:
41  *   On success, the Value corresponding to the capability (KVM_CAP_*)
42  *   specified by the value of cap.  On failure a TEST_ASSERT failure
43  *   is produced.
44  *
45  * Looks up and returns the value corresponding to the capability
46  * (KVM_CAP_*) given by cap.
47  */
48 int kvm_check_cap(long cap)
49 {
50 	int ret;
51 	int kvm_fd;
52 
53 	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
54 	if (kvm_fd < 0)
55 		exit(KSFT_SKIP);
56 
57 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
58 	TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
59 		"  rc: %i errno: %i", ret, errno);
60 
61 	close(kvm_fd);
62 
63 	return ret;
64 }
65 
66 /* VM Enable Capability
67  *
68  * Input Args:
69  *   vm - Virtual Machine
70  *   cap - Capability
71  *
72  * Output Args: None
73  *
74  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
75  *
76  * Enables a capability (KVM_CAP_*) on the VM.
77  */
78 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
79 {
80 	int ret;
81 
82 	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
83 	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
84 		"  rc: %i errno: %i", ret, errno);
85 
86 	return ret;
87 }
88 
89 static void vm_open(struct kvm_vm *vm, int perm)
90 {
91 	vm->kvm_fd = open(KVM_DEV_PATH, perm);
92 	if (vm->kvm_fd < 0)
93 		exit(KSFT_SKIP);
94 
95 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
96 		print_skip("immediate_exit not available");
97 		exit(KSFT_SKIP);
98 	}
99 
100 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
101 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
102 		"rc: %i errno: %i", vm->fd, errno);
103 }
104 
105 const char * const vm_guest_mode_string[] = {
106 	"PA-bits:52,  VA-bits:48,  4K pages",
107 	"PA-bits:52,  VA-bits:48, 64K pages",
108 	"PA-bits:48,  VA-bits:48,  4K pages",
109 	"PA-bits:48,  VA-bits:48, 64K pages",
110 	"PA-bits:40,  VA-bits:48,  4K pages",
111 	"PA-bits:40,  VA-bits:48, 64K pages",
112 	"PA-bits:ANY, VA-bits:48,  4K pages",
113 };
114 _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
115 	       "Missing new mode strings?");
116 
117 struct vm_guest_mode_params {
118 	unsigned int pa_bits;
119 	unsigned int va_bits;
120 	unsigned int page_size;
121 	unsigned int page_shift;
122 };
123 
124 static const struct vm_guest_mode_params vm_guest_mode_params[] = {
125 	{ 52, 48,  0x1000, 12 },
126 	{ 52, 48, 0x10000, 16 },
127 	{ 48, 48,  0x1000, 12 },
128 	{ 48, 48, 0x10000, 16 },
129 	{ 40, 48,  0x1000, 12 },
130 	{ 40, 48, 0x10000, 16 },
131 	{  0,  0,  0x1000, 12 },
132 };
133 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
134 	       "Missing new mode params?");
135 
136 /*
137  * VM Create
138  *
139  * Input Args:
140  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
141  *   phy_pages - Physical memory pages
142  *   perm - permission
143  *
144  * Output Args: None
145  *
146  * Return:
147  *   Pointer to opaque structure that describes the created VM.
148  *
149  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
150  * When phy_pages is non-zero, a memory region of phy_pages physical pages
151  * is created and mapped starting at guest physical address 0.  The file
152  * descriptor to control the created VM is created with the permissions
153  * given by perm (e.g. O_RDWR).
154  */
155 struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
156 {
157 	struct kvm_vm *vm;
158 
159 	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
160 		 vm_guest_mode_string(mode), phy_pages, perm);
161 
162 	vm = calloc(1, sizeof(*vm));
163 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
164 
165 	INIT_LIST_HEAD(&vm->vcpus);
166 	INIT_LIST_HEAD(&vm->userspace_mem_regions);
167 
168 	vm->mode = mode;
169 	vm->type = 0;
170 
171 	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
172 	vm->va_bits = vm_guest_mode_params[mode].va_bits;
173 	vm->page_size = vm_guest_mode_params[mode].page_size;
174 	vm->page_shift = vm_guest_mode_params[mode].page_shift;
175 
176 	/* Setup mode specific traits. */
177 	switch (vm->mode) {
178 	case VM_MODE_P52V48_4K:
179 		vm->pgtable_levels = 4;
180 		break;
181 	case VM_MODE_P52V48_64K:
182 		vm->pgtable_levels = 3;
183 		break;
184 	case VM_MODE_P48V48_4K:
185 		vm->pgtable_levels = 4;
186 		break;
187 	case VM_MODE_P48V48_64K:
188 		vm->pgtable_levels = 3;
189 		break;
190 	case VM_MODE_P40V48_4K:
191 		vm->pgtable_levels = 4;
192 		break;
193 	case VM_MODE_P40V48_64K:
194 		vm->pgtable_levels = 3;
195 		break;
196 	case VM_MODE_PXXV48_4K:
197 #ifdef __x86_64__
198 		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
199 		/*
200 		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
201 		 * it doesn't take effect unless a CR4.LA57 is set, which it
202 		 * isn't for this VM_MODE.
203 		 */
204 		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
205 			    "Linear address width (%d bits) not supported",
206 			    vm->va_bits);
207 		pr_debug("Guest physical address width detected: %d\n",
208 			 vm->pa_bits);
209 		vm->pgtable_levels = 4;
210 		vm->va_bits = 48;
211 #else
212 		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
213 #endif
214 		break;
215 	default:
216 		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
217 	}
218 
219 #ifdef __aarch64__
220 	if (vm->pa_bits != 40)
221 		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
222 #endif
223 
224 	vm_open(vm, perm);
225 
226 	/* Limit to VA-bit canonical virtual addresses. */
227 	vm->vpages_valid = sparsebit_alloc();
228 	sparsebit_set_num(vm->vpages_valid,
229 		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
230 	sparsebit_set_num(vm->vpages_valid,
231 		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
232 		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
233 
234 	/* Limit physical addresses to PA-bits. */
235 	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
236 
237 	/* Allocate and setup memory for guest. */
238 	vm->vpages_mapped = sparsebit_alloc();
239 	if (phy_pages != 0)
240 		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
241 					    0, 0, phy_pages, 0);
242 
243 	return vm;
244 }
245 
246 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
247 {
248 	return _vm_create(mode, phy_pages, perm);
249 }
250 
251 /*
252  * VM Restart
253  *
254  * Input Args:
255  *   vm - VM that has been released before
256  *   perm - permission
257  *
258  * Output Args: None
259  *
260  * Reopens the file descriptors associated to the VM and reinstates the
261  * global state, such as the irqchip and the memory regions that are mapped
262  * into the guest.
263  */
264 void kvm_vm_restart(struct kvm_vm *vmp, int perm)
265 {
266 	struct userspace_mem_region *region;
267 
268 	vm_open(vmp, perm);
269 	if (vmp->has_irqchip)
270 		vm_create_irqchip(vmp);
271 
272 	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
273 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
274 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
275 			    "  rc: %i errno: %i\n"
276 			    "  slot: %u flags: 0x%x\n"
277 			    "  guest_phys_addr: 0x%llx size: 0x%llx",
278 			    ret, errno, region->region.slot,
279 			    region->region.flags,
280 			    region->region.guest_phys_addr,
281 			    region->region.memory_size);
282 	}
283 }
284 
285 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
286 {
287 	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
288 	int ret;
289 
290 	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
291 	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
292 		    __func__, strerror(-ret));
293 }
294 
295 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
296 			    uint64_t first_page, uint32_t num_pages)
297 {
298 	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
299 		                            .first_page = first_page,
300 	                                    .num_pages = num_pages };
301 	int ret;
302 
303 	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
304 	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
305 		    __func__, strerror(-ret));
306 }
307 
308 /*
309  * Userspace Memory Region Find
310  *
311  * Input Args:
312  *   vm - Virtual Machine
313  *   start - Starting VM physical address
314  *   end - Ending VM physical address, inclusive.
315  *
316  * Output Args: None
317  *
318  * Return:
319  *   Pointer to overlapping region, NULL if no such region.
320  *
321  * Searches for a region with any physical memory that overlaps with
322  * any portion of the guest physical addresses from start to end
323  * inclusive.  If multiple overlapping regions exist, a pointer to any
324  * of the regions is returned.  Null is returned only when no overlapping
325  * region exists.
326  */
327 static struct userspace_mem_region *
328 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
329 {
330 	struct userspace_mem_region *region;
331 
332 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
333 		uint64_t existing_start = region->region.guest_phys_addr;
334 		uint64_t existing_end = region->region.guest_phys_addr
335 			+ region->region.memory_size - 1;
336 		if (start <= existing_end && end >= existing_start)
337 			return region;
338 	}
339 
340 	return NULL;
341 }
342 
343 /*
344  * KVM Userspace Memory Region Find
345  *
346  * Input Args:
347  *   vm - Virtual Machine
348  *   start - Starting VM physical address
349  *   end - Ending VM physical address, inclusive.
350  *
351  * Output Args: None
352  *
353  * Return:
354  *   Pointer to overlapping region, NULL if no such region.
355  *
356  * Public interface to userspace_mem_region_find. Allows tests to look up
357  * the memslot datastructure for a given range of guest physical memory.
358  */
359 struct kvm_userspace_memory_region *
360 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
361 				 uint64_t end)
362 {
363 	struct userspace_mem_region *region;
364 
365 	region = userspace_mem_region_find(vm, start, end);
366 	if (!region)
367 		return NULL;
368 
369 	return &region->region;
370 }
371 
372 /*
373  * VCPU Find
374  *
375  * Input Args:
376  *   vm - Virtual Machine
377  *   vcpuid - VCPU ID
378  *
379  * Output Args: None
380  *
381  * Return:
382  *   Pointer to VCPU structure
383  *
384  * Locates a vcpu structure that describes the VCPU specified by vcpuid and
385  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
386  * for the specified vcpuid.
387  */
388 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
389 {
390 	struct vcpu *vcpu;
391 
392 	list_for_each_entry(vcpu, &vm->vcpus, list) {
393 		if (vcpu->id == vcpuid)
394 			return vcpu;
395 	}
396 
397 	return NULL;
398 }
399 
400 /*
401  * VM VCPU Remove
402  *
403  * Input Args:
404  *   vcpu - VCPU to remove
405  *
406  * Output Args: None
407  *
408  * Return: None, TEST_ASSERT failures for all error conditions
409  *
410  * Removes a vCPU from a VM and frees its resources.
411  */
412 static void vm_vcpu_rm(struct vcpu *vcpu)
413 {
414 	int ret;
415 
416 	ret = munmap(vcpu->state, sizeof(*vcpu->state));
417 	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
418 		"errno: %i", ret, errno);
419 	close(vcpu->fd);
420 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
421 		"errno: %i", ret, errno);
422 
423 	list_del(&vcpu->list);
424 	free(vcpu);
425 }
426 
427 void kvm_vm_release(struct kvm_vm *vmp)
428 {
429 	struct vcpu *vcpu, *tmp;
430 	int ret;
431 
432 	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
433 		vm_vcpu_rm(vcpu);
434 
435 	ret = close(vmp->fd);
436 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
437 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
438 
439 	close(vmp->kvm_fd);
440 	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
441 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
442 }
443 
444 static void __vm_mem_region_delete(struct kvm_vm *vm,
445 				   struct userspace_mem_region *region)
446 {
447 	int ret;
448 
449 	list_del(&region->list);
450 
451 	region->region.memory_size = 0;
452 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
453 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
454 		    "rc: %i errno: %i", ret, errno);
455 
456 	sparsebit_free(&region->unused_phy_pages);
457 	ret = munmap(region->mmap_start, region->mmap_size);
458 	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
459 
460 	free(region);
461 }
462 
463 /*
464  * Destroys and frees the VM pointed to by vmp.
465  */
466 void kvm_vm_free(struct kvm_vm *vmp)
467 {
468 	struct userspace_mem_region *region, *tmp;
469 
470 	if (vmp == NULL)
471 		return;
472 
473 	/* Free userspace_mem_regions. */
474 	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
475 		__vm_mem_region_delete(vmp, region);
476 
477 	/* Free sparsebit arrays. */
478 	sparsebit_free(&vmp->vpages_valid);
479 	sparsebit_free(&vmp->vpages_mapped);
480 
481 	kvm_vm_release(vmp);
482 
483 	/* Free the structure describing the VM. */
484 	free(vmp);
485 }
486 
487 /*
488  * Memory Compare, host virtual to guest virtual
489  *
490  * Input Args:
491  *   hva - Starting host virtual address
492  *   vm - Virtual Machine
493  *   gva - Starting guest virtual address
494  *   len - number of bytes to compare
495  *
496  * Output Args: None
497  *
498  * Input/Output Args: None
499  *
500  * Return:
501  *   Returns 0 if the bytes starting at hva for a length of len
502  *   are equal the guest virtual bytes starting at gva.  Returns
503  *   a value < 0, if bytes at hva are less than those at gva.
504  *   Otherwise a value > 0 is returned.
505  *
506  * Compares the bytes starting at the host virtual address hva, for
507  * a length of len, to the guest bytes starting at the guest virtual
508  * address given by gva.
509  */
510 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
511 {
512 	size_t amt;
513 
514 	/*
515 	 * Compare a batch of bytes until either a match is found
516 	 * or all the bytes have been compared.
517 	 */
518 	for (uintptr_t offset = 0; offset < len; offset += amt) {
519 		uintptr_t ptr1 = (uintptr_t)hva + offset;
520 
521 		/*
522 		 * Determine host address for guest virtual address
523 		 * at offset.
524 		 */
525 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
526 
527 		/*
528 		 * Determine amount to compare on this pass.
529 		 * Don't allow the comparsion to cross a page boundary.
530 		 */
531 		amt = len - offset;
532 		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
533 			amt = vm->page_size - (ptr1 % vm->page_size);
534 		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
535 			amt = vm->page_size - (ptr2 % vm->page_size);
536 
537 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
538 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
539 
540 		/*
541 		 * Perform the comparison.  If there is a difference
542 		 * return that result to the caller, otherwise need
543 		 * to continue on looking for a mismatch.
544 		 */
545 		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
546 		if (ret != 0)
547 			return ret;
548 	}
549 
550 	/*
551 	 * No mismatch found.  Let the caller know the two memory
552 	 * areas are equal.
553 	 */
554 	return 0;
555 }
556 
557 /*
558  * VM Userspace Memory Region Add
559  *
560  * Input Args:
561  *   vm - Virtual Machine
562  *   backing_src - Storage source for this region.
563  *                 NULL to use anonymous memory.
564  *   guest_paddr - Starting guest physical address
565  *   slot - KVM region slot
566  *   npages - Number of physical pages
567  *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
568  *
569  * Output Args: None
570  *
571  * Return: None
572  *
573  * Allocates a memory area of the number of pages specified by npages
574  * and maps it to the VM specified by vm, at a starting physical address
575  * given by guest_paddr.  The region is created with a KVM region slot
576  * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
577  * region is created with the flags given by flags.
578  */
579 void vm_userspace_mem_region_add(struct kvm_vm *vm,
580 	enum vm_mem_backing_src_type src_type,
581 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
582 	uint32_t flags)
583 {
584 	int ret;
585 	struct userspace_mem_region *region;
586 	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
587 	size_t alignment;
588 
589 	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
590 		"Number of guest pages is not compatible with the host. "
591 		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
592 
593 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
594 		"address not on a page boundary.\n"
595 		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
596 		guest_paddr, vm->page_size);
597 	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
598 		<= vm->max_gfn, "Physical range beyond maximum "
599 		"supported physical address,\n"
600 		"  guest_paddr: 0x%lx npages: 0x%lx\n"
601 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
602 		guest_paddr, npages, vm->max_gfn, vm->page_size);
603 
604 	/*
605 	 * Confirm a mem region with an overlapping address doesn't
606 	 * already exist.
607 	 */
608 	region = (struct userspace_mem_region *) userspace_mem_region_find(
609 		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
610 	if (region != NULL)
611 		TEST_FAIL("overlapping userspace_mem_region already "
612 			"exists\n"
613 			"  requested guest_paddr: 0x%lx npages: 0x%lx "
614 			"page_size: 0x%x\n"
615 			"  existing guest_paddr: 0x%lx size: 0x%lx",
616 			guest_paddr, npages, vm->page_size,
617 			(uint64_t) region->region.guest_phys_addr,
618 			(uint64_t) region->region.memory_size);
619 
620 	/* Confirm no region with the requested slot already exists. */
621 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
622 		if (region->region.slot != slot)
623 			continue;
624 
625 		TEST_FAIL("A mem region with the requested slot "
626 			"already exists.\n"
627 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
628 			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
629 			slot, guest_paddr, npages,
630 			region->region.slot,
631 			(uint64_t) region->region.guest_phys_addr,
632 			(uint64_t) region->region.memory_size);
633 	}
634 
635 	/* Allocate and initialize new mem region structure. */
636 	region = calloc(1, sizeof(*region));
637 	TEST_ASSERT(region != NULL, "Insufficient Memory");
638 	region->mmap_size = npages * vm->page_size;
639 
640 #ifdef __s390x__
641 	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
642 	alignment = 0x100000;
643 #else
644 	alignment = 1;
645 #endif
646 
647 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
648 		alignment = max(huge_page_size, alignment);
649 
650 	/* Add enough memory to align up if necessary */
651 	if (alignment > 1)
652 		region->mmap_size += alignment;
653 
654 	region->mmap_start = mmap(NULL, region->mmap_size,
655 				  PROT_READ | PROT_WRITE,
656 				  MAP_PRIVATE | MAP_ANONYMOUS
657 				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
658 				  -1, 0);
659 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
660 		    "test_malloc failed, mmap_start: %p errno: %i",
661 		    region->mmap_start, errno);
662 
663 	/* Align host address */
664 	region->host_mem = align(region->mmap_start, alignment);
665 
666 	/* As needed perform madvise */
667 	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
668 		struct stat statbuf;
669 
670 		ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
671 		TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
672 			    "stat /sys/kernel/mm/transparent_hugepage");
673 
674 		TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
675 			    "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
676 
677 		if (ret == 0) {
678 			ret = madvise(region->host_mem, npages * vm->page_size,
679 				      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
680 			TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
681 				    region->host_mem, npages * vm->page_size, src_type);
682 		}
683 	}
684 
685 	region->unused_phy_pages = sparsebit_alloc();
686 	sparsebit_set_num(region->unused_phy_pages,
687 		guest_paddr >> vm->page_shift, npages);
688 	region->region.slot = slot;
689 	region->region.flags = flags;
690 	region->region.guest_phys_addr = guest_paddr;
691 	region->region.memory_size = npages * vm->page_size;
692 	region->region.userspace_addr = (uintptr_t) region->host_mem;
693 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
694 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
695 		"  rc: %i errno: %i\n"
696 		"  slot: %u flags: 0x%x\n"
697 		"  guest_phys_addr: 0x%lx size: 0x%lx",
698 		ret, errno, slot, flags,
699 		guest_paddr, (uint64_t) region->region.memory_size);
700 
701 	/* Add to linked-list of memory regions. */
702 	list_add(&region->list, &vm->userspace_mem_regions);
703 }
704 
705 /*
706  * Memslot to region
707  *
708  * Input Args:
709  *   vm - Virtual Machine
710  *   memslot - KVM memory slot ID
711  *
712  * Output Args: None
713  *
714  * Return:
715  *   Pointer to memory region structure that describe memory region
716  *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
717  *   on error (e.g. currently no memory region using memslot as a KVM
718  *   memory slot ID).
719  */
720 struct userspace_mem_region *
721 memslot2region(struct kvm_vm *vm, uint32_t memslot)
722 {
723 	struct userspace_mem_region *region;
724 
725 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
726 		if (region->region.slot == memslot)
727 			return region;
728 	}
729 
730 	fprintf(stderr, "No mem region with the requested slot found,\n"
731 		"  requested slot: %u\n", memslot);
732 	fputs("---- vm dump ----\n", stderr);
733 	vm_dump(stderr, vm, 2);
734 	TEST_FAIL("Mem region not found");
735 	return NULL;
736 }
737 
738 /*
739  * VM Memory Region Flags Set
740  *
741  * Input Args:
742  *   vm - Virtual Machine
743  *   flags - Starting guest physical address
744  *
745  * Output Args: None
746  *
747  * Return: None
748  *
749  * Sets the flags of the memory region specified by the value of slot,
750  * to the values given by flags.
751  */
752 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
753 {
754 	int ret;
755 	struct userspace_mem_region *region;
756 
757 	region = memslot2region(vm, slot);
758 
759 	region->region.flags = flags;
760 
761 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
762 
763 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
764 		"  rc: %i errno: %i slot: %u flags: 0x%x",
765 		ret, errno, slot, flags);
766 }
767 
768 /*
769  * VM Memory Region Move
770  *
771  * Input Args:
772  *   vm - Virtual Machine
773  *   slot - Slot of the memory region to move
774  *   new_gpa - Starting guest physical address
775  *
776  * Output Args: None
777  *
778  * Return: None
779  *
780  * Change the gpa of a memory region.
781  */
782 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
783 {
784 	struct userspace_mem_region *region;
785 	int ret;
786 
787 	region = memslot2region(vm, slot);
788 
789 	region->region.guest_phys_addr = new_gpa;
790 
791 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
792 
793 	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
794 		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
795 		    ret, errno, slot, new_gpa);
796 }
797 
798 /*
799  * VM Memory Region Delete
800  *
801  * Input Args:
802  *   vm - Virtual Machine
803  *   slot - Slot of the memory region to delete
804  *
805  * Output Args: None
806  *
807  * Return: None
808  *
809  * Delete a memory region.
810  */
811 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
812 {
813 	__vm_mem_region_delete(vm, memslot2region(vm, slot));
814 }
815 
816 /*
817  * VCPU mmap Size
818  *
819  * Input Args: None
820  *
821  * Output Args: None
822  *
823  * Return:
824  *   Size of VCPU state
825  *
826  * Returns the size of the structure pointed to by the return value
827  * of vcpu_state().
828  */
829 static int vcpu_mmap_sz(void)
830 {
831 	int dev_fd, ret;
832 
833 	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
834 	if (dev_fd < 0)
835 		exit(KSFT_SKIP);
836 
837 	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
838 	TEST_ASSERT(ret >= sizeof(struct kvm_run),
839 		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
840 		__func__, ret, errno);
841 
842 	close(dev_fd);
843 
844 	return ret;
845 }
846 
847 /*
848  * VM VCPU Add
849  *
850  * Input Args:
851  *   vm - Virtual Machine
852  *   vcpuid - VCPU ID
853  *
854  * Output Args: None
855  *
856  * Return: None
857  *
858  * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
859  * No additional VCPU setup is done.
860  */
861 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
862 {
863 	struct vcpu *vcpu;
864 
865 	/* Confirm a vcpu with the specified id doesn't already exist. */
866 	vcpu = vcpu_find(vm, vcpuid);
867 	if (vcpu != NULL)
868 		TEST_FAIL("vcpu with the specified id "
869 			"already exists,\n"
870 			"  requested vcpuid: %u\n"
871 			"  existing vcpuid: %u state: %p",
872 			vcpuid, vcpu->id, vcpu->state);
873 
874 	/* Allocate and initialize new vcpu structure. */
875 	vcpu = calloc(1, sizeof(*vcpu));
876 	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
877 	vcpu->id = vcpuid;
878 	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
879 	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
880 		vcpu->fd, errno);
881 
882 	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
883 		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
884 		vcpu_mmap_sz(), sizeof(*vcpu->state));
885 	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
886 		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
887 	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
888 		"vcpu id: %u errno: %i", vcpuid, errno);
889 
890 	/* Add to linked-list of VCPUs. */
891 	list_add(&vcpu->list, &vm->vcpus);
892 }
893 
894 /*
895  * VM Virtual Address Unused Gap
896  *
897  * Input Args:
898  *   vm - Virtual Machine
899  *   sz - Size (bytes)
900  *   vaddr_min - Minimum Virtual Address
901  *
902  * Output Args: None
903  *
904  * Return:
905  *   Lowest virtual address at or below vaddr_min, with at least
906  *   sz unused bytes.  TEST_ASSERT failure if no area of at least
907  *   size sz is available.
908  *
909  * Within the VM specified by vm, locates the lowest starting virtual
910  * address >= vaddr_min, that has at least sz unallocated bytes.  A
911  * TEST_ASSERT failure occurs for invalid input or no area of at least
912  * sz unallocated bytes >= vaddr_min is available.
913  */
914 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
915 				      vm_vaddr_t vaddr_min)
916 {
917 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
918 
919 	/* Determine lowest permitted virtual page index. */
920 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
921 	if ((pgidx_start * vm->page_size) < vaddr_min)
922 		goto no_va_found;
923 
924 	/* Loop over section with enough valid virtual page indexes. */
925 	if (!sparsebit_is_set_num(vm->vpages_valid,
926 		pgidx_start, pages))
927 		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
928 			pgidx_start, pages);
929 	do {
930 		/*
931 		 * Are there enough unused virtual pages available at
932 		 * the currently proposed starting virtual page index.
933 		 * If not, adjust proposed starting index to next
934 		 * possible.
935 		 */
936 		if (sparsebit_is_clear_num(vm->vpages_mapped,
937 			pgidx_start, pages))
938 			goto va_found;
939 		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
940 			pgidx_start, pages);
941 		if (pgidx_start == 0)
942 			goto no_va_found;
943 
944 		/*
945 		 * If needed, adjust proposed starting virtual address,
946 		 * to next range of valid virtual addresses.
947 		 */
948 		if (!sparsebit_is_set_num(vm->vpages_valid,
949 			pgidx_start, pages)) {
950 			pgidx_start = sparsebit_next_set_num(
951 				vm->vpages_valid, pgidx_start, pages);
952 			if (pgidx_start == 0)
953 				goto no_va_found;
954 		}
955 	} while (pgidx_start != 0);
956 
957 no_va_found:
958 	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
959 
960 	/* NOT REACHED */
961 	return -1;
962 
963 va_found:
964 	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
965 		pgidx_start, pages),
966 		"Unexpected, invalid virtual page index range,\n"
967 		"  pgidx_start: 0x%lx\n"
968 		"  pages: 0x%lx",
969 		pgidx_start, pages);
970 	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
971 		pgidx_start, pages),
972 		"Unexpected, pages already mapped,\n"
973 		"  pgidx_start: 0x%lx\n"
974 		"  pages: 0x%lx",
975 		pgidx_start, pages);
976 
977 	return pgidx_start * vm->page_size;
978 }
979 
980 /*
981  * VM Virtual Address Allocate
982  *
983  * Input Args:
984  *   vm - Virtual Machine
985  *   sz - Size in bytes
986  *   vaddr_min - Minimum starting virtual address
987  *   data_memslot - Memory region slot for data pages
988  *   pgd_memslot - Memory region slot for new virtual translation tables
989  *
990  * Output Args: None
991  *
992  * Return:
993  *   Starting guest virtual address
994  *
995  * Allocates at least sz bytes within the virtual address space of the vm
996  * given by vm.  The allocated bytes are mapped to a virtual address >=
997  * the address given by vaddr_min.  Note that each allocation uses a
998  * a unique set of pages, with the minimum real allocation being at least
999  * a page.
1000  */
1001 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
1002 			  uint32_t data_memslot, uint32_t pgd_memslot)
1003 {
1004 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1005 
1006 	virt_pgd_alloc(vm, pgd_memslot);
1007 
1008 	/*
1009 	 * Find an unused range of virtual page addresses of at least
1010 	 * pages in length.
1011 	 */
1012 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1013 
1014 	/* Map the virtual pages. */
1015 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1016 		pages--, vaddr += vm->page_size) {
1017 		vm_paddr_t paddr;
1018 
1019 		paddr = vm_phy_page_alloc(vm,
1020 				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
1021 
1022 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
1023 
1024 		sparsebit_set(vm->vpages_mapped,
1025 			vaddr >> vm->page_shift);
1026 	}
1027 
1028 	return vaddr_start;
1029 }
1030 
1031 /*
1032  * Map a range of VM virtual address to the VM's physical address
1033  *
1034  * Input Args:
1035  *   vm - Virtual Machine
1036  *   vaddr - Virtuall address to map
1037  *   paddr - VM Physical Address
1038  *   npages - The number of pages to map
1039  *   pgd_memslot - Memory region slot for new virtual translation tables
1040  *
1041  * Output Args: None
1042  *
1043  * Return: None
1044  *
1045  * Within the VM given by @vm, creates a virtual translation for
1046  * @npages starting at @vaddr to the page range starting at @paddr.
1047  */
1048 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1049 	      unsigned int npages, uint32_t pgd_memslot)
1050 {
1051 	size_t page_size = vm->page_size;
1052 	size_t size = npages * page_size;
1053 
1054 	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1055 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1056 
1057 	while (npages--) {
1058 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
1059 		vaddr += page_size;
1060 		paddr += page_size;
1061 	}
1062 }
1063 
1064 /*
1065  * Address VM Physical to Host Virtual
1066  *
1067  * Input Args:
1068  *   vm - Virtual Machine
1069  *   gpa - VM physical address
1070  *
1071  * Output Args: None
1072  *
1073  * Return:
1074  *   Equivalent host virtual address
1075  *
1076  * Locates the memory region containing the VM physical address given
1077  * by gpa, within the VM given by vm.  When found, the host virtual
1078  * address providing the memory to the vm physical address is returned.
1079  * A TEST_ASSERT failure occurs if no region containing gpa exists.
1080  */
1081 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1082 {
1083 	struct userspace_mem_region *region;
1084 
1085 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1086 		if ((gpa >= region->region.guest_phys_addr)
1087 			&& (gpa <= (region->region.guest_phys_addr
1088 				+ region->region.memory_size - 1)))
1089 			return (void *) ((uintptr_t) region->host_mem
1090 				+ (gpa - region->region.guest_phys_addr));
1091 	}
1092 
1093 	TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1094 	return NULL;
1095 }
1096 
1097 /*
1098  * Address Host Virtual to VM Physical
1099  *
1100  * Input Args:
1101  *   vm - Virtual Machine
1102  *   hva - Host virtual address
1103  *
1104  * Output Args: None
1105  *
1106  * Return:
1107  *   Equivalent VM physical address
1108  *
1109  * Locates the memory region containing the host virtual address given
1110  * by hva, within the VM given by vm.  When found, the equivalent
1111  * VM physical address is returned. A TEST_ASSERT failure occurs if no
1112  * region containing hva exists.
1113  */
1114 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1115 {
1116 	struct userspace_mem_region *region;
1117 
1118 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1119 		if ((hva >= region->host_mem)
1120 			&& (hva <= (region->host_mem
1121 				+ region->region.memory_size - 1)))
1122 			return (vm_paddr_t) ((uintptr_t)
1123 				region->region.guest_phys_addr
1124 				+ (hva - (uintptr_t) region->host_mem));
1125 	}
1126 
1127 	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1128 	return -1;
1129 }
1130 
1131 /*
1132  * VM Create IRQ Chip
1133  *
1134  * Input Args:
1135  *   vm - Virtual Machine
1136  *
1137  * Output Args: None
1138  *
1139  * Return: None
1140  *
1141  * Creates an interrupt controller chip for the VM specified by vm.
1142  */
1143 void vm_create_irqchip(struct kvm_vm *vm)
1144 {
1145 	int ret;
1146 
1147 	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1148 	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1149 		"rc: %i errno: %i", ret, errno);
1150 
1151 	vm->has_irqchip = true;
1152 }
1153 
1154 /*
1155  * VM VCPU State
1156  *
1157  * Input Args:
1158  *   vm - Virtual Machine
1159  *   vcpuid - VCPU ID
1160  *
1161  * Output Args: None
1162  *
1163  * Return:
1164  *   Pointer to structure that describes the state of the VCPU.
1165  *
1166  * Locates and returns a pointer to a structure that describes the
1167  * state of the VCPU with the given vcpuid.
1168  */
1169 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1170 {
1171 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1172 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1173 
1174 	return vcpu->state;
1175 }
1176 
1177 /*
1178  * VM VCPU Run
1179  *
1180  * Input Args:
1181  *   vm - Virtual Machine
1182  *   vcpuid - VCPU ID
1183  *
1184  * Output Args: None
1185  *
1186  * Return: None
1187  *
1188  * Switch to executing the code for the VCPU given by vcpuid, within the VM
1189  * given by vm.
1190  */
1191 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1192 {
1193 	int ret = _vcpu_run(vm, vcpuid);
1194 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1195 		"rc: %i errno: %i", ret, errno);
1196 }
1197 
1198 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1199 {
1200 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1201 	int rc;
1202 
1203 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1204 	do {
1205 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1206 	} while (rc == -1 && errno == EINTR);
1207 	return rc;
1208 }
1209 
1210 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1211 {
1212 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1213 	int ret;
1214 
1215 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1216 
1217 	vcpu->state->immediate_exit = 1;
1218 	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1219 	vcpu->state->immediate_exit = 0;
1220 
1221 	TEST_ASSERT(ret == -1 && errno == EINTR,
1222 		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1223 		    ret, errno);
1224 }
1225 
1226 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
1227 			  struct kvm_guest_debug *debug)
1228 {
1229 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1230 	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
1231 
1232 	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
1233 }
1234 
1235 /*
1236  * VM VCPU Set MP State
1237  *
1238  * Input Args:
1239  *   vm - Virtual Machine
1240  *   vcpuid - VCPU ID
1241  *   mp_state - mp_state to be set
1242  *
1243  * Output Args: None
1244  *
1245  * Return: None
1246  *
1247  * Sets the MP state of the VCPU given by vcpuid, to the state given
1248  * by mp_state.
1249  */
1250 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1251 		       struct kvm_mp_state *mp_state)
1252 {
1253 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1254 	int ret;
1255 
1256 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1257 
1258 	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1259 	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1260 		"rc: %i errno: %i", ret, errno);
1261 }
1262 
1263 /*
1264  * VM VCPU Regs Get
1265  *
1266  * Input Args:
1267  *   vm - Virtual Machine
1268  *   vcpuid - VCPU ID
1269  *
1270  * Output Args:
1271  *   regs - current state of VCPU regs
1272  *
1273  * Return: None
1274  *
1275  * Obtains the current register state for the VCPU specified by vcpuid
1276  * and stores it at the location given by regs.
1277  */
1278 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1279 {
1280 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1281 	int ret;
1282 
1283 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1284 
1285 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1286 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1287 		ret, errno);
1288 }
1289 
1290 /*
1291  * VM VCPU Regs Set
1292  *
1293  * Input Args:
1294  *   vm - Virtual Machine
1295  *   vcpuid - VCPU ID
1296  *   regs - Values to set VCPU regs to
1297  *
1298  * Output Args: None
1299  *
1300  * Return: None
1301  *
1302  * Sets the regs of the VCPU specified by vcpuid to the values
1303  * given by regs.
1304  */
1305 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1306 {
1307 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1308 	int ret;
1309 
1310 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1311 
1312 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1313 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1314 		ret, errno);
1315 }
1316 
1317 #ifdef __KVM_HAVE_VCPU_EVENTS
1318 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1319 		     struct kvm_vcpu_events *events)
1320 {
1321 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1322 	int ret;
1323 
1324 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1325 
1326 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1327 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1328 		ret, errno);
1329 }
1330 
1331 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1332 		     struct kvm_vcpu_events *events)
1333 {
1334 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1335 	int ret;
1336 
1337 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1338 
1339 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1340 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1341 		ret, errno);
1342 }
1343 #endif
1344 
1345 #ifdef __x86_64__
1346 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1347 			   struct kvm_nested_state *state)
1348 {
1349 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1350 	int ret;
1351 
1352 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1353 
1354 	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1355 	TEST_ASSERT(ret == 0,
1356 		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1357 		ret, errno);
1358 }
1359 
1360 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1361 			  struct kvm_nested_state *state, bool ignore_error)
1362 {
1363 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1364 	int ret;
1365 
1366 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1367 
1368 	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1369 	if (!ignore_error) {
1370 		TEST_ASSERT(ret == 0,
1371 			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1372 			ret, errno);
1373 	}
1374 
1375 	return ret;
1376 }
1377 #endif
1378 
1379 /*
1380  * VM VCPU System Regs Get
1381  *
1382  * Input Args:
1383  *   vm - Virtual Machine
1384  *   vcpuid - VCPU ID
1385  *
1386  * Output Args:
1387  *   sregs - current state of VCPU system regs
1388  *
1389  * Return: None
1390  *
1391  * Obtains the current system register state for the VCPU specified by
1392  * vcpuid and stores it at the location given by sregs.
1393  */
1394 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1395 {
1396 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1397 	int ret;
1398 
1399 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1400 
1401 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1402 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1403 		ret, errno);
1404 }
1405 
1406 /*
1407  * VM VCPU System Regs Set
1408  *
1409  * Input Args:
1410  *   vm - Virtual Machine
1411  *   vcpuid - VCPU ID
1412  *   sregs - Values to set VCPU system regs to
1413  *
1414  * Output Args: None
1415  *
1416  * Return: None
1417  *
1418  * Sets the system regs of the VCPU specified by vcpuid to the values
1419  * given by sregs.
1420  */
1421 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1422 {
1423 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1424 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1425 		"rc: %i errno: %i", ret, errno);
1426 }
1427 
1428 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1429 {
1430 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1431 
1432 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1433 
1434 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1435 }
1436 
1437 void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1438 {
1439 	int ret;
1440 
1441 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
1442 	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
1443 		    ret, errno, strerror(errno));
1444 }
1445 
1446 void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1447 {
1448 	int ret;
1449 
1450 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
1451 	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
1452 		    ret, errno, strerror(errno));
1453 }
1454 
1455 void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1456 {
1457 	int ret;
1458 
1459 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
1460 	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
1461 		    ret, errno, strerror(errno));
1462 }
1463 
1464 void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1465 {
1466 	int ret;
1467 
1468 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
1469 	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
1470 		    ret, errno, strerror(errno));
1471 }
1472 
1473 /*
1474  * VCPU Ioctl
1475  *
1476  * Input Args:
1477  *   vm - Virtual Machine
1478  *   vcpuid - VCPU ID
1479  *   cmd - Ioctl number
1480  *   arg - Argument to pass to the ioctl
1481  *
1482  * Return: None
1483  *
1484  * Issues an arbitrary ioctl on a VCPU fd.
1485  */
1486 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1487 		unsigned long cmd, void *arg)
1488 {
1489 	int ret;
1490 
1491 	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1492 	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1493 		cmd, ret, errno, strerror(errno));
1494 }
1495 
1496 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1497 		unsigned long cmd, void *arg)
1498 {
1499 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1500 	int ret;
1501 
1502 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1503 
1504 	ret = ioctl(vcpu->fd, cmd, arg);
1505 
1506 	return ret;
1507 }
1508 
1509 /*
1510  * VM Ioctl
1511  *
1512  * Input Args:
1513  *   vm - Virtual Machine
1514  *   cmd - Ioctl number
1515  *   arg - Argument to pass to the ioctl
1516  *
1517  * Return: None
1518  *
1519  * Issues an arbitrary ioctl on a VM fd.
1520  */
1521 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1522 {
1523 	int ret;
1524 
1525 	ret = ioctl(vm->fd, cmd, arg);
1526 	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1527 		cmd, ret, errno, strerror(errno));
1528 }
1529 
1530 /*
1531  * VM Dump
1532  *
1533  * Input Args:
1534  *   vm - Virtual Machine
1535  *   indent - Left margin indent amount
1536  *
1537  * Output Args:
1538  *   stream - Output FILE stream
1539  *
1540  * Return: None
1541  *
1542  * Dumps the current state of the VM given by vm, to the FILE stream
1543  * given by stream.
1544  */
1545 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
1546 {
1547 	struct userspace_mem_region *region;
1548 	struct vcpu *vcpu;
1549 
1550 	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
1551 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
1552 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
1553 	fprintf(stream, "%*sMem Regions:\n", indent, "");
1554 	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
1555 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
1556 			"host_virt: %p\n", indent + 2, "",
1557 			(uint64_t) region->region.guest_phys_addr,
1558 			(uint64_t) region->region.memory_size,
1559 			region->host_mem);
1560 		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
1561 		sparsebit_dump(stream, region->unused_phy_pages, 0);
1562 	}
1563 	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
1564 	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
1565 	fprintf(stream, "%*spgd_created: %u\n", indent, "",
1566 		vm->pgd_created);
1567 	if (vm->pgd_created) {
1568 		fprintf(stream, "%*sVirtual Translation Tables:\n",
1569 			indent + 2, "");
1570 		virt_dump(stream, vm, indent + 4);
1571 	}
1572 	fprintf(stream, "%*sVCPUs:\n", indent, "");
1573 	list_for_each_entry(vcpu, &vm->vcpus, list)
1574 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
1575 }
1576 
1577 /* Known KVM exit reasons */
1578 static struct exit_reason {
1579 	unsigned int reason;
1580 	const char *name;
1581 } exit_reasons_known[] = {
1582 	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
1583 	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
1584 	{KVM_EXIT_IO, "IO"},
1585 	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
1586 	{KVM_EXIT_DEBUG, "DEBUG"},
1587 	{KVM_EXIT_HLT, "HLT"},
1588 	{KVM_EXIT_MMIO, "MMIO"},
1589 	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
1590 	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
1591 	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
1592 	{KVM_EXIT_INTR, "INTR"},
1593 	{KVM_EXIT_SET_TPR, "SET_TPR"},
1594 	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
1595 	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
1596 	{KVM_EXIT_S390_RESET, "S390_RESET"},
1597 	{KVM_EXIT_DCR, "DCR"},
1598 	{KVM_EXIT_NMI, "NMI"},
1599 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
1600 	{KVM_EXIT_OSI, "OSI"},
1601 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
1602 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
1603 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
1604 #endif
1605 };
1606 
1607 /*
1608  * Exit Reason String
1609  *
1610  * Input Args:
1611  *   exit_reason - Exit reason
1612  *
1613  * Output Args: None
1614  *
1615  * Return:
1616  *   Constant string pointer describing the exit reason.
1617  *
1618  * Locates and returns a constant string that describes the KVM exit
1619  * reason given by exit_reason.  If no such string is found, a constant
1620  * string of "Unknown" is returned.
1621  */
1622 const char *exit_reason_str(unsigned int exit_reason)
1623 {
1624 	unsigned int n1;
1625 
1626 	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
1627 		if (exit_reason == exit_reasons_known[n1].reason)
1628 			return exit_reasons_known[n1].name;
1629 	}
1630 
1631 	return "Unknown";
1632 }
1633 
1634 /*
1635  * Physical Contiguous Page Allocator
1636  *
1637  * Input Args:
1638  *   vm - Virtual Machine
1639  *   num - number of pages
1640  *   paddr_min - Physical address minimum
1641  *   memslot - Memory region to allocate page from
1642  *
1643  * Output Args: None
1644  *
1645  * Return:
1646  *   Starting physical address
1647  *
1648  * Within the VM specified by vm, locates a range of available physical
1649  * pages at or above paddr_min. If found, the pages are marked as in use
1650  * and their base address is returned. A TEST_ASSERT failure occurs if
1651  * not enough pages are available at or above paddr_min.
1652  */
1653 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
1654 			      vm_paddr_t paddr_min, uint32_t memslot)
1655 {
1656 	struct userspace_mem_region *region;
1657 	sparsebit_idx_t pg, base;
1658 
1659 	TEST_ASSERT(num > 0, "Must allocate at least one page");
1660 
1661 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
1662 		"not divisible by page size.\n"
1663 		"  paddr_min: 0x%lx page_size: 0x%x",
1664 		paddr_min, vm->page_size);
1665 
1666 	region = memslot2region(vm, memslot);
1667 	base = pg = paddr_min >> vm->page_shift;
1668 
1669 	do {
1670 		for (; pg < base + num; ++pg) {
1671 			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
1672 				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
1673 				break;
1674 			}
1675 		}
1676 	} while (pg && pg != base + num);
1677 
1678 	if (pg == 0) {
1679 		fprintf(stderr, "No guest physical page available, "
1680 			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
1681 			paddr_min, vm->page_size, memslot);
1682 		fputs("---- vm dump ----\n", stderr);
1683 		vm_dump(stderr, vm, 2);
1684 		abort();
1685 	}
1686 
1687 	for (pg = base; pg < base + num; ++pg)
1688 		sparsebit_clear(region->unused_phy_pages, pg);
1689 
1690 	return base * vm->page_size;
1691 }
1692 
1693 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
1694 			     uint32_t memslot)
1695 {
1696 	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
1697 }
1698 
1699 /*
1700  * Address Guest Virtual to Host Virtual
1701  *
1702  * Input Args:
1703  *   vm - Virtual Machine
1704  *   gva - VM virtual address
1705  *
1706  * Output Args: None
1707  *
1708  * Return:
1709  *   Equivalent host virtual address
1710  */
1711 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
1712 {
1713 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
1714 }
1715 
1716 /*
1717  * Is Unrestricted Guest
1718  *
1719  * Input Args:
1720  *   vm - Virtual Machine
1721  *
1722  * Output Args: None
1723  *
1724  * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
1725  *
1726  * Check if the unrestricted guest flag is enabled.
1727  */
1728 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
1729 {
1730 	char val = 'N';
1731 	size_t count;
1732 	FILE *f;
1733 
1734 	if (vm == NULL) {
1735 		/* Ensure that the KVM vendor-specific module is loaded. */
1736 		f = fopen(KVM_DEV_PATH, "r");
1737 		TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
1738 			    errno);
1739 		fclose(f);
1740 	}
1741 
1742 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
1743 	if (f) {
1744 		count = fread(&val, sizeof(char), 1, f);
1745 		TEST_ASSERT(count == 1, "Unable to read from param file.");
1746 		fclose(f);
1747 	}
1748 
1749 	return val == 'Y';
1750 }
1751 
1752 unsigned int vm_get_page_size(struct kvm_vm *vm)
1753 {
1754 	return vm->page_size;
1755 }
1756 
1757 unsigned int vm_get_page_shift(struct kvm_vm *vm)
1758 {
1759 	return vm->page_shift;
1760 }
1761 
1762 unsigned int vm_get_max_gfn(struct kvm_vm *vm)
1763 {
1764 	return vm->max_gfn;
1765 }
1766 
1767 int vm_get_fd(struct kvm_vm *vm)
1768 {
1769 	return vm->fd;
1770 }
1771 
1772 static unsigned int vm_calc_num_pages(unsigned int num_pages,
1773 				      unsigned int page_shift,
1774 				      unsigned int new_page_shift,
1775 				      bool ceil)
1776 {
1777 	unsigned int n = 1 << (new_page_shift - page_shift);
1778 
1779 	if (page_shift >= new_page_shift)
1780 		return num_pages * (1 << (page_shift - new_page_shift));
1781 
1782 	return num_pages / n + !!(ceil && num_pages % n);
1783 }
1784 
1785 static inline int getpageshift(void)
1786 {
1787 	return __builtin_ffs(getpagesize()) - 1;
1788 }
1789 
1790 unsigned int
1791 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
1792 {
1793 	return vm_calc_num_pages(num_guest_pages,
1794 				 vm_guest_mode_params[mode].page_shift,
1795 				 getpageshift(), true);
1796 }
1797 
1798 unsigned int
1799 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
1800 {
1801 	return vm_calc_num_pages(num_host_pages, getpageshift(),
1802 				 vm_guest_mode_params[mode].page_shift, false);
1803 }
1804 
1805 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
1806 {
1807 	unsigned int n;
1808 	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
1809 	return vm_adjust_num_guest_pages(mode, n);
1810 }
1811