xref: /openbmc/linux/tools/testing/selftests/kvm/lib/kvm_util.c (revision c64d01b3ceba873aa8e8605598cec4a6bc6d1601)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/kvm_util.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #define _GNU_SOURCE /* for program_invocation_name */
9 #include "test_util.h"
10 #include "kvm_util.h"
11 #include "kvm_util_internal.h"
12 #include "processor.h"
13 
14 #include <assert.h>
15 #include <sys/mman.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <linux/kernel.h>
20 
21 #define KVM_UTIL_MIN_PFN	2
22 
23 static int vcpu_mmap_sz(void);
24 
25 int open_path_or_exit(const char *path, int flags)
26 {
27 	int fd;
28 
29 	fd = open(path, flags);
30 	if (fd < 0) {
31 		print_skip("%s not available (errno: %d)", path, errno);
32 		exit(KSFT_SKIP);
33 	}
34 
35 	return fd;
36 }
37 
38 /*
39  * Open KVM_DEV_PATH if available, otherwise exit the entire program.
40  *
41  * Input Args:
42  *   flags - The flags to pass when opening KVM_DEV_PATH.
43  *
44  * Return:
45  *   The opened file descriptor of /dev/kvm.
46  */
47 static int _open_kvm_dev_path_or_exit(int flags)
48 {
49 	return open_path_or_exit(KVM_DEV_PATH, flags);
50 }
51 
52 int open_kvm_dev_path_or_exit(void)
53 {
54 	return _open_kvm_dev_path_or_exit(O_RDONLY);
55 }
56 
57 /*
58  * Capability
59  *
60  * Input Args:
61  *   cap - Capability
62  *
63  * Output Args: None
64  *
65  * Return:
66  *   On success, the Value corresponding to the capability (KVM_CAP_*)
67  *   specified by the value of cap.  On failure a TEST_ASSERT failure
68  *   is produced.
69  *
70  * Looks up and returns the value corresponding to the capability
71  * (KVM_CAP_*) given by cap.
72  */
73 int kvm_check_cap(long cap)
74 {
75 	int ret;
76 	int kvm_fd;
77 
78 	kvm_fd = open_kvm_dev_path_or_exit();
79 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
80 	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
81 		"  rc: %i errno: %i", ret, errno);
82 
83 	close(kvm_fd);
84 
85 	return ret;
86 }
87 
88 /* VM Enable Capability
89  *
90  * Input Args:
91  *   vm - Virtual Machine
92  *   cap - Capability
93  *
94  * Output Args: None
95  *
96  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
97  *
98  * Enables a capability (KVM_CAP_*) on the VM.
99  */
100 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
101 {
102 	int ret;
103 
104 	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
105 	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
106 		"  rc: %i errno: %i", ret, errno);
107 
108 	return ret;
109 }
110 
111 /* VCPU Enable Capability
112  *
113  * Input Args:
114  *   vm - Virtual Machine
115  *   vcpu_id - VCPU
116  *   cap - Capability
117  *
118  * Output Args: None
119  *
120  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
121  *
122  * Enables a capability (KVM_CAP_*) on the VCPU.
123  */
124 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
125 		    struct kvm_enable_cap *cap)
126 {
127 	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
128 	int r;
129 
130 	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
131 
132 	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
133 	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
134 			"  rc: %i, errno: %i", r, errno);
135 
136 	return r;
137 }
138 
139 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
140 {
141 	struct kvm_enable_cap cap = { 0 };
142 
143 	cap.cap = KVM_CAP_DIRTY_LOG_RING;
144 	cap.args[0] = ring_size;
145 	vm_enable_cap(vm, &cap);
146 	vm->dirty_ring_size = ring_size;
147 }
148 
149 static void vm_open(struct kvm_vm *vm, int perm)
150 {
151 	vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
152 
153 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
154 		print_skip("immediate_exit not available");
155 		exit(KSFT_SKIP);
156 	}
157 
158 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
159 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
160 		"rc: %i errno: %i", vm->fd, errno);
161 }
162 
163 const char *vm_guest_mode_string(uint32_t i)
164 {
165 	static const char * const strings[] = {
166 		[VM_MODE_P52V48_4K]	= "PA-bits:52,  VA-bits:48,  4K pages",
167 		[VM_MODE_P52V48_64K]	= "PA-bits:52,  VA-bits:48, 64K pages",
168 		[VM_MODE_P48V48_4K]	= "PA-bits:48,  VA-bits:48,  4K pages",
169 		[VM_MODE_P48V48_64K]	= "PA-bits:48,  VA-bits:48, 64K pages",
170 		[VM_MODE_P40V48_4K]	= "PA-bits:40,  VA-bits:48,  4K pages",
171 		[VM_MODE_P40V48_64K]	= "PA-bits:40,  VA-bits:48, 64K pages",
172 		[VM_MODE_PXXV48_4K]	= "PA-bits:ANY, VA-bits:48,  4K pages",
173 		[VM_MODE_P47V64_4K]	= "PA-bits:47,  VA-bits:64,  4K pages",
174 		[VM_MODE_P44V64_4K]	= "PA-bits:44,  VA-bits:64,  4K pages",
175 	};
176 	_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
177 		       "Missing new mode strings?");
178 
179 	TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
180 
181 	return strings[i];
182 }
183 
184 const struct vm_guest_mode_params vm_guest_mode_params[] = {
185 	[VM_MODE_P52V48_4K]	= { 52, 48,  0x1000, 12 },
186 	[VM_MODE_P52V48_64K]	= { 52, 48, 0x10000, 16 },
187 	[VM_MODE_P48V48_4K]	= { 48, 48,  0x1000, 12 },
188 	[VM_MODE_P48V48_64K]	= { 48, 48, 0x10000, 16 },
189 	[VM_MODE_P40V48_4K]	= { 40, 48,  0x1000, 12 },
190 	[VM_MODE_P40V48_64K]	= { 40, 48, 0x10000, 16 },
191 	[VM_MODE_PXXV48_4K]	= {  0,  0,  0x1000, 12 },
192 	[VM_MODE_P47V64_4K]	= { 47, 64,  0x1000, 12 },
193 	[VM_MODE_P44V64_4K]	= { 44, 64,  0x1000, 12 },
194 };
195 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
196 	       "Missing new mode params?");
197 
198 /*
199  * VM Create
200  *
201  * Input Args:
202  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
203  *   phy_pages - Physical memory pages
204  *   perm - permission
205  *
206  * Output Args: None
207  *
208  * Return:
209  *   Pointer to opaque structure that describes the created VM.
210  *
211  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
212  * When phy_pages is non-zero, a memory region of phy_pages physical pages
213  * is created and mapped starting at guest physical address 0.  The file
214  * descriptor to control the created VM is created with the permissions
215  * given by perm (e.g. O_RDWR).
216  */
217 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
218 {
219 	struct kvm_vm *vm;
220 
221 	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
222 		 vm_guest_mode_string(mode), phy_pages, perm);
223 
224 	vm = calloc(1, sizeof(*vm));
225 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
226 
227 	INIT_LIST_HEAD(&vm->vcpus);
228 	vm->regions.gpa_tree = RB_ROOT;
229 	vm->regions.hva_tree = RB_ROOT;
230 	hash_init(vm->regions.slot_hash);
231 
232 	vm->mode = mode;
233 	vm->type = 0;
234 
235 	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
236 	vm->va_bits = vm_guest_mode_params[mode].va_bits;
237 	vm->page_size = vm_guest_mode_params[mode].page_size;
238 	vm->page_shift = vm_guest_mode_params[mode].page_shift;
239 
240 	/* Setup mode specific traits. */
241 	switch (vm->mode) {
242 	case VM_MODE_P52V48_4K:
243 		vm->pgtable_levels = 4;
244 		break;
245 	case VM_MODE_P52V48_64K:
246 		vm->pgtable_levels = 3;
247 		break;
248 	case VM_MODE_P48V48_4K:
249 		vm->pgtable_levels = 4;
250 		break;
251 	case VM_MODE_P48V48_64K:
252 		vm->pgtable_levels = 3;
253 		break;
254 	case VM_MODE_P40V48_4K:
255 		vm->pgtable_levels = 4;
256 		break;
257 	case VM_MODE_P40V48_64K:
258 		vm->pgtable_levels = 3;
259 		break;
260 	case VM_MODE_PXXV48_4K:
261 #ifdef __x86_64__
262 		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
263 		/*
264 		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
265 		 * it doesn't take effect unless a CR4.LA57 is set, which it
266 		 * isn't for this VM_MODE.
267 		 */
268 		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
269 			    "Linear address width (%d bits) not supported",
270 			    vm->va_bits);
271 		pr_debug("Guest physical address width detected: %d\n",
272 			 vm->pa_bits);
273 		vm->pgtable_levels = 4;
274 		vm->va_bits = 48;
275 #else
276 		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
277 #endif
278 		break;
279 	case VM_MODE_P47V64_4K:
280 		vm->pgtable_levels = 5;
281 		break;
282 	case VM_MODE_P44V64_4K:
283 		vm->pgtable_levels = 5;
284 		break;
285 	default:
286 		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
287 	}
288 
289 #ifdef __aarch64__
290 	if (vm->pa_bits != 40)
291 		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
292 #endif
293 
294 	vm_open(vm, perm);
295 
296 	/* Limit to VA-bit canonical virtual addresses. */
297 	vm->vpages_valid = sparsebit_alloc();
298 	sparsebit_set_num(vm->vpages_valid,
299 		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
300 	sparsebit_set_num(vm->vpages_valid,
301 		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
302 		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
303 
304 	/* Limit physical addresses to PA-bits. */
305 	vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
306 
307 	/* Allocate and setup memory for guest. */
308 	vm->vpages_mapped = sparsebit_alloc();
309 	if (phy_pages != 0)
310 		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
311 					    0, 0, phy_pages, 0);
312 
313 	return vm;
314 }
315 
316 /*
317  * VM Create with customized parameters
318  *
319  * Input Args:
320  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
321  *   nr_vcpus - VCPU count
322  *   slot0_mem_pages - Slot0 physical memory size
323  *   extra_mem_pages - Non-slot0 physical memory total size
324  *   num_percpu_pages - Per-cpu physical memory pages
325  *   guest_code - Guest entry point
326  *   vcpuids - VCPU IDs
327  *
328  * Output Args: None
329  *
330  * Return:
331  *   Pointer to opaque structure that describes the created VM.
332  *
333  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
334  * with customized slot0 memory size, at least 512 pages currently.
335  * extra_mem_pages is only used to calculate the maximum page table size,
336  * no real memory allocation for non-slot0 memory in this function.
337  */
338 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
339 				    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
340 				    uint32_t num_percpu_pages, void *guest_code,
341 				    uint32_t vcpuids[])
342 {
343 	uint64_t vcpu_pages, extra_pg_pages, pages;
344 	struct kvm_vm *vm;
345 	int i;
346 
347 	/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
348 	if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
349 		slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
350 
351 	/* The maximum page table size for a memory region will be when the
352 	 * smallest pages are used. Considering each page contains x page
353 	 * table descriptors, the total extra size for page tables (for extra
354 	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
355 	 * than N/x*2.
356 	 */
357 	vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
358 	extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
359 	pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
360 
361 	TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
362 		    "nr_vcpus = %d too large for host, max-vcpus = %d",
363 		    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
364 
365 	pages = vm_adjust_num_guest_pages(mode, pages);
366 	vm = vm_create(mode, pages, O_RDWR);
367 
368 	kvm_vm_elf_load(vm, program_invocation_name);
369 
370 #ifdef __x86_64__
371 	vm_create_irqchip(vm);
372 #endif
373 
374 	for (i = 0; i < nr_vcpus; ++i) {
375 		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
376 
377 		vm_vcpu_add_default(vm, vcpuid, guest_code);
378 	}
379 
380 	return vm;
381 }
382 
383 struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
384 					    uint32_t num_percpu_pages, void *guest_code,
385 					    uint32_t vcpuids[])
386 {
387 	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
388 				    extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
389 }
390 
391 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
392 				 void *guest_code)
393 {
394 	return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
395 					    (uint32_t []){ vcpuid });
396 }
397 
398 /*
399  * VM Restart
400  *
401  * Input Args:
402  *   vm - VM that has been released before
403  *   perm - permission
404  *
405  * Output Args: None
406  *
407  * Reopens the file descriptors associated to the VM and reinstates the
408  * global state, such as the irqchip and the memory regions that are mapped
409  * into the guest.
410  */
411 void kvm_vm_restart(struct kvm_vm *vmp, int perm)
412 {
413 	int ctr;
414 	struct userspace_mem_region *region;
415 
416 	vm_open(vmp, perm);
417 	if (vmp->has_irqchip)
418 		vm_create_irqchip(vmp);
419 
420 	hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
421 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
422 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
423 			    "  rc: %i errno: %i\n"
424 			    "  slot: %u flags: 0x%x\n"
425 			    "  guest_phys_addr: 0x%llx size: 0x%llx",
426 			    ret, errno, region->region.slot,
427 			    region->region.flags,
428 			    region->region.guest_phys_addr,
429 			    region->region.memory_size);
430 	}
431 }
432 
433 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
434 {
435 	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
436 	int ret;
437 
438 	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
439 	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
440 		    __func__, strerror(-ret));
441 }
442 
443 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
444 			    uint64_t first_page, uint32_t num_pages)
445 {
446 	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
447 		                            .first_page = first_page,
448 	                                    .num_pages = num_pages };
449 	int ret;
450 
451 	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
452 	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
453 		    __func__, strerror(-ret));
454 }
455 
456 uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
457 {
458 	return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
459 }
460 
461 /*
462  * Userspace Memory Region Find
463  *
464  * Input Args:
465  *   vm - Virtual Machine
466  *   start - Starting VM physical address
467  *   end - Ending VM physical address, inclusive.
468  *
469  * Output Args: None
470  *
471  * Return:
472  *   Pointer to overlapping region, NULL if no such region.
473  *
474  * Searches for a region with any physical memory that overlaps with
475  * any portion of the guest physical addresses from start to end
476  * inclusive.  If multiple overlapping regions exist, a pointer to any
477  * of the regions is returned.  Null is returned only when no overlapping
478  * region exists.
479  */
480 static struct userspace_mem_region *
481 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
482 {
483 	struct rb_node *node;
484 
485 	for (node = vm->regions.gpa_tree.rb_node; node; ) {
486 		struct userspace_mem_region *region =
487 			container_of(node, struct userspace_mem_region, gpa_node);
488 		uint64_t existing_start = region->region.guest_phys_addr;
489 		uint64_t existing_end = region->region.guest_phys_addr
490 			+ region->region.memory_size - 1;
491 		if (start <= existing_end && end >= existing_start)
492 			return region;
493 
494 		if (start < existing_start)
495 			node = node->rb_left;
496 		else
497 			node = node->rb_right;
498 	}
499 
500 	return NULL;
501 }
502 
503 /*
504  * KVM Userspace Memory Region Find
505  *
506  * Input Args:
507  *   vm - Virtual Machine
508  *   start - Starting VM physical address
509  *   end - Ending VM physical address, inclusive.
510  *
511  * Output Args: None
512  *
513  * Return:
514  *   Pointer to overlapping region, NULL if no such region.
515  *
516  * Public interface to userspace_mem_region_find. Allows tests to look up
517  * the memslot datastructure for a given range of guest physical memory.
518  */
519 struct kvm_userspace_memory_region *
520 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
521 				 uint64_t end)
522 {
523 	struct userspace_mem_region *region;
524 
525 	region = userspace_mem_region_find(vm, start, end);
526 	if (!region)
527 		return NULL;
528 
529 	return &region->region;
530 }
531 
532 /*
533  * VCPU Find
534  *
535  * Input Args:
536  *   vm - Virtual Machine
537  *   vcpuid - VCPU ID
538  *
539  * Output Args: None
540  *
541  * Return:
542  *   Pointer to VCPU structure
543  *
544  * Locates a vcpu structure that describes the VCPU specified by vcpuid and
545  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
546  * for the specified vcpuid.
547  */
548 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
549 {
550 	struct vcpu *vcpu;
551 
552 	list_for_each_entry(vcpu, &vm->vcpus, list) {
553 		if (vcpu->id == vcpuid)
554 			return vcpu;
555 	}
556 
557 	return NULL;
558 }
559 
560 /*
561  * VM VCPU Remove
562  *
563  * Input Args:
564  *   vcpu - VCPU to remove
565  *
566  * Output Args: None
567  *
568  * Return: None, TEST_ASSERT failures for all error conditions
569  *
570  * Removes a vCPU from a VM and frees its resources.
571  */
572 static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
573 {
574 	int ret;
575 
576 	if (vcpu->dirty_gfns) {
577 		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
578 		TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
579 			    "rc: %i errno: %i", ret, errno);
580 		vcpu->dirty_gfns = NULL;
581 	}
582 
583 	ret = munmap(vcpu->state, vcpu_mmap_sz());
584 	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
585 		"errno: %i", ret, errno);
586 	ret = close(vcpu->fd);
587 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
588 		"errno: %i", ret, errno);
589 
590 	list_del(&vcpu->list);
591 	free(vcpu);
592 }
593 
594 void kvm_vm_release(struct kvm_vm *vmp)
595 {
596 	struct vcpu *vcpu, *tmp;
597 	int ret;
598 
599 	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
600 		vm_vcpu_rm(vmp, vcpu);
601 
602 	ret = close(vmp->fd);
603 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
604 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
605 
606 	ret = close(vmp->kvm_fd);
607 	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
608 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
609 }
610 
611 static void __vm_mem_region_delete(struct kvm_vm *vm,
612 				   struct userspace_mem_region *region,
613 				   bool unlink)
614 {
615 	int ret;
616 
617 	if (unlink) {
618 		rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
619 		rb_erase(&region->hva_node, &vm->regions.hva_tree);
620 		hash_del(&region->slot_node);
621 	}
622 
623 	region->region.memory_size = 0;
624 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
625 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
626 		    "rc: %i errno: %i", ret, errno);
627 
628 	sparsebit_free(&region->unused_phy_pages);
629 	ret = munmap(region->mmap_start, region->mmap_size);
630 	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
631 
632 	free(region);
633 }
634 
635 /*
636  * Destroys and frees the VM pointed to by vmp.
637  */
638 void kvm_vm_free(struct kvm_vm *vmp)
639 {
640 	int ctr;
641 	struct hlist_node *node;
642 	struct userspace_mem_region *region;
643 
644 	if (vmp == NULL)
645 		return;
646 
647 	/* Free userspace_mem_regions. */
648 	hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
649 		__vm_mem_region_delete(vmp, region, false);
650 
651 	/* Free sparsebit arrays. */
652 	sparsebit_free(&vmp->vpages_valid);
653 	sparsebit_free(&vmp->vpages_mapped);
654 
655 	kvm_vm_release(vmp);
656 
657 	/* Free the structure describing the VM. */
658 	free(vmp);
659 }
660 
661 /*
662  * Memory Compare, host virtual to guest virtual
663  *
664  * Input Args:
665  *   hva - Starting host virtual address
666  *   vm - Virtual Machine
667  *   gva - Starting guest virtual address
668  *   len - number of bytes to compare
669  *
670  * Output Args: None
671  *
672  * Input/Output Args: None
673  *
674  * Return:
675  *   Returns 0 if the bytes starting at hva for a length of len
676  *   are equal the guest virtual bytes starting at gva.  Returns
677  *   a value < 0, if bytes at hva are less than those at gva.
678  *   Otherwise a value > 0 is returned.
679  *
680  * Compares the bytes starting at the host virtual address hva, for
681  * a length of len, to the guest bytes starting at the guest virtual
682  * address given by gva.
683  */
684 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
685 {
686 	size_t amt;
687 
688 	/*
689 	 * Compare a batch of bytes until either a match is found
690 	 * or all the bytes have been compared.
691 	 */
692 	for (uintptr_t offset = 0; offset < len; offset += amt) {
693 		uintptr_t ptr1 = (uintptr_t)hva + offset;
694 
695 		/*
696 		 * Determine host address for guest virtual address
697 		 * at offset.
698 		 */
699 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
700 
701 		/*
702 		 * Determine amount to compare on this pass.
703 		 * Don't allow the comparsion to cross a page boundary.
704 		 */
705 		amt = len - offset;
706 		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
707 			amt = vm->page_size - (ptr1 % vm->page_size);
708 		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
709 			amt = vm->page_size - (ptr2 % vm->page_size);
710 
711 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
712 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
713 
714 		/*
715 		 * Perform the comparison.  If there is a difference
716 		 * return that result to the caller, otherwise need
717 		 * to continue on looking for a mismatch.
718 		 */
719 		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
720 		if (ret != 0)
721 			return ret;
722 	}
723 
724 	/*
725 	 * No mismatch found.  Let the caller know the two memory
726 	 * areas are equal.
727 	 */
728 	return 0;
729 }
730 
731 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
732 					       struct userspace_mem_region *region)
733 {
734 	struct rb_node **cur, *parent;
735 
736 	for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
737 		struct userspace_mem_region *cregion;
738 
739 		cregion = container_of(*cur, typeof(*cregion), gpa_node);
740 		parent = *cur;
741 		if (region->region.guest_phys_addr <
742 		    cregion->region.guest_phys_addr)
743 			cur = &(*cur)->rb_left;
744 		else {
745 			TEST_ASSERT(region->region.guest_phys_addr !=
746 				    cregion->region.guest_phys_addr,
747 				    "Duplicate GPA in region tree");
748 
749 			cur = &(*cur)->rb_right;
750 		}
751 	}
752 
753 	rb_link_node(&region->gpa_node, parent, cur);
754 	rb_insert_color(&region->gpa_node, gpa_tree);
755 }
756 
757 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
758 					       struct userspace_mem_region *region)
759 {
760 	struct rb_node **cur, *parent;
761 
762 	for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
763 		struct userspace_mem_region *cregion;
764 
765 		cregion = container_of(*cur, typeof(*cregion), hva_node);
766 		parent = *cur;
767 		if (region->host_mem < cregion->host_mem)
768 			cur = &(*cur)->rb_left;
769 		else {
770 			TEST_ASSERT(region->host_mem !=
771 				    cregion->host_mem,
772 				    "Duplicate HVA in region tree");
773 
774 			cur = &(*cur)->rb_right;
775 		}
776 	}
777 
778 	rb_link_node(&region->hva_node, parent, cur);
779 	rb_insert_color(&region->hva_node, hva_tree);
780 }
781 
782 /*
783  * VM Userspace Memory Region Add
784  *
785  * Input Args:
786  *   vm - Virtual Machine
787  *   src_type - Storage source for this region.
788  *              NULL to use anonymous memory.
789  *   guest_paddr - Starting guest physical address
790  *   slot - KVM region slot
791  *   npages - Number of physical pages
792  *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
793  *
794  * Output Args: None
795  *
796  * Return: None
797  *
798  * Allocates a memory area of the number of pages specified by npages
799  * and maps it to the VM specified by vm, at a starting physical address
800  * given by guest_paddr.  The region is created with a KVM region slot
801  * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
802  * region is created with the flags given by flags.
803  */
804 void vm_userspace_mem_region_add(struct kvm_vm *vm,
805 	enum vm_mem_backing_src_type src_type,
806 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
807 	uint32_t flags)
808 {
809 	int ret;
810 	struct userspace_mem_region *region;
811 	size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
812 	size_t alignment;
813 
814 	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
815 		"Number of guest pages is not compatible with the host. "
816 		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
817 
818 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
819 		"address not on a page boundary.\n"
820 		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
821 		guest_paddr, vm->page_size);
822 	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
823 		<= vm->max_gfn, "Physical range beyond maximum "
824 		"supported physical address,\n"
825 		"  guest_paddr: 0x%lx npages: 0x%lx\n"
826 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
827 		guest_paddr, npages, vm->max_gfn, vm->page_size);
828 
829 	/*
830 	 * Confirm a mem region with an overlapping address doesn't
831 	 * already exist.
832 	 */
833 	region = (struct userspace_mem_region *) userspace_mem_region_find(
834 		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
835 	if (region != NULL)
836 		TEST_FAIL("overlapping userspace_mem_region already "
837 			"exists\n"
838 			"  requested guest_paddr: 0x%lx npages: 0x%lx "
839 			"page_size: 0x%x\n"
840 			"  existing guest_paddr: 0x%lx size: 0x%lx",
841 			guest_paddr, npages, vm->page_size,
842 			(uint64_t) region->region.guest_phys_addr,
843 			(uint64_t) region->region.memory_size);
844 
845 	/* Confirm no region with the requested slot already exists. */
846 	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
847 			       slot) {
848 		if (region->region.slot != slot)
849 			continue;
850 
851 		TEST_FAIL("A mem region with the requested slot "
852 			"already exists.\n"
853 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
854 			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
855 			slot, guest_paddr, npages,
856 			region->region.slot,
857 			(uint64_t) region->region.guest_phys_addr,
858 			(uint64_t) region->region.memory_size);
859 	}
860 
861 	/* Allocate and initialize new mem region structure. */
862 	region = calloc(1, sizeof(*region));
863 	TEST_ASSERT(region != NULL, "Insufficient Memory");
864 	region->mmap_size = npages * vm->page_size;
865 
866 #ifdef __s390x__
867 	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
868 	alignment = 0x100000;
869 #else
870 	alignment = 1;
871 #endif
872 
873 	/*
874 	 * When using THP mmap is not guaranteed to returned a hugepage aligned
875 	 * address so we have to pad the mmap. Padding is not needed for HugeTLB
876 	 * because mmap will always return an address aligned to the HugeTLB
877 	 * page size.
878 	 */
879 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
880 		alignment = max(backing_src_pagesz, alignment);
881 
882 	ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
883 
884 	/* Add enough memory to align up if necessary */
885 	if (alignment > 1)
886 		region->mmap_size += alignment;
887 
888 	region->fd = -1;
889 	if (backing_src_is_shared(src_type)) {
890 		int memfd_flags = MFD_CLOEXEC;
891 
892 		if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
893 			memfd_flags |= MFD_HUGETLB;
894 
895 		region->fd = memfd_create("kvm_selftest", memfd_flags);
896 		TEST_ASSERT(region->fd != -1,
897 			    "memfd_create failed, errno: %i", errno);
898 
899 		ret = ftruncate(region->fd, region->mmap_size);
900 		TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
901 
902 		ret = fallocate(region->fd,
903 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
904 				region->mmap_size);
905 		TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
906 	}
907 
908 	region->mmap_start = mmap(NULL, region->mmap_size,
909 				  PROT_READ | PROT_WRITE,
910 				  vm_mem_backing_src_alias(src_type)->flag,
911 				  region->fd, 0);
912 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
913 		    "test_malloc failed, mmap_start: %p errno: %i",
914 		    region->mmap_start, errno);
915 
916 	TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
917 		    region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
918 		    "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
919 		    region->mmap_start, backing_src_pagesz);
920 
921 	/* Align host address */
922 	region->host_mem = align_ptr_up(region->mmap_start, alignment);
923 
924 	/* As needed perform madvise */
925 	if ((src_type == VM_MEM_SRC_ANONYMOUS ||
926 	     src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
927 		ret = madvise(region->host_mem, npages * vm->page_size,
928 			      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
929 		TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
930 			    region->host_mem, npages * vm->page_size,
931 			    vm_mem_backing_src_alias(src_type)->name);
932 	}
933 
934 	region->unused_phy_pages = sparsebit_alloc();
935 	sparsebit_set_num(region->unused_phy_pages,
936 		guest_paddr >> vm->page_shift, npages);
937 	region->region.slot = slot;
938 	region->region.flags = flags;
939 	region->region.guest_phys_addr = guest_paddr;
940 	region->region.memory_size = npages * vm->page_size;
941 	region->region.userspace_addr = (uintptr_t) region->host_mem;
942 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
943 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
944 		"  rc: %i errno: %i\n"
945 		"  slot: %u flags: 0x%x\n"
946 		"  guest_phys_addr: 0x%lx size: 0x%lx",
947 		ret, errno, slot, flags,
948 		guest_paddr, (uint64_t) region->region.memory_size);
949 
950 	/* Add to quick lookup data structures */
951 	vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
952 	vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
953 	hash_add(vm->regions.slot_hash, &region->slot_node, slot);
954 
955 	/* If shared memory, create an alias. */
956 	if (region->fd >= 0) {
957 		region->mmap_alias = mmap(NULL, region->mmap_size,
958 					  PROT_READ | PROT_WRITE,
959 					  vm_mem_backing_src_alias(src_type)->flag,
960 					  region->fd, 0);
961 		TEST_ASSERT(region->mmap_alias != MAP_FAILED,
962 			    "mmap of alias failed, errno: %i", errno);
963 
964 		/* Align host alias address */
965 		region->host_alias = align_ptr_up(region->mmap_alias, alignment);
966 	}
967 }
968 
969 /*
970  * Memslot to region
971  *
972  * Input Args:
973  *   vm - Virtual Machine
974  *   memslot - KVM memory slot ID
975  *
976  * Output Args: None
977  *
978  * Return:
979  *   Pointer to memory region structure that describe memory region
980  *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
981  *   on error (e.g. currently no memory region using memslot as a KVM
982  *   memory slot ID).
983  */
984 struct userspace_mem_region *
985 memslot2region(struct kvm_vm *vm, uint32_t memslot)
986 {
987 	struct userspace_mem_region *region;
988 
989 	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
990 			       memslot)
991 		if (region->region.slot == memslot)
992 			return region;
993 
994 	fprintf(stderr, "No mem region with the requested slot found,\n"
995 		"  requested slot: %u\n", memslot);
996 	fputs("---- vm dump ----\n", stderr);
997 	vm_dump(stderr, vm, 2);
998 	TEST_FAIL("Mem region not found");
999 	return NULL;
1000 }
1001 
1002 /*
1003  * VM Memory Region Flags Set
1004  *
1005  * Input Args:
1006  *   vm - Virtual Machine
1007  *   flags - Starting guest physical address
1008  *
1009  * Output Args: None
1010  *
1011  * Return: None
1012  *
1013  * Sets the flags of the memory region specified by the value of slot,
1014  * to the values given by flags.
1015  */
1016 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
1017 {
1018 	int ret;
1019 	struct userspace_mem_region *region;
1020 
1021 	region = memslot2region(vm, slot);
1022 
1023 	region->region.flags = flags;
1024 
1025 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1026 
1027 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
1028 		"  rc: %i errno: %i slot: %u flags: 0x%x",
1029 		ret, errno, slot, flags);
1030 }
1031 
1032 /*
1033  * VM Memory Region Move
1034  *
1035  * Input Args:
1036  *   vm - Virtual Machine
1037  *   slot - Slot of the memory region to move
1038  *   new_gpa - Starting guest physical address
1039  *
1040  * Output Args: None
1041  *
1042  * Return: None
1043  *
1044  * Change the gpa of a memory region.
1045  */
1046 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
1047 {
1048 	struct userspace_mem_region *region;
1049 	int ret;
1050 
1051 	region = memslot2region(vm, slot);
1052 
1053 	region->region.guest_phys_addr = new_gpa;
1054 
1055 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1056 
1057 	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
1058 		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
1059 		    ret, errno, slot, new_gpa);
1060 }
1061 
1062 /*
1063  * VM Memory Region Delete
1064  *
1065  * Input Args:
1066  *   vm - Virtual Machine
1067  *   slot - Slot of the memory region to delete
1068  *
1069  * Output Args: None
1070  *
1071  * Return: None
1072  *
1073  * Delete a memory region.
1074  */
1075 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
1076 {
1077 	__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
1078 }
1079 
1080 /*
1081  * VCPU mmap Size
1082  *
1083  * Input Args: None
1084  *
1085  * Output Args: None
1086  *
1087  * Return:
1088  *   Size of VCPU state
1089  *
1090  * Returns the size of the structure pointed to by the return value
1091  * of vcpu_state().
1092  */
1093 static int vcpu_mmap_sz(void)
1094 {
1095 	int dev_fd, ret;
1096 
1097 	dev_fd = open_kvm_dev_path_or_exit();
1098 
1099 	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
1100 	TEST_ASSERT(ret >= sizeof(struct kvm_run),
1101 		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
1102 		__func__, ret, errno);
1103 
1104 	close(dev_fd);
1105 
1106 	return ret;
1107 }
1108 
1109 /*
1110  * VM VCPU Add
1111  *
1112  * Input Args:
1113  *   vm - Virtual Machine
1114  *   vcpuid - VCPU ID
1115  *
1116  * Output Args: None
1117  *
1118  * Return: None
1119  *
1120  * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
1121  * No additional VCPU setup is done.
1122  */
1123 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
1124 {
1125 	struct vcpu *vcpu;
1126 
1127 	/* Confirm a vcpu with the specified id doesn't already exist. */
1128 	vcpu = vcpu_find(vm, vcpuid);
1129 	if (vcpu != NULL)
1130 		TEST_FAIL("vcpu with the specified id "
1131 			"already exists,\n"
1132 			"  requested vcpuid: %u\n"
1133 			"  existing vcpuid: %u state: %p",
1134 			vcpuid, vcpu->id, vcpu->state);
1135 
1136 	/* Allocate and initialize new vcpu structure. */
1137 	vcpu = calloc(1, sizeof(*vcpu));
1138 	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
1139 	vcpu->id = vcpuid;
1140 	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
1141 	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
1142 		vcpu->fd, errno);
1143 
1144 	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
1145 		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
1146 		vcpu_mmap_sz(), sizeof(*vcpu->state));
1147 	vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
1148 		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
1149 	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
1150 		"vcpu id: %u errno: %i", vcpuid, errno);
1151 
1152 	/* Add to linked-list of VCPUs. */
1153 	list_add(&vcpu->list, &vm->vcpus);
1154 }
1155 
1156 /*
1157  * VM Virtual Address Unused Gap
1158  *
1159  * Input Args:
1160  *   vm - Virtual Machine
1161  *   sz - Size (bytes)
1162  *   vaddr_min - Minimum Virtual Address
1163  *
1164  * Output Args: None
1165  *
1166  * Return:
1167  *   Lowest virtual address at or below vaddr_min, with at least
1168  *   sz unused bytes.  TEST_ASSERT failure if no area of at least
1169  *   size sz is available.
1170  *
1171  * Within the VM specified by vm, locates the lowest starting virtual
1172  * address >= vaddr_min, that has at least sz unallocated bytes.  A
1173  * TEST_ASSERT failure occurs for invalid input or no area of at least
1174  * sz unallocated bytes >= vaddr_min is available.
1175  */
1176 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
1177 				      vm_vaddr_t vaddr_min)
1178 {
1179 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
1180 
1181 	/* Determine lowest permitted virtual page index. */
1182 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
1183 	if ((pgidx_start * vm->page_size) < vaddr_min)
1184 		goto no_va_found;
1185 
1186 	/* Loop over section with enough valid virtual page indexes. */
1187 	if (!sparsebit_is_set_num(vm->vpages_valid,
1188 		pgidx_start, pages))
1189 		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
1190 			pgidx_start, pages);
1191 	do {
1192 		/*
1193 		 * Are there enough unused virtual pages available at
1194 		 * the currently proposed starting virtual page index.
1195 		 * If not, adjust proposed starting index to next
1196 		 * possible.
1197 		 */
1198 		if (sparsebit_is_clear_num(vm->vpages_mapped,
1199 			pgidx_start, pages))
1200 			goto va_found;
1201 		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
1202 			pgidx_start, pages);
1203 		if (pgidx_start == 0)
1204 			goto no_va_found;
1205 
1206 		/*
1207 		 * If needed, adjust proposed starting virtual address,
1208 		 * to next range of valid virtual addresses.
1209 		 */
1210 		if (!sparsebit_is_set_num(vm->vpages_valid,
1211 			pgidx_start, pages)) {
1212 			pgidx_start = sparsebit_next_set_num(
1213 				vm->vpages_valid, pgidx_start, pages);
1214 			if (pgidx_start == 0)
1215 				goto no_va_found;
1216 		}
1217 	} while (pgidx_start != 0);
1218 
1219 no_va_found:
1220 	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
1221 
1222 	/* NOT REACHED */
1223 	return -1;
1224 
1225 va_found:
1226 	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
1227 		pgidx_start, pages),
1228 		"Unexpected, invalid virtual page index range,\n"
1229 		"  pgidx_start: 0x%lx\n"
1230 		"  pages: 0x%lx",
1231 		pgidx_start, pages);
1232 	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
1233 		pgidx_start, pages),
1234 		"Unexpected, pages already mapped,\n"
1235 		"  pgidx_start: 0x%lx\n"
1236 		"  pages: 0x%lx",
1237 		pgidx_start, pages);
1238 
1239 	return pgidx_start * vm->page_size;
1240 }
1241 
1242 /*
1243  * VM Virtual Address Allocate
1244  *
1245  * Input Args:
1246  *   vm - Virtual Machine
1247  *   sz - Size in bytes
1248  *   vaddr_min - Minimum starting virtual address
1249  *   data_memslot - Memory region slot for data pages
1250  *   pgd_memslot - Memory region slot for new virtual translation tables
1251  *
1252  * Output Args: None
1253  *
1254  * Return:
1255  *   Starting guest virtual address
1256  *
1257  * Allocates at least sz bytes within the virtual address space of the vm
1258  * given by vm.  The allocated bytes are mapped to a virtual address >=
1259  * the address given by vaddr_min.  Note that each allocation uses a
1260  * a unique set of pages, with the minimum real allocation being at least
1261  * a page.
1262  */
1263 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
1264 {
1265 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1266 
1267 	virt_pgd_alloc(vm);
1268 	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
1269 					      KVM_UTIL_MIN_PFN * vm->page_size, 0);
1270 
1271 	/*
1272 	 * Find an unused range of virtual page addresses of at least
1273 	 * pages in length.
1274 	 */
1275 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1276 
1277 	/* Map the virtual pages. */
1278 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1279 		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
1280 
1281 		virt_pg_map(vm, vaddr, paddr);
1282 
1283 		sparsebit_set(vm->vpages_mapped,
1284 			vaddr >> vm->page_shift);
1285 	}
1286 
1287 	return vaddr_start;
1288 }
1289 
1290 /*
1291  * VM Virtual Address Allocate Pages
1292  *
1293  * Input Args:
1294  *   vm - Virtual Machine
1295  *
1296  * Output Args: None
1297  *
1298  * Return:
1299  *   Starting guest virtual address
1300  *
1301  * Allocates at least N system pages worth of bytes within the virtual address
1302  * space of the vm.
1303  */
1304 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
1305 {
1306 	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
1307 }
1308 
1309 /*
1310  * VM Virtual Address Allocate Page
1311  *
1312  * Input Args:
1313  *   vm - Virtual Machine
1314  *
1315  * Output Args: None
1316  *
1317  * Return:
1318  *   Starting guest virtual address
1319  *
1320  * Allocates at least one system page worth of bytes within the virtual address
1321  * space of the vm.
1322  */
1323 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
1324 {
1325 	return vm_vaddr_alloc_pages(vm, 1);
1326 }
1327 
1328 /*
1329  * Map a range of VM virtual address to the VM's physical address
1330  *
1331  * Input Args:
1332  *   vm - Virtual Machine
1333  *   vaddr - Virtuall address to map
1334  *   paddr - VM Physical Address
1335  *   npages - The number of pages to map
1336  *   pgd_memslot - Memory region slot for new virtual translation tables
1337  *
1338  * Output Args: None
1339  *
1340  * Return: None
1341  *
1342  * Within the VM given by @vm, creates a virtual translation for
1343  * @npages starting at @vaddr to the page range starting at @paddr.
1344  */
1345 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1346 	      unsigned int npages)
1347 {
1348 	size_t page_size = vm->page_size;
1349 	size_t size = npages * page_size;
1350 
1351 	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1352 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1353 
1354 	while (npages--) {
1355 		virt_pg_map(vm, vaddr, paddr);
1356 		vaddr += page_size;
1357 		paddr += page_size;
1358 	}
1359 }
1360 
1361 /*
1362  * Address VM Physical to Host Virtual
1363  *
1364  * Input Args:
1365  *   vm - Virtual Machine
1366  *   gpa - VM physical address
1367  *
1368  * Output Args: None
1369  *
1370  * Return:
1371  *   Equivalent host virtual address
1372  *
1373  * Locates the memory region containing the VM physical address given
1374  * by gpa, within the VM given by vm.  When found, the host virtual
1375  * address providing the memory to the vm physical address is returned.
1376  * A TEST_ASSERT failure occurs if no region containing gpa exists.
1377  */
1378 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1379 {
1380 	struct userspace_mem_region *region;
1381 
1382 	region = userspace_mem_region_find(vm, gpa, gpa);
1383 	if (!region) {
1384 		TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1385 		return NULL;
1386 	}
1387 
1388 	return (void *)((uintptr_t)region->host_mem
1389 		+ (gpa - region->region.guest_phys_addr));
1390 }
1391 
1392 /*
1393  * Address Host Virtual to VM Physical
1394  *
1395  * Input Args:
1396  *   vm - Virtual Machine
1397  *   hva - Host virtual address
1398  *
1399  * Output Args: None
1400  *
1401  * Return:
1402  *   Equivalent VM physical address
1403  *
1404  * Locates the memory region containing the host virtual address given
1405  * by hva, within the VM given by vm.  When found, the equivalent
1406  * VM physical address is returned. A TEST_ASSERT failure occurs if no
1407  * region containing hva exists.
1408  */
1409 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1410 {
1411 	struct rb_node *node;
1412 
1413 	for (node = vm->regions.hva_tree.rb_node; node; ) {
1414 		struct userspace_mem_region *region =
1415 			container_of(node, struct userspace_mem_region, hva_node);
1416 
1417 		if (hva >= region->host_mem) {
1418 			if (hva <= (region->host_mem
1419 				+ region->region.memory_size - 1))
1420 				return (vm_paddr_t)((uintptr_t)
1421 					region->region.guest_phys_addr
1422 					+ (hva - (uintptr_t)region->host_mem));
1423 
1424 			node = node->rb_right;
1425 		} else
1426 			node = node->rb_left;
1427 	}
1428 
1429 	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1430 	return -1;
1431 }
1432 
1433 /*
1434  * Address VM physical to Host Virtual *alias*.
1435  *
1436  * Input Args:
1437  *   vm - Virtual Machine
1438  *   gpa - VM physical address
1439  *
1440  * Output Args: None
1441  *
1442  * Return:
1443  *   Equivalent address within the host virtual *alias* area, or NULL
1444  *   (without failing the test) if the guest memory is not shared (so
1445  *   no alias exists).
1446  *
1447  * When vm_create() and related functions are called with a shared memory
1448  * src_type, we also create a writable, shared alias mapping of the
1449  * underlying guest memory. This allows the host to manipulate guest memory
1450  * without mapping that memory in the guest's address space. And, for
1451  * userfaultfd-based demand paging, we can do so without triggering userfaults.
1452  */
1453 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
1454 {
1455 	struct userspace_mem_region *region;
1456 	uintptr_t offset;
1457 
1458 	region = userspace_mem_region_find(vm, gpa, gpa);
1459 	if (!region)
1460 		return NULL;
1461 
1462 	if (!region->host_alias)
1463 		return NULL;
1464 
1465 	offset = gpa - region->region.guest_phys_addr;
1466 	return (void *) ((uintptr_t) region->host_alias + offset);
1467 }
1468 
1469 /*
1470  * VM Create IRQ Chip
1471  *
1472  * Input Args:
1473  *   vm - Virtual Machine
1474  *
1475  * Output Args: None
1476  *
1477  * Return: None
1478  *
1479  * Creates an interrupt controller chip for the VM specified by vm.
1480  */
1481 void vm_create_irqchip(struct kvm_vm *vm)
1482 {
1483 	int ret;
1484 
1485 	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1486 	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1487 		"rc: %i errno: %i", ret, errno);
1488 
1489 	vm->has_irqchip = true;
1490 }
1491 
1492 /*
1493  * VM VCPU State
1494  *
1495  * Input Args:
1496  *   vm - Virtual Machine
1497  *   vcpuid - VCPU ID
1498  *
1499  * Output Args: None
1500  *
1501  * Return:
1502  *   Pointer to structure that describes the state of the VCPU.
1503  *
1504  * Locates and returns a pointer to a structure that describes the
1505  * state of the VCPU with the given vcpuid.
1506  */
1507 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1508 {
1509 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1510 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1511 
1512 	return vcpu->state;
1513 }
1514 
1515 /*
1516  * VM VCPU Run
1517  *
1518  * Input Args:
1519  *   vm - Virtual Machine
1520  *   vcpuid - VCPU ID
1521  *
1522  * Output Args: None
1523  *
1524  * Return: None
1525  *
1526  * Switch to executing the code for the VCPU given by vcpuid, within the VM
1527  * given by vm.
1528  */
1529 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1530 {
1531 	int ret = _vcpu_run(vm, vcpuid);
1532 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1533 		"rc: %i errno: %i", ret, errno);
1534 }
1535 
1536 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1537 {
1538 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1539 	int rc;
1540 
1541 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1542 	do {
1543 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1544 	} while (rc == -1 && errno == EINTR);
1545 
1546 	assert_on_unhandled_exception(vm, vcpuid);
1547 
1548 	return rc;
1549 }
1550 
1551 int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
1552 {
1553 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1554 
1555 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1556 
1557 	return vcpu->fd;
1558 }
1559 
1560 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1561 {
1562 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1563 	int ret;
1564 
1565 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1566 
1567 	vcpu->state->immediate_exit = 1;
1568 	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1569 	vcpu->state->immediate_exit = 0;
1570 
1571 	TEST_ASSERT(ret == -1 && errno == EINTR,
1572 		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1573 		    ret, errno);
1574 }
1575 
1576 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
1577 			  struct kvm_guest_debug *debug)
1578 {
1579 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1580 	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
1581 
1582 	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
1583 }
1584 
1585 /*
1586  * VM VCPU Set MP State
1587  *
1588  * Input Args:
1589  *   vm - Virtual Machine
1590  *   vcpuid - VCPU ID
1591  *   mp_state - mp_state to be set
1592  *
1593  * Output Args: None
1594  *
1595  * Return: None
1596  *
1597  * Sets the MP state of the VCPU given by vcpuid, to the state given
1598  * by mp_state.
1599  */
1600 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1601 		       struct kvm_mp_state *mp_state)
1602 {
1603 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1604 	int ret;
1605 
1606 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1607 
1608 	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1609 	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1610 		"rc: %i errno: %i", ret, errno);
1611 }
1612 
1613 /*
1614  * VM VCPU Get Reg List
1615  *
1616  * Input Args:
1617  *   vm - Virtual Machine
1618  *   vcpuid - VCPU ID
1619  *
1620  * Output Args:
1621  *   None
1622  *
1623  * Return:
1624  *   A pointer to an allocated struct kvm_reg_list
1625  *
1626  * Get the list of guest registers which are supported for
1627  * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
1628  */
1629 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
1630 {
1631 	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1632 	int ret;
1633 
1634 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
1635 	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1636 	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1637 	reg_list->n = reg_list_n.n;
1638 	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
1639 	return reg_list;
1640 }
1641 
1642 /*
1643  * VM VCPU Regs Get
1644  *
1645  * Input Args:
1646  *   vm - Virtual Machine
1647  *   vcpuid - VCPU ID
1648  *
1649  * Output Args:
1650  *   regs - current state of VCPU regs
1651  *
1652  * Return: None
1653  *
1654  * Obtains the current register state for the VCPU specified by vcpuid
1655  * and stores it at the location given by regs.
1656  */
1657 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1658 {
1659 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1660 	int ret;
1661 
1662 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1663 
1664 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1665 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1666 		ret, errno);
1667 }
1668 
1669 /*
1670  * VM VCPU Regs Set
1671  *
1672  * Input Args:
1673  *   vm - Virtual Machine
1674  *   vcpuid - VCPU ID
1675  *   regs - Values to set VCPU regs to
1676  *
1677  * Output Args: None
1678  *
1679  * Return: None
1680  *
1681  * Sets the regs of the VCPU specified by vcpuid to the values
1682  * given by regs.
1683  */
1684 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1685 {
1686 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1687 	int ret;
1688 
1689 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1690 
1691 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1692 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1693 		ret, errno);
1694 }
1695 
1696 #ifdef __KVM_HAVE_VCPU_EVENTS
1697 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1698 		     struct kvm_vcpu_events *events)
1699 {
1700 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1701 	int ret;
1702 
1703 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1704 
1705 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1706 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1707 		ret, errno);
1708 }
1709 
1710 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1711 		     struct kvm_vcpu_events *events)
1712 {
1713 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1714 	int ret;
1715 
1716 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1717 
1718 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1719 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1720 		ret, errno);
1721 }
1722 #endif
1723 
1724 #ifdef __x86_64__
1725 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1726 			   struct kvm_nested_state *state)
1727 {
1728 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1729 	int ret;
1730 
1731 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1732 
1733 	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1734 	TEST_ASSERT(ret == 0,
1735 		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1736 		ret, errno);
1737 }
1738 
1739 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1740 			  struct kvm_nested_state *state, bool ignore_error)
1741 {
1742 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1743 	int ret;
1744 
1745 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1746 
1747 	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1748 	if (!ignore_error) {
1749 		TEST_ASSERT(ret == 0,
1750 			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1751 			ret, errno);
1752 	}
1753 
1754 	return ret;
1755 }
1756 #endif
1757 
1758 /*
1759  * VM VCPU System Regs Get
1760  *
1761  * Input Args:
1762  *   vm - Virtual Machine
1763  *   vcpuid - VCPU ID
1764  *
1765  * Output Args:
1766  *   sregs - current state of VCPU system regs
1767  *
1768  * Return: None
1769  *
1770  * Obtains the current system register state for the VCPU specified by
1771  * vcpuid and stores it at the location given by sregs.
1772  */
1773 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1774 {
1775 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1776 	int ret;
1777 
1778 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1779 
1780 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1781 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1782 		ret, errno);
1783 }
1784 
1785 /*
1786  * VM VCPU System Regs Set
1787  *
1788  * Input Args:
1789  *   vm - Virtual Machine
1790  *   vcpuid - VCPU ID
1791  *   sregs - Values to set VCPU system regs to
1792  *
1793  * Output Args: None
1794  *
1795  * Return: None
1796  *
1797  * Sets the system regs of the VCPU specified by vcpuid to the values
1798  * given by sregs.
1799  */
1800 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1801 {
1802 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1803 	TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, "
1804 		"rc: %i errno: %i", ret, errno);
1805 }
1806 
1807 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1808 {
1809 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1810 
1811 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1812 
1813 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1814 }
1815 
1816 void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1817 {
1818 	int ret;
1819 
1820 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
1821 	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
1822 		    ret, errno, strerror(errno));
1823 }
1824 
1825 void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1826 {
1827 	int ret;
1828 
1829 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
1830 	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
1831 		    ret, errno, strerror(errno));
1832 }
1833 
1834 void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1835 {
1836 	int ret;
1837 
1838 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
1839 	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
1840 		    ret, errno, strerror(errno));
1841 }
1842 
1843 void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1844 {
1845 	int ret;
1846 
1847 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
1848 	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
1849 		    ret, errno, strerror(errno));
1850 }
1851 
1852 /*
1853  * VCPU Ioctl
1854  *
1855  * Input Args:
1856  *   vm - Virtual Machine
1857  *   vcpuid - VCPU ID
1858  *   cmd - Ioctl number
1859  *   arg - Argument to pass to the ioctl
1860  *
1861  * Return: None
1862  *
1863  * Issues an arbitrary ioctl on a VCPU fd.
1864  */
1865 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1866 		unsigned long cmd, void *arg)
1867 {
1868 	int ret;
1869 
1870 	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1871 	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1872 		cmd, ret, errno, strerror(errno));
1873 }
1874 
1875 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1876 		unsigned long cmd, void *arg)
1877 {
1878 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1879 	int ret;
1880 
1881 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1882 
1883 	ret = ioctl(vcpu->fd, cmd, arg);
1884 
1885 	return ret;
1886 }
1887 
1888 void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
1889 {
1890 	struct vcpu *vcpu;
1891 	uint32_t size = vm->dirty_ring_size;
1892 
1893 	TEST_ASSERT(size > 0, "Should enable dirty ring first");
1894 
1895 	vcpu = vcpu_find(vm, vcpuid);
1896 
1897 	TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
1898 
1899 	if (!vcpu->dirty_gfns) {
1900 		void *addr;
1901 
1902 		addr = mmap(NULL, size, PROT_READ,
1903 			    MAP_PRIVATE, vcpu->fd,
1904 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1905 		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
1906 
1907 		addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
1908 			    MAP_PRIVATE, vcpu->fd,
1909 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1910 		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
1911 
1912 		addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
1913 			    MAP_SHARED, vcpu->fd,
1914 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1915 		TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
1916 
1917 		vcpu->dirty_gfns = addr;
1918 		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
1919 	}
1920 
1921 	return vcpu->dirty_gfns;
1922 }
1923 
1924 /*
1925  * VM Ioctl
1926  *
1927  * Input Args:
1928  *   vm - Virtual Machine
1929  *   cmd - Ioctl number
1930  *   arg - Argument to pass to the ioctl
1931  *
1932  * Return: None
1933  *
1934  * Issues an arbitrary ioctl on a VM fd.
1935  */
1936 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1937 {
1938 	int ret;
1939 
1940 	ret = _vm_ioctl(vm, cmd, arg);
1941 	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1942 		cmd, ret, errno, strerror(errno));
1943 }
1944 
1945 int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1946 {
1947 	return ioctl(vm->fd, cmd, arg);
1948 }
1949 
1950 /*
1951  * KVM system ioctl
1952  *
1953  * Input Args:
1954  *   vm - Virtual Machine
1955  *   cmd - Ioctl number
1956  *   arg - Argument to pass to the ioctl
1957  *
1958  * Return: None
1959  *
1960  * Issues an arbitrary ioctl on a KVM fd.
1961  */
1962 void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1963 {
1964 	int ret;
1965 
1966 	ret = ioctl(vm->kvm_fd, cmd, arg);
1967 	TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
1968 		cmd, ret, errno, strerror(errno));
1969 }
1970 
1971 int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1972 {
1973 	return ioctl(vm->kvm_fd, cmd, arg);
1974 }
1975 
1976 /*
1977  * Device Ioctl
1978  */
1979 
1980 int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
1981 {
1982 	struct kvm_device_attr attribute = {
1983 		.group = group,
1984 		.attr = attr,
1985 		.flags = 0,
1986 	};
1987 
1988 	return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
1989 }
1990 
1991 int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
1992 {
1993 	int ret = _kvm_device_check_attr(dev_fd, group, attr);
1994 
1995 	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
1996 	return ret;
1997 }
1998 
1999 int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd)
2000 {
2001 	struct kvm_create_device create_dev;
2002 	int ret;
2003 
2004 	create_dev.type = type;
2005 	create_dev.fd = -1;
2006 	create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
2007 	ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev);
2008 	*fd = create_dev.fd;
2009 	return ret;
2010 }
2011 
2012 int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
2013 {
2014 	int fd, ret;
2015 
2016 	ret = _kvm_create_device(vm, type, test, &fd);
2017 
2018 	if (!test) {
2019 		TEST_ASSERT(!ret,
2020 			    "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
2021 		return fd;
2022 	}
2023 	return ret;
2024 }
2025 
2026 int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2027 		      void *val, bool write)
2028 {
2029 	struct kvm_device_attr kvmattr = {
2030 		.group = group,
2031 		.attr = attr,
2032 		.flags = 0,
2033 		.addr = (uintptr_t)val,
2034 	};
2035 	int ret;
2036 
2037 	ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
2038 		    &kvmattr);
2039 	return ret;
2040 }
2041 
2042 int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2043 		      void *val, bool write)
2044 {
2045 	int ret = _kvm_device_access(dev_fd, group, attr, val, write);
2046 
2047 	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2048 	return ret;
2049 }
2050 
2051 int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2052 			  uint64_t attr)
2053 {
2054 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2055 
2056 	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
2057 
2058 	return _kvm_device_check_attr(vcpu->fd, group, attr);
2059 }
2060 
2061 int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2062 				 uint64_t attr)
2063 {
2064 	int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr);
2065 
2066 	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2067 	return ret;
2068 }
2069 
2070 int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2071 			     uint64_t attr, void *val, bool write)
2072 {
2073 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2074 
2075 	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
2076 
2077 	return _kvm_device_access(vcpu->fd, group, attr, val, write);
2078 }
2079 
2080 int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2081 			    uint64_t attr, void *val, bool write)
2082 {
2083 	int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write);
2084 
2085 	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2086 	return ret;
2087 }
2088 
2089 /*
2090  * VM Dump
2091  *
2092  * Input Args:
2093  *   vm - Virtual Machine
2094  *   indent - Left margin indent amount
2095  *
2096  * Output Args:
2097  *   stream - Output FILE stream
2098  *
2099  * Return: None
2100  *
2101  * Dumps the current state of the VM given by vm, to the FILE stream
2102  * given by stream.
2103  */
2104 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
2105 {
2106 	int ctr;
2107 	struct userspace_mem_region *region;
2108 	struct vcpu *vcpu;
2109 
2110 	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
2111 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
2112 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
2113 	fprintf(stream, "%*sMem Regions:\n", indent, "");
2114 	hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
2115 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
2116 			"host_virt: %p\n", indent + 2, "",
2117 			(uint64_t) region->region.guest_phys_addr,
2118 			(uint64_t) region->region.memory_size,
2119 			region->host_mem);
2120 		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
2121 		sparsebit_dump(stream, region->unused_phy_pages, 0);
2122 	}
2123 	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
2124 	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
2125 	fprintf(stream, "%*spgd_created: %u\n", indent, "",
2126 		vm->pgd_created);
2127 	if (vm->pgd_created) {
2128 		fprintf(stream, "%*sVirtual Translation Tables:\n",
2129 			indent + 2, "");
2130 		virt_dump(stream, vm, indent + 4);
2131 	}
2132 	fprintf(stream, "%*sVCPUs:\n", indent, "");
2133 	list_for_each_entry(vcpu, &vm->vcpus, list)
2134 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
2135 }
2136 
2137 /* Known KVM exit reasons */
2138 static struct exit_reason {
2139 	unsigned int reason;
2140 	const char *name;
2141 } exit_reasons_known[] = {
2142 	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
2143 	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
2144 	{KVM_EXIT_IO, "IO"},
2145 	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
2146 	{KVM_EXIT_DEBUG, "DEBUG"},
2147 	{KVM_EXIT_HLT, "HLT"},
2148 	{KVM_EXIT_MMIO, "MMIO"},
2149 	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
2150 	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
2151 	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
2152 	{KVM_EXIT_INTR, "INTR"},
2153 	{KVM_EXIT_SET_TPR, "SET_TPR"},
2154 	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
2155 	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
2156 	{KVM_EXIT_S390_RESET, "S390_RESET"},
2157 	{KVM_EXIT_DCR, "DCR"},
2158 	{KVM_EXIT_NMI, "NMI"},
2159 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
2160 	{KVM_EXIT_OSI, "OSI"},
2161 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
2162 	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
2163 	{KVM_EXIT_X86_RDMSR, "RDMSR"},
2164 	{KVM_EXIT_X86_WRMSR, "WRMSR"},
2165 	{KVM_EXIT_XEN, "XEN"},
2166 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
2167 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
2168 #endif
2169 };
2170 
2171 /*
2172  * Exit Reason String
2173  *
2174  * Input Args:
2175  *   exit_reason - Exit reason
2176  *
2177  * Output Args: None
2178  *
2179  * Return:
2180  *   Constant string pointer describing the exit reason.
2181  *
2182  * Locates and returns a constant string that describes the KVM exit
2183  * reason given by exit_reason.  If no such string is found, a constant
2184  * string of "Unknown" is returned.
2185  */
2186 const char *exit_reason_str(unsigned int exit_reason)
2187 {
2188 	unsigned int n1;
2189 
2190 	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
2191 		if (exit_reason == exit_reasons_known[n1].reason)
2192 			return exit_reasons_known[n1].name;
2193 	}
2194 
2195 	return "Unknown";
2196 }
2197 
2198 /*
2199  * Physical Contiguous Page Allocator
2200  *
2201  * Input Args:
2202  *   vm - Virtual Machine
2203  *   num - number of pages
2204  *   paddr_min - Physical address minimum
2205  *   memslot - Memory region to allocate page from
2206  *
2207  * Output Args: None
2208  *
2209  * Return:
2210  *   Starting physical address
2211  *
2212  * Within the VM specified by vm, locates a range of available physical
2213  * pages at or above paddr_min. If found, the pages are marked as in use
2214  * and their base address is returned. A TEST_ASSERT failure occurs if
2215  * not enough pages are available at or above paddr_min.
2216  */
2217 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
2218 			      vm_paddr_t paddr_min, uint32_t memslot)
2219 {
2220 	struct userspace_mem_region *region;
2221 	sparsebit_idx_t pg, base;
2222 
2223 	TEST_ASSERT(num > 0, "Must allocate at least one page");
2224 
2225 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
2226 		"not divisible by page size.\n"
2227 		"  paddr_min: 0x%lx page_size: 0x%x",
2228 		paddr_min, vm->page_size);
2229 
2230 	region = memslot2region(vm, memslot);
2231 	base = pg = paddr_min >> vm->page_shift;
2232 
2233 	do {
2234 		for (; pg < base + num; ++pg) {
2235 			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
2236 				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
2237 				break;
2238 			}
2239 		}
2240 	} while (pg && pg != base + num);
2241 
2242 	if (pg == 0) {
2243 		fprintf(stderr, "No guest physical page available, "
2244 			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
2245 			paddr_min, vm->page_size, memslot);
2246 		fputs("---- vm dump ----\n", stderr);
2247 		vm_dump(stderr, vm, 2);
2248 		abort();
2249 	}
2250 
2251 	for (pg = base; pg < base + num; ++pg)
2252 		sparsebit_clear(region->unused_phy_pages, pg);
2253 
2254 	return base * vm->page_size;
2255 }
2256 
2257 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
2258 			     uint32_t memslot)
2259 {
2260 	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
2261 }
2262 
2263 /* Arbitrary minimum physical address used for virtual translation tables. */
2264 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
2265 
2266 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
2267 {
2268 	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
2269 }
2270 
2271 /*
2272  * Address Guest Virtual to Host Virtual
2273  *
2274  * Input Args:
2275  *   vm - Virtual Machine
2276  *   gva - VM virtual address
2277  *
2278  * Output Args: None
2279  *
2280  * Return:
2281  *   Equivalent host virtual address
2282  */
2283 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
2284 {
2285 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
2286 }
2287 
2288 /*
2289  * Is Unrestricted Guest
2290  *
2291  * Input Args:
2292  *   vm - Virtual Machine
2293  *
2294  * Output Args: None
2295  *
2296  * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
2297  *
2298  * Check if the unrestricted guest flag is enabled.
2299  */
2300 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
2301 {
2302 	char val = 'N';
2303 	size_t count;
2304 	FILE *f;
2305 
2306 	if (vm == NULL) {
2307 		/* Ensure that the KVM vendor-specific module is loaded. */
2308 		close(open_kvm_dev_path_or_exit());
2309 	}
2310 
2311 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
2312 	if (f) {
2313 		count = fread(&val, sizeof(char), 1, f);
2314 		TEST_ASSERT(count == 1, "Unable to read from param file.");
2315 		fclose(f);
2316 	}
2317 
2318 	return val == 'Y';
2319 }
2320 
2321 unsigned int vm_get_page_size(struct kvm_vm *vm)
2322 {
2323 	return vm->page_size;
2324 }
2325 
2326 unsigned int vm_get_page_shift(struct kvm_vm *vm)
2327 {
2328 	return vm->page_shift;
2329 }
2330 
2331 uint64_t vm_get_max_gfn(struct kvm_vm *vm)
2332 {
2333 	return vm->max_gfn;
2334 }
2335 
2336 int vm_get_fd(struct kvm_vm *vm)
2337 {
2338 	return vm->fd;
2339 }
2340 
2341 static unsigned int vm_calc_num_pages(unsigned int num_pages,
2342 				      unsigned int page_shift,
2343 				      unsigned int new_page_shift,
2344 				      bool ceil)
2345 {
2346 	unsigned int n = 1 << (new_page_shift - page_shift);
2347 
2348 	if (page_shift >= new_page_shift)
2349 		return num_pages * (1 << (page_shift - new_page_shift));
2350 
2351 	return num_pages / n + !!(ceil && num_pages % n);
2352 }
2353 
2354 static inline int getpageshift(void)
2355 {
2356 	return __builtin_ffs(getpagesize()) - 1;
2357 }
2358 
2359 unsigned int
2360 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
2361 {
2362 	return vm_calc_num_pages(num_guest_pages,
2363 				 vm_guest_mode_params[mode].page_shift,
2364 				 getpageshift(), true);
2365 }
2366 
2367 unsigned int
2368 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
2369 {
2370 	return vm_calc_num_pages(num_host_pages, getpageshift(),
2371 				 vm_guest_mode_params[mode].page_shift, false);
2372 }
2373 
2374 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
2375 {
2376 	unsigned int n;
2377 	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
2378 	return vm_adjust_num_guest_pages(mode, n);
2379 }
2380 
2381 int vm_get_stats_fd(struct kvm_vm *vm)
2382 {
2383 	return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
2384 }
2385 
2386 int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
2387 {
2388 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2389 
2390 	return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
2391 }
2392