1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * tools/testing/selftests/kvm/lib/kvm_util.c
4  *
5  * Copyright (C) 2018, Google LLC.
6  */
7 
8 #define _GNU_SOURCE /* for program_invocation_name */
9 #include "test_util.h"
10 #include "kvm_util.h"
11 #include "kvm_util_internal.h"
12 #include "processor.h"
13 
14 #include <assert.h>
15 #include <sys/mman.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <linux/kernel.h>
20 
21 #define KVM_UTIL_MIN_PFN	2
22 
23 static int vcpu_mmap_sz(void);
24 
25 int open_path_or_exit(const char *path, int flags)
26 {
27 	int fd;
28 
29 	fd = open(path, flags);
30 	if (fd < 0) {
31 		print_skip("%s not available (errno: %d)", path, errno);
32 		exit(KSFT_SKIP);
33 	}
34 
35 	return fd;
36 }
37 
38 /*
39  * Open KVM_DEV_PATH if available, otherwise exit the entire program.
40  *
41  * Input Args:
42  *   flags - The flags to pass when opening KVM_DEV_PATH.
43  *
44  * Return:
45  *   The opened file descriptor of /dev/kvm.
46  */
47 static int _open_kvm_dev_path_or_exit(int flags)
48 {
49 	return open_path_or_exit(KVM_DEV_PATH, flags);
50 }
51 
52 int open_kvm_dev_path_or_exit(void)
53 {
54 	return _open_kvm_dev_path_or_exit(O_RDONLY);
55 }
56 
57 /*
58  * Capability
59  *
60  * Input Args:
61  *   cap - Capability
62  *
63  * Output Args: None
64  *
65  * Return:
66  *   On success, the Value corresponding to the capability (KVM_CAP_*)
67  *   specified by the value of cap.  On failure a TEST_ASSERT failure
68  *   is produced.
69  *
70  * Looks up and returns the value corresponding to the capability
71  * (KVM_CAP_*) given by cap.
72  */
73 int kvm_check_cap(long cap)
74 {
75 	int ret;
76 	int kvm_fd;
77 
78 	kvm_fd = open_kvm_dev_path_or_exit();
79 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
80 	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
81 		"  rc: %i errno: %i", ret, errno);
82 
83 	close(kvm_fd);
84 
85 	return ret;
86 }
87 
88 /* VM Check Capability
89  *
90  * Input Args:
91  *   vm - Virtual Machine
92  *   cap - Capability
93  *
94  * Output Args: None
95  *
96  * Return:
97  *   On success, the Value corresponding to the capability (KVM_CAP_*)
98  *   specified by the value of cap.  On failure a TEST_ASSERT failure
99  *   is produced.
100  *
101  * Looks up and returns the value corresponding to the capability
102  * (KVM_CAP_*) given by cap.
103  */
104 int vm_check_cap(struct kvm_vm *vm, long cap)
105 {
106 	int ret;
107 
108 	ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
109 	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
110 		"  rc: %i errno: %i", ret, errno);
111 
112 	return ret;
113 }
114 
115 /* VM Enable Capability
116  *
117  * Input Args:
118  *   vm - Virtual Machine
119  *   cap - Capability
120  *
121  * Output Args: None
122  *
123  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
124  *
125  * Enables a capability (KVM_CAP_*) on the VM.
126  */
127 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
128 {
129 	int ret;
130 
131 	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
132 	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
133 		"  rc: %i errno: %i", ret, errno);
134 
135 	return ret;
136 }
137 
138 /* VCPU Enable Capability
139  *
140  * Input Args:
141  *   vm - Virtual Machine
142  *   vcpu_id - VCPU
143  *   cap - Capability
144  *
145  * Output Args: None
146  *
147  * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
148  *
149  * Enables a capability (KVM_CAP_*) on the VCPU.
150  */
151 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
152 		    struct kvm_enable_cap *cap)
153 {
154 	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
155 	int r;
156 
157 	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
158 
159 	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
160 	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
161 			"  rc: %i, errno: %i", r, errno);
162 
163 	return r;
164 }
165 
166 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
167 {
168 	struct kvm_enable_cap cap = { 0 };
169 
170 	cap.cap = KVM_CAP_DIRTY_LOG_RING;
171 	cap.args[0] = ring_size;
172 	vm_enable_cap(vm, &cap);
173 	vm->dirty_ring_size = ring_size;
174 }
175 
176 static void vm_open(struct kvm_vm *vm, int perm)
177 {
178 	vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
179 
180 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
181 		print_skip("immediate_exit not available");
182 		exit(KSFT_SKIP);
183 	}
184 
185 	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
186 	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
187 		"rc: %i errno: %i", vm->fd, errno);
188 }
189 
190 const char *vm_guest_mode_string(uint32_t i)
191 {
192 	static const char * const strings[] = {
193 		[VM_MODE_P52V48_4K]	= "PA-bits:52,  VA-bits:48,  4K pages",
194 		[VM_MODE_P52V48_64K]	= "PA-bits:52,  VA-bits:48, 64K pages",
195 		[VM_MODE_P48V48_4K]	= "PA-bits:48,  VA-bits:48,  4K pages",
196 		[VM_MODE_P48V48_16K]	= "PA-bits:48,  VA-bits:48, 16K pages",
197 		[VM_MODE_P48V48_64K]	= "PA-bits:48,  VA-bits:48, 64K pages",
198 		[VM_MODE_P40V48_4K]	= "PA-bits:40,  VA-bits:48,  4K pages",
199 		[VM_MODE_P40V48_16K]	= "PA-bits:40,  VA-bits:48, 16K pages",
200 		[VM_MODE_P40V48_64K]	= "PA-bits:40,  VA-bits:48, 64K pages",
201 		[VM_MODE_PXXV48_4K]	= "PA-bits:ANY, VA-bits:48,  4K pages",
202 		[VM_MODE_P47V64_4K]	= "PA-bits:47,  VA-bits:64,  4K pages",
203 		[VM_MODE_P44V64_4K]	= "PA-bits:44,  VA-bits:64,  4K pages",
204 		[VM_MODE_P36V48_4K]	= "PA-bits:36,  VA-bits:48,  4K pages",
205 		[VM_MODE_P36V48_16K]	= "PA-bits:36,  VA-bits:48, 16K pages",
206 		[VM_MODE_P36V48_64K]	= "PA-bits:36,  VA-bits:48, 64K pages",
207 		[VM_MODE_P36V47_16K]	= "PA-bits:36,  VA-bits:47, 16K pages",
208 	};
209 	_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
210 		       "Missing new mode strings?");
211 
212 	TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
213 
214 	return strings[i];
215 }
216 
217 const struct vm_guest_mode_params vm_guest_mode_params[] = {
218 	[VM_MODE_P52V48_4K]	= { 52, 48,  0x1000, 12 },
219 	[VM_MODE_P52V48_64K]	= { 52, 48, 0x10000, 16 },
220 	[VM_MODE_P48V48_4K]	= { 48, 48,  0x1000, 12 },
221 	[VM_MODE_P48V48_16K]	= { 48, 48,  0x4000, 14 },
222 	[VM_MODE_P48V48_64K]	= { 48, 48, 0x10000, 16 },
223 	[VM_MODE_P40V48_4K]	= { 40, 48,  0x1000, 12 },
224 	[VM_MODE_P40V48_16K]	= { 40, 48,  0x4000, 14 },
225 	[VM_MODE_P40V48_64K]	= { 40, 48, 0x10000, 16 },
226 	[VM_MODE_PXXV48_4K]	= {  0,  0,  0x1000, 12 },
227 	[VM_MODE_P47V64_4K]	= { 47, 64,  0x1000, 12 },
228 	[VM_MODE_P44V64_4K]	= { 44, 64,  0x1000, 12 },
229 	[VM_MODE_P36V48_4K]	= { 36, 48,  0x1000, 12 },
230 	[VM_MODE_P36V48_16K]	= { 36, 48,  0x4000, 14 },
231 	[VM_MODE_P36V48_64K]	= { 36, 48, 0x10000, 16 },
232 	[VM_MODE_P36V47_16K]	= { 36, 47,  0x4000, 14 },
233 };
234 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
235 	       "Missing new mode params?");
236 
237 /*
238  * VM Create
239  *
240  * Input Args:
241  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
242  *   phy_pages - Physical memory pages
243  *   perm - permission
244  *
245  * Output Args: None
246  *
247  * Return:
248  *   Pointer to opaque structure that describes the created VM.
249  *
250  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
251  * When phy_pages is non-zero, a memory region of phy_pages physical pages
252  * is created and mapped starting at guest physical address 0.  The file
253  * descriptor to control the created VM is created with the permissions
254  * given by perm (e.g. O_RDWR).
255  */
256 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
257 {
258 	struct kvm_vm *vm;
259 
260 	pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
261 		 vm_guest_mode_string(mode), phy_pages, perm);
262 
263 	vm = calloc(1, sizeof(*vm));
264 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
265 
266 	INIT_LIST_HEAD(&vm->vcpus);
267 	vm->regions.gpa_tree = RB_ROOT;
268 	vm->regions.hva_tree = RB_ROOT;
269 	hash_init(vm->regions.slot_hash);
270 
271 	vm->mode = mode;
272 	vm->type = 0;
273 
274 	vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
275 	vm->va_bits = vm_guest_mode_params[mode].va_bits;
276 	vm->page_size = vm_guest_mode_params[mode].page_size;
277 	vm->page_shift = vm_guest_mode_params[mode].page_shift;
278 
279 	/* Setup mode specific traits. */
280 	switch (vm->mode) {
281 	case VM_MODE_P52V48_4K:
282 		vm->pgtable_levels = 4;
283 		break;
284 	case VM_MODE_P52V48_64K:
285 		vm->pgtable_levels = 3;
286 		break;
287 	case VM_MODE_P48V48_4K:
288 		vm->pgtable_levels = 4;
289 		break;
290 	case VM_MODE_P48V48_64K:
291 		vm->pgtable_levels = 3;
292 		break;
293 	case VM_MODE_P40V48_4K:
294 	case VM_MODE_P36V48_4K:
295 		vm->pgtable_levels = 4;
296 		break;
297 	case VM_MODE_P40V48_64K:
298 	case VM_MODE_P36V48_64K:
299 		vm->pgtable_levels = 3;
300 		break;
301 	case VM_MODE_P48V48_16K:
302 	case VM_MODE_P40V48_16K:
303 	case VM_MODE_P36V48_16K:
304 		vm->pgtable_levels = 4;
305 		break;
306 	case VM_MODE_P36V47_16K:
307 		vm->pgtable_levels = 3;
308 		break;
309 	case VM_MODE_PXXV48_4K:
310 #ifdef __x86_64__
311 		kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
312 		/*
313 		 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
314 		 * it doesn't take effect unless a CR4.LA57 is set, which it
315 		 * isn't for this VM_MODE.
316 		 */
317 		TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
318 			    "Linear address width (%d bits) not supported",
319 			    vm->va_bits);
320 		pr_debug("Guest physical address width detected: %d\n",
321 			 vm->pa_bits);
322 		vm->pgtable_levels = 4;
323 		vm->va_bits = 48;
324 #else
325 		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
326 #endif
327 		break;
328 	case VM_MODE_P47V64_4K:
329 		vm->pgtable_levels = 5;
330 		break;
331 	case VM_MODE_P44V64_4K:
332 		vm->pgtable_levels = 5;
333 		break;
334 	default:
335 		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
336 	}
337 
338 #ifdef __aarch64__
339 	if (vm->pa_bits != 40)
340 		vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
341 #endif
342 
343 	vm_open(vm, perm);
344 
345 	/* Limit to VA-bit canonical virtual addresses. */
346 	vm->vpages_valid = sparsebit_alloc();
347 	sparsebit_set_num(vm->vpages_valid,
348 		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
349 	sparsebit_set_num(vm->vpages_valid,
350 		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
351 		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
352 
353 	/* Limit physical addresses to PA-bits. */
354 	vm->max_gfn = vm_compute_max_gfn(vm);
355 
356 	/* Allocate and setup memory for guest. */
357 	vm->vpages_mapped = sparsebit_alloc();
358 	if (phy_pages != 0)
359 		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
360 					    0, 0, phy_pages, 0);
361 
362 	return vm;
363 }
364 
365 /*
366  * VM Create with customized parameters
367  *
368  * Input Args:
369  *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
370  *   nr_vcpus - VCPU count
371  *   slot0_mem_pages - Slot0 physical memory size
372  *   extra_mem_pages - Non-slot0 physical memory total size
373  *   num_percpu_pages - Per-cpu physical memory pages
374  *   guest_code - Guest entry point
375  *   vcpuids - VCPU IDs
376  *
377  * Output Args: None
378  *
379  * Return:
380  *   Pointer to opaque structure that describes the created VM.
381  *
382  * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
383  * with customized slot0 memory size, at least 512 pages currently.
384  * extra_mem_pages is only used to calculate the maximum page table size,
385  * no real memory allocation for non-slot0 memory in this function.
386  */
387 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
388 				    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
389 				    uint32_t num_percpu_pages, void *guest_code,
390 				    uint32_t vcpuids[])
391 {
392 	uint64_t vcpu_pages, extra_pg_pages, pages;
393 	struct kvm_vm *vm;
394 	int i;
395 
396 	/*
397 	 * Permission needs to be requested before KVM_SET_CPUID2.
398 	 */
399 	vm_xsave_req_perm();
400 
401 	/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
402 	if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
403 		slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
404 
405 	/* The maximum page table size for a memory region will be when the
406 	 * smallest pages are used. Considering each page contains x page
407 	 * table descriptors, the total extra size for page tables (for extra
408 	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
409 	 * than N/x*2.
410 	 */
411 	vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
412 	extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
413 	pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
414 
415 	TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
416 		    "nr_vcpus = %d too large for host, max-vcpus = %d",
417 		    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
418 
419 	pages = vm_adjust_num_guest_pages(mode, pages);
420 	vm = vm_create(mode, pages, O_RDWR);
421 
422 	kvm_vm_elf_load(vm, program_invocation_name);
423 
424 #ifdef __x86_64__
425 	vm_create_irqchip(vm);
426 #endif
427 
428 	for (i = 0; i < nr_vcpus; ++i) {
429 		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
430 
431 		vm_vcpu_add_default(vm, vcpuid, guest_code);
432 	}
433 
434 	return vm;
435 }
436 
437 struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
438 					    uint32_t num_percpu_pages, void *guest_code,
439 					    uint32_t vcpuids[])
440 {
441 	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
442 				    extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
443 }
444 
445 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
446 				 void *guest_code)
447 {
448 	return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
449 					    (uint32_t []){ vcpuid });
450 }
451 
452 /*
453  * VM Restart
454  *
455  * Input Args:
456  *   vm - VM that has been released before
457  *   perm - permission
458  *
459  * Output Args: None
460  *
461  * Reopens the file descriptors associated to the VM and reinstates the
462  * global state, such as the irqchip and the memory regions that are mapped
463  * into the guest.
464  */
465 void kvm_vm_restart(struct kvm_vm *vmp, int perm)
466 {
467 	int ctr;
468 	struct userspace_mem_region *region;
469 
470 	vm_open(vmp, perm);
471 	if (vmp->has_irqchip)
472 		vm_create_irqchip(vmp);
473 
474 	hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
475 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
476 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
477 			    "  rc: %i errno: %i\n"
478 			    "  slot: %u flags: 0x%x\n"
479 			    "  guest_phys_addr: 0x%llx size: 0x%llx",
480 			    ret, errno, region->region.slot,
481 			    region->region.flags,
482 			    region->region.guest_phys_addr,
483 			    region->region.memory_size);
484 	}
485 }
486 
487 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
488 {
489 	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
490 	int ret;
491 
492 	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
493 	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
494 		    __func__, strerror(-ret));
495 }
496 
497 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
498 			    uint64_t first_page, uint32_t num_pages)
499 {
500 	struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
501 		                            .first_page = first_page,
502 	                                    .num_pages = num_pages };
503 	int ret;
504 
505 	ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
506 	TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
507 		    __func__, strerror(-ret));
508 }
509 
510 uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
511 {
512 	return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
513 }
514 
515 /*
516  * Userspace Memory Region Find
517  *
518  * Input Args:
519  *   vm - Virtual Machine
520  *   start - Starting VM physical address
521  *   end - Ending VM physical address, inclusive.
522  *
523  * Output Args: None
524  *
525  * Return:
526  *   Pointer to overlapping region, NULL if no such region.
527  *
528  * Searches for a region with any physical memory that overlaps with
529  * any portion of the guest physical addresses from start to end
530  * inclusive.  If multiple overlapping regions exist, a pointer to any
531  * of the regions is returned.  Null is returned only when no overlapping
532  * region exists.
533  */
534 static struct userspace_mem_region *
535 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
536 {
537 	struct rb_node *node;
538 
539 	for (node = vm->regions.gpa_tree.rb_node; node; ) {
540 		struct userspace_mem_region *region =
541 			container_of(node, struct userspace_mem_region, gpa_node);
542 		uint64_t existing_start = region->region.guest_phys_addr;
543 		uint64_t existing_end = region->region.guest_phys_addr
544 			+ region->region.memory_size - 1;
545 		if (start <= existing_end && end >= existing_start)
546 			return region;
547 
548 		if (start < existing_start)
549 			node = node->rb_left;
550 		else
551 			node = node->rb_right;
552 	}
553 
554 	return NULL;
555 }
556 
557 /*
558  * KVM Userspace Memory Region Find
559  *
560  * Input Args:
561  *   vm - Virtual Machine
562  *   start - Starting VM physical address
563  *   end - Ending VM physical address, inclusive.
564  *
565  * Output Args: None
566  *
567  * Return:
568  *   Pointer to overlapping region, NULL if no such region.
569  *
570  * Public interface to userspace_mem_region_find. Allows tests to look up
571  * the memslot datastructure for a given range of guest physical memory.
572  */
573 struct kvm_userspace_memory_region *
574 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
575 				 uint64_t end)
576 {
577 	struct userspace_mem_region *region;
578 
579 	region = userspace_mem_region_find(vm, start, end);
580 	if (!region)
581 		return NULL;
582 
583 	return &region->region;
584 }
585 
586 /*
587  * VCPU Find
588  *
589  * Input Args:
590  *   vm - Virtual Machine
591  *   vcpuid - VCPU ID
592  *
593  * Output Args: None
594  *
595  * Return:
596  *   Pointer to VCPU structure
597  *
598  * Locates a vcpu structure that describes the VCPU specified by vcpuid and
599  * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
600  * for the specified vcpuid.
601  */
602 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
603 {
604 	struct vcpu *vcpu;
605 
606 	list_for_each_entry(vcpu, &vm->vcpus, list) {
607 		if (vcpu->id == vcpuid)
608 			return vcpu;
609 	}
610 
611 	return NULL;
612 }
613 
614 /*
615  * VM VCPU Remove
616  *
617  * Input Args:
618  *   vcpu - VCPU to remove
619  *
620  * Output Args: None
621  *
622  * Return: None, TEST_ASSERT failures for all error conditions
623  *
624  * Removes a vCPU from a VM and frees its resources.
625  */
626 static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
627 {
628 	int ret;
629 
630 	if (vcpu->dirty_gfns) {
631 		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
632 		TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
633 			    "rc: %i errno: %i", ret, errno);
634 		vcpu->dirty_gfns = NULL;
635 	}
636 
637 	ret = munmap(vcpu->state, vcpu_mmap_sz());
638 	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
639 		"errno: %i", ret, errno);
640 	ret = close(vcpu->fd);
641 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
642 		"errno: %i", ret, errno);
643 
644 	list_del(&vcpu->list);
645 	free(vcpu);
646 }
647 
648 void kvm_vm_release(struct kvm_vm *vmp)
649 {
650 	struct vcpu *vcpu, *tmp;
651 	int ret;
652 
653 	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
654 		vm_vcpu_rm(vmp, vcpu);
655 
656 	ret = close(vmp->fd);
657 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
658 		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
659 
660 	ret = close(vmp->kvm_fd);
661 	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
662 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
663 }
664 
665 static void __vm_mem_region_delete(struct kvm_vm *vm,
666 				   struct userspace_mem_region *region,
667 				   bool unlink)
668 {
669 	int ret;
670 
671 	if (unlink) {
672 		rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
673 		rb_erase(&region->hva_node, &vm->regions.hva_tree);
674 		hash_del(&region->slot_node);
675 	}
676 
677 	region->region.memory_size = 0;
678 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
679 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
680 		    "rc: %i errno: %i", ret, errno);
681 
682 	sparsebit_free(&region->unused_phy_pages);
683 	ret = munmap(region->mmap_start, region->mmap_size);
684 	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
685 
686 	free(region);
687 }
688 
689 /*
690  * Destroys and frees the VM pointed to by vmp.
691  */
692 void kvm_vm_free(struct kvm_vm *vmp)
693 {
694 	int ctr;
695 	struct hlist_node *node;
696 	struct userspace_mem_region *region;
697 
698 	if (vmp == NULL)
699 		return;
700 
701 	/* Free userspace_mem_regions. */
702 	hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
703 		__vm_mem_region_delete(vmp, region, false);
704 
705 	/* Free sparsebit arrays. */
706 	sparsebit_free(&vmp->vpages_valid);
707 	sparsebit_free(&vmp->vpages_mapped);
708 
709 	kvm_vm_release(vmp);
710 
711 	/* Free the structure describing the VM. */
712 	free(vmp);
713 }
714 
715 /*
716  * Memory Compare, host virtual to guest virtual
717  *
718  * Input Args:
719  *   hva - Starting host virtual address
720  *   vm - Virtual Machine
721  *   gva - Starting guest virtual address
722  *   len - number of bytes to compare
723  *
724  * Output Args: None
725  *
726  * Input/Output Args: None
727  *
728  * Return:
729  *   Returns 0 if the bytes starting at hva for a length of len
730  *   are equal the guest virtual bytes starting at gva.  Returns
731  *   a value < 0, if bytes at hva are less than those at gva.
732  *   Otherwise a value > 0 is returned.
733  *
734  * Compares the bytes starting at the host virtual address hva, for
735  * a length of len, to the guest bytes starting at the guest virtual
736  * address given by gva.
737  */
738 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
739 {
740 	size_t amt;
741 
742 	/*
743 	 * Compare a batch of bytes until either a match is found
744 	 * or all the bytes have been compared.
745 	 */
746 	for (uintptr_t offset = 0; offset < len; offset += amt) {
747 		uintptr_t ptr1 = (uintptr_t)hva + offset;
748 
749 		/*
750 		 * Determine host address for guest virtual address
751 		 * at offset.
752 		 */
753 		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
754 
755 		/*
756 		 * Determine amount to compare on this pass.
757 		 * Don't allow the comparsion to cross a page boundary.
758 		 */
759 		amt = len - offset;
760 		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
761 			amt = vm->page_size - (ptr1 % vm->page_size);
762 		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
763 			amt = vm->page_size - (ptr2 % vm->page_size);
764 
765 		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
766 		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
767 
768 		/*
769 		 * Perform the comparison.  If there is a difference
770 		 * return that result to the caller, otherwise need
771 		 * to continue on looking for a mismatch.
772 		 */
773 		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
774 		if (ret != 0)
775 			return ret;
776 	}
777 
778 	/*
779 	 * No mismatch found.  Let the caller know the two memory
780 	 * areas are equal.
781 	 */
782 	return 0;
783 }
784 
785 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
786 					       struct userspace_mem_region *region)
787 {
788 	struct rb_node **cur, *parent;
789 
790 	for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
791 		struct userspace_mem_region *cregion;
792 
793 		cregion = container_of(*cur, typeof(*cregion), gpa_node);
794 		parent = *cur;
795 		if (region->region.guest_phys_addr <
796 		    cregion->region.guest_phys_addr)
797 			cur = &(*cur)->rb_left;
798 		else {
799 			TEST_ASSERT(region->region.guest_phys_addr !=
800 				    cregion->region.guest_phys_addr,
801 				    "Duplicate GPA in region tree");
802 
803 			cur = &(*cur)->rb_right;
804 		}
805 	}
806 
807 	rb_link_node(&region->gpa_node, parent, cur);
808 	rb_insert_color(&region->gpa_node, gpa_tree);
809 }
810 
811 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
812 					       struct userspace_mem_region *region)
813 {
814 	struct rb_node **cur, *parent;
815 
816 	for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
817 		struct userspace_mem_region *cregion;
818 
819 		cregion = container_of(*cur, typeof(*cregion), hva_node);
820 		parent = *cur;
821 		if (region->host_mem < cregion->host_mem)
822 			cur = &(*cur)->rb_left;
823 		else {
824 			TEST_ASSERT(region->host_mem !=
825 				    cregion->host_mem,
826 				    "Duplicate HVA in region tree");
827 
828 			cur = &(*cur)->rb_right;
829 		}
830 	}
831 
832 	rb_link_node(&region->hva_node, parent, cur);
833 	rb_insert_color(&region->hva_node, hva_tree);
834 }
835 
836 /*
837  * VM Userspace Memory Region Add
838  *
839  * Input Args:
840  *   vm - Virtual Machine
841  *   src_type - Storage source for this region.
842  *              NULL to use anonymous memory.
843  *   guest_paddr - Starting guest physical address
844  *   slot - KVM region slot
845  *   npages - Number of physical pages
846  *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
847  *
848  * Output Args: None
849  *
850  * Return: None
851  *
852  * Allocates a memory area of the number of pages specified by npages
853  * and maps it to the VM specified by vm, at a starting physical address
854  * given by guest_paddr.  The region is created with a KVM region slot
855  * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
856  * region is created with the flags given by flags.
857  */
858 void vm_userspace_mem_region_add(struct kvm_vm *vm,
859 	enum vm_mem_backing_src_type src_type,
860 	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
861 	uint32_t flags)
862 {
863 	int ret;
864 	struct userspace_mem_region *region;
865 	size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
866 	size_t alignment;
867 
868 	TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
869 		"Number of guest pages is not compatible with the host. "
870 		"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
871 
872 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
873 		"address not on a page boundary.\n"
874 		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
875 		guest_paddr, vm->page_size);
876 	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
877 		<= vm->max_gfn, "Physical range beyond maximum "
878 		"supported physical address,\n"
879 		"  guest_paddr: 0x%lx npages: 0x%lx\n"
880 		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
881 		guest_paddr, npages, vm->max_gfn, vm->page_size);
882 
883 	/*
884 	 * Confirm a mem region with an overlapping address doesn't
885 	 * already exist.
886 	 */
887 	region = (struct userspace_mem_region *) userspace_mem_region_find(
888 		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
889 	if (region != NULL)
890 		TEST_FAIL("overlapping userspace_mem_region already "
891 			"exists\n"
892 			"  requested guest_paddr: 0x%lx npages: 0x%lx "
893 			"page_size: 0x%x\n"
894 			"  existing guest_paddr: 0x%lx size: 0x%lx",
895 			guest_paddr, npages, vm->page_size,
896 			(uint64_t) region->region.guest_phys_addr,
897 			(uint64_t) region->region.memory_size);
898 
899 	/* Confirm no region with the requested slot already exists. */
900 	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
901 			       slot) {
902 		if (region->region.slot != slot)
903 			continue;
904 
905 		TEST_FAIL("A mem region with the requested slot "
906 			"already exists.\n"
907 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
908 			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
909 			slot, guest_paddr, npages,
910 			region->region.slot,
911 			(uint64_t) region->region.guest_phys_addr,
912 			(uint64_t) region->region.memory_size);
913 	}
914 
915 	/* Allocate and initialize new mem region structure. */
916 	region = calloc(1, sizeof(*region));
917 	TEST_ASSERT(region != NULL, "Insufficient Memory");
918 	region->mmap_size = npages * vm->page_size;
919 
920 #ifdef __s390x__
921 	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
922 	alignment = 0x100000;
923 #else
924 	alignment = 1;
925 #endif
926 
927 	/*
928 	 * When using THP mmap is not guaranteed to returned a hugepage aligned
929 	 * address so we have to pad the mmap. Padding is not needed for HugeTLB
930 	 * because mmap will always return an address aligned to the HugeTLB
931 	 * page size.
932 	 */
933 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
934 		alignment = max(backing_src_pagesz, alignment);
935 
936 	ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
937 
938 	/* Add enough memory to align up if necessary */
939 	if (alignment > 1)
940 		region->mmap_size += alignment;
941 
942 	region->fd = -1;
943 	if (backing_src_is_shared(src_type)) {
944 		int memfd_flags = MFD_CLOEXEC;
945 
946 		if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
947 			memfd_flags |= MFD_HUGETLB;
948 
949 		region->fd = memfd_create("kvm_selftest", memfd_flags);
950 		TEST_ASSERT(region->fd != -1,
951 			    "memfd_create failed, errno: %i", errno);
952 
953 		ret = ftruncate(region->fd, region->mmap_size);
954 		TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
955 
956 		ret = fallocate(region->fd,
957 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
958 				region->mmap_size);
959 		TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
960 	}
961 
962 	region->mmap_start = mmap(NULL, region->mmap_size,
963 				  PROT_READ | PROT_WRITE,
964 				  vm_mem_backing_src_alias(src_type)->flag,
965 				  region->fd, 0);
966 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
967 		    "test_malloc failed, mmap_start: %p errno: %i",
968 		    region->mmap_start, errno);
969 
970 	TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
971 		    region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
972 		    "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
973 		    region->mmap_start, backing_src_pagesz);
974 
975 	/* Align host address */
976 	region->host_mem = align_ptr_up(region->mmap_start, alignment);
977 
978 	/* As needed perform madvise */
979 	if ((src_type == VM_MEM_SRC_ANONYMOUS ||
980 	     src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
981 		ret = madvise(region->host_mem, npages * vm->page_size,
982 			      src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
983 		TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
984 			    region->host_mem, npages * vm->page_size,
985 			    vm_mem_backing_src_alias(src_type)->name);
986 	}
987 
988 	region->unused_phy_pages = sparsebit_alloc();
989 	sparsebit_set_num(region->unused_phy_pages,
990 		guest_paddr >> vm->page_shift, npages);
991 	region->region.slot = slot;
992 	region->region.flags = flags;
993 	region->region.guest_phys_addr = guest_paddr;
994 	region->region.memory_size = npages * vm->page_size;
995 	region->region.userspace_addr = (uintptr_t) region->host_mem;
996 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
997 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
998 		"  rc: %i errno: %i\n"
999 		"  slot: %u flags: 0x%x\n"
1000 		"  guest_phys_addr: 0x%lx size: 0x%lx",
1001 		ret, errno, slot, flags,
1002 		guest_paddr, (uint64_t) region->region.memory_size);
1003 
1004 	/* Add to quick lookup data structures */
1005 	vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
1006 	vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
1007 	hash_add(vm->regions.slot_hash, &region->slot_node, slot);
1008 
1009 	/* If shared memory, create an alias. */
1010 	if (region->fd >= 0) {
1011 		region->mmap_alias = mmap(NULL, region->mmap_size,
1012 					  PROT_READ | PROT_WRITE,
1013 					  vm_mem_backing_src_alias(src_type)->flag,
1014 					  region->fd, 0);
1015 		TEST_ASSERT(region->mmap_alias != MAP_FAILED,
1016 			    "mmap of alias failed, errno: %i", errno);
1017 
1018 		/* Align host alias address */
1019 		region->host_alias = align_ptr_up(region->mmap_alias, alignment);
1020 	}
1021 }
1022 
1023 /*
1024  * Memslot to region
1025  *
1026  * Input Args:
1027  *   vm - Virtual Machine
1028  *   memslot - KVM memory slot ID
1029  *
1030  * Output Args: None
1031  *
1032  * Return:
1033  *   Pointer to memory region structure that describe memory region
1034  *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
1035  *   on error (e.g. currently no memory region using memslot as a KVM
1036  *   memory slot ID).
1037  */
1038 struct userspace_mem_region *
1039 memslot2region(struct kvm_vm *vm, uint32_t memslot)
1040 {
1041 	struct userspace_mem_region *region;
1042 
1043 	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
1044 			       memslot)
1045 		if (region->region.slot == memslot)
1046 			return region;
1047 
1048 	fprintf(stderr, "No mem region with the requested slot found,\n"
1049 		"  requested slot: %u\n", memslot);
1050 	fputs("---- vm dump ----\n", stderr);
1051 	vm_dump(stderr, vm, 2);
1052 	TEST_FAIL("Mem region not found");
1053 	return NULL;
1054 }
1055 
1056 /*
1057  * VM Memory Region Flags Set
1058  *
1059  * Input Args:
1060  *   vm - Virtual Machine
1061  *   flags - Starting guest physical address
1062  *
1063  * Output Args: None
1064  *
1065  * Return: None
1066  *
1067  * Sets the flags of the memory region specified by the value of slot,
1068  * to the values given by flags.
1069  */
1070 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
1071 {
1072 	int ret;
1073 	struct userspace_mem_region *region;
1074 
1075 	region = memslot2region(vm, slot);
1076 
1077 	region->region.flags = flags;
1078 
1079 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1080 
1081 	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
1082 		"  rc: %i errno: %i slot: %u flags: 0x%x",
1083 		ret, errno, slot, flags);
1084 }
1085 
1086 /*
1087  * VM Memory Region Move
1088  *
1089  * Input Args:
1090  *   vm - Virtual Machine
1091  *   slot - Slot of the memory region to move
1092  *   new_gpa - Starting guest physical address
1093  *
1094  * Output Args: None
1095  *
1096  * Return: None
1097  *
1098  * Change the gpa of a memory region.
1099  */
1100 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
1101 {
1102 	struct userspace_mem_region *region;
1103 	int ret;
1104 
1105 	region = memslot2region(vm, slot);
1106 
1107 	region->region.guest_phys_addr = new_gpa;
1108 
1109 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1110 
1111 	TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
1112 		    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
1113 		    ret, errno, slot, new_gpa);
1114 }
1115 
1116 /*
1117  * VM Memory Region Delete
1118  *
1119  * Input Args:
1120  *   vm - Virtual Machine
1121  *   slot - Slot of the memory region to delete
1122  *
1123  * Output Args: None
1124  *
1125  * Return: None
1126  *
1127  * Delete a memory region.
1128  */
1129 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
1130 {
1131 	__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
1132 }
1133 
1134 /*
1135  * VCPU mmap Size
1136  *
1137  * Input Args: None
1138  *
1139  * Output Args: None
1140  *
1141  * Return:
1142  *   Size of VCPU state
1143  *
1144  * Returns the size of the structure pointed to by the return value
1145  * of vcpu_state().
1146  */
1147 static int vcpu_mmap_sz(void)
1148 {
1149 	int dev_fd, ret;
1150 
1151 	dev_fd = open_kvm_dev_path_or_exit();
1152 
1153 	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
1154 	TEST_ASSERT(ret >= sizeof(struct kvm_run),
1155 		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
1156 		__func__, ret, errno);
1157 
1158 	close(dev_fd);
1159 
1160 	return ret;
1161 }
1162 
1163 /*
1164  * VM VCPU Add
1165  *
1166  * Input Args:
1167  *   vm - Virtual Machine
1168  *   vcpuid - VCPU ID
1169  *
1170  * Output Args: None
1171  *
1172  * Return: None
1173  *
1174  * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
1175  * No additional VCPU setup is done.
1176  */
1177 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
1178 {
1179 	struct vcpu *vcpu;
1180 
1181 	/* Confirm a vcpu with the specified id doesn't already exist. */
1182 	vcpu = vcpu_find(vm, vcpuid);
1183 	if (vcpu != NULL)
1184 		TEST_FAIL("vcpu with the specified id "
1185 			"already exists,\n"
1186 			"  requested vcpuid: %u\n"
1187 			"  existing vcpuid: %u state: %p",
1188 			vcpuid, vcpu->id, vcpu->state);
1189 
1190 	/* Allocate and initialize new vcpu structure. */
1191 	vcpu = calloc(1, sizeof(*vcpu));
1192 	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
1193 	vcpu->id = vcpuid;
1194 	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
1195 	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
1196 		vcpu->fd, errno);
1197 
1198 	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
1199 		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
1200 		vcpu_mmap_sz(), sizeof(*vcpu->state));
1201 	vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
1202 		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
1203 	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
1204 		"vcpu id: %u errno: %i", vcpuid, errno);
1205 
1206 	/* Add to linked-list of VCPUs. */
1207 	list_add(&vcpu->list, &vm->vcpus);
1208 }
1209 
1210 /*
1211  * VM Virtual Address Unused Gap
1212  *
1213  * Input Args:
1214  *   vm - Virtual Machine
1215  *   sz - Size (bytes)
1216  *   vaddr_min - Minimum Virtual Address
1217  *
1218  * Output Args: None
1219  *
1220  * Return:
1221  *   Lowest virtual address at or below vaddr_min, with at least
1222  *   sz unused bytes.  TEST_ASSERT failure if no area of at least
1223  *   size sz is available.
1224  *
1225  * Within the VM specified by vm, locates the lowest starting virtual
1226  * address >= vaddr_min, that has at least sz unallocated bytes.  A
1227  * TEST_ASSERT failure occurs for invalid input or no area of at least
1228  * sz unallocated bytes >= vaddr_min is available.
1229  */
1230 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
1231 				      vm_vaddr_t vaddr_min)
1232 {
1233 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
1234 
1235 	/* Determine lowest permitted virtual page index. */
1236 	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
1237 	if ((pgidx_start * vm->page_size) < vaddr_min)
1238 		goto no_va_found;
1239 
1240 	/* Loop over section with enough valid virtual page indexes. */
1241 	if (!sparsebit_is_set_num(vm->vpages_valid,
1242 		pgidx_start, pages))
1243 		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
1244 			pgidx_start, pages);
1245 	do {
1246 		/*
1247 		 * Are there enough unused virtual pages available at
1248 		 * the currently proposed starting virtual page index.
1249 		 * If not, adjust proposed starting index to next
1250 		 * possible.
1251 		 */
1252 		if (sparsebit_is_clear_num(vm->vpages_mapped,
1253 			pgidx_start, pages))
1254 			goto va_found;
1255 		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
1256 			pgidx_start, pages);
1257 		if (pgidx_start == 0)
1258 			goto no_va_found;
1259 
1260 		/*
1261 		 * If needed, adjust proposed starting virtual address,
1262 		 * to next range of valid virtual addresses.
1263 		 */
1264 		if (!sparsebit_is_set_num(vm->vpages_valid,
1265 			pgidx_start, pages)) {
1266 			pgidx_start = sparsebit_next_set_num(
1267 				vm->vpages_valid, pgidx_start, pages);
1268 			if (pgidx_start == 0)
1269 				goto no_va_found;
1270 		}
1271 	} while (pgidx_start != 0);
1272 
1273 no_va_found:
1274 	TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
1275 
1276 	/* NOT REACHED */
1277 	return -1;
1278 
1279 va_found:
1280 	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
1281 		pgidx_start, pages),
1282 		"Unexpected, invalid virtual page index range,\n"
1283 		"  pgidx_start: 0x%lx\n"
1284 		"  pages: 0x%lx",
1285 		pgidx_start, pages);
1286 	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
1287 		pgidx_start, pages),
1288 		"Unexpected, pages already mapped,\n"
1289 		"  pgidx_start: 0x%lx\n"
1290 		"  pages: 0x%lx",
1291 		pgidx_start, pages);
1292 
1293 	return pgidx_start * vm->page_size;
1294 }
1295 
1296 /*
1297  * VM Virtual Address Allocate
1298  *
1299  * Input Args:
1300  *   vm - Virtual Machine
1301  *   sz - Size in bytes
1302  *   vaddr_min - Minimum starting virtual address
1303  *   data_memslot - Memory region slot for data pages
1304  *   pgd_memslot - Memory region slot for new virtual translation tables
1305  *
1306  * Output Args: None
1307  *
1308  * Return:
1309  *   Starting guest virtual address
1310  *
1311  * Allocates at least sz bytes within the virtual address space of the vm
1312  * given by vm.  The allocated bytes are mapped to a virtual address >=
1313  * the address given by vaddr_min.  Note that each allocation uses a
1314  * a unique set of pages, with the minimum real allocation being at least
1315  * a page.
1316  */
1317 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
1318 {
1319 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1320 
1321 	virt_pgd_alloc(vm);
1322 	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
1323 					      KVM_UTIL_MIN_PFN * vm->page_size, 0);
1324 
1325 	/*
1326 	 * Find an unused range of virtual page addresses of at least
1327 	 * pages in length.
1328 	 */
1329 	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1330 
1331 	/* Map the virtual pages. */
1332 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1333 		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
1334 
1335 		virt_pg_map(vm, vaddr, paddr);
1336 
1337 		sparsebit_set(vm->vpages_mapped,
1338 			vaddr >> vm->page_shift);
1339 	}
1340 
1341 	return vaddr_start;
1342 }
1343 
1344 /*
1345  * VM Virtual Address Allocate Pages
1346  *
1347  * Input Args:
1348  *   vm - Virtual Machine
1349  *
1350  * Output Args: None
1351  *
1352  * Return:
1353  *   Starting guest virtual address
1354  *
1355  * Allocates at least N system pages worth of bytes within the virtual address
1356  * space of the vm.
1357  */
1358 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
1359 {
1360 	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
1361 }
1362 
1363 /*
1364  * VM Virtual Address Allocate Page
1365  *
1366  * Input Args:
1367  *   vm - Virtual Machine
1368  *
1369  * Output Args: None
1370  *
1371  * Return:
1372  *   Starting guest virtual address
1373  *
1374  * Allocates at least one system page worth of bytes within the virtual address
1375  * space of the vm.
1376  */
1377 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
1378 {
1379 	return vm_vaddr_alloc_pages(vm, 1);
1380 }
1381 
1382 /*
1383  * Map a range of VM virtual address to the VM's physical address
1384  *
1385  * Input Args:
1386  *   vm - Virtual Machine
1387  *   vaddr - Virtuall address to map
1388  *   paddr - VM Physical Address
1389  *   npages - The number of pages to map
1390  *   pgd_memslot - Memory region slot for new virtual translation tables
1391  *
1392  * Output Args: None
1393  *
1394  * Return: None
1395  *
1396  * Within the VM given by @vm, creates a virtual translation for
1397  * @npages starting at @vaddr to the page range starting at @paddr.
1398  */
1399 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1400 	      unsigned int npages)
1401 {
1402 	size_t page_size = vm->page_size;
1403 	size_t size = npages * page_size;
1404 
1405 	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1406 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1407 
1408 	while (npages--) {
1409 		virt_pg_map(vm, vaddr, paddr);
1410 		vaddr += page_size;
1411 		paddr += page_size;
1412 	}
1413 }
1414 
1415 /*
1416  * Address VM Physical to Host Virtual
1417  *
1418  * Input Args:
1419  *   vm - Virtual Machine
1420  *   gpa - VM physical address
1421  *
1422  * Output Args: None
1423  *
1424  * Return:
1425  *   Equivalent host virtual address
1426  *
1427  * Locates the memory region containing the VM physical address given
1428  * by gpa, within the VM given by vm.  When found, the host virtual
1429  * address providing the memory to the vm physical address is returned.
1430  * A TEST_ASSERT failure occurs if no region containing gpa exists.
1431  */
1432 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1433 {
1434 	struct userspace_mem_region *region;
1435 
1436 	region = userspace_mem_region_find(vm, gpa, gpa);
1437 	if (!region) {
1438 		TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1439 		return NULL;
1440 	}
1441 
1442 	return (void *)((uintptr_t)region->host_mem
1443 		+ (gpa - region->region.guest_phys_addr));
1444 }
1445 
1446 /*
1447  * Address Host Virtual to VM Physical
1448  *
1449  * Input Args:
1450  *   vm - Virtual Machine
1451  *   hva - Host virtual address
1452  *
1453  * Output Args: None
1454  *
1455  * Return:
1456  *   Equivalent VM physical address
1457  *
1458  * Locates the memory region containing the host virtual address given
1459  * by hva, within the VM given by vm.  When found, the equivalent
1460  * VM physical address is returned. A TEST_ASSERT failure occurs if no
1461  * region containing hva exists.
1462  */
1463 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1464 {
1465 	struct rb_node *node;
1466 
1467 	for (node = vm->regions.hva_tree.rb_node; node; ) {
1468 		struct userspace_mem_region *region =
1469 			container_of(node, struct userspace_mem_region, hva_node);
1470 
1471 		if (hva >= region->host_mem) {
1472 			if (hva <= (region->host_mem
1473 				+ region->region.memory_size - 1))
1474 				return (vm_paddr_t)((uintptr_t)
1475 					region->region.guest_phys_addr
1476 					+ (hva - (uintptr_t)region->host_mem));
1477 
1478 			node = node->rb_right;
1479 		} else
1480 			node = node->rb_left;
1481 	}
1482 
1483 	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1484 	return -1;
1485 }
1486 
1487 /*
1488  * Address VM physical to Host Virtual *alias*.
1489  *
1490  * Input Args:
1491  *   vm - Virtual Machine
1492  *   gpa - VM physical address
1493  *
1494  * Output Args: None
1495  *
1496  * Return:
1497  *   Equivalent address within the host virtual *alias* area, or NULL
1498  *   (without failing the test) if the guest memory is not shared (so
1499  *   no alias exists).
1500  *
1501  * When vm_create() and related functions are called with a shared memory
1502  * src_type, we also create a writable, shared alias mapping of the
1503  * underlying guest memory. This allows the host to manipulate guest memory
1504  * without mapping that memory in the guest's address space. And, for
1505  * userfaultfd-based demand paging, we can do so without triggering userfaults.
1506  */
1507 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
1508 {
1509 	struct userspace_mem_region *region;
1510 	uintptr_t offset;
1511 
1512 	region = userspace_mem_region_find(vm, gpa, gpa);
1513 	if (!region)
1514 		return NULL;
1515 
1516 	if (!region->host_alias)
1517 		return NULL;
1518 
1519 	offset = gpa - region->region.guest_phys_addr;
1520 	return (void *) ((uintptr_t) region->host_alias + offset);
1521 }
1522 
1523 /*
1524  * VM Create IRQ Chip
1525  *
1526  * Input Args:
1527  *   vm - Virtual Machine
1528  *
1529  * Output Args: None
1530  *
1531  * Return: None
1532  *
1533  * Creates an interrupt controller chip for the VM specified by vm.
1534  */
1535 void vm_create_irqchip(struct kvm_vm *vm)
1536 {
1537 	int ret;
1538 
1539 	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1540 	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1541 		"rc: %i errno: %i", ret, errno);
1542 
1543 	vm->has_irqchip = true;
1544 }
1545 
1546 /*
1547  * VM VCPU State
1548  *
1549  * Input Args:
1550  *   vm - Virtual Machine
1551  *   vcpuid - VCPU ID
1552  *
1553  * Output Args: None
1554  *
1555  * Return:
1556  *   Pointer to structure that describes the state of the VCPU.
1557  *
1558  * Locates and returns a pointer to a structure that describes the
1559  * state of the VCPU with the given vcpuid.
1560  */
1561 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1562 {
1563 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1564 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1565 
1566 	return vcpu->state;
1567 }
1568 
1569 /*
1570  * VM VCPU Run
1571  *
1572  * Input Args:
1573  *   vm - Virtual Machine
1574  *   vcpuid - VCPU ID
1575  *
1576  * Output Args: None
1577  *
1578  * Return: None
1579  *
1580  * Switch to executing the code for the VCPU given by vcpuid, within the VM
1581  * given by vm.
1582  */
1583 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1584 {
1585 	int ret = _vcpu_run(vm, vcpuid);
1586 	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1587 		"rc: %i errno: %i", ret, errno);
1588 }
1589 
1590 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1591 {
1592 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1593 	int rc;
1594 
1595 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1596 	do {
1597 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1598 	} while (rc == -1 && errno == EINTR);
1599 
1600 	assert_on_unhandled_exception(vm, vcpuid);
1601 
1602 	return rc;
1603 }
1604 
1605 int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
1606 {
1607 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1608 
1609 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1610 
1611 	return vcpu->fd;
1612 }
1613 
1614 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1615 {
1616 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1617 	int ret;
1618 
1619 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1620 
1621 	vcpu->state->immediate_exit = 1;
1622 	ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1623 	vcpu->state->immediate_exit = 0;
1624 
1625 	TEST_ASSERT(ret == -1 && errno == EINTR,
1626 		    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1627 		    ret, errno);
1628 }
1629 
1630 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
1631 			  struct kvm_guest_debug *debug)
1632 {
1633 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1634 	int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
1635 
1636 	TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
1637 }
1638 
1639 /*
1640  * VM VCPU Set MP State
1641  *
1642  * Input Args:
1643  *   vm - Virtual Machine
1644  *   vcpuid - VCPU ID
1645  *   mp_state - mp_state to be set
1646  *
1647  * Output Args: None
1648  *
1649  * Return: None
1650  *
1651  * Sets the MP state of the VCPU given by vcpuid, to the state given
1652  * by mp_state.
1653  */
1654 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1655 		       struct kvm_mp_state *mp_state)
1656 {
1657 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1658 	int ret;
1659 
1660 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1661 
1662 	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1663 	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1664 		"rc: %i errno: %i", ret, errno);
1665 }
1666 
1667 /*
1668  * VM VCPU Get Reg List
1669  *
1670  * Input Args:
1671  *   vm - Virtual Machine
1672  *   vcpuid - VCPU ID
1673  *
1674  * Output Args:
1675  *   None
1676  *
1677  * Return:
1678  *   A pointer to an allocated struct kvm_reg_list
1679  *
1680  * Get the list of guest registers which are supported for
1681  * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
1682  */
1683 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
1684 {
1685 	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1686 	int ret;
1687 
1688 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
1689 	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1690 	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1691 	reg_list->n = reg_list_n.n;
1692 	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
1693 	return reg_list;
1694 }
1695 
1696 /*
1697  * VM VCPU Regs Get
1698  *
1699  * Input Args:
1700  *   vm - Virtual Machine
1701  *   vcpuid - VCPU ID
1702  *
1703  * Output Args:
1704  *   regs - current state of VCPU regs
1705  *
1706  * Return: None
1707  *
1708  * Obtains the current register state for the VCPU specified by vcpuid
1709  * and stores it at the location given by regs.
1710  */
1711 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1712 {
1713 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1714 	int ret;
1715 
1716 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1717 
1718 	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1719 	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1720 		ret, errno);
1721 }
1722 
1723 /*
1724  * VM VCPU Regs Set
1725  *
1726  * Input Args:
1727  *   vm - Virtual Machine
1728  *   vcpuid - VCPU ID
1729  *   regs - Values to set VCPU regs to
1730  *
1731  * Output Args: None
1732  *
1733  * Return: None
1734  *
1735  * Sets the regs of the VCPU specified by vcpuid to the values
1736  * given by regs.
1737  */
1738 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1739 {
1740 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1741 	int ret;
1742 
1743 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1744 
1745 	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1746 	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1747 		ret, errno);
1748 }
1749 
1750 #ifdef __KVM_HAVE_VCPU_EVENTS
1751 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1752 		     struct kvm_vcpu_events *events)
1753 {
1754 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1755 	int ret;
1756 
1757 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1758 
1759 	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1760 	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1761 		ret, errno);
1762 }
1763 
1764 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1765 		     struct kvm_vcpu_events *events)
1766 {
1767 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1768 	int ret;
1769 
1770 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1771 
1772 	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1773 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1774 		ret, errno);
1775 }
1776 #endif
1777 
1778 #ifdef __x86_64__
1779 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1780 			   struct kvm_nested_state *state)
1781 {
1782 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1783 	int ret;
1784 
1785 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1786 
1787 	ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1788 	TEST_ASSERT(ret == 0,
1789 		"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1790 		ret, errno);
1791 }
1792 
1793 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1794 			  struct kvm_nested_state *state, bool ignore_error)
1795 {
1796 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1797 	int ret;
1798 
1799 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1800 
1801 	ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1802 	if (!ignore_error) {
1803 		TEST_ASSERT(ret == 0,
1804 			"KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1805 			ret, errno);
1806 	}
1807 
1808 	return ret;
1809 }
1810 #endif
1811 
1812 /*
1813  * VM VCPU System Regs Get
1814  *
1815  * Input Args:
1816  *   vm - Virtual Machine
1817  *   vcpuid - VCPU ID
1818  *
1819  * Output Args:
1820  *   sregs - current state of VCPU system regs
1821  *
1822  * Return: None
1823  *
1824  * Obtains the current system register state for the VCPU specified by
1825  * vcpuid and stores it at the location given by sregs.
1826  */
1827 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1828 {
1829 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1830 	int ret;
1831 
1832 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1833 
1834 	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1835 	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1836 		ret, errno);
1837 }
1838 
1839 /*
1840  * VM VCPU System Regs Set
1841  *
1842  * Input Args:
1843  *   vm - Virtual Machine
1844  *   vcpuid - VCPU ID
1845  *   sregs - Values to set VCPU system regs to
1846  *
1847  * Output Args: None
1848  *
1849  * Return: None
1850  *
1851  * Sets the system regs of the VCPU specified by vcpuid to the values
1852  * given by sregs.
1853  */
1854 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1855 {
1856 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1857 	TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, "
1858 		"rc: %i errno: %i", ret, errno);
1859 }
1860 
1861 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1862 {
1863 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1864 
1865 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1866 
1867 	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1868 }
1869 
1870 void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1871 {
1872 	int ret;
1873 
1874 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
1875 	TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
1876 		    ret, errno, strerror(errno));
1877 }
1878 
1879 void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1880 {
1881 	int ret;
1882 
1883 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
1884 	TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
1885 		    ret, errno, strerror(errno));
1886 }
1887 
1888 void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1889 {
1890 	int ret;
1891 
1892 	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
1893 	TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
1894 		    ret, errno, strerror(errno));
1895 }
1896 
1897 void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1898 {
1899 	int ret;
1900 
1901 	ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
1902 	TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
1903 		    ret, errno, strerror(errno));
1904 }
1905 
1906 /*
1907  * VCPU Ioctl
1908  *
1909  * Input Args:
1910  *   vm - Virtual Machine
1911  *   vcpuid - VCPU ID
1912  *   cmd - Ioctl number
1913  *   arg - Argument to pass to the ioctl
1914  *
1915  * Return: None
1916  *
1917  * Issues an arbitrary ioctl on a VCPU fd.
1918  */
1919 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1920 		unsigned long cmd, void *arg)
1921 {
1922 	int ret;
1923 
1924 	ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1925 	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1926 		cmd, ret, errno, strerror(errno));
1927 }
1928 
1929 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1930 		unsigned long cmd, void *arg)
1931 {
1932 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1933 	int ret;
1934 
1935 	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1936 
1937 	ret = ioctl(vcpu->fd, cmd, arg);
1938 
1939 	return ret;
1940 }
1941 
1942 void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
1943 {
1944 	struct vcpu *vcpu;
1945 	uint32_t size = vm->dirty_ring_size;
1946 
1947 	TEST_ASSERT(size > 0, "Should enable dirty ring first");
1948 
1949 	vcpu = vcpu_find(vm, vcpuid);
1950 
1951 	TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
1952 
1953 	if (!vcpu->dirty_gfns) {
1954 		void *addr;
1955 
1956 		addr = mmap(NULL, size, PROT_READ,
1957 			    MAP_PRIVATE, vcpu->fd,
1958 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1959 		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
1960 
1961 		addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
1962 			    MAP_PRIVATE, vcpu->fd,
1963 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1964 		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
1965 
1966 		addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
1967 			    MAP_SHARED, vcpu->fd,
1968 			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1969 		TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
1970 
1971 		vcpu->dirty_gfns = addr;
1972 		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
1973 	}
1974 
1975 	return vcpu->dirty_gfns;
1976 }
1977 
1978 /*
1979  * VM Ioctl
1980  *
1981  * Input Args:
1982  *   vm - Virtual Machine
1983  *   cmd - Ioctl number
1984  *   arg - Argument to pass to the ioctl
1985  *
1986  * Return: None
1987  *
1988  * Issues an arbitrary ioctl on a VM fd.
1989  */
1990 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1991 {
1992 	int ret;
1993 
1994 	ret = _vm_ioctl(vm, cmd, arg);
1995 	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1996 		cmd, ret, errno, strerror(errno));
1997 }
1998 
1999 int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
2000 {
2001 	return ioctl(vm->fd, cmd, arg);
2002 }
2003 
2004 /*
2005  * KVM system ioctl
2006  *
2007  * Input Args:
2008  *   vm - Virtual Machine
2009  *   cmd - Ioctl number
2010  *   arg - Argument to pass to the ioctl
2011  *
2012  * Return: None
2013  *
2014  * Issues an arbitrary ioctl on a KVM fd.
2015  */
2016 void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
2017 {
2018 	int ret;
2019 
2020 	ret = ioctl(vm->kvm_fd, cmd, arg);
2021 	TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
2022 		cmd, ret, errno, strerror(errno));
2023 }
2024 
2025 int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
2026 {
2027 	return ioctl(vm->kvm_fd, cmd, arg);
2028 }
2029 
2030 /*
2031  * Device Ioctl
2032  */
2033 
2034 int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
2035 {
2036 	struct kvm_device_attr attribute = {
2037 		.group = group,
2038 		.attr = attr,
2039 		.flags = 0,
2040 	};
2041 
2042 	return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
2043 }
2044 
2045 int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
2046 {
2047 	int ret = _kvm_device_check_attr(dev_fd, group, attr);
2048 
2049 	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
2050 	return ret;
2051 }
2052 
2053 int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd)
2054 {
2055 	struct kvm_create_device create_dev;
2056 	int ret;
2057 
2058 	create_dev.type = type;
2059 	create_dev.fd = -1;
2060 	create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
2061 	ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev);
2062 	*fd = create_dev.fd;
2063 	return ret;
2064 }
2065 
2066 int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
2067 {
2068 	int fd, ret;
2069 
2070 	ret = _kvm_create_device(vm, type, test, &fd);
2071 
2072 	if (!test) {
2073 		TEST_ASSERT(!ret,
2074 			    "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
2075 		return fd;
2076 	}
2077 	return ret;
2078 }
2079 
2080 int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2081 		      void *val, bool write)
2082 {
2083 	struct kvm_device_attr kvmattr = {
2084 		.group = group,
2085 		.attr = attr,
2086 		.flags = 0,
2087 		.addr = (uintptr_t)val,
2088 	};
2089 	int ret;
2090 
2091 	ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
2092 		    &kvmattr);
2093 	return ret;
2094 }
2095 
2096 int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2097 		      void *val, bool write)
2098 {
2099 	int ret = _kvm_device_access(dev_fd, group, attr, val, write);
2100 
2101 	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2102 	return ret;
2103 }
2104 
2105 int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2106 			  uint64_t attr)
2107 {
2108 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2109 
2110 	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
2111 
2112 	return _kvm_device_check_attr(vcpu->fd, group, attr);
2113 }
2114 
2115 int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2116 				 uint64_t attr)
2117 {
2118 	int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr);
2119 
2120 	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2121 	return ret;
2122 }
2123 
2124 int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2125 			     uint64_t attr, void *val, bool write)
2126 {
2127 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2128 
2129 	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
2130 
2131 	return _kvm_device_access(vcpu->fd, group, attr, val, write);
2132 }
2133 
2134 int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
2135 			    uint64_t attr, void *val, bool write)
2136 {
2137 	int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write);
2138 
2139 	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2140 	return ret;
2141 }
2142 
2143 /*
2144  * IRQ related functions.
2145  */
2146 
2147 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
2148 {
2149 	struct kvm_irq_level irq_level = {
2150 		.irq    = irq,
2151 		.level  = level,
2152 	};
2153 
2154 	return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
2155 }
2156 
2157 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
2158 {
2159 	int ret = _kvm_irq_line(vm, irq, level);
2160 
2161 	TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
2162 }
2163 
2164 struct kvm_irq_routing *kvm_gsi_routing_create(void)
2165 {
2166 	struct kvm_irq_routing *routing;
2167 	size_t size;
2168 
2169 	size = sizeof(struct kvm_irq_routing);
2170 	/* Allocate space for the max number of entries: this wastes 196 KBs. */
2171 	size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
2172 	routing = calloc(1, size);
2173 	assert(routing);
2174 
2175 	return routing;
2176 }
2177 
2178 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
2179 		uint32_t gsi, uint32_t pin)
2180 {
2181 	int i;
2182 
2183 	assert(routing);
2184 	assert(routing->nr < KVM_MAX_IRQ_ROUTES);
2185 
2186 	i = routing->nr;
2187 	routing->entries[i].gsi = gsi;
2188 	routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
2189 	routing->entries[i].flags = 0;
2190 	routing->entries[i].u.irqchip.irqchip = 0;
2191 	routing->entries[i].u.irqchip.pin = pin;
2192 	routing->nr++;
2193 }
2194 
2195 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
2196 {
2197 	int ret;
2198 
2199 	assert(routing);
2200 	ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
2201 	free(routing);
2202 
2203 	return ret;
2204 }
2205 
2206 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
2207 {
2208 	int ret;
2209 
2210 	ret = _kvm_gsi_routing_write(vm, routing);
2211 	TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
2212 				ret, errno);
2213 }
2214 
2215 /*
2216  * VM Dump
2217  *
2218  * Input Args:
2219  *   vm - Virtual Machine
2220  *   indent - Left margin indent amount
2221  *
2222  * Output Args:
2223  *   stream - Output FILE stream
2224  *
2225  * Return: None
2226  *
2227  * Dumps the current state of the VM given by vm, to the FILE stream
2228  * given by stream.
2229  */
2230 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
2231 {
2232 	int ctr;
2233 	struct userspace_mem_region *region;
2234 	struct vcpu *vcpu;
2235 
2236 	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
2237 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
2238 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
2239 	fprintf(stream, "%*sMem Regions:\n", indent, "");
2240 	hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
2241 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
2242 			"host_virt: %p\n", indent + 2, "",
2243 			(uint64_t) region->region.guest_phys_addr,
2244 			(uint64_t) region->region.memory_size,
2245 			region->host_mem);
2246 		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
2247 		sparsebit_dump(stream, region->unused_phy_pages, 0);
2248 	}
2249 	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
2250 	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
2251 	fprintf(stream, "%*spgd_created: %u\n", indent, "",
2252 		vm->pgd_created);
2253 	if (vm->pgd_created) {
2254 		fprintf(stream, "%*sVirtual Translation Tables:\n",
2255 			indent + 2, "");
2256 		virt_dump(stream, vm, indent + 4);
2257 	}
2258 	fprintf(stream, "%*sVCPUs:\n", indent, "");
2259 	list_for_each_entry(vcpu, &vm->vcpus, list)
2260 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
2261 }
2262 
2263 /* Known KVM exit reasons */
2264 static struct exit_reason {
2265 	unsigned int reason;
2266 	const char *name;
2267 } exit_reasons_known[] = {
2268 	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
2269 	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
2270 	{KVM_EXIT_IO, "IO"},
2271 	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
2272 	{KVM_EXIT_DEBUG, "DEBUG"},
2273 	{KVM_EXIT_HLT, "HLT"},
2274 	{KVM_EXIT_MMIO, "MMIO"},
2275 	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
2276 	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
2277 	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
2278 	{KVM_EXIT_INTR, "INTR"},
2279 	{KVM_EXIT_SET_TPR, "SET_TPR"},
2280 	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
2281 	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
2282 	{KVM_EXIT_S390_RESET, "S390_RESET"},
2283 	{KVM_EXIT_DCR, "DCR"},
2284 	{KVM_EXIT_NMI, "NMI"},
2285 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
2286 	{KVM_EXIT_OSI, "OSI"},
2287 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
2288 	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
2289 	{KVM_EXIT_X86_RDMSR, "RDMSR"},
2290 	{KVM_EXIT_X86_WRMSR, "WRMSR"},
2291 	{KVM_EXIT_XEN, "XEN"},
2292 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
2293 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
2294 #endif
2295 };
2296 
2297 /*
2298  * Exit Reason String
2299  *
2300  * Input Args:
2301  *   exit_reason - Exit reason
2302  *
2303  * Output Args: None
2304  *
2305  * Return:
2306  *   Constant string pointer describing the exit reason.
2307  *
2308  * Locates and returns a constant string that describes the KVM exit
2309  * reason given by exit_reason.  If no such string is found, a constant
2310  * string of "Unknown" is returned.
2311  */
2312 const char *exit_reason_str(unsigned int exit_reason)
2313 {
2314 	unsigned int n1;
2315 
2316 	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
2317 		if (exit_reason == exit_reasons_known[n1].reason)
2318 			return exit_reasons_known[n1].name;
2319 	}
2320 
2321 	return "Unknown";
2322 }
2323 
2324 /*
2325  * Physical Contiguous Page Allocator
2326  *
2327  * Input Args:
2328  *   vm - Virtual Machine
2329  *   num - number of pages
2330  *   paddr_min - Physical address minimum
2331  *   memslot - Memory region to allocate page from
2332  *
2333  * Output Args: None
2334  *
2335  * Return:
2336  *   Starting physical address
2337  *
2338  * Within the VM specified by vm, locates a range of available physical
2339  * pages at or above paddr_min. If found, the pages are marked as in use
2340  * and their base address is returned. A TEST_ASSERT failure occurs if
2341  * not enough pages are available at or above paddr_min.
2342  */
2343 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
2344 			      vm_paddr_t paddr_min, uint32_t memslot)
2345 {
2346 	struct userspace_mem_region *region;
2347 	sparsebit_idx_t pg, base;
2348 
2349 	TEST_ASSERT(num > 0, "Must allocate at least one page");
2350 
2351 	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
2352 		"not divisible by page size.\n"
2353 		"  paddr_min: 0x%lx page_size: 0x%x",
2354 		paddr_min, vm->page_size);
2355 
2356 	region = memslot2region(vm, memslot);
2357 	base = pg = paddr_min >> vm->page_shift;
2358 
2359 	do {
2360 		for (; pg < base + num; ++pg) {
2361 			if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
2362 				base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
2363 				break;
2364 			}
2365 		}
2366 	} while (pg && pg != base + num);
2367 
2368 	if (pg == 0) {
2369 		fprintf(stderr, "No guest physical page available, "
2370 			"paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
2371 			paddr_min, vm->page_size, memslot);
2372 		fputs("---- vm dump ----\n", stderr);
2373 		vm_dump(stderr, vm, 2);
2374 		abort();
2375 	}
2376 
2377 	for (pg = base; pg < base + num; ++pg)
2378 		sparsebit_clear(region->unused_phy_pages, pg);
2379 
2380 	return base * vm->page_size;
2381 }
2382 
2383 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
2384 			     uint32_t memslot)
2385 {
2386 	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
2387 }
2388 
2389 /* Arbitrary minimum physical address used for virtual translation tables. */
2390 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
2391 
2392 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
2393 {
2394 	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
2395 }
2396 
2397 /*
2398  * Address Guest Virtual to Host Virtual
2399  *
2400  * Input Args:
2401  *   vm - Virtual Machine
2402  *   gva - VM virtual address
2403  *
2404  * Output Args: None
2405  *
2406  * Return:
2407  *   Equivalent host virtual address
2408  */
2409 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
2410 {
2411 	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
2412 }
2413 
2414 /*
2415  * Is Unrestricted Guest
2416  *
2417  * Input Args:
2418  *   vm - Virtual Machine
2419  *
2420  * Output Args: None
2421  *
2422  * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
2423  *
2424  * Check if the unrestricted guest flag is enabled.
2425  */
2426 bool vm_is_unrestricted_guest(struct kvm_vm *vm)
2427 {
2428 	char val = 'N';
2429 	size_t count;
2430 	FILE *f;
2431 
2432 	if (vm == NULL) {
2433 		/* Ensure that the KVM vendor-specific module is loaded. */
2434 		close(open_kvm_dev_path_or_exit());
2435 	}
2436 
2437 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
2438 	if (f) {
2439 		count = fread(&val, sizeof(char), 1, f);
2440 		TEST_ASSERT(count == 1, "Unable to read from param file.");
2441 		fclose(f);
2442 	}
2443 
2444 	return val == 'Y';
2445 }
2446 
2447 unsigned int vm_get_page_size(struct kvm_vm *vm)
2448 {
2449 	return vm->page_size;
2450 }
2451 
2452 unsigned int vm_get_page_shift(struct kvm_vm *vm)
2453 {
2454 	return vm->page_shift;
2455 }
2456 
2457 unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
2458 {
2459 	return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
2460 }
2461 
2462 uint64_t vm_get_max_gfn(struct kvm_vm *vm)
2463 {
2464 	return vm->max_gfn;
2465 }
2466 
2467 int vm_get_fd(struct kvm_vm *vm)
2468 {
2469 	return vm->fd;
2470 }
2471 
2472 static unsigned int vm_calc_num_pages(unsigned int num_pages,
2473 				      unsigned int page_shift,
2474 				      unsigned int new_page_shift,
2475 				      bool ceil)
2476 {
2477 	unsigned int n = 1 << (new_page_shift - page_shift);
2478 
2479 	if (page_shift >= new_page_shift)
2480 		return num_pages * (1 << (page_shift - new_page_shift));
2481 
2482 	return num_pages / n + !!(ceil && num_pages % n);
2483 }
2484 
2485 static inline int getpageshift(void)
2486 {
2487 	return __builtin_ffs(getpagesize()) - 1;
2488 }
2489 
2490 unsigned int
2491 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
2492 {
2493 	return vm_calc_num_pages(num_guest_pages,
2494 				 vm_guest_mode_params[mode].page_shift,
2495 				 getpageshift(), true);
2496 }
2497 
2498 unsigned int
2499 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
2500 {
2501 	return vm_calc_num_pages(num_host_pages, getpageshift(),
2502 				 vm_guest_mode_params[mode].page_shift, false);
2503 }
2504 
2505 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
2506 {
2507 	unsigned int n;
2508 	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
2509 	return vm_adjust_num_guest_pages(mode, n);
2510 }
2511 
2512 int vm_get_stats_fd(struct kvm_vm *vm)
2513 {
2514 	return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
2515 }
2516 
2517 int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
2518 {
2519 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2520 
2521 	return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
2522 }
2523