1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "kvm_util_internal.h" 12 #include "processor.h" 13 14 #include <assert.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 /* Aligns x up to the next multiple of size. Size must be a power of 2. */ 26 static void *align(void *x, size_t size) 27 { 28 size_t mask = size - 1; 29 TEST_ASSERT(size != 0 && !(size & (size - 1)), 30 "size not a power of 2: %lu", size); 31 return (void *) (((size_t) x + mask) & ~mask); 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 int fd; 46 47 fd = open(KVM_DEV_PATH, flags); 48 if (fd < 0) { 49 print_skip("%s not available, is KVM loaded? (errno: %d)", 50 KVM_DEV_PATH, errno); 51 exit(KSFT_SKIP); 52 } 53 54 return fd; 55 } 56 57 int open_kvm_dev_path_or_exit(void) 58 { 59 return _open_kvm_dev_path_or_exit(O_RDONLY); 60 } 61 62 /* 63 * Capability 64 * 65 * Input Args: 66 * cap - Capability 67 * 68 * Output Args: None 69 * 70 * Return: 71 * On success, the Value corresponding to the capability (KVM_CAP_*) 72 * specified by the value of cap. On failure a TEST_ASSERT failure 73 * is produced. 74 * 75 * Looks up and returns the value corresponding to the capability 76 * (KVM_CAP_*) given by cap. 77 */ 78 int kvm_check_cap(long cap) 79 { 80 int ret; 81 int kvm_fd; 82 83 kvm_fd = open_kvm_dev_path_or_exit(); 84 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); 85 TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" 86 " rc: %i errno: %i", ret, errno); 87 88 close(kvm_fd); 89 90 return ret; 91 } 92 93 /* VM Enable Capability 94 * 95 * Input Args: 96 * vm - Virtual Machine 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: On success, 0. On failure a TEST_ASSERT failure is produced. 102 * 103 * Enables a capability (KVM_CAP_*) on the VM. 104 */ 105 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) 106 { 107 int ret; 108 109 ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap); 110 TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n" 111 " rc: %i errno: %i", ret, errno); 112 113 return ret; 114 } 115 116 /* VCPU Enable Capability 117 * 118 * Input Args: 119 * vm - Virtual Machine 120 * vcpu_id - VCPU 121 * cap - Capability 122 * 123 * Output Args: None 124 * 125 * Return: On success, 0. On failure a TEST_ASSERT failure is produced. 126 * 127 * Enables a capability (KVM_CAP_*) on the VCPU. 128 */ 129 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, 130 struct kvm_enable_cap *cap) 131 { 132 struct vcpu *vcpu = vcpu_find(vm, vcpu_id); 133 int r; 134 135 TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); 136 137 r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); 138 TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" 139 " rc: %i, errno: %i", r, errno); 140 141 return r; 142 } 143 144 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 145 { 146 struct kvm_enable_cap cap = { 0 }; 147 148 cap.cap = KVM_CAP_DIRTY_LOG_RING; 149 cap.args[0] = ring_size; 150 vm_enable_cap(vm, &cap); 151 vm->dirty_ring_size = ring_size; 152 } 153 154 static void vm_open(struct kvm_vm *vm, int perm) 155 { 156 vm->kvm_fd = _open_kvm_dev_path_or_exit(perm); 157 158 if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { 159 print_skip("immediate_exit not available"); 160 exit(KSFT_SKIP); 161 } 162 163 vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); 164 TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " 165 "rc: %i errno: %i", vm->fd, errno); 166 } 167 168 const char *vm_guest_mode_string(uint32_t i) 169 { 170 static const char * const strings[] = { 171 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 172 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 173 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 174 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 175 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 176 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 177 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 178 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 179 }; 180 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 181 "Missing new mode strings?"); 182 183 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 184 185 return strings[i]; 186 } 187 188 const struct vm_guest_mode_params vm_guest_mode_params[] = { 189 { 52, 48, 0x1000, 12 }, 190 { 52, 48, 0x10000, 16 }, 191 { 48, 48, 0x1000, 12 }, 192 { 48, 48, 0x10000, 16 }, 193 { 40, 48, 0x1000, 12 }, 194 { 40, 48, 0x10000, 16 }, 195 { 0, 0, 0x1000, 12 }, 196 { 47, 64, 0x1000, 12 }, 197 }; 198 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 199 "Missing new mode params?"); 200 201 /* 202 * VM Create 203 * 204 * Input Args: 205 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 206 * phy_pages - Physical memory pages 207 * perm - permission 208 * 209 * Output Args: None 210 * 211 * Return: 212 * Pointer to opaque structure that describes the created VM. 213 * 214 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 215 * When phy_pages is non-zero, a memory region of phy_pages physical pages 216 * is created and mapped starting at guest physical address 0. The file 217 * descriptor to control the created VM is created with the permissions 218 * given by perm (e.g. O_RDWR). 219 */ 220 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) 221 { 222 struct kvm_vm *vm; 223 224 pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__, 225 vm_guest_mode_string(mode), phy_pages, perm); 226 227 vm = calloc(1, sizeof(*vm)); 228 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 229 230 INIT_LIST_HEAD(&vm->vcpus); 231 vm->regions.gpa_tree = RB_ROOT; 232 vm->regions.hva_tree = RB_ROOT; 233 hash_init(vm->regions.slot_hash); 234 235 vm->mode = mode; 236 vm->type = 0; 237 238 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 239 vm->va_bits = vm_guest_mode_params[mode].va_bits; 240 vm->page_size = vm_guest_mode_params[mode].page_size; 241 vm->page_shift = vm_guest_mode_params[mode].page_shift; 242 243 /* Setup mode specific traits. */ 244 switch (vm->mode) { 245 case VM_MODE_P52V48_4K: 246 vm->pgtable_levels = 4; 247 break; 248 case VM_MODE_P52V48_64K: 249 vm->pgtable_levels = 3; 250 break; 251 case VM_MODE_P48V48_4K: 252 vm->pgtable_levels = 4; 253 break; 254 case VM_MODE_P48V48_64K: 255 vm->pgtable_levels = 3; 256 break; 257 case VM_MODE_P40V48_4K: 258 vm->pgtable_levels = 4; 259 break; 260 case VM_MODE_P40V48_64K: 261 vm->pgtable_levels = 3; 262 break; 263 case VM_MODE_PXXV48_4K: 264 #ifdef __x86_64__ 265 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 266 /* 267 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 268 * it doesn't take effect unless a CR4.LA57 is set, which it 269 * isn't for this VM_MODE. 270 */ 271 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 272 "Linear address width (%d bits) not supported", 273 vm->va_bits); 274 pr_debug("Guest physical address width detected: %d\n", 275 vm->pa_bits); 276 vm->pgtable_levels = 4; 277 vm->va_bits = 48; 278 #else 279 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 280 #endif 281 break; 282 case VM_MODE_P47V64_4K: 283 vm->pgtable_levels = 5; 284 break; 285 default: 286 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 287 } 288 289 #ifdef __aarch64__ 290 if (vm->pa_bits != 40) 291 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 292 #endif 293 294 vm_open(vm, perm); 295 296 /* Limit to VA-bit canonical virtual addresses. */ 297 vm->vpages_valid = sparsebit_alloc(); 298 sparsebit_set_num(vm->vpages_valid, 299 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 300 sparsebit_set_num(vm->vpages_valid, 301 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 302 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 303 304 /* Limit physical addresses to PA-bits. */ 305 vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 306 307 /* Allocate and setup memory for guest. */ 308 vm->vpages_mapped = sparsebit_alloc(); 309 if (phy_pages != 0) 310 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 311 0, 0, phy_pages, 0); 312 313 return vm; 314 } 315 316 /* 317 * VM Create with customized parameters 318 * 319 * Input Args: 320 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 321 * nr_vcpus - VCPU count 322 * slot0_mem_pages - Slot0 physical memory size 323 * extra_mem_pages - Non-slot0 physical memory total size 324 * num_percpu_pages - Per-cpu physical memory pages 325 * guest_code - Guest entry point 326 * vcpuids - VCPU IDs 327 * 328 * Output Args: None 329 * 330 * Return: 331 * Pointer to opaque structure that describes the created VM. 332 * 333 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K), 334 * with customized slot0 memory size, at least 512 pages currently. 335 * extra_mem_pages is only used to calculate the maximum page table size, 336 * no real memory allocation for non-slot0 memory in this function. 337 */ 338 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 339 uint64_t slot0_mem_pages, uint64_t extra_mem_pages, 340 uint32_t num_percpu_pages, void *guest_code, 341 uint32_t vcpuids[]) 342 { 343 uint64_t vcpu_pages, extra_pg_pages, pages; 344 struct kvm_vm *vm; 345 int i; 346 347 /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ 348 if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) 349 slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; 350 351 /* The maximum page table size for a memory region will be when the 352 * smallest pages are used. Considering each page contains x page 353 * table descriptors, the total extra size for page tables (for extra 354 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 355 * than N/x*2. 356 */ 357 vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; 358 extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; 359 pages = slot0_mem_pages + vcpu_pages + extra_pg_pages; 360 361 TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 362 "nr_vcpus = %d too large for host, max-vcpus = %d", 363 nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 364 365 pages = vm_adjust_num_guest_pages(mode, pages); 366 vm = vm_create(mode, pages, O_RDWR); 367 368 kvm_vm_elf_load(vm, program_invocation_name); 369 370 #ifdef __x86_64__ 371 vm_create_irqchip(vm); 372 #endif 373 374 for (i = 0; i < nr_vcpus; ++i) { 375 uint32_t vcpuid = vcpuids ? vcpuids[i] : i; 376 377 vm_vcpu_add_default(vm, vcpuid, guest_code); 378 } 379 380 return vm; 381 } 382 383 struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, 384 uint32_t num_percpu_pages, void *guest_code, 385 uint32_t vcpuids[]) 386 { 387 return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, 388 extra_mem_pages, num_percpu_pages, guest_code, vcpuids); 389 } 390 391 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 392 void *guest_code) 393 { 394 return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code, 395 (uint32_t []){ vcpuid }); 396 } 397 398 /* 399 * VM Restart 400 * 401 * Input Args: 402 * vm - VM that has been released before 403 * perm - permission 404 * 405 * Output Args: None 406 * 407 * Reopens the file descriptors associated to the VM and reinstates the 408 * global state, such as the irqchip and the memory regions that are mapped 409 * into the guest. 410 */ 411 void kvm_vm_restart(struct kvm_vm *vmp, int perm) 412 { 413 int ctr; 414 struct userspace_mem_region *region; 415 416 vm_open(vmp, perm); 417 if (vmp->has_irqchip) 418 vm_create_irqchip(vmp); 419 420 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 421 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 422 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 423 " rc: %i errno: %i\n" 424 " slot: %u flags: 0x%x\n" 425 " guest_phys_addr: 0x%llx size: 0x%llx", 426 ret, errno, region->region.slot, 427 region->region.flags, 428 region->region.guest_phys_addr, 429 region->region.memory_size); 430 } 431 } 432 433 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) 434 { 435 struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; 436 int ret; 437 438 ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args); 439 TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s", 440 __func__, strerror(-ret)); 441 } 442 443 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, 444 uint64_t first_page, uint32_t num_pages) 445 { 446 struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, 447 .first_page = first_page, 448 .num_pages = num_pages }; 449 int ret; 450 451 ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); 452 TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", 453 __func__, strerror(-ret)); 454 } 455 456 uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) 457 { 458 return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS); 459 } 460 461 /* 462 * Userspace Memory Region Find 463 * 464 * Input Args: 465 * vm - Virtual Machine 466 * start - Starting VM physical address 467 * end - Ending VM physical address, inclusive. 468 * 469 * Output Args: None 470 * 471 * Return: 472 * Pointer to overlapping region, NULL if no such region. 473 * 474 * Searches for a region with any physical memory that overlaps with 475 * any portion of the guest physical addresses from start to end 476 * inclusive. If multiple overlapping regions exist, a pointer to any 477 * of the regions is returned. Null is returned only when no overlapping 478 * region exists. 479 */ 480 static struct userspace_mem_region * 481 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 482 { 483 struct rb_node *node; 484 485 for (node = vm->regions.gpa_tree.rb_node; node; ) { 486 struct userspace_mem_region *region = 487 container_of(node, struct userspace_mem_region, gpa_node); 488 uint64_t existing_start = region->region.guest_phys_addr; 489 uint64_t existing_end = region->region.guest_phys_addr 490 + region->region.memory_size - 1; 491 if (start <= existing_end && end >= existing_start) 492 return region; 493 494 if (start < existing_start) 495 node = node->rb_left; 496 else 497 node = node->rb_right; 498 } 499 500 return NULL; 501 } 502 503 /* 504 * KVM Userspace Memory Region Find 505 * 506 * Input Args: 507 * vm - Virtual Machine 508 * start - Starting VM physical address 509 * end - Ending VM physical address, inclusive. 510 * 511 * Output Args: None 512 * 513 * Return: 514 * Pointer to overlapping region, NULL if no such region. 515 * 516 * Public interface to userspace_mem_region_find. Allows tests to look up 517 * the memslot datastructure for a given range of guest physical memory. 518 */ 519 struct kvm_userspace_memory_region * 520 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 521 uint64_t end) 522 { 523 struct userspace_mem_region *region; 524 525 region = userspace_mem_region_find(vm, start, end); 526 if (!region) 527 return NULL; 528 529 return ®ion->region; 530 } 531 532 /* 533 * VCPU Find 534 * 535 * Input Args: 536 * vm - Virtual Machine 537 * vcpuid - VCPU ID 538 * 539 * Output Args: None 540 * 541 * Return: 542 * Pointer to VCPU structure 543 * 544 * Locates a vcpu structure that describes the VCPU specified by vcpuid and 545 * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU 546 * for the specified vcpuid. 547 */ 548 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) 549 { 550 struct vcpu *vcpu; 551 552 list_for_each_entry(vcpu, &vm->vcpus, list) { 553 if (vcpu->id == vcpuid) 554 return vcpu; 555 } 556 557 return NULL; 558 } 559 560 /* 561 * VM VCPU Remove 562 * 563 * Input Args: 564 * vcpu - VCPU to remove 565 * 566 * Output Args: None 567 * 568 * Return: None, TEST_ASSERT failures for all error conditions 569 * 570 * Removes a vCPU from a VM and frees its resources. 571 */ 572 static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) 573 { 574 int ret; 575 576 if (vcpu->dirty_gfns) { 577 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 578 TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, " 579 "rc: %i errno: %i", ret, errno); 580 vcpu->dirty_gfns = NULL; 581 } 582 583 ret = munmap(vcpu->state, vcpu_mmap_sz()); 584 TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " 585 "errno: %i", ret, errno); 586 ret = close(vcpu->fd); 587 TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " 588 "errno: %i", ret, errno); 589 590 list_del(&vcpu->list); 591 free(vcpu); 592 } 593 594 void kvm_vm_release(struct kvm_vm *vmp) 595 { 596 struct vcpu *vcpu, *tmp; 597 int ret; 598 599 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 600 vm_vcpu_rm(vmp, vcpu); 601 602 ret = close(vmp->fd); 603 TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" 604 " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); 605 606 ret = close(vmp->kvm_fd); 607 TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" 608 " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); 609 } 610 611 static void __vm_mem_region_delete(struct kvm_vm *vm, 612 struct userspace_mem_region *region, 613 bool unlink) 614 { 615 int ret; 616 617 if (unlink) { 618 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 619 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 620 hash_del(®ion->slot_node); 621 } 622 623 region->region.memory_size = 0; 624 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 625 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " 626 "rc: %i errno: %i", ret, errno); 627 628 sparsebit_free(®ion->unused_phy_pages); 629 ret = munmap(region->mmap_start, region->mmap_size); 630 TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno); 631 632 free(region); 633 } 634 635 /* 636 * Destroys and frees the VM pointed to by vmp. 637 */ 638 void kvm_vm_free(struct kvm_vm *vmp) 639 { 640 int ctr; 641 struct hlist_node *node; 642 struct userspace_mem_region *region; 643 644 if (vmp == NULL) 645 return; 646 647 /* Free userspace_mem_regions. */ 648 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 649 __vm_mem_region_delete(vmp, region, false); 650 651 /* Free sparsebit arrays. */ 652 sparsebit_free(&vmp->vpages_valid); 653 sparsebit_free(&vmp->vpages_mapped); 654 655 kvm_vm_release(vmp); 656 657 /* Free the structure describing the VM. */ 658 free(vmp); 659 } 660 661 /* 662 * Memory Compare, host virtual to guest virtual 663 * 664 * Input Args: 665 * hva - Starting host virtual address 666 * vm - Virtual Machine 667 * gva - Starting guest virtual address 668 * len - number of bytes to compare 669 * 670 * Output Args: None 671 * 672 * Input/Output Args: None 673 * 674 * Return: 675 * Returns 0 if the bytes starting at hva for a length of len 676 * are equal the guest virtual bytes starting at gva. Returns 677 * a value < 0, if bytes at hva are less than those at gva. 678 * Otherwise a value > 0 is returned. 679 * 680 * Compares the bytes starting at the host virtual address hva, for 681 * a length of len, to the guest bytes starting at the guest virtual 682 * address given by gva. 683 */ 684 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 685 { 686 size_t amt; 687 688 /* 689 * Compare a batch of bytes until either a match is found 690 * or all the bytes have been compared. 691 */ 692 for (uintptr_t offset = 0; offset < len; offset += amt) { 693 uintptr_t ptr1 = (uintptr_t)hva + offset; 694 695 /* 696 * Determine host address for guest virtual address 697 * at offset. 698 */ 699 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 700 701 /* 702 * Determine amount to compare on this pass. 703 * Don't allow the comparsion to cross a page boundary. 704 */ 705 amt = len - offset; 706 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 707 amt = vm->page_size - (ptr1 % vm->page_size); 708 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 709 amt = vm->page_size - (ptr2 % vm->page_size); 710 711 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 712 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 713 714 /* 715 * Perform the comparison. If there is a difference 716 * return that result to the caller, otherwise need 717 * to continue on looking for a mismatch. 718 */ 719 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 720 if (ret != 0) 721 return ret; 722 } 723 724 /* 725 * No mismatch found. Let the caller know the two memory 726 * areas are equal. 727 */ 728 return 0; 729 } 730 731 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 732 struct userspace_mem_region *region) 733 { 734 struct rb_node **cur, *parent; 735 736 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 737 struct userspace_mem_region *cregion; 738 739 cregion = container_of(*cur, typeof(*cregion), gpa_node); 740 parent = *cur; 741 if (region->region.guest_phys_addr < 742 cregion->region.guest_phys_addr) 743 cur = &(*cur)->rb_left; 744 else { 745 TEST_ASSERT(region->region.guest_phys_addr != 746 cregion->region.guest_phys_addr, 747 "Duplicate GPA in region tree"); 748 749 cur = &(*cur)->rb_right; 750 } 751 } 752 753 rb_link_node(®ion->gpa_node, parent, cur); 754 rb_insert_color(®ion->gpa_node, gpa_tree); 755 } 756 757 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 758 struct userspace_mem_region *region) 759 { 760 struct rb_node **cur, *parent; 761 762 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 763 struct userspace_mem_region *cregion; 764 765 cregion = container_of(*cur, typeof(*cregion), hva_node); 766 parent = *cur; 767 if (region->host_mem < cregion->host_mem) 768 cur = &(*cur)->rb_left; 769 else { 770 TEST_ASSERT(region->host_mem != 771 cregion->host_mem, 772 "Duplicate HVA in region tree"); 773 774 cur = &(*cur)->rb_right; 775 } 776 } 777 778 rb_link_node(®ion->hva_node, parent, cur); 779 rb_insert_color(®ion->hva_node, hva_tree); 780 } 781 782 /* 783 * VM Userspace Memory Region Add 784 * 785 * Input Args: 786 * vm - Virtual Machine 787 * src_type - Storage source for this region. 788 * NULL to use anonymous memory. 789 * guest_paddr - Starting guest physical address 790 * slot - KVM region slot 791 * npages - Number of physical pages 792 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 793 * 794 * Output Args: None 795 * 796 * Return: None 797 * 798 * Allocates a memory area of the number of pages specified by npages 799 * and maps it to the VM specified by vm, at a starting physical address 800 * given by guest_paddr. The region is created with a KVM region slot 801 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 802 * region is created with the flags given by flags. 803 */ 804 void vm_userspace_mem_region_add(struct kvm_vm *vm, 805 enum vm_mem_backing_src_type src_type, 806 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 807 uint32_t flags) 808 { 809 int ret; 810 struct userspace_mem_region *region; 811 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 812 size_t alignment; 813 814 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 815 "Number of guest pages is not compatible with the host. " 816 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 817 818 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 819 "address not on a page boundary.\n" 820 " guest_paddr: 0x%lx vm->page_size: 0x%x", 821 guest_paddr, vm->page_size); 822 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 823 <= vm->max_gfn, "Physical range beyond maximum " 824 "supported physical address,\n" 825 " guest_paddr: 0x%lx npages: 0x%lx\n" 826 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 827 guest_paddr, npages, vm->max_gfn, vm->page_size); 828 829 /* 830 * Confirm a mem region with an overlapping address doesn't 831 * already exist. 832 */ 833 region = (struct userspace_mem_region *) userspace_mem_region_find( 834 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 835 if (region != NULL) 836 TEST_FAIL("overlapping userspace_mem_region already " 837 "exists\n" 838 " requested guest_paddr: 0x%lx npages: 0x%lx " 839 "page_size: 0x%x\n" 840 " existing guest_paddr: 0x%lx size: 0x%lx", 841 guest_paddr, npages, vm->page_size, 842 (uint64_t) region->region.guest_phys_addr, 843 (uint64_t) region->region.memory_size); 844 845 /* Confirm no region with the requested slot already exists. */ 846 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 847 slot) { 848 if (region->region.slot != slot) 849 continue; 850 851 TEST_FAIL("A mem region with the requested slot " 852 "already exists.\n" 853 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 854 " existing slot: %u paddr: 0x%lx size: 0x%lx", 855 slot, guest_paddr, npages, 856 region->region.slot, 857 (uint64_t) region->region.guest_phys_addr, 858 (uint64_t) region->region.memory_size); 859 } 860 861 /* Allocate and initialize new mem region structure. */ 862 region = calloc(1, sizeof(*region)); 863 TEST_ASSERT(region != NULL, "Insufficient Memory"); 864 region->mmap_size = npages * vm->page_size; 865 866 #ifdef __s390x__ 867 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 868 alignment = 0x100000; 869 #else 870 alignment = 1; 871 #endif 872 873 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 874 alignment = max(backing_src_pagesz, alignment); 875 876 /* Add enough memory to align up if necessary */ 877 if (alignment > 1) 878 region->mmap_size += alignment; 879 880 region->fd = -1; 881 if (backing_src_is_shared(src_type)) { 882 int memfd_flags = MFD_CLOEXEC; 883 884 if (src_type == VM_MEM_SRC_SHARED_HUGETLB) 885 memfd_flags |= MFD_HUGETLB; 886 887 region->fd = memfd_create("kvm_selftest", memfd_flags); 888 TEST_ASSERT(region->fd != -1, 889 "memfd_create failed, errno: %i", errno); 890 891 ret = ftruncate(region->fd, region->mmap_size); 892 TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno); 893 894 ret = fallocate(region->fd, 895 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 896 region->mmap_size); 897 TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno); 898 } 899 900 region->mmap_start = mmap(NULL, region->mmap_size, 901 PROT_READ | PROT_WRITE, 902 vm_mem_backing_src_alias(src_type)->flag, 903 region->fd, 0); 904 TEST_ASSERT(region->mmap_start != MAP_FAILED, 905 "test_malloc failed, mmap_start: %p errno: %i", 906 region->mmap_start, errno); 907 908 /* Align host address */ 909 region->host_mem = align(region->mmap_start, alignment); 910 911 /* As needed perform madvise */ 912 if ((src_type == VM_MEM_SRC_ANONYMOUS || 913 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 914 ret = madvise(region->host_mem, npages * vm->page_size, 915 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 916 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 917 region->host_mem, npages * vm->page_size, 918 vm_mem_backing_src_alias(src_type)->name); 919 } 920 921 region->unused_phy_pages = sparsebit_alloc(); 922 sparsebit_set_num(region->unused_phy_pages, 923 guest_paddr >> vm->page_shift, npages); 924 region->region.slot = slot; 925 region->region.flags = flags; 926 region->region.guest_phys_addr = guest_paddr; 927 region->region.memory_size = npages * vm->page_size; 928 region->region.userspace_addr = (uintptr_t) region->host_mem; 929 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 930 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 931 " rc: %i errno: %i\n" 932 " slot: %u flags: 0x%x\n" 933 " guest_phys_addr: 0x%lx size: 0x%lx", 934 ret, errno, slot, flags, 935 guest_paddr, (uint64_t) region->region.memory_size); 936 937 /* Add to quick lookup data structures */ 938 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 939 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 940 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 941 942 /* If shared memory, create an alias. */ 943 if (region->fd >= 0) { 944 region->mmap_alias = mmap(NULL, region->mmap_size, 945 PROT_READ | PROT_WRITE, 946 vm_mem_backing_src_alias(src_type)->flag, 947 region->fd, 0); 948 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 949 "mmap of alias failed, errno: %i", errno); 950 951 /* Align host alias address */ 952 region->host_alias = align(region->mmap_alias, alignment); 953 } 954 } 955 956 /* 957 * Memslot to region 958 * 959 * Input Args: 960 * vm - Virtual Machine 961 * memslot - KVM memory slot ID 962 * 963 * Output Args: None 964 * 965 * Return: 966 * Pointer to memory region structure that describe memory region 967 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 968 * on error (e.g. currently no memory region using memslot as a KVM 969 * memory slot ID). 970 */ 971 struct userspace_mem_region * 972 memslot2region(struct kvm_vm *vm, uint32_t memslot) 973 { 974 struct userspace_mem_region *region; 975 976 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 977 memslot) 978 if (region->region.slot == memslot) 979 return region; 980 981 fprintf(stderr, "No mem region with the requested slot found,\n" 982 " requested slot: %u\n", memslot); 983 fputs("---- vm dump ----\n", stderr); 984 vm_dump(stderr, vm, 2); 985 TEST_FAIL("Mem region not found"); 986 return NULL; 987 } 988 989 /* 990 * VM Memory Region Flags Set 991 * 992 * Input Args: 993 * vm - Virtual Machine 994 * flags - Starting guest physical address 995 * 996 * Output Args: None 997 * 998 * Return: None 999 * 1000 * Sets the flags of the memory region specified by the value of slot, 1001 * to the values given by flags. 1002 */ 1003 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1004 { 1005 int ret; 1006 struct userspace_mem_region *region; 1007 1008 region = memslot2region(vm, slot); 1009 1010 region->region.flags = flags; 1011 1012 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1013 1014 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1015 " rc: %i errno: %i slot: %u flags: 0x%x", 1016 ret, errno, slot, flags); 1017 } 1018 1019 /* 1020 * VM Memory Region Move 1021 * 1022 * Input Args: 1023 * vm - Virtual Machine 1024 * slot - Slot of the memory region to move 1025 * new_gpa - Starting guest physical address 1026 * 1027 * Output Args: None 1028 * 1029 * Return: None 1030 * 1031 * Change the gpa of a memory region. 1032 */ 1033 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1034 { 1035 struct userspace_mem_region *region; 1036 int ret; 1037 1038 region = memslot2region(vm, slot); 1039 1040 region->region.guest_phys_addr = new_gpa; 1041 1042 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1043 1044 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1045 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1046 ret, errno, slot, new_gpa); 1047 } 1048 1049 /* 1050 * VM Memory Region Delete 1051 * 1052 * Input Args: 1053 * vm - Virtual Machine 1054 * slot - Slot of the memory region to delete 1055 * 1056 * Output Args: None 1057 * 1058 * Return: None 1059 * 1060 * Delete a memory region. 1061 */ 1062 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1063 { 1064 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1065 } 1066 1067 /* 1068 * VCPU mmap Size 1069 * 1070 * Input Args: None 1071 * 1072 * Output Args: None 1073 * 1074 * Return: 1075 * Size of VCPU state 1076 * 1077 * Returns the size of the structure pointed to by the return value 1078 * of vcpu_state(). 1079 */ 1080 static int vcpu_mmap_sz(void) 1081 { 1082 int dev_fd, ret; 1083 1084 dev_fd = open_kvm_dev_path_or_exit(); 1085 1086 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1087 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1088 "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", 1089 __func__, ret, errno); 1090 1091 close(dev_fd); 1092 1093 return ret; 1094 } 1095 1096 /* 1097 * VM VCPU Add 1098 * 1099 * Input Args: 1100 * vm - Virtual Machine 1101 * vcpuid - VCPU ID 1102 * 1103 * Output Args: None 1104 * 1105 * Return: None 1106 * 1107 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid. 1108 * No additional VCPU setup is done. 1109 */ 1110 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) 1111 { 1112 struct vcpu *vcpu; 1113 1114 /* Confirm a vcpu with the specified id doesn't already exist. */ 1115 vcpu = vcpu_find(vm, vcpuid); 1116 if (vcpu != NULL) 1117 TEST_FAIL("vcpu with the specified id " 1118 "already exists,\n" 1119 " requested vcpuid: %u\n" 1120 " existing vcpuid: %u state: %p", 1121 vcpuid, vcpu->id, vcpu->state); 1122 1123 /* Allocate and initialize new vcpu structure. */ 1124 vcpu = calloc(1, sizeof(*vcpu)); 1125 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1126 vcpu->id = vcpuid; 1127 vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); 1128 TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", 1129 vcpu->fd, errno); 1130 1131 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " 1132 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1133 vcpu_mmap_sz(), sizeof(*vcpu->state)); 1134 vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1135 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1136 TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " 1137 "vcpu id: %u errno: %i", vcpuid, errno); 1138 1139 /* Add to linked-list of VCPUs. */ 1140 list_add(&vcpu->list, &vm->vcpus); 1141 } 1142 1143 /* 1144 * VM Virtual Address Unused Gap 1145 * 1146 * Input Args: 1147 * vm - Virtual Machine 1148 * sz - Size (bytes) 1149 * vaddr_min - Minimum Virtual Address 1150 * 1151 * Output Args: None 1152 * 1153 * Return: 1154 * Lowest virtual address at or below vaddr_min, with at least 1155 * sz unused bytes. TEST_ASSERT failure if no area of at least 1156 * size sz is available. 1157 * 1158 * Within the VM specified by vm, locates the lowest starting virtual 1159 * address >= vaddr_min, that has at least sz unallocated bytes. A 1160 * TEST_ASSERT failure occurs for invalid input or no area of at least 1161 * sz unallocated bytes >= vaddr_min is available. 1162 */ 1163 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1164 vm_vaddr_t vaddr_min) 1165 { 1166 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1167 1168 /* Determine lowest permitted virtual page index. */ 1169 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1170 if ((pgidx_start * vm->page_size) < vaddr_min) 1171 goto no_va_found; 1172 1173 /* Loop over section with enough valid virtual page indexes. */ 1174 if (!sparsebit_is_set_num(vm->vpages_valid, 1175 pgidx_start, pages)) 1176 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1177 pgidx_start, pages); 1178 do { 1179 /* 1180 * Are there enough unused virtual pages available at 1181 * the currently proposed starting virtual page index. 1182 * If not, adjust proposed starting index to next 1183 * possible. 1184 */ 1185 if (sparsebit_is_clear_num(vm->vpages_mapped, 1186 pgidx_start, pages)) 1187 goto va_found; 1188 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1189 pgidx_start, pages); 1190 if (pgidx_start == 0) 1191 goto no_va_found; 1192 1193 /* 1194 * If needed, adjust proposed starting virtual address, 1195 * to next range of valid virtual addresses. 1196 */ 1197 if (!sparsebit_is_set_num(vm->vpages_valid, 1198 pgidx_start, pages)) { 1199 pgidx_start = sparsebit_next_set_num( 1200 vm->vpages_valid, pgidx_start, pages); 1201 if (pgidx_start == 0) 1202 goto no_va_found; 1203 } 1204 } while (pgidx_start != 0); 1205 1206 no_va_found: 1207 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1208 1209 /* NOT REACHED */ 1210 return -1; 1211 1212 va_found: 1213 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1214 pgidx_start, pages), 1215 "Unexpected, invalid virtual page index range,\n" 1216 " pgidx_start: 0x%lx\n" 1217 " pages: 0x%lx", 1218 pgidx_start, pages); 1219 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1220 pgidx_start, pages), 1221 "Unexpected, pages already mapped,\n" 1222 " pgidx_start: 0x%lx\n" 1223 " pages: 0x%lx", 1224 pgidx_start, pages); 1225 1226 return pgidx_start * vm->page_size; 1227 } 1228 1229 /* 1230 * VM Virtual Address Allocate 1231 * 1232 * Input Args: 1233 * vm - Virtual Machine 1234 * sz - Size in bytes 1235 * vaddr_min - Minimum starting virtual address 1236 * data_memslot - Memory region slot for data pages 1237 * pgd_memslot - Memory region slot for new virtual translation tables 1238 * 1239 * Output Args: None 1240 * 1241 * Return: 1242 * Starting guest virtual address 1243 * 1244 * Allocates at least sz bytes within the virtual address space of the vm 1245 * given by vm. The allocated bytes are mapped to a virtual address >= 1246 * the address given by vaddr_min. Note that each allocation uses a 1247 * a unique set of pages, with the minimum real allocation being at least 1248 * a page. 1249 */ 1250 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1251 { 1252 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1253 1254 virt_pgd_alloc(vm); 1255 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1256 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1257 1258 /* 1259 * Find an unused range of virtual page addresses of at least 1260 * pages in length. 1261 */ 1262 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1263 1264 /* Map the virtual pages. */ 1265 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1266 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1267 1268 virt_pg_map(vm, vaddr, paddr); 1269 1270 sparsebit_set(vm->vpages_mapped, 1271 vaddr >> vm->page_shift); 1272 } 1273 1274 return vaddr_start; 1275 } 1276 1277 /* 1278 * VM Virtual Address Allocate Pages 1279 * 1280 * Input Args: 1281 * vm - Virtual Machine 1282 * 1283 * Output Args: None 1284 * 1285 * Return: 1286 * Starting guest virtual address 1287 * 1288 * Allocates at least N system pages worth of bytes within the virtual address 1289 * space of the vm. 1290 */ 1291 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1292 { 1293 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1294 } 1295 1296 /* 1297 * VM Virtual Address Allocate Page 1298 * 1299 * Input Args: 1300 * vm - Virtual Machine 1301 * 1302 * Output Args: None 1303 * 1304 * Return: 1305 * Starting guest virtual address 1306 * 1307 * Allocates at least one system page worth of bytes within the virtual address 1308 * space of the vm. 1309 */ 1310 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1311 { 1312 return vm_vaddr_alloc_pages(vm, 1); 1313 } 1314 1315 /* 1316 * Map a range of VM virtual address to the VM's physical address 1317 * 1318 * Input Args: 1319 * vm - Virtual Machine 1320 * vaddr - Virtuall address to map 1321 * paddr - VM Physical Address 1322 * npages - The number of pages to map 1323 * pgd_memslot - Memory region slot for new virtual translation tables 1324 * 1325 * Output Args: None 1326 * 1327 * Return: None 1328 * 1329 * Within the VM given by @vm, creates a virtual translation for 1330 * @npages starting at @vaddr to the page range starting at @paddr. 1331 */ 1332 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1333 unsigned int npages) 1334 { 1335 size_t page_size = vm->page_size; 1336 size_t size = npages * page_size; 1337 1338 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1339 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1340 1341 while (npages--) { 1342 virt_pg_map(vm, vaddr, paddr); 1343 vaddr += page_size; 1344 paddr += page_size; 1345 } 1346 } 1347 1348 /* 1349 * Address VM Physical to Host Virtual 1350 * 1351 * Input Args: 1352 * vm - Virtual Machine 1353 * gpa - VM physical address 1354 * 1355 * Output Args: None 1356 * 1357 * Return: 1358 * Equivalent host virtual address 1359 * 1360 * Locates the memory region containing the VM physical address given 1361 * by gpa, within the VM given by vm. When found, the host virtual 1362 * address providing the memory to the vm physical address is returned. 1363 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1364 */ 1365 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1366 { 1367 struct userspace_mem_region *region; 1368 1369 region = userspace_mem_region_find(vm, gpa, gpa); 1370 if (!region) { 1371 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1372 return NULL; 1373 } 1374 1375 return (void *)((uintptr_t)region->host_mem 1376 + (gpa - region->region.guest_phys_addr)); 1377 } 1378 1379 /* 1380 * Address Host Virtual to VM Physical 1381 * 1382 * Input Args: 1383 * vm - Virtual Machine 1384 * hva - Host virtual address 1385 * 1386 * Output Args: None 1387 * 1388 * Return: 1389 * Equivalent VM physical address 1390 * 1391 * Locates the memory region containing the host virtual address given 1392 * by hva, within the VM given by vm. When found, the equivalent 1393 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1394 * region containing hva exists. 1395 */ 1396 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1397 { 1398 struct rb_node *node; 1399 1400 for (node = vm->regions.hva_tree.rb_node; node; ) { 1401 struct userspace_mem_region *region = 1402 container_of(node, struct userspace_mem_region, hva_node); 1403 1404 if (hva >= region->host_mem) { 1405 if (hva <= (region->host_mem 1406 + region->region.memory_size - 1)) 1407 return (vm_paddr_t)((uintptr_t) 1408 region->region.guest_phys_addr 1409 + (hva - (uintptr_t)region->host_mem)); 1410 1411 node = node->rb_right; 1412 } else 1413 node = node->rb_left; 1414 } 1415 1416 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1417 return -1; 1418 } 1419 1420 /* 1421 * Address VM physical to Host Virtual *alias*. 1422 * 1423 * Input Args: 1424 * vm - Virtual Machine 1425 * gpa - VM physical address 1426 * 1427 * Output Args: None 1428 * 1429 * Return: 1430 * Equivalent address within the host virtual *alias* area, or NULL 1431 * (without failing the test) if the guest memory is not shared (so 1432 * no alias exists). 1433 * 1434 * When vm_create() and related functions are called with a shared memory 1435 * src_type, we also create a writable, shared alias mapping of the 1436 * underlying guest memory. This allows the host to manipulate guest memory 1437 * without mapping that memory in the guest's address space. And, for 1438 * userfaultfd-based demand paging, we can do so without triggering userfaults. 1439 */ 1440 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1441 { 1442 struct userspace_mem_region *region; 1443 uintptr_t offset; 1444 1445 region = userspace_mem_region_find(vm, gpa, gpa); 1446 if (!region) 1447 return NULL; 1448 1449 if (!region->host_alias) 1450 return NULL; 1451 1452 offset = gpa - region->region.guest_phys_addr; 1453 return (void *) ((uintptr_t) region->host_alias + offset); 1454 } 1455 1456 /* 1457 * VM Create IRQ Chip 1458 * 1459 * Input Args: 1460 * vm - Virtual Machine 1461 * 1462 * Output Args: None 1463 * 1464 * Return: None 1465 * 1466 * Creates an interrupt controller chip for the VM specified by vm. 1467 */ 1468 void vm_create_irqchip(struct kvm_vm *vm) 1469 { 1470 int ret; 1471 1472 ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); 1473 TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " 1474 "rc: %i errno: %i", ret, errno); 1475 1476 vm->has_irqchip = true; 1477 } 1478 1479 /* 1480 * VM VCPU State 1481 * 1482 * Input Args: 1483 * vm - Virtual Machine 1484 * vcpuid - VCPU ID 1485 * 1486 * Output Args: None 1487 * 1488 * Return: 1489 * Pointer to structure that describes the state of the VCPU. 1490 * 1491 * Locates and returns a pointer to a structure that describes the 1492 * state of the VCPU with the given vcpuid. 1493 */ 1494 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) 1495 { 1496 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1497 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1498 1499 return vcpu->state; 1500 } 1501 1502 /* 1503 * VM VCPU Run 1504 * 1505 * Input Args: 1506 * vm - Virtual Machine 1507 * vcpuid - VCPU ID 1508 * 1509 * Output Args: None 1510 * 1511 * Return: None 1512 * 1513 * Switch to executing the code for the VCPU given by vcpuid, within the VM 1514 * given by vm. 1515 */ 1516 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 1517 { 1518 int ret = _vcpu_run(vm, vcpuid); 1519 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 1520 "rc: %i errno: %i", ret, errno); 1521 } 1522 1523 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 1524 { 1525 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1526 int rc; 1527 1528 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1529 do { 1530 rc = ioctl(vcpu->fd, KVM_RUN, NULL); 1531 } while (rc == -1 && errno == EINTR); 1532 1533 assert_on_unhandled_exception(vm, vcpuid); 1534 1535 return rc; 1536 } 1537 1538 int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid) 1539 { 1540 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1541 1542 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1543 1544 return vcpu->fd; 1545 } 1546 1547 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) 1548 { 1549 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1550 int ret; 1551 1552 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1553 1554 vcpu->state->immediate_exit = 1; 1555 ret = ioctl(vcpu->fd, KVM_RUN, NULL); 1556 vcpu->state->immediate_exit = 0; 1557 1558 TEST_ASSERT(ret == -1 && errno == EINTR, 1559 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1560 ret, errno); 1561 } 1562 1563 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, 1564 struct kvm_guest_debug *debug) 1565 { 1566 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1567 int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug); 1568 1569 TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret); 1570 } 1571 1572 /* 1573 * VM VCPU Set MP State 1574 * 1575 * Input Args: 1576 * vm - Virtual Machine 1577 * vcpuid - VCPU ID 1578 * mp_state - mp_state to be set 1579 * 1580 * Output Args: None 1581 * 1582 * Return: None 1583 * 1584 * Sets the MP state of the VCPU given by vcpuid, to the state given 1585 * by mp_state. 1586 */ 1587 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, 1588 struct kvm_mp_state *mp_state) 1589 { 1590 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1591 int ret; 1592 1593 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1594 1595 ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); 1596 TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " 1597 "rc: %i errno: %i", ret, errno); 1598 } 1599 1600 /* 1601 * VM VCPU Get Reg List 1602 * 1603 * Input Args: 1604 * vm - Virtual Machine 1605 * vcpuid - VCPU ID 1606 * 1607 * Output Args: 1608 * None 1609 * 1610 * Return: 1611 * A pointer to an allocated struct kvm_reg_list 1612 * 1613 * Get the list of guest registers which are supported for 1614 * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls 1615 */ 1616 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) 1617 { 1618 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1619 int ret; 1620 1621 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); 1622 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1623 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1624 reg_list->n = reg_list_n.n; 1625 vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); 1626 return reg_list; 1627 } 1628 1629 /* 1630 * VM VCPU Regs Get 1631 * 1632 * Input Args: 1633 * vm - Virtual Machine 1634 * vcpuid - VCPU ID 1635 * 1636 * Output Args: 1637 * regs - current state of VCPU regs 1638 * 1639 * Return: None 1640 * 1641 * Obtains the current register state for the VCPU specified by vcpuid 1642 * and stores it at the location given by regs. 1643 */ 1644 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) 1645 { 1646 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1647 int ret; 1648 1649 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1650 1651 ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); 1652 TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", 1653 ret, errno); 1654 } 1655 1656 /* 1657 * VM VCPU Regs Set 1658 * 1659 * Input Args: 1660 * vm - Virtual Machine 1661 * vcpuid - VCPU ID 1662 * regs - Values to set VCPU regs to 1663 * 1664 * Output Args: None 1665 * 1666 * Return: None 1667 * 1668 * Sets the regs of the VCPU specified by vcpuid to the values 1669 * given by regs. 1670 */ 1671 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) 1672 { 1673 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1674 int ret; 1675 1676 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1677 1678 ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); 1679 TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", 1680 ret, errno); 1681 } 1682 1683 #ifdef __KVM_HAVE_VCPU_EVENTS 1684 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 1685 struct kvm_vcpu_events *events) 1686 { 1687 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1688 int ret; 1689 1690 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1691 1692 ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); 1693 TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", 1694 ret, errno); 1695 } 1696 1697 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, 1698 struct kvm_vcpu_events *events) 1699 { 1700 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1701 int ret; 1702 1703 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1704 1705 ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); 1706 TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", 1707 ret, errno); 1708 } 1709 #endif 1710 1711 #ifdef __x86_64__ 1712 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, 1713 struct kvm_nested_state *state) 1714 { 1715 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1716 int ret; 1717 1718 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1719 1720 ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state); 1721 TEST_ASSERT(ret == 0, 1722 "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", 1723 ret, errno); 1724 } 1725 1726 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, 1727 struct kvm_nested_state *state, bool ignore_error) 1728 { 1729 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1730 int ret; 1731 1732 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1733 1734 ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state); 1735 if (!ignore_error) { 1736 TEST_ASSERT(ret == 0, 1737 "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", 1738 ret, errno); 1739 } 1740 1741 return ret; 1742 } 1743 #endif 1744 1745 /* 1746 * VM VCPU System Regs Get 1747 * 1748 * Input Args: 1749 * vm - Virtual Machine 1750 * vcpuid - VCPU ID 1751 * 1752 * Output Args: 1753 * sregs - current state of VCPU system regs 1754 * 1755 * Return: None 1756 * 1757 * Obtains the current system register state for the VCPU specified by 1758 * vcpuid and stores it at the location given by sregs. 1759 */ 1760 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1761 { 1762 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1763 int ret; 1764 1765 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1766 1767 ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); 1768 TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", 1769 ret, errno); 1770 } 1771 1772 /* 1773 * VM VCPU System Regs Set 1774 * 1775 * Input Args: 1776 * vm - Virtual Machine 1777 * vcpuid - VCPU ID 1778 * sregs - Values to set VCPU system regs to 1779 * 1780 * Output Args: None 1781 * 1782 * Return: None 1783 * 1784 * Sets the system regs of the VCPU specified by vcpuid to the values 1785 * given by sregs. 1786 */ 1787 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1788 { 1789 int ret = _vcpu_sregs_set(vm, vcpuid, sregs); 1790 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 1791 "rc: %i errno: %i", ret, errno); 1792 } 1793 1794 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1795 { 1796 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1797 1798 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1799 1800 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); 1801 } 1802 1803 void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) 1804 { 1805 int ret; 1806 1807 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu); 1808 TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)", 1809 ret, errno, strerror(errno)); 1810 } 1811 1812 void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) 1813 { 1814 int ret; 1815 1816 ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu); 1817 TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)", 1818 ret, errno, strerror(errno)); 1819 } 1820 1821 void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) 1822 { 1823 int ret; 1824 1825 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg); 1826 TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)", 1827 ret, errno, strerror(errno)); 1828 } 1829 1830 void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) 1831 { 1832 int ret; 1833 1834 ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg); 1835 TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)", 1836 ret, errno, strerror(errno)); 1837 } 1838 1839 /* 1840 * VCPU Ioctl 1841 * 1842 * Input Args: 1843 * vm - Virtual Machine 1844 * vcpuid - VCPU ID 1845 * cmd - Ioctl number 1846 * arg - Argument to pass to the ioctl 1847 * 1848 * Return: None 1849 * 1850 * Issues an arbitrary ioctl on a VCPU fd. 1851 */ 1852 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, 1853 unsigned long cmd, void *arg) 1854 { 1855 int ret; 1856 1857 ret = _vcpu_ioctl(vm, vcpuid, cmd, arg); 1858 TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", 1859 cmd, ret, errno, strerror(errno)); 1860 } 1861 1862 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, 1863 unsigned long cmd, void *arg) 1864 { 1865 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1866 int ret; 1867 1868 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1869 1870 ret = ioctl(vcpu->fd, cmd, arg); 1871 1872 return ret; 1873 } 1874 1875 void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) 1876 { 1877 struct vcpu *vcpu; 1878 uint32_t size = vm->dirty_ring_size; 1879 1880 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1881 1882 vcpu = vcpu_find(vm, vcpuid); 1883 1884 TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid); 1885 1886 if (!vcpu->dirty_gfns) { 1887 void *addr; 1888 1889 addr = mmap(NULL, size, PROT_READ, 1890 MAP_PRIVATE, vcpu->fd, 1891 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1892 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1893 1894 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, 1895 MAP_PRIVATE, vcpu->fd, 1896 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1897 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1898 1899 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, 1900 MAP_SHARED, vcpu->fd, 1901 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1902 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1903 1904 vcpu->dirty_gfns = addr; 1905 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1906 } 1907 1908 return vcpu->dirty_gfns; 1909 } 1910 1911 /* 1912 * VM Ioctl 1913 * 1914 * Input Args: 1915 * vm - Virtual Machine 1916 * cmd - Ioctl number 1917 * arg - Argument to pass to the ioctl 1918 * 1919 * Return: None 1920 * 1921 * Issues an arbitrary ioctl on a VM fd. 1922 */ 1923 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1924 { 1925 int ret; 1926 1927 ret = _vm_ioctl(vm, cmd, arg); 1928 TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", 1929 cmd, ret, errno, strerror(errno)); 1930 } 1931 1932 int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1933 { 1934 return ioctl(vm->fd, cmd, arg); 1935 } 1936 1937 /* 1938 * KVM system ioctl 1939 * 1940 * Input Args: 1941 * vm - Virtual Machine 1942 * cmd - Ioctl number 1943 * arg - Argument to pass to the ioctl 1944 * 1945 * Return: None 1946 * 1947 * Issues an arbitrary ioctl on a KVM fd. 1948 */ 1949 void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1950 { 1951 int ret; 1952 1953 ret = ioctl(vm->kvm_fd, cmd, arg); 1954 TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)", 1955 cmd, ret, errno, strerror(errno)); 1956 } 1957 1958 int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1959 { 1960 return ioctl(vm->kvm_fd, cmd, arg); 1961 } 1962 1963 /* 1964 * Device Ioctl 1965 */ 1966 1967 int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) 1968 { 1969 struct kvm_device_attr attribute = { 1970 .group = group, 1971 .attr = attr, 1972 .flags = 0, 1973 }; 1974 1975 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1976 } 1977 1978 int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) 1979 { 1980 int ret = _kvm_device_check_attr(dev_fd, group, attr); 1981 1982 TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); 1983 return ret; 1984 } 1985 1986 int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd) 1987 { 1988 struct kvm_create_device create_dev; 1989 int ret; 1990 1991 create_dev.type = type; 1992 create_dev.fd = -1; 1993 create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0; 1994 ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev); 1995 *fd = create_dev.fd; 1996 return ret; 1997 } 1998 1999 int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) 2000 { 2001 int fd, ret; 2002 2003 ret = _kvm_create_device(vm, type, test, &fd); 2004 2005 if (!test) { 2006 TEST_ASSERT(ret >= 0, 2007 "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); 2008 return fd; 2009 } 2010 return ret; 2011 } 2012 2013 int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, 2014 void *val, bool write) 2015 { 2016 struct kvm_device_attr kvmattr = { 2017 .group = group, 2018 .attr = attr, 2019 .flags = 0, 2020 .addr = (uintptr_t)val, 2021 }; 2022 int ret; 2023 2024 ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, 2025 &kvmattr); 2026 return ret; 2027 } 2028 2029 int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, 2030 void *val, bool write) 2031 { 2032 int ret = _kvm_device_access(dev_fd, group, attr, val, write); 2033 2034 TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); 2035 return ret; 2036 } 2037 2038 /* 2039 * VM Dump 2040 * 2041 * Input Args: 2042 * vm - Virtual Machine 2043 * indent - Left margin indent amount 2044 * 2045 * Output Args: 2046 * stream - Output FILE stream 2047 * 2048 * Return: None 2049 * 2050 * Dumps the current state of the VM given by vm, to the FILE stream 2051 * given by stream. 2052 */ 2053 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 2054 { 2055 int ctr; 2056 struct userspace_mem_region *region; 2057 struct vcpu *vcpu; 2058 2059 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 2060 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 2061 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 2062 fprintf(stream, "%*sMem Regions:\n", indent, ""); 2063 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 2064 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 2065 "host_virt: %p\n", indent + 2, "", 2066 (uint64_t) region->region.guest_phys_addr, 2067 (uint64_t) region->region.memory_size, 2068 region->host_mem); 2069 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 2070 sparsebit_dump(stream, region->unused_phy_pages, 0); 2071 } 2072 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 2073 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 2074 fprintf(stream, "%*spgd_created: %u\n", indent, "", 2075 vm->pgd_created); 2076 if (vm->pgd_created) { 2077 fprintf(stream, "%*sVirtual Translation Tables:\n", 2078 indent + 2, ""); 2079 virt_dump(stream, vm, indent + 4); 2080 } 2081 fprintf(stream, "%*sVCPUs:\n", indent, ""); 2082 list_for_each_entry(vcpu, &vm->vcpus, list) 2083 vcpu_dump(stream, vm, vcpu->id, indent + 2); 2084 } 2085 2086 /* Known KVM exit reasons */ 2087 static struct exit_reason { 2088 unsigned int reason; 2089 const char *name; 2090 } exit_reasons_known[] = { 2091 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 2092 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 2093 {KVM_EXIT_IO, "IO"}, 2094 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 2095 {KVM_EXIT_DEBUG, "DEBUG"}, 2096 {KVM_EXIT_HLT, "HLT"}, 2097 {KVM_EXIT_MMIO, "MMIO"}, 2098 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 2099 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 2100 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 2101 {KVM_EXIT_INTR, "INTR"}, 2102 {KVM_EXIT_SET_TPR, "SET_TPR"}, 2103 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 2104 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 2105 {KVM_EXIT_S390_RESET, "S390_RESET"}, 2106 {KVM_EXIT_DCR, "DCR"}, 2107 {KVM_EXIT_NMI, "NMI"}, 2108 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 2109 {KVM_EXIT_OSI, "OSI"}, 2110 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 2111 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 2112 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 2113 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 2114 {KVM_EXIT_XEN, "XEN"}, 2115 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 2116 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 2117 #endif 2118 }; 2119 2120 /* 2121 * Exit Reason String 2122 * 2123 * Input Args: 2124 * exit_reason - Exit reason 2125 * 2126 * Output Args: None 2127 * 2128 * Return: 2129 * Constant string pointer describing the exit reason. 2130 * 2131 * Locates and returns a constant string that describes the KVM exit 2132 * reason given by exit_reason. If no such string is found, a constant 2133 * string of "Unknown" is returned. 2134 */ 2135 const char *exit_reason_str(unsigned int exit_reason) 2136 { 2137 unsigned int n1; 2138 2139 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 2140 if (exit_reason == exit_reasons_known[n1].reason) 2141 return exit_reasons_known[n1].name; 2142 } 2143 2144 return "Unknown"; 2145 } 2146 2147 /* 2148 * Physical Contiguous Page Allocator 2149 * 2150 * Input Args: 2151 * vm - Virtual Machine 2152 * num - number of pages 2153 * paddr_min - Physical address minimum 2154 * memslot - Memory region to allocate page from 2155 * 2156 * Output Args: None 2157 * 2158 * Return: 2159 * Starting physical address 2160 * 2161 * Within the VM specified by vm, locates a range of available physical 2162 * pages at or above paddr_min. If found, the pages are marked as in use 2163 * and their base address is returned. A TEST_ASSERT failure occurs if 2164 * not enough pages are available at or above paddr_min. 2165 */ 2166 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 2167 vm_paddr_t paddr_min, uint32_t memslot) 2168 { 2169 struct userspace_mem_region *region; 2170 sparsebit_idx_t pg, base; 2171 2172 TEST_ASSERT(num > 0, "Must allocate at least one page"); 2173 2174 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 2175 "not divisible by page size.\n" 2176 " paddr_min: 0x%lx page_size: 0x%x", 2177 paddr_min, vm->page_size); 2178 2179 region = memslot2region(vm, memslot); 2180 base = pg = paddr_min >> vm->page_shift; 2181 2182 do { 2183 for (; pg < base + num; ++pg) { 2184 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 2185 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 2186 break; 2187 } 2188 } 2189 } while (pg && pg != base + num); 2190 2191 if (pg == 0) { 2192 fprintf(stderr, "No guest physical page available, " 2193 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 2194 paddr_min, vm->page_size, memslot); 2195 fputs("---- vm dump ----\n", stderr); 2196 vm_dump(stderr, vm, 2); 2197 abort(); 2198 } 2199 2200 for (pg = base; pg < base + num; ++pg) 2201 sparsebit_clear(region->unused_phy_pages, pg); 2202 2203 return base * vm->page_size; 2204 } 2205 2206 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 2207 uint32_t memslot) 2208 { 2209 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 2210 } 2211 2212 /* Arbitrary minimum physical address used for virtual translation tables. */ 2213 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 2214 2215 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 2216 { 2217 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 2218 } 2219 2220 /* 2221 * Address Guest Virtual to Host Virtual 2222 * 2223 * Input Args: 2224 * vm - Virtual Machine 2225 * gva - VM virtual address 2226 * 2227 * Output Args: None 2228 * 2229 * Return: 2230 * Equivalent host virtual address 2231 */ 2232 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 2233 { 2234 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 2235 } 2236 2237 /* 2238 * Is Unrestricted Guest 2239 * 2240 * Input Args: 2241 * vm - Virtual Machine 2242 * 2243 * Output Args: None 2244 * 2245 * Return: True if the unrestricted guest is set to 'Y', otherwise return false. 2246 * 2247 * Check if the unrestricted guest flag is enabled. 2248 */ 2249 bool vm_is_unrestricted_guest(struct kvm_vm *vm) 2250 { 2251 char val = 'N'; 2252 size_t count; 2253 FILE *f; 2254 2255 if (vm == NULL) { 2256 /* Ensure that the KVM vendor-specific module is loaded. */ 2257 close(open_kvm_dev_path_or_exit()); 2258 } 2259 2260 f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); 2261 if (f) { 2262 count = fread(&val, sizeof(char), 1, f); 2263 TEST_ASSERT(count == 1, "Unable to read from param file."); 2264 fclose(f); 2265 } 2266 2267 return val == 'Y'; 2268 } 2269 2270 unsigned int vm_get_page_size(struct kvm_vm *vm) 2271 { 2272 return vm->page_size; 2273 } 2274 2275 unsigned int vm_get_page_shift(struct kvm_vm *vm) 2276 { 2277 return vm->page_shift; 2278 } 2279 2280 uint64_t vm_get_max_gfn(struct kvm_vm *vm) 2281 { 2282 return vm->max_gfn; 2283 } 2284 2285 int vm_get_fd(struct kvm_vm *vm) 2286 { 2287 return vm->fd; 2288 } 2289 2290 static unsigned int vm_calc_num_pages(unsigned int num_pages, 2291 unsigned int page_shift, 2292 unsigned int new_page_shift, 2293 bool ceil) 2294 { 2295 unsigned int n = 1 << (new_page_shift - page_shift); 2296 2297 if (page_shift >= new_page_shift) 2298 return num_pages * (1 << (page_shift - new_page_shift)); 2299 2300 return num_pages / n + !!(ceil && num_pages % n); 2301 } 2302 2303 static inline int getpageshift(void) 2304 { 2305 return __builtin_ffs(getpagesize()) - 1; 2306 } 2307 2308 unsigned int 2309 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 2310 { 2311 return vm_calc_num_pages(num_guest_pages, 2312 vm_guest_mode_params[mode].page_shift, 2313 getpageshift(), true); 2314 } 2315 2316 unsigned int 2317 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 2318 { 2319 return vm_calc_num_pages(num_host_pages, getpageshift(), 2320 vm_guest_mode_params[mode].page_shift, false); 2321 } 2322 2323 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2324 { 2325 unsigned int n; 2326 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2327 return vm_adjust_num_guest_pages(mode, n); 2328 } 2329 2330 int vm_get_stats_fd(struct kvm_vm *vm) 2331 { 2332 return ioctl(vm->fd, KVM_GET_STATS_FD, NULL); 2333 } 2334 2335 int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid) 2336 { 2337 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 2338 2339 return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); 2340 } 2341