1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "kvm_util_internal.h" 12 #include "processor.h" 13 14 #include <assert.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 /* Aligns x up to the next multiple of size. Size must be a power of 2. */ 26 static void *align(void *x, size_t size) 27 { 28 size_t mask = size - 1; 29 TEST_ASSERT(size != 0 && !(size & (size - 1)), 30 "size not a power of 2: %lu", size); 31 return (void *) (((size_t) x + mask) & ~mask); 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 int fd; 46 47 fd = open(KVM_DEV_PATH, flags); 48 if (fd < 0) { 49 print_skip("%s not available, is KVM loaded? (errno: %d)", 50 KVM_DEV_PATH, errno); 51 exit(KSFT_SKIP); 52 } 53 54 return fd; 55 } 56 57 int open_kvm_dev_path_or_exit(void) 58 { 59 return _open_kvm_dev_path_or_exit(O_RDONLY); 60 } 61 62 /* 63 * Capability 64 * 65 * Input Args: 66 * cap - Capability 67 * 68 * Output Args: None 69 * 70 * Return: 71 * On success, the Value corresponding to the capability (KVM_CAP_*) 72 * specified by the value of cap. On failure a TEST_ASSERT failure 73 * is produced. 74 * 75 * Looks up and returns the value corresponding to the capability 76 * (KVM_CAP_*) given by cap. 77 */ 78 int kvm_check_cap(long cap) 79 { 80 int ret; 81 int kvm_fd; 82 83 kvm_fd = open_kvm_dev_path_or_exit(); 84 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); 85 TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" 86 " rc: %i errno: %i", ret, errno); 87 88 close(kvm_fd); 89 90 return ret; 91 } 92 93 /* VM Enable Capability 94 * 95 * Input Args: 96 * vm - Virtual Machine 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: On success, 0. On failure a TEST_ASSERT failure is produced. 102 * 103 * Enables a capability (KVM_CAP_*) on the VM. 104 */ 105 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) 106 { 107 int ret; 108 109 ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap); 110 TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n" 111 " rc: %i errno: %i", ret, errno); 112 113 return ret; 114 } 115 116 /* VCPU Enable Capability 117 * 118 * Input Args: 119 * vm - Virtual Machine 120 * vcpu_id - VCPU 121 * cap - Capability 122 * 123 * Output Args: None 124 * 125 * Return: On success, 0. On failure a TEST_ASSERT failure is produced. 126 * 127 * Enables a capability (KVM_CAP_*) on the VCPU. 128 */ 129 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, 130 struct kvm_enable_cap *cap) 131 { 132 struct vcpu *vcpu = vcpu_find(vm, vcpu_id); 133 int r; 134 135 TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); 136 137 r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); 138 TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" 139 " rc: %i, errno: %i", r, errno); 140 141 return r; 142 } 143 144 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 145 { 146 struct kvm_enable_cap cap = { 0 }; 147 148 cap.cap = KVM_CAP_DIRTY_LOG_RING; 149 cap.args[0] = ring_size; 150 vm_enable_cap(vm, &cap); 151 vm->dirty_ring_size = ring_size; 152 } 153 154 static void vm_open(struct kvm_vm *vm, int perm) 155 { 156 vm->kvm_fd = _open_kvm_dev_path_or_exit(perm); 157 158 if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { 159 print_skip("immediate_exit not available"); 160 exit(KSFT_SKIP); 161 } 162 163 vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); 164 TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " 165 "rc: %i errno: %i", vm->fd, errno); 166 } 167 168 const char *vm_guest_mode_string(uint32_t i) 169 { 170 static const char * const strings[] = { 171 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 172 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 173 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 174 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 175 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 176 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 177 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 178 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 179 }; 180 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 181 "Missing new mode strings?"); 182 183 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 184 185 return strings[i]; 186 } 187 188 const struct vm_guest_mode_params vm_guest_mode_params[] = { 189 { 52, 48, 0x1000, 12 }, 190 { 52, 48, 0x10000, 16 }, 191 { 48, 48, 0x1000, 12 }, 192 { 48, 48, 0x10000, 16 }, 193 { 40, 48, 0x1000, 12 }, 194 { 40, 48, 0x10000, 16 }, 195 { 0, 0, 0x1000, 12 }, 196 { 47, 64, 0x1000, 12 }, 197 }; 198 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 199 "Missing new mode params?"); 200 201 /* 202 * VM Create 203 * 204 * Input Args: 205 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 206 * phy_pages - Physical memory pages 207 * perm - permission 208 * 209 * Output Args: None 210 * 211 * Return: 212 * Pointer to opaque structure that describes the created VM. 213 * 214 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 215 * When phy_pages is non-zero, a memory region of phy_pages physical pages 216 * is created and mapped starting at guest physical address 0. The file 217 * descriptor to control the created VM is created with the permissions 218 * given by perm (e.g. O_RDWR). 219 */ 220 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) 221 { 222 struct kvm_vm *vm; 223 224 pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__, 225 vm_guest_mode_string(mode), phy_pages, perm); 226 227 vm = calloc(1, sizeof(*vm)); 228 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 229 230 INIT_LIST_HEAD(&vm->vcpus); 231 vm->regions.gpa_tree = RB_ROOT; 232 vm->regions.hva_tree = RB_ROOT; 233 hash_init(vm->regions.slot_hash); 234 235 vm->mode = mode; 236 vm->type = 0; 237 238 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 239 vm->va_bits = vm_guest_mode_params[mode].va_bits; 240 vm->page_size = vm_guest_mode_params[mode].page_size; 241 vm->page_shift = vm_guest_mode_params[mode].page_shift; 242 243 /* Setup mode specific traits. */ 244 switch (vm->mode) { 245 case VM_MODE_P52V48_4K: 246 vm->pgtable_levels = 4; 247 break; 248 case VM_MODE_P52V48_64K: 249 vm->pgtable_levels = 3; 250 break; 251 case VM_MODE_P48V48_4K: 252 vm->pgtable_levels = 4; 253 break; 254 case VM_MODE_P48V48_64K: 255 vm->pgtable_levels = 3; 256 break; 257 case VM_MODE_P40V48_4K: 258 vm->pgtable_levels = 4; 259 break; 260 case VM_MODE_P40V48_64K: 261 vm->pgtable_levels = 3; 262 break; 263 case VM_MODE_PXXV48_4K: 264 #ifdef __x86_64__ 265 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 266 /* 267 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 268 * it doesn't take effect unless a CR4.LA57 is set, which it 269 * isn't for this VM_MODE. 270 */ 271 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 272 "Linear address width (%d bits) not supported", 273 vm->va_bits); 274 pr_debug("Guest physical address width detected: %d\n", 275 vm->pa_bits); 276 vm->pgtable_levels = 4; 277 vm->va_bits = 48; 278 #else 279 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 280 #endif 281 break; 282 case VM_MODE_P47V64_4K: 283 vm->pgtable_levels = 5; 284 break; 285 default: 286 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 287 } 288 289 #ifdef __aarch64__ 290 if (vm->pa_bits != 40) 291 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 292 #endif 293 294 vm_open(vm, perm); 295 296 /* Limit to VA-bit canonical virtual addresses. */ 297 vm->vpages_valid = sparsebit_alloc(); 298 sparsebit_set_num(vm->vpages_valid, 299 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 300 sparsebit_set_num(vm->vpages_valid, 301 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 302 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 303 304 /* Limit physical addresses to PA-bits. */ 305 vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 306 307 /* Allocate and setup memory for guest. */ 308 vm->vpages_mapped = sparsebit_alloc(); 309 if (phy_pages != 0) 310 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 311 0, 0, phy_pages, 0); 312 313 return vm; 314 } 315 316 /* 317 * VM Create with customized parameters 318 * 319 * Input Args: 320 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 321 * nr_vcpus - VCPU count 322 * slot0_mem_pages - Slot0 physical memory size 323 * extra_mem_pages - Non-slot0 physical memory total size 324 * num_percpu_pages - Per-cpu physical memory pages 325 * guest_code - Guest entry point 326 * vcpuids - VCPU IDs 327 * 328 * Output Args: None 329 * 330 * Return: 331 * Pointer to opaque structure that describes the created VM. 332 * 333 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K), 334 * with customized slot0 memory size, at least 512 pages currently. 335 * extra_mem_pages is only used to calculate the maximum page table size, 336 * no real memory allocation for non-slot0 memory in this function. 337 */ 338 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 339 uint64_t slot0_mem_pages, uint64_t extra_mem_pages, 340 uint32_t num_percpu_pages, void *guest_code, 341 uint32_t vcpuids[]) 342 { 343 uint64_t vcpu_pages, extra_pg_pages, pages; 344 struct kvm_vm *vm; 345 int i; 346 347 /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ 348 if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) 349 slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; 350 351 /* The maximum page table size for a memory region will be when the 352 * smallest pages are used. Considering each page contains x page 353 * table descriptors, the total extra size for page tables (for extra 354 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 355 * than N/x*2. 356 */ 357 vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; 358 extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; 359 pages = slot0_mem_pages + vcpu_pages + extra_pg_pages; 360 361 TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 362 "nr_vcpus = %d too large for host, max-vcpus = %d", 363 nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 364 365 pages = vm_adjust_num_guest_pages(mode, pages); 366 vm = vm_create(mode, pages, O_RDWR); 367 368 kvm_vm_elf_load(vm, program_invocation_name, 0, 0); 369 370 #ifdef __x86_64__ 371 vm_create_irqchip(vm); 372 #endif 373 374 for (i = 0; i < nr_vcpus; ++i) { 375 uint32_t vcpuid = vcpuids ? vcpuids[i] : i; 376 377 vm_vcpu_add_default(vm, vcpuid, guest_code); 378 379 #ifdef __x86_64__ 380 vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); 381 #endif 382 } 383 384 return vm; 385 } 386 387 struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, 388 uint32_t num_percpu_pages, void *guest_code, 389 uint32_t vcpuids[]) 390 { 391 return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, 392 extra_mem_pages, num_percpu_pages, guest_code, vcpuids); 393 } 394 395 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, 396 void *guest_code) 397 { 398 return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code, 399 (uint32_t []){ vcpuid }); 400 } 401 402 /* 403 * VM Restart 404 * 405 * Input Args: 406 * vm - VM that has been released before 407 * perm - permission 408 * 409 * Output Args: None 410 * 411 * Reopens the file descriptors associated to the VM and reinstates the 412 * global state, such as the irqchip and the memory regions that are mapped 413 * into the guest. 414 */ 415 void kvm_vm_restart(struct kvm_vm *vmp, int perm) 416 { 417 int ctr; 418 struct userspace_mem_region *region; 419 420 vm_open(vmp, perm); 421 if (vmp->has_irqchip) 422 vm_create_irqchip(vmp); 423 424 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 425 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 426 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 427 " rc: %i errno: %i\n" 428 " slot: %u flags: 0x%x\n" 429 " guest_phys_addr: 0x%llx size: 0x%llx", 430 ret, errno, region->region.slot, 431 region->region.flags, 432 region->region.guest_phys_addr, 433 region->region.memory_size); 434 } 435 } 436 437 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) 438 { 439 struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; 440 int ret; 441 442 ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args); 443 TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s", 444 __func__, strerror(-ret)); 445 } 446 447 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, 448 uint64_t first_page, uint32_t num_pages) 449 { 450 struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, 451 .first_page = first_page, 452 .num_pages = num_pages }; 453 int ret; 454 455 ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); 456 TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", 457 __func__, strerror(-ret)); 458 } 459 460 uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) 461 { 462 return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS); 463 } 464 465 /* 466 * Userspace Memory Region Find 467 * 468 * Input Args: 469 * vm - Virtual Machine 470 * start - Starting VM physical address 471 * end - Ending VM physical address, inclusive. 472 * 473 * Output Args: None 474 * 475 * Return: 476 * Pointer to overlapping region, NULL if no such region. 477 * 478 * Searches for a region with any physical memory that overlaps with 479 * any portion of the guest physical addresses from start to end 480 * inclusive. If multiple overlapping regions exist, a pointer to any 481 * of the regions is returned. Null is returned only when no overlapping 482 * region exists. 483 */ 484 static struct userspace_mem_region * 485 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 486 { 487 struct rb_node *node; 488 489 for (node = vm->regions.gpa_tree.rb_node; node; ) { 490 struct userspace_mem_region *region = 491 container_of(node, struct userspace_mem_region, gpa_node); 492 uint64_t existing_start = region->region.guest_phys_addr; 493 uint64_t existing_end = region->region.guest_phys_addr 494 + region->region.memory_size - 1; 495 if (start <= existing_end && end >= existing_start) 496 return region; 497 498 if (start < existing_start) 499 node = node->rb_left; 500 else 501 node = node->rb_right; 502 } 503 504 return NULL; 505 } 506 507 /* 508 * KVM Userspace Memory Region Find 509 * 510 * Input Args: 511 * vm - Virtual Machine 512 * start - Starting VM physical address 513 * end - Ending VM physical address, inclusive. 514 * 515 * Output Args: None 516 * 517 * Return: 518 * Pointer to overlapping region, NULL if no such region. 519 * 520 * Public interface to userspace_mem_region_find. Allows tests to look up 521 * the memslot datastructure for a given range of guest physical memory. 522 */ 523 struct kvm_userspace_memory_region * 524 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 525 uint64_t end) 526 { 527 struct userspace_mem_region *region; 528 529 region = userspace_mem_region_find(vm, start, end); 530 if (!region) 531 return NULL; 532 533 return ®ion->region; 534 } 535 536 /* 537 * VCPU Find 538 * 539 * Input Args: 540 * vm - Virtual Machine 541 * vcpuid - VCPU ID 542 * 543 * Output Args: None 544 * 545 * Return: 546 * Pointer to VCPU structure 547 * 548 * Locates a vcpu structure that describes the VCPU specified by vcpuid and 549 * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU 550 * for the specified vcpuid. 551 */ 552 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) 553 { 554 struct vcpu *vcpu; 555 556 list_for_each_entry(vcpu, &vm->vcpus, list) { 557 if (vcpu->id == vcpuid) 558 return vcpu; 559 } 560 561 return NULL; 562 } 563 564 /* 565 * VM VCPU Remove 566 * 567 * Input Args: 568 * vcpu - VCPU to remove 569 * 570 * Output Args: None 571 * 572 * Return: None, TEST_ASSERT failures for all error conditions 573 * 574 * Removes a vCPU from a VM and frees its resources. 575 */ 576 static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) 577 { 578 int ret; 579 580 if (vcpu->dirty_gfns) { 581 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 582 TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, " 583 "rc: %i errno: %i", ret, errno); 584 vcpu->dirty_gfns = NULL; 585 } 586 587 ret = munmap(vcpu->state, vcpu_mmap_sz()); 588 TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " 589 "errno: %i", ret, errno); 590 ret = close(vcpu->fd); 591 TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " 592 "errno: %i", ret, errno); 593 594 list_del(&vcpu->list); 595 free(vcpu); 596 } 597 598 void kvm_vm_release(struct kvm_vm *vmp) 599 { 600 struct vcpu *vcpu, *tmp; 601 int ret; 602 603 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 604 vm_vcpu_rm(vmp, vcpu); 605 606 ret = close(vmp->fd); 607 TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" 608 " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); 609 610 ret = close(vmp->kvm_fd); 611 TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" 612 " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); 613 } 614 615 static void __vm_mem_region_delete(struct kvm_vm *vm, 616 struct userspace_mem_region *region, 617 bool unlink) 618 { 619 int ret; 620 621 if (unlink) { 622 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 623 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 624 hash_del(®ion->slot_node); 625 } 626 627 region->region.memory_size = 0; 628 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 629 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " 630 "rc: %i errno: %i", ret, errno); 631 632 sparsebit_free(®ion->unused_phy_pages); 633 ret = munmap(region->mmap_start, region->mmap_size); 634 TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno); 635 636 free(region); 637 } 638 639 /* 640 * Destroys and frees the VM pointed to by vmp. 641 */ 642 void kvm_vm_free(struct kvm_vm *vmp) 643 { 644 int ctr; 645 struct hlist_node *node; 646 struct userspace_mem_region *region; 647 648 if (vmp == NULL) 649 return; 650 651 /* Free userspace_mem_regions. */ 652 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 653 __vm_mem_region_delete(vmp, region, false); 654 655 /* Free sparsebit arrays. */ 656 sparsebit_free(&vmp->vpages_valid); 657 sparsebit_free(&vmp->vpages_mapped); 658 659 kvm_vm_release(vmp); 660 661 /* Free the structure describing the VM. */ 662 free(vmp); 663 } 664 665 /* 666 * Memory Compare, host virtual to guest virtual 667 * 668 * Input Args: 669 * hva - Starting host virtual address 670 * vm - Virtual Machine 671 * gva - Starting guest virtual address 672 * len - number of bytes to compare 673 * 674 * Output Args: None 675 * 676 * Input/Output Args: None 677 * 678 * Return: 679 * Returns 0 if the bytes starting at hva for a length of len 680 * are equal the guest virtual bytes starting at gva. Returns 681 * a value < 0, if bytes at hva are less than those at gva. 682 * Otherwise a value > 0 is returned. 683 * 684 * Compares the bytes starting at the host virtual address hva, for 685 * a length of len, to the guest bytes starting at the guest virtual 686 * address given by gva. 687 */ 688 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 689 { 690 size_t amt; 691 692 /* 693 * Compare a batch of bytes until either a match is found 694 * or all the bytes have been compared. 695 */ 696 for (uintptr_t offset = 0; offset < len; offset += amt) { 697 uintptr_t ptr1 = (uintptr_t)hva + offset; 698 699 /* 700 * Determine host address for guest virtual address 701 * at offset. 702 */ 703 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 704 705 /* 706 * Determine amount to compare on this pass. 707 * Don't allow the comparsion to cross a page boundary. 708 */ 709 amt = len - offset; 710 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 711 amt = vm->page_size - (ptr1 % vm->page_size); 712 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 713 amt = vm->page_size - (ptr2 % vm->page_size); 714 715 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 716 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 717 718 /* 719 * Perform the comparison. If there is a difference 720 * return that result to the caller, otherwise need 721 * to continue on looking for a mismatch. 722 */ 723 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 724 if (ret != 0) 725 return ret; 726 } 727 728 /* 729 * No mismatch found. Let the caller know the two memory 730 * areas are equal. 731 */ 732 return 0; 733 } 734 735 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 736 struct userspace_mem_region *region) 737 { 738 struct rb_node **cur, *parent; 739 740 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 741 struct userspace_mem_region *cregion; 742 743 cregion = container_of(*cur, typeof(*cregion), gpa_node); 744 parent = *cur; 745 if (region->region.guest_phys_addr < 746 cregion->region.guest_phys_addr) 747 cur = &(*cur)->rb_left; 748 else { 749 TEST_ASSERT(region->region.guest_phys_addr != 750 cregion->region.guest_phys_addr, 751 "Duplicate GPA in region tree"); 752 753 cur = &(*cur)->rb_right; 754 } 755 } 756 757 rb_link_node(®ion->gpa_node, parent, cur); 758 rb_insert_color(®ion->gpa_node, gpa_tree); 759 } 760 761 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 762 struct userspace_mem_region *region) 763 { 764 struct rb_node **cur, *parent; 765 766 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 767 struct userspace_mem_region *cregion; 768 769 cregion = container_of(*cur, typeof(*cregion), hva_node); 770 parent = *cur; 771 if (region->host_mem < cregion->host_mem) 772 cur = &(*cur)->rb_left; 773 else { 774 TEST_ASSERT(region->host_mem != 775 cregion->host_mem, 776 "Duplicate HVA in region tree"); 777 778 cur = &(*cur)->rb_right; 779 } 780 } 781 782 rb_link_node(®ion->hva_node, parent, cur); 783 rb_insert_color(®ion->hva_node, hva_tree); 784 } 785 786 /* 787 * VM Userspace Memory Region Add 788 * 789 * Input Args: 790 * vm - Virtual Machine 791 * src_type - Storage source for this region. 792 * NULL to use anonymous memory. 793 * guest_paddr - Starting guest physical address 794 * slot - KVM region slot 795 * npages - Number of physical pages 796 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 797 * 798 * Output Args: None 799 * 800 * Return: None 801 * 802 * Allocates a memory area of the number of pages specified by npages 803 * and maps it to the VM specified by vm, at a starting physical address 804 * given by guest_paddr. The region is created with a KVM region slot 805 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 806 * region is created with the flags given by flags. 807 */ 808 void vm_userspace_mem_region_add(struct kvm_vm *vm, 809 enum vm_mem_backing_src_type src_type, 810 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 811 uint32_t flags) 812 { 813 int ret; 814 struct userspace_mem_region *region; 815 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 816 size_t alignment; 817 818 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 819 "Number of guest pages is not compatible with the host. " 820 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 821 822 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 823 "address not on a page boundary.\n" 824 " guest_paddr: 0x%lx vm->page_size: 0x%x", 825 guest_paddr, vm->page_size); 826 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 827 <= vm->max_gfn, "Physical range beyond maximum " 828 "supported physical address,\n" 829 " guest_paddr: 0x%lx npages: 0x%lx\n" 830 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 831 guest_paddr, npages, vm->max_gfn, vm->page_size); 832 833 /* 834 * Confirm a mem region with an overlapping address doesn't 835 * already exist. 836 */ 837 region = (struct userspace_mem_region *) userspace_mem_region_find( 838 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 839 if (region != NULL) 840 TEST_FAIL("overlapping userspace_mem_region already " 841 "exists\n" 842 " requested guest_paddr: 0x%lx npages: 0x%lx " 843 "page_size: 0x%x\n" 844 " existing guest_paddr: 0x%lx size: 0x%lx", 845 guest_paddr, npages, vm->page_size, 846 (uint64_t) region->region.guest_phys_addr, 847 (uint64_t) region->region.memory_size); 848 849 /* Confirm no region with the requested slot already exists. */ 850 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 851 slot) { 852 if (region->region.slot != slot) 853 continue; 854 855 TEST_FAIL("A mem region with the requested slot " 856 "already exists.\n" 857 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 858 " existing slot: %u paddr: 0x%lx size: 0x%lx", 859 slot, guest_paddr, npages, 860 region->region.slot, 861 (uint64_t) region->region.guest_phys_addr, 862 (uint64_t) region->region.memory_size); 863 } 864 865 /* Allocate and initialize new mem region structure. */ 866 region = calloc(1, sizeof(*region)); 867 TEST_ASSERT(region != NULL, "Insufficient Memory"); 868 region->mmap_size = npages * vm->page_size; 869 870 #ifdef __s390x__ 871 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 872 alignment = 0x100000; 873 #else 874 alignment = 1; 875 #endif 876 877 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 878 alignment = max(backing_src_pagesz, alignment); 879 880 /* Add enough memory to align up if necessary */ 881 if (alignment > 1) 882 region->mmap_size += alignment; 883 884 region->fd = -1; 885 if (backing_src_is_shared(src_type)) { 886 int memfd_flags = MFD_CLOEXEC; 887 888 if (src_type == VM_MEM_SRC_SHARED_HUGETLB) 889 memfd_flags |= MFD_HUGETLB; 890 891 region->fd = memfd_create("kvm_selftest", memfd_flags); 892 TEST_ASSERT(region->fd != -1, 893 "memfd_create failed, errno: %i", errno); 894 895 ret = ftruncate(region->fd, region->mmap_size); 896 TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno); 897 898 ret = fallocate(region->fd, 899 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 900 region->mmap_size); 901 TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno); 902 } 903 904 region->mmap_start = mmap(NULL, region->mmap_size, 905 PROT_READ | PROT_WRITE, 906 vm_mem_backing_src_alias(src_type)->flag, 907 region->fd, 0); 908 TEST_ASSERT(region->mmap_start != MAP_FAILED, 909 "test_malloc failed, mmap_start: %p errno: %i", 910 region->mmap_start, errno); 911 912 /* Align host address */ 913 region->host_mem = align(region->mmap_start, alignment); 914 915 /* As needed perform madvise */ 916 if ((src_type == VM_MEM_SRC_ANONYMOUS || 917 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 918 ret = madvise(region->host_mem, npages * vm->page_size, 919 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 920 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 921 region->host_mem, npages * vm->page_size, 922 vm_mem_backing_src_alias(src_type)->name); 923 } 924 925 region->unused_phy_pages = sparsebit_alloc(); 926 sparsebit_set_num(region->unused_phy_pages, 927 guest_paddr >> vm->page_shift, npages); 928 region->region.slot = slot; 929 region->region.flags = flags; 930 region->region.guest_phys_addr = guest_paddr; 931 region->region.memory_size = npages * vm->page_size; 932 region->region.userspace_addr = (uintptr_t) region->host_mem; 933 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 934 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 935 " rc: %i errno: %i\n" 936 " slot: %u flags: 0x%x\n" 937 " guest_phys_addr: 0x%lx size: 0x%lx", 938 ret, errno, slot, flags, 939 guest_paddr, (uint64_t) region->region.memory_size); 940 941 /* Add to quick lookup data structures */ 942 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 943 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 944 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 945 946 /* If shared memory, create an alias. */ 947 if (region->fd >= 0) { 948 region->mmap_alias = mmap(NULL, region->mmap_size, 949 PROT_READ | PROT_WRITE, 950 vm_mem_backing_src_alias(src_type)->flag, 951 region->fd, 0); 952 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 953 "mmap of alias failed, errno: %i", errno); 954 955 /* Align host alias address */ 956 region->host_alias = align(region->mmap_alias, alignment); 957 } 958 } 959 960 /* 961 * Memslot to region 962 * 963 * Input Args: 964 * vm - Virtual Machine 965 * memslot - KVM memory slot ID 966 * 967 * Output Args: None 968 * 969 * Return: 970 * Pointer to memory region structure that describe memory region 971 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 972 * on error (e.g. currently no memory region using memslot as a KVM 973 * memory slot ID). 974 */ 975 struct userspace_mem_region * 976 memslot2region(struct kvm_vm *vm, uint32_t memslot) 977 { 978 struct userspace_mem_region *region; 979 980 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 981 memslot) 982 if (region->region.slot == memslot) 983 return region; 984 985 fprintf(stderr, "No mem region with the requested slot found,\n" 986 " requested slot: %u\n", memslot); 987 fputs("---- vm dump ----\n", stderr); 988 vm_dump(stderr, vm, 2); 989 TEST_FAIL("Mem region not found"); 990 return NULL; 991 } 992 993 /* 994 * VM Memory Region Flags Set 995 * 996 * Input Args: 997 * vm - Virtual Machine 998 * flags - Starting guest physical address 999 * 1000 * Output Args: None 1001 * 1002 * Return: None 1003 * 1004 * Sets the flags of the memory region specified by the value of slot, 1005 * to the values given by flags. 1006 */ 1007 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1008 { 1009 int ret; 1010 struct userspace_mem_region *region; 1011 1012 region = memslot2region(vm, slot); 1013 1014 region->region.flags = flags; 1015 1016 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1017 1018 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1019 " rc: %i errno: %i slot: %u flags: 0x%x", 1020 ret, errno, slot, flags); 1021 } 1022 1023 /* 1024 * VM Memory Region Move 1025 * 1026 * Input Args: 1027 * vm - Virtual Machine 1028 * slot - Slot of the memory region to move 1029 * new_gpa - Starting guest physical address 1030 * 1031 * Output Args: None 1032 * 1033 * Return: None 1034 * 1035 * Change the gpa of a memory region. 1036 */ 1037 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1038 { 1039 struct userspace_mem_region *region; 1040 int ret; 1041 1042 region = memslot2region(vm, slot); 1043 1044 region->region.guest_phys_addr = new_gpa; 1045 1046 ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1047 1048 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1049 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1050 ret, errno, slot, new_gpa); 1051 } 1052 1053 /* 1054 * VM Memory Region Delete 1055 * 1056 * Input Args: 1057 * vm - Virtual Machine 1058 * slot - Slot of the memory region to delete 1059 * 1060 * Output Args: None 1061 * 1062 * Return: None 1063 * 1064 * Delete a memory region. 1065 */ 1066 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1067 { 1068 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1069 } 1070 1071 /* 1072 * VCPU mmap Size 1073 * 1074 * Input Args: None 1075 * 1076 * Output Args: None 1077 * 1078 * Return: 1079 * Size of VCPU state 1080 * 1081 * Returns the size of the structure pointed to by the return value 1082 * of vcpu_state(). 1083 */ 1084 static int vcpu_mmap_sz(void) 1085 { 1086 int dev_fd, ret; 1087 1088 dev_fd = open_kvm_dev_path_or_exit(); 1089 1090 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1091 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1092 "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", 1093 __func__, ret, errno); 1094 1095 close(dev_fd); 1096 1097 return ret; 1098 } 1099 1100 /* 1101 * VM VCPU Add 1102 * 1103 * Input Args: 1104 * vm - Virtual Machine 1105 * vcpuid - VCPU ID 1106 * 1107 * Output Args: None 1108 * 1109 * Return: None 1110 * 1111 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid. 1112 * No additional VCPU setup is done. 1113 */ 1114 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) 1115 { 1116 struct vcpu *vcpu; 1117 1118 /* Confirm a vcpu with the specified id doesn't already exist. */ 1119 vcpu = vcpu_find(vm, vcpuid); 1120 if (vcpu != NULL) 1121 TEST_FAIL("vcpu with the specified id " 1122 "already exists,\n" 1123 " requested vcpuid: %u\n" 1124 " existing vcpuid: %u state: %p", 1125 vcpuid, vcpu->id, vcpu->state); 1126 1127 /* Allocate and initialize new vcpu structure. */ 1128 vcpu = calloc(1, sizeof(*vcpu)); 1129 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1130 vcpu->id = vcpuid; 1131 vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); 1132 TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", 1133 vcpu->fd, errno); 1134 1135 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " 1136 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1137 vcpu_mmap_sz(), sizeof(*vcpu->state)); 1138 vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1139 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1140 TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " 1141 "vcpu id: %u errno: %i", vcpuid, errno); 1142 1143 /* Add to linked-list of VCPUs. */ 1144 list_add(&vcpu->list, &vm->vcpus); 1145 } 1146 1147 /* 1148 * VM Virtual Address Unused Gap 1149 * 1150 * Input Args: 1151 * vm - Virtual Machine 1152 * sz - Size (bytes) 1153 * vaddr_min - Minimum Virtual Address 1154 * 1155 * Output Args: None 1156 * 1157 * Return: 1158 * Lowest virtual address at or below vaddr_min, with at least 1159 * sz unused bytes. TEST_ASSERT failure if no area of at least 1160 * size sz is available. 1161 * 1162 * Within the VM specified by vm, locates the lowest starting virtual 1163 * address >= vaddr_min, that has at least sz unallocated bytes. A 1164 * TEST_ASSERT failure occurs for invalid input or no area of at least 1165 * sz unallocated bytes >= vaddr_min is available. 1166 */ 1167 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1168 vm_vaddr_t vaddr_min) 1169 { 1170 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1171 1172 /* Determine lowest permitted virtual page index. */ 1173 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1174 if ((pgidx_start * vm->page_size) < vaddr_min) 1175 goto no_va_found; 1176 1177 /* Loop over section with enough valid virtual page indexes. */ 1178 if (!sparsebit_is_set_num(vm->vpages_valid, 1179 pgidx_start, pages)) 1180 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1181 pgidx_start, pages); 1182 do { 1183 /* 1184 * Are there enough unused virtual pages available at 1185 * the currently proposed starting virtual page index. 1186 * If not, adjust proposed starting index to next 1187 * possible. 1188 */ 1189 if (sparsebit_is_clear_num(vm->vpages_mapped, 1190 pgidx_start, pages)) 1191 goto va_found; 1192 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1193 pgidx_start, pages); 1194 if (pgidx_start == 0) 1195 goto no_va_found; 1196 1197 /* 1198 * If needed, adjust proposed starting virtual address, 1199 * to next range of valid virtual addresses. 1200 */ 1201 if (!sparsebit_is_set_num(vm->vpages_valid, 1202 pgidx_start, pages)) { 1203 pgidx_start = sparsebit_next_set_num( 1204 vm->vpages_valid, pgidx_start, pages); 1205 if (pgidx_start == 0) 1206 goto no_va_found; 1207 } 1208 } while (pgidx_start != 0); 1209 1210 no_va_found: 1211 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1212 1213 /* NOT REACHED */ 1214 return -1; 1215 1216 va_found: 1217 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1218 pgidx_start, pages), 1219 "Unexpected, invalid virtual page index range,\n" 1220 " pgidx_start: 0x%lx\n" 1221 " pages: 0x%lx", 1222 pgidx_start, pages); 1223 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1224 pgidx_start, pages), 1225 "Unexpected, pages already mapped,\n" 1226 " pgidx_start: 0x%lx\n" 1227 " pages: 0x%lx", 1228 pgidx_start, pages); 1229 1230 return pgidx_start * vm->page_size; 1231 } 1232 1233 /* 1234 * VM Virtual Address Allocate 1235 * 1236 * Input Args: 1237 * vm - Virtual Machine 1238 * sz - Size in bytes 1239 * vaddr_min - Minimum starting virtual address 1240 * data_memslot - Memory region slot for data pages 1241 * pgd_memslot - Memory region slot for new virtual translation tables 1242 * 1243 * Output Args: None 1244 * 1245 * Return: 1246 * Starting guest virtual address 1247 * 1248 * Allocates at least sz bytes within the virtual address space of the vm 1249 * given by vm. The allocated bytes are mapped to a virtual address >= 1250 * the address given by vaddr_min. Note that each allocation uses a 1251 * a unique set of pages, with the minimum real allocation being at least 1252 * a page. 1253 */ 1254 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1255 uint32_t data_memslot, uint32_t pgd_memslot) 1256 { 1257 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1258 1259 virt_pgd_alloc(vm, pgd_memslot); 1260 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1261 KVM_UTIL_MIN_PFN * vm->page_size, 1262 data_memslot); 1263 1264 /* 1265 * Find an unused range of virtual page addresses of at least 1266 * pages in length. 1267 */ 1268 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1269 1270 /* Map the virtual pages. */ 1271 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1272 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1273 1274 virt_pg_map(vm, vaddr, paddr, pgd_memslot); 1275 1276 sparsebit_set(vm->vpages_mapped, 1277 vaddr >> vm->page_shift); 1278 } 1279 1280 return vaddr_start; 1281 } 1282 1283 /* 1284 * Map a range of VM virtual address to the VM's physical address 1285 * 1286 * Input Args: 1287 * vm - Virtual Machine 1288 * vaddr - Virtuall address to map 1289 * paddr - VM Physical Address 1290 * npages - The number of pages to map 1291 * pgd_memslot - Memory region slot for new virtual translation tables 1292 * 1293 * Output Args: None 1294 * 1295 * Return: None 1296 * 1297 * Within the VM given by @vm, creates a virtual translation for 1298 * @npages starting at @vaddr to the page range starting at @paddr. 1299 */ 1300 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1301 unsigned int npages, uint32_t pgd_memslot) 1302 { 1303 size_t page_size = vm->page_size; 1304 size_t size = npages * page_size; 1305 1306 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1307 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1308 1309 while (npages--) { 1310 virt_pg_map(vm, vaddr, paddr, pgd_memslot); 1311 vaddr += page_size; 1312 paddr += page_size; 1313 } 1314 } 1315 1316 /* 1317 * Address VM Physical to Host Virtual 1318 * 1319 * Input Args: 1320 * vm - Virtual Machine 1321 * gpa - VM physical address 1322 * 1323 * Output Args: None 1324 * 1325 * Return: 1326 * Equivalent host virtual address 1327 * 1328 * Locates the memory region containing the VM physical address given 1329 * by gpa, within the VM given by vm. When found, the host virtual 1330 * address providing the memory to the vm physical address is returned. 1331 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1332 */ 1333 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1334 { 1335 struct userspace_mem_region *region; 1336 1337 region = userspace_mem_region_find(vm, gpa, gpa); 1338 if (!region) { 1339 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1340 return NULL; 1341 } 1342 1343 return (void *)((uintptr_t)region->host_mem 1344 + (gpa - region->region.guest_phys_addr)); 1345 } 1346 1347 /* 1348 * Address Host Virtual to VM Physical 1349 * 1350 * Input Args: 1351 * vm - Virtual Machine 1352 * hva - Host virtual address 1353 * 1354 * Output Args: None 1355 * 1356 * Return: 1357 * Equivalent VM physical address 1358 * 1359 * Locates the memory region containing the host virtual address given 1360 * by hva, within the VM given by vm. When found, the equivalent 1361 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1362 * region containing hva exists. 1363 */ 1364 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1365 { 1366 struct rb_node *node; 1367 1368 for (node = vm->regions.hva_tree.rb_node; node; ) { 1369 struct userspace_mem_region *region = 1370 container_of(node, struct userspace_mem_region, hva_node); 1371 1372 if (hva >= region->host_mem) { 1373 if (hva <= (region->host_mem 1374 + region->region.memory_size - 1)) 1375 return (vm_paddr_t)((uintptr_t) 1376 region->region.guest_phys_addr 1377 + (hva - (uintptr_t)region->host_mem)); 1378 1379 node = node->rb_right; 1380 } else 1381 node = node->rb_left; 1382 } 1383 1384 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1385 return -1; 1386 } 1387 1388 /* 1389 * Address VM physical to Host Virtual *alias*. 1390 * 1391 * Input Args: 1392 * vm - Virtual Machine 1393 * gpa - VM physical address 1394 * 1395 * Output Args: None 1396 * 1397 * Return: 1398 * Equivalent address within the host virtual *alias* area, or NULL 1399 * (without failing the test) if the guest memory is not shared (so 1400 * no alias exists). 1401 * 1402 * When vm_create() and related functions are called with a shared memory 1403 * src_type, we also create a writable, shared alias mapping of the 1404 * underlying guest memory. This allows the host to manipulate guest memory 1405 * without mapping that memory in the guest's address space. And, for 1406 * userfaultfd-based demand paging, we can do so without triggering userfaults. 1407 */ 1408 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1409 { 1410 struct userspace_mem_region *region; 1411 uintptr_t offset; 1412 1413 region = userspace_mem_region_find(vm, gpa, gpa); 1414 if (!region) 1415 return NULL; 1416 1417 if (!region->host_alias) 1418 return NULL; 1419 1420 offset = gpa - region->region.guest_phys_addr; 1421 return (void *) ((uintptr_t) region->host_alias + offset); 1422 } 1423 1424 /* 1425 * VM Create IRQ Chip 1426 * 1427 * Input Args: 1428 * vm - Virtual Machine 1429 * 1430 * Output Args: None 1431 * 1432 * Return: None 1433 * 1434 * Creates an interrupt controller chip for the VM specified by vm. 1435 */ 1436 void vm_create_irqchip(struct kvm_vm *vm) 1437 { 1438 int ret; 1439 1440 ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); 1441 TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " 1442 "rc: %i errno: %i", ret, errno); 1443 1444 vm->has_irqchip = true; 1445 } 1446 1447 /* 1448 * VM VCPU State 1449 * 1450 * Input Args: 1451 * vm - Virtual Machine 1452 * vcpuid - VCPU ID 1453 * 1454 * Output Args: None 1455 * 1456 * Return: 1457 * Pointer to structure that describes the state of the VCPU. 1458 * 1459 * Locates and returns a pointer to a structure that describes the 1460 * state of the VCPU with the given vcpuid. 1461 */ 1462 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) 1463 { 1464 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1465 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1466 1467 return vcpu->state; 1468 } 1469 1470 /* 1471 * VM VCPU Run 1472 * 1473 * Input Args: 1474 * vm - Virtual Machine 1475 * vcpuid - VCPU ID 1476 * 1477 * Output Args: None 1478 * 1479 * Return: None 1480 * 1481 * Switch to executing the code for the VCPU given by vcpuid, within the VM 1482 * given by vm. 1483 */ 1484 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 1485 { 1486 int ret = _vcpu_run(vm, vcpuid); 1487 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 1488 "rc: %i errno: %i", ret, errno); 1489 } 1490 1491 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) 1492 { 1493 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1494 int rc; 1495 1496 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1497 do { 1498 rc = ioctl(vcpu->fd, KVM_RUN, NULL); 1499 } while (rc == -1 && errno == EINTR); 1500 1501 assert_on_unhandled_exception(vm, vcpuid); 1502 1503 return rc; 1504 } 1505 1506 int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid) 1507 { 1508 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1509 1510 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1511 1512 return vcpu->fd; 1513 } 1514 1515 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) 1516 { 1517 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1518 int ret; 1519 1520 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1521 1522 vcpu->state->immediate_exit = 1; 1523 ret = ioctl(vcpu->fd, KVM_RUN, NULL); 1524 vcpu->state->immediate_exit = 0; 1525 1526 TEST_ASSERT(ret == -1 && errno == EINTR, 1527 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1528 ret, errno); 1529 } 1530 1531 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, 1532 struct kvm_guest_debug *debug) 1533 { 1534 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1535 int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug); 1536 1537 TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret); 1538 } 1539 1540 /* 1541 * VM VCPU Set MP State 1542 * 1543 * Input Args: 1544 * vm - Virtual Machine 1545 * vcpuid - VCPU ID 1546 * mp_state - mp_state to be set 1547 * 1548 * Output Args: None 1549 * 1550 * Return: None 1551 * 1552 * Sets the MP state of the VCPU given by vcpuid, to the state given 1553 * by mp_state. 1554 */ 1555 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, 1556 struct kvm_mp_state *mp_state) 1557 { 1558 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1559 int ret; 1560 1561 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1562 1563 ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); 1564 TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " 1565 "rc: %i errno: %i", ret, errno); 1566 } 1567 1568 /* 1569 * VM VCPU Get Reg List 1570 * 1571 * Input Args: 1572 * vm - Virtual Machine 1573 * vcpuid - VCPU ID 1574 * 1575 * Output Args: 1576 * None 1577 * 1578 * Return: 1579 * A pointer to an allocated struct kvm_reg_list 1580 * 1581 * Get the list of guest registers which are supported for 1582 * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls 1583 */ 1584 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) 1585 { 1586 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1587 int ret; 1588 1589 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); 1590 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1591 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1592 reg_list->n = reg_list_n.n; 1593 vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); 1594 return reg_list; 1595 } 1596 1597 /* 1598 * VM VCPU Regs Get 1599 * 1600 * Input Args: 1601 * vm - Virtual Machine 1602 * vcpuid - VCPU ID 1603 * 1604 * Output Args: 1605 * regs - current state of VCPU regs 1606 * 1607 * Return: None 1608 * 1609 * Obtains the current register state for the VCPU specified by vcpuid 1610 * and stores it at the location given by regs. 1611 */ 1612 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) 1613 { 1614 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1615 int ret; 1616 1617 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1618 1619 ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); 1620 TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", 1621 ret, errno); 1622 } 1623 1624 /* 1625 * VM VCPU Regs Set 1626 * 1627 * Input Args: 1628 * vm - Virtual Machine 1629 * vcpuid - VCPU ID 1630 * regs - Values to set VCPU regs to 1631 * 1632 * Output Args: None 1633 * 1634 * Return: None 1635 * 1636 * Sets the regs of the VCPU specified by vcpuid to the values 1637 * given by regs. 1638 */ 1639 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) 1640 { 1641 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1642 int ret; 1643 1644 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1645 1646 ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); 1647 TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", 1648 ret, errno); 1649 } 1650 1651 #ifdef __KVM_HAVE_VCPU_EVENTS 1652 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, 1653 struct kvm_vcpu_events *events) 1654 { 1655 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1656 int ret; 1657 1658 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1659 1660 ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); 1661 TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", 1662 ret, errno); 1663 } 1664 1665 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, 1666 struct kvm_vcpu_events *events) 1667 { 1668 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1669 int ret; 1670 1671 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1672 1673 ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); 1674 TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", 1675 ret, errno); 1676 } 1677 #endif 1678 1679 #ifdef __x86_64__ 1680 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, 1681 struct kvm_nested_state *state) 1682 { 1683 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1684 int ret; 1685 1686 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1687 1688 ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state); 1689 TEST_ASSERT(ret == 0, 1690 "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", 1691 ret, errno); 1692 } 1693 1694 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, 1695 struct kvm_nested_state *state, bool ignore_error) 1696 { 1697 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1698 int ret; 1699 1700 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1701 1702 ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state); 1703 if (!ignore_error) { 1704 TEST_ASSERT(ret == 0, 1705 "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", 1706 ret, errno); 1707 } 1708 1709 return ret; 1710 } 1711 #endif 1712 1713 /* 1714 * VM VCPU System Regs Get 1715 * 1716 * Input Args: 1717 * vm - Virtual Machine 1718 * vcpuid - VCPU ID 1719 * 1720 * Output Args: 1721 * sregs - current state of VCPU system regs 1722 * 1723 * Return: None 1724 * 1725 * Obtains the current system register state for the VCPU specified by 1726 * vcpuid and stores it at the location given by sregs. 1727 */ 1728 void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1729 { 1730 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1731 int ret; 1732 1733 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1734 1735 ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); 1736 TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", 1737 ret, errno); 1738 } 1739 1740 /* 1741 * VM VCPU System Regs Set 1742 * 1743 * Input Args: 1744 * vm - Virtual Machine 1745 * vcpuid - VCPU ID 1746 * sregs - Values to set VCPU system regs to 1747 * 1748 * Output Args: None 1749 * 1750 * Return: None 1751 * 1752 * Sets the system regs of the VCPU specified by vcpuid to the values 1753 * given by sregs. 1754 */ 1755 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1756 { 1757 int ret = _vcpu_sregs_set(vm, vcpuid, sregs); 1758 TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " 1759 "rc: %i errno: %i", ret, errno); 1760 } 1761 1762 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) 1763 { 1764 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1765 1766 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1767 1768 return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); 1769 } 1770 1771 void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) 1772 { 1773 int ret; 1774 1775 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu); 1776 TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)", 1777 ret, errno, strerror(errno)); 1778 } 1779 1780 void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) 1781 { 1782 int ret; 1783 1784 ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu); 1785 TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)", 1786 ret, errno, strerror(errno)); 1787 } 1788 1789 void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) 1790 { 1791 int ret; 1792 1793 ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg); 1794 TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)", 1795 ret, errno, strerror(errno)); 1796 } 1797 1798 void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) 1799 { 1800 int ret; 1801 1802 ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg); 1803 TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)", 1804 ret, errno, strerror(errno)); 1805 } 1806 1807 /* 1808 * VCPU Ioctl 1809 * 1810 * Input Args: 1811 * vm - Virtual Machine 1812 * vcpuid - VCPU ID 1813 * cmd - Ioctl number 1814 * arg - Argument to pass to the ioctl 1815 * 1816 * Return: None 1817 * 1818 * Issues an arbitrary ioctl on a VCPU fd. 1819 */ 1820 void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, 1821 unsigned long cmd, void *arg) 1822 { 1823 int ret; 1824 1825 ret = _vcpu_ioctl(vm, vcpuid, cmd, arg); 1826 TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", 1827 cmd, ret, errno, strerror(errno)); 1828 } 1829 1830 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, 1831 unsigned long cmd, void *arg) 1832 { 1833 struct vcpu *vcpu = vcpu_find(vm, vcpuid); 1834 int ret; 1835 1836 TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); 1837 1838 ret = ioctl(vcpu->fd, cmd, arg); 1839 1840 return ret; 1841 } 1842 1843 void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) 1844 { 1845 struct vcpu *vcpu; 1846 uint32_t size = vm->dirty_ring_size; 1847 1848 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1849 1850 vcpu = vcpu_find(vm, vcpuid); 1851 1852 TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid); 1853 1854 if (!vcpu->dirty_gfns) { 1855 void *addr; 1856 1857 addr = mmap(NULL, size, PROT_READ, 1858 MAP_PRIVATE, vcpu->fd, 1859 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1860 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1861 1862 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, 1863 MAP_PRIVATE, vcpu->fd, 1864 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1865 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1866 1867 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, 1868 MAP_SHARED, vcpu->fd, 1869 vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1870 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1871 1872 vcpu->dirty_gfns = addr; 1873 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1874 } 1875 1876 return vcpu->dirty_gfns; 1877 } 1878 1879 /* 1880 * VM Ioctl 1881 * 1882 * Input Args: 1883 * vm - Virtual Machine 1884 * cmd - Ioctl number 1885 * arg - Argument to pass to the ioctl 1886 * 1887 * Return: None 1888 * 1889 * Issues an arbitrary ioctl on a VM fd. 1890 */ 1891 void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1892 { 1893 int ret; 1894 1895 ret = _vm_ioctl(vm, cmd, arg); 1896 TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", 1897 cmd, ret, errno, strerror(errno)); 1898 } 1899 1900 int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1901 { 1902 return ioctl(vm->fd, cmd, arg); 1903 } 1904 1905 /* 1906 * KVM system ioctl 1907 * 1908 * Input Args: 1909 * vm - Virtual Machine 1910 * cmd - Ioctl number 1911 * arg - Argument to pass to the ioctl 1912 * 1913 * Return: None 1914 * 1915 * Issues an arbitrary ioctl on a KVM fd. 1916 */ 1917 void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1918 { 1919 int ret; 1920 1921 ret = ioctl(vm->kvm_fd, cmd, arg); 1922 TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)", 1923 cmd, ret, errno, strerror(errno)); 1924 } 1925 1926 int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) 1927 { 1928 return ioctl(vm->kvm_fd, cmd, arg); 1929 } 1930 1931 /* 1932 * Device Ioctl 1933 */ 1934 1935 int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) 1936 { 1937 struct kvm_device_attr attribute = { 1938 .group = group, 1939 .attr = attr, 1940 .flags = 0, 1941 }; 1942 1943 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1944 } 1945 1946 int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) 1947 { 1948 int ret = _kvm_device_check_attr(dev_fd, group, attr); 1949 1950 TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); 1951 return ret; 1952 } 1953 1954 int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd) 1955 { 1956 struct kvm_create_device create_dev; 1957 int ret; 1958 1959 create_dev.type = type; 1960 create_dev.fd = -1; 1961 create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0; 1962 ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev); 1963 *fd = create_dev.fd; 1964 return ret; 1965 } 1966 1967 int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) 1968 { 1969 int fd, ret; 1970 1971 ret = _kvm_create_device(vm, type, test, &fd); 1972 1973 if (!test) { 1974 TEST_ASSERT(ret >= 0, 1975 "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); 1976 return fd; 1977 } 1978 return ret; 1979 } 1980 1981 int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, 1982 void *val, bool write) 1983 { 1984 struct kvm_device_attr kvmattr = { 1985 .group = group, 1986 .attr = attr, 1987 .flags = 0, 1988 .addr = (uintptr_t)val, 1989 }; 1990 int ret; 1991 1992 ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, 1993 &kvmattr); 1994 return ret; 1995 } 1996 1997 int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, 1998 void *val, bool write) 1999 { 2000 int ret = _kvm_device_access(dev_fd, group, attr, val, write); 2001 2002 TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); 2003 return ret; 2004 } 2005 2006 /* 2007 * VM Dump 2008 * 2009 * Input Args: 2010 * vm - Virtual Machine 2011 * indent - Left margin indent amount 2012 * 2013 * Output Args: 2014 * stream - Output FILE stream 2015 * 2016 * Return: None 2017 * 2018 * Dumps the current state of the VM given by vm, to the FILE stream 2019 * given by stream. 2020 */ 2021 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 2022 { 2023 int ctr; 2024 struct userspace_mem_region *region; 2025 struct vcpu *vcpu; 2026 2027 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 2028 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 2029 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 2030 fprintf(stream, "%*sMem Regions:\n", indent, ""); 2031 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 2032 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 2033 "host_virt: %p\n", indent + 2, "", 2034 (uint64_t) region->region.guest_phys_addr, 2035 (uint64_t) region->region.memory_size, 2036 region->host_mem); 2037 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 2038 sparsebit_dump(stream, region->unused_phy_pages, 0); 2039 } 2040 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 2041 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 2042 fprintf(stream, "%*spgd_created: %u\n", indent, "", 2043 vm->pgd_created); 2044 if (vm->pgd_created) { 2045 fprintf(stream, "%*sVirtual Translation Tables:\n", 2046 indent + 2, ""); 2047 virt_dump(stream, vm, indent + 4); 2048 } 2049 fprintf(stream, "%*sVCPUs:\n", indent, ""); 2050 list_for_each_entry(vcpu, &vm->vcpus, list) 2051 vcpu_dump(stream, vm, vcpu->id, indent + 2); 2052 } 2053 2054 /* Known KVM exit reasons */ 2055 static struct exit_reason { 2056 unsigned int reason; 2057 const char *name; 2058 } exit_reasons_known[] = { 2059 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 2060 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 2061 {KVM_EXIT_IO, "IO"}, 2062 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 2063 {KVM_EXIT_DEBUG, "DEBUG"}, 2064 {KVM_EXIT_HLT, "HLT"}, 2065 {KVM_EXIT_MMIO, "MMIO"}, 2066 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 2067 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 2068 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 2069 {KVM_EXIT_INTR, "INTR"}, 2070 {KVM_EXIT_SET_TPR, "SET_TPR"}, 2071 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 2072 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 2073 {KVM_EXIT_S390_RESET, "S390_RESET"}, 2074 {KVM_EXIT_DCR, "DCR"}, 2075 {KVM_EXIT_NMI, "NMI"}, 2076 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 2077 {KVM_EXIT_OSI, "OSI"}, 2078 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 2079 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 2080 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 2081 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 2082 {KVM_EXIT_XEN, "XEN"}, 2083 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 2084 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 2085 #endif 2086 }; 2087 2088 /* 2089 * Exit Reason String 2090 * 2091 * Input Args: 2092 * exit_reason - Exit reason 2093 * 2094 * Output Args: None 2095 * 2096 * Return: 2097 * Constant string pointer describing the exit reason. 2098 * 2099 * Locates and returns a constant string that describes the KVM exit 2100 * reason given by exit_reason. If no such string is found, a constant 2101 * string of "Unknown" is returned. 2102 */ 2103 const char *exit_reason_str(unsigned int exit_reason) 2104 { 2105 unsigned int n1; 2106 2107 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 2108 if (exit_reason == exit_reasons_known[n1].reason) 2109 return exit_reasons_known[n1].name; 2110 } 2111 2112 return "Unknown"; 2113 } 2114 2115 /* 2116 * Physical Contiguous Page Allocator 2117 * 2118 * Input Args: 2119 * vm - Virtual Machine 2120 * num - number of pages 2121 * paddr_min - Physical address minimum 2122 * memslot - Memory region to allocate page from 2123 * 2124 * Output Args: None 2125 * 2126 * Return: 2127 * Starting physical address 2128 * 2129 * Within the VM specified by vm, locates a range of available physical 2130 * pages at or above paddr_min. If found, the pages are marked as in use 2131 * and their base address is returned. A TEST_ASSERT failure occurs if 2132 * not enough pages are available at or above paddr_min. 2133 */ 2134 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 2135 vm_paddr_t paddr_min, uint32_t memslot) 2136 { 2137 struct userspace_mem_region *region; 2138 sparsebit_idx_t pg, base; 2139 2140 TEST_ASSERT(num > 0, "Must allocate at least one page"); 2141 2142 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 2143 "not divisible by page size.\n" 2144 " paddr_min: 0x%lx page_size: 0x%x", 2145 paddr_min, vm->page_size); 2146 2147 region = memslot2region(vm, memslot); 2148 base = pg = paddr_min >> vm->page_shift; 2149 2150 do { 2151 for (; pg < base + num; ++pg) { 2152 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 2153 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 2154 break; 2155 } 2156 } 2157 } while (pg && pg != base + num); 2158 2159 if (pg == 0) { 2160 fprintf(stderr, "No guest physical page available, " 2161 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 2162 paddr_min, vm->page_size, memslot); 2163 fputs("---- vm dump ----\n", stderr); 2164 vm_dump(stderr, vm, 2); 2165 abort(); 2166 } 2167 2168 for (pg = base; pg < base + num; ++pg) 2169 sparsebit_clear(region->unused_phy_pages, pg); 2170 2171 return base * vm->page_size; 2172 } 2173 2174 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 2175 uint32_t memslot) 2176 { 2177 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 2178 } 2179 2180 /* 2181 * Address Guest Virtual to Host Virtual 2182 * 2183 * Input Args: 2184 * vm - Virtual Machine 2185 * gva - VM virtual address 2186 * 2187 * Output Args: None 2188 * 2189 * Return: 2190 * Equivalent host virtual address 2191 */ 2192 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 2193 { 2194 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 2195 } 2196 2197 /* 2198 * Is Unrestricted Guest 2199 * 2200 * Input Args: 2201 * vm - Virtual Machine 2202 * 2203 * Output Args: None 2204 * 2205 * Return: True if the unrestricted guest is set to 'Y', otherwise return false. 2206 * 2207 * Check if the unrestricted guest flag is enabled. 2208 */ 2209 bool vm_is_unrestricted_guest(struct kvm_vm *vm) 2210 { 2211 char val = 'N'; 2212 size_t count; 2213 FILE *f; 2214 2215 if (vm == NULL) { 2216 /* Ensure that the KVM vendor-specific module is loaded. */ 2217 close(open_kvm_dev_path_or_exit()); 2218 } 2219 2220 f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); 2221 if (f) { 2222 count = fread(&val, sizeof(char), 1, f); 2223 TEST_ASSERT(count == 1, "Unable to read from param file."); 2224 fclose(f); 2225 } 2226 2227 return val == 'Y'; 2228 } 2229 2230 unsigned int vm_get_page_size(struct kvm_vm *vm) 2231 { 2232 return vm->page_size; 2233 } 2234 2235 unsigned int vm_get_page_shift(struct kvm_vm *vm) 2236 { 2237 return vm->page_shift; 2238 } 2239 2240 uint64_t vm_get_max_gfn(struct kvm_vm *vm) 2241 { 2242 return vm->max_gfn; 2243 } 2244 2245 int vm_get_fd(struct kvm_vm *vm) 2246 { 2247 return vm->fd; 2248 } 2249 2250 static unsigned int vm_calc_num_pages(unsigned int num_pages, 2251 unsigned int page_shift, 2252 unsigned int new_page_shift, 2253 bool ceil) 2254 { 2255 unsigned int n = 1 << (new_page_shift - page_shift); 2256 2257 if (page_shift >= new_page_shift) 2258 return num_pages * (1 << (page_shift - new_page_shift)); 2259 2260 return num_pages / n + !!(ceil && num_pages % n); 2261 } 2262 2263 static inline int getpageshift(void) 2264 { 2265 return __builtin_ffs(getpagesize()) - 1; 2266 } 2267 2268 unsigned int 2269 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 2270 { 2271 return vm_calc_num_pages(num_guest_pages, 2272 vm_guest_mode_params[mode].page_shift, 2273 getpageshift(), true); 2274 } 2275 2276 unsigned int 2277 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 2278 { 2279 return vm_calc_num_pages(num_host_pages, getpageshift(), 2280 vm_guest_mode_params[mode].page_shift, false); 2281 } 2282 2283 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2284 { 2285 unsigned int n; 2286 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2287 return vm_adjust_num_guest_pages(mode, n); 2288 } 2289