1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sys/mman.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 #include <unistd.h> 18 #include <linux/kernel.h> 19 20 #define KVM_UTIL_MIN_PFN 2 21 22 static int vcpu_mmap_sz(void); 23 24 int open_path_or_exit(const char *path, int flags) 25 { 26 int fd; 27 28 fd = open(path, flags); 29 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 30 31 return fd; 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 return open_path_or_exit(KVM_DEV_PATH, flags); 46 } 47 48 int open_kvm_dev_path_or_exit(void) 49 { 50 return _open_kvm_dev_path_or_exit(O_RDONLY); 51 } 52 53 static bool get_module_param_bool(const char *module_name, const char *param) 54 { 55 const int path_size = 128; 56 char path[path_size]; 57 char value; 58 ssize_t r; 59 int fd; 60 61 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 62 module_name, param); 63 TEST_ASSERT(r < path_size, 64 "Failed to construct sysfs path in %d bytes.", path_size); 65 66 fd = open_path_or_exit(path, O_RDONLY); 67 68 r = read(fd, &value, 1); 69 TEST_ASSERT(r == 1, "read(%s) failed", path); 70 71 r = close(fd); 72 TEST_ASSERT(!r, "close(%s) failed", path); 73 74 if (value == 'Y') 75 return true; 76 else if (value == 'N') 77 return false; 78 79 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 80 } 81 82 bool get_kvm_intel_param_bool(const char *param) 83 { 84 return get_module_param_bool("kvm_intel", param); 85 } 86 87 bool get_kvm_amd_param_bool(const char *param) 88 { 89 return get_module_param_bool("kvm_amd", param); 90 } 91 92 /* 93 * Capability 94 * 95 * Input Args: 96 * cap - Capability 97 * 98 * Output Args: None 99 * 100 * Return: 101 * On success, the Value corresponding to the capability (KVM_CAP_*) 102 * specified by the value of cap. On failure a TEST_ASSERT failure 103 * is produced. 104 * 105 * Looks up and returns the value corresponding to the capability 106 * (KVM_CAP_*) given by cap. 107 */ 108 unsigned int kvm_check_cap(long cap) 109 { 110 int ret; 111 int kvm_fd; 112 113 kvm_fd = open_kvm_dev_path_or_exit(); 114 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 115 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 116 117 close(kvm_fd); 118 119 return (unsigned int)ret; 120 } 121 122 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 123 { 124 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 125 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 126 else 127 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 128 vm->dirty_ring_size = ring_size; 129 } 130 131 static void vm_open(struct kvm_vm *vm) 132 { 133 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 134 135 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 136 137 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 138 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 139 } 140 141 const char *vm_guest_mode_string(uint32_t i) 142 { 143 static const char * const strings[] = { 144 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 145 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 146 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 147 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 148 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 149 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 150 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 151 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 152 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 153 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 154 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 155 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 156 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 157 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 158 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 159 }; 160 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 161 "Missing new mode strings?"); 162 163 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 164 165 return strings[i]; 166 } 167 168 const struct vm_guest_mode_params vm_guest_mode_params[] = { 169 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 170 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 171 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 172 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 173 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 174 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 175 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 176 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 177 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 178 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 179 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 180 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 181 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 182 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 183 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 184 }; 185 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 186 "Missing new mode params?"); 187 188 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) 189 { 190 struct kvm_vm *vm; 191 192 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 193 vm_guest_mode_string(mode), nr_pages); 194 195 vm = calloc(1, sizeof(*vm)); 196 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 197 198 INIT_LIST_HEAD(&vm->vcpus); 199 vm->regions.gpa_tree = RB_ROOT; 200 vm->regions.hva_tree = RB_ROOT; 201 hash_init(vm->regions.slot_hash); 202 203 vm->mode = mode; 204 vm->type = 0; 205 206 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 207 vm->va_bits = vm_guest_mode_params[mode].va_bits; 208 vm->page_size = vm_guest_mode_params[mode].page_size; 209 vm->page_shift = vm_guest_mode_params[mode].page_shift; 210 211 /* Setup mode specific traits. */ 212 switch (vm->mode) { 213 case VM_MODE_P52V48_4K: 214 vm->pgtable_levels = 4; 215 break; 216 case VM_MODE_P52V48_64K: 217 vm->pgtable_levels = 3; 218 break; 219 case VM_MODE_P48V48_4K: 220 vm->pgtable_levels = 4; 221 break; 222 case VM_MODE_P48V48_64K: 223 vm->pgtable_levels = 3; 224 break; 225 case VM_MODE_P40V48_4K: 226 case VM_MODE_P36V48_4K: 227 vm->pgtable_levels = 4; 228 break; 229 case VM_MODE_P40V48_64K: 230 case VM_MODE_P36V48_64K: 231 vm->pgtable_levels = 3; 232 break; 233 case VM_MODE_P48V48_16K: 234 case VM_MODE_P40V48_16K: 235 case VM_MODE_P36V48_16K: 236 vm->pgtable_levels = 4; 237 break; 238 case VM_MODE_P36V47_16K: 239 vm->pgtable_levels = 3; 240 break; 241 case VM_MODE_PXXV48_4K: 242 #ifdef __x86_64__ 243 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 244 /* 245 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 246 * it doesn't take effect unless a CR4.LA57 is set, which it 247 * isn't for this VM_MODE. 248 */ 249 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 250 "Linear address width (%d bits) not supported", 251 vm->va_bits); 252 pr_debug("Guest physical address width detected: %d\n", 253 vm->pa_bits); 254 vm->pgtable_levels = 4; 255 vm->va_bits = 48; 256 #else 257 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 258 #endif 259 break; 260 case VM_MODE_P47V64_4K: 261 vm->pgtable_levels = 5; 262 break; 263 case VM_MODE_P44V64_4K: 264 vm->pgtable_levels = 5; 265 break; 266 default: 267 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 268 } 269 270 #ifdef __aarch64__ 271 if (vm->pa_bits != 40) 272 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 273 #endif 274 275 vm_open(vm); 276 277 /* Limit to VA-bit canonical virtual addresses. */ 278 vm->vpages_valid = sparsebit_alloc(); 279 sparsebit_set_num(vm->vpages_valid, 280 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 281 sparsebit_set_num(vm->vpages_valid, 282 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 283 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 284 285 /* Limit physical addresses to PA-bits. */ 286 vm->max_gfn = vm_compute_max_gfn(vm); 287 288 /* Allocate and setup memory for guest. */ 289 vm->vpages_mapped = sparsebit_alloc(); 290 if (nr_pages != 0) 291 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 292 0, 0, nr_pages, 0); 293 294 return vm; 295 } 296 297 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 298 uint32_t nr_runnable_vcpus, 299 uint64_t extra_mem_pages) 300 { 301 uint64_t nr_pages; 302 303 TEST_ASSERT(nr_runnable_vcpus, 304 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 305 306 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 307 "nr_vcpus = %d too large for host, max-vcpus = %d", 308 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 309 310 /* 311 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 312 * test code and other per-VM assets that will be loaded into memslot0. 313 */ 314 nr_pages = 512; 315 316 /* Account for the per-vCPU stacks on behalf of the test. */ 317 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 318 319 /* 320 * Account for the number of pages needed for the page tables. The 321 * maximum page table size for a memory region will be when the 322 * smallest page size is used. Considering each page contains x page 323 * table descriptors, the total extra size for page tables (for extra 324 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 325 * than N/x*2. 326 */ 327 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 328 329 return vm_adjust_num_guest_pages(mode, nr_pages); 330 } 331 332 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 333 uint64_t nr_extra_pages) 334 { 335 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 336 nr_extra_pages); 337 struct kvm_vm *vm; 338 339 vm = ____vm_create(mode, nr_pages); 340 341 kvm_vm_elf_load(vm, program_invocation_name); 342 343 #ifdef __x86_64__ 344 vm_create_irqchip(vm); 345 #endif 346 return vm; 347 } 348 349 /* 350 * VM Create with customized parameters 351 * 352 * Input Args: 353 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 354 * nr_vcpus - VCPU count 355 * extra_mem_pages - Non-slot0 physical memory total size 356 * guest_code - Guest entry point 357 * vcpuids - VCPU IDs 358 * 359 * Output Args: None 360 * 361 * Return: 362 * Pointer to opaque structure that describes the created VM. 363 * 364 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 365 * extra_mem_pages is only used to calculate the maximum page table size, 366 * no real memory allocation for non-slot0 memory in this function. 367 */ 368 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 369 uint64_t extra_mem_pages, 370 void *guest_code, struct kvm_vcpu *vcpus[]) 371 { 372 struct kvm_vm *vm; 373 int i; 374 375 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 376 377 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 378 379 for (i = 0; i < nr_vcpus; ++i) 380 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 381 382 return vm; 383 } 384 385 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 386 uint64_t extra_mem_pages, 387 void *guest_code) 388 { 389 struct kvm_vcpu *vcpus[1]; 390 struct kvm_vm *vm; 391 392 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 393 guest_code, vcpus); 394 395 *vcpu = vcpus[0]; 396 return vm; 397 } 398 399 /* 400 * VM Restart 401 * 402 * Input Args: 403 * vm - VM that has been released before 404 * 405 * Output Args: None 406 * 407 * Reopens the file descriptors associated to the VM and reinstates the 408 * global state, such as the irqchip and the memory regions that are mapped 409 * into the guest. 410 */ 411 void kvm_vm_restart(struct kvm_vm *vmp) 412 { 413 int ctr; 414 struct userspace_mem_region *region; 415 416 vm_open(vmp); 417 if (vmp->has_irqchip) 418 vm_create_irqchip(vmp); 419 420 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 421 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 422 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 423 " rc: %i errno: %i\n" 424 " slot: %u flags: 0x%x\n" 425 " guest_phys_addr: 0x%llx size: 0x%llx", 426 ret, errno, region->region.slot, 427 region->region.flags, 428 region->region.guest_phys_addr, 429 region->region.memory_size); 430 } 431 } 432 433 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 434 uint32_t vcpu_id) 435 { 436 return __vm_vcpu_add(vm, vcpu_id); 437 } 438 439 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 440 { 441 kvm_vm_restart(vm); 442 443 return vm_vcpu_recreate(vm, 0); 444 } 445 446 /* 447 * Userspace Memory Region Find 448 * 449 * Input Args: 450 * vm - Virtual Machine 451 * start - Starting VM physical address 452 * end - Ending VM physical address, inclusive. 453 * 454 * Output Args: None 455 * 456 * Return: 457 * Pointer to overlapping region, NULL if no such region. 458 * 459 * Searches for a region with any physical memory that overlaps with 460 * any portion of the guest physical addresses from start to end 461 * inclusive. If multiple overlapping regions exist, a pointer to any 462 * of the regions is returned. Null is returned only when no overlapping 463 * region exists. 464 */ 465 static struct userspace_mem_region * 466 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 467 { 468 struct rb_node *node; 469 470 for (node = vm->regions.gpa_tree.rb_node; node; ) { 471 struct userspace_mem_region *region = 472 container_of(node, struct userspace_mem_region, gpa_node); 473 uint64_t existing_start = region->region.guest_phys_addr; 474 uint64_t existing_end = region->region.guest_phys_addr 475 + region->region.memory_size - 1; 476 if (start <= existing_end && end >= existing_start) 477 return region; 478 479 if (start < existing_start) 480 node = node->rb_left; 481 else 482 node = node->rb_right; 483 } 484 485 return NULL; 486 } 487 488 /* 489 * KVM Userspace Memory Region Find 490 * 491 * Input Args: 492 * vm - Virtual Machine 493 * start - Starting VM physical address 494 * end - Ending VM physical address, inclusive. 495 * 496 * Output Args: None 497 * 498 * Return: 499 * Pointer to overlapping region, NULL if no such region. 500 * 501 * Public interface to userspace_mem_region_find. Allows tests to look up 502 * the memslot datastructure for a given range of guest physical memory. 503 */ 504 struct kvm_userspace_memory_region * 505 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 506 uint64_t end) 507 { 508 struct userspace_mem_region *region; 509 510 region = userspace_mem_region_find(vm, start, end); 511 if (!region) 512 return NULL; 513 514 return ®ion->region; 515 } 516 517 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 518 { 519 520 } 521 522 /* 523 * VM VCPU Remove 524 * 525 * Input Args: 526 * vcpu - VCPU to remove 527 * 528 * Output Args: None 529 * 530 * Return: None, TEST_ASSERT failures for all error conditions 531 * 532 * Removes a vCPU from a VM and frees its resources. 533 */ 534 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 535 { 536 int ret; 537 538 if (vcpu->dirty_gfns) { 539 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 540 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 541 vcpu->dirty_gfns = NULL; 542 } 543 544 ret = munmap(vcpu->run, vcpu_mmap_sz()); 545 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 546 547 ret = close(vcpu->fd); 548 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 549 550 list_del(&vcpu->list); 551 552 vcpu_arch_free(vcpu); 553 free(vcpu); 554 } 555 556 void kvm_vm_release(struct kvm_vm *vmp) 557 { 558 struct kvm_vcpu *vcpu, *tmp; 559 int ret; 560 561 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 562 vm_vcpu_rm(vmp, vcpu); 563 564 ret = close(vmp->fd); 565 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 566 567 ret = close(vmp->kvm_fd); 568 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 569 } 570 571 static void __vm_mem_region_delete(struct kvm_vm *vm, 572 struct userspace_mem_region *region, 573 bool unlink) 574 { 575 int ret; 576 577 if (unlink) { 578 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 579 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 580 hash_del(®ion->slot_node); 581 } 582 583 region->region.memory_size = 0; 584 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 585 586 sparsebit_free(®ion->unused_phy_pages); 587 ret = munmap(region->mmap_start, region->mmap_size); 588 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 589 590 free(region); 591 } 592 593 /* 594 * Destroys and frees the VM pointed to by vmp. 595 */ 596 void kvm_vm_free(struct kvm_vm *vmp) 597 { 598 int ctr; 599 struct hlist_node *node; 600 struct userspace_mem_region *region; 601 602 if (vmp == NULL) 603 return; 604 605 /* Free cached stats metadata and close FD */ 606 if (vmp->stats_fd) { 607 free(vmp->stats_desc); 608 close(vmp->stats_fd); 609 } 610 611 /* Free userspace_mem_regions. */ 612 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 613 __vm_mem_region_delete(vmp, region, false); 614 615 /* Free sparsebit arrays. */ 616 sparsebit_free(&vmp->vpages_valid); 617 sparsebit_free(&vmp->vpages_mapped); 618 619 kvm_vm_release(vmp); 620 621 /* Free the structure describing the VM. */ 622 free(vmp); 623 } 624 625 int kvm_memfd_alloc(size_t size, bool hugepages) 626 { 627 int memfd_flags = MFD_CLOEXEC; 628 int fd, r; 629 630 if (hugepages) 631 memfd_flags |= MFD_HUGETLB; 632 633 fd = memfd_create("kvm_selftest", memfd_flags); 634 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 635 636 r = ftruncate(fd, size); 637 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 638 639 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 640 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 641 642 return fd; 643 } 644 645 /* 646 * Memory Compare, host virtual to guest virtual 647 * 648 * Input Args: 649 * hva - Starting host virtual address 650 * vm - Virtual Machine 651 * gva - Starting guest virtual address 652 * len - number of bytes to compare 653 * 654 * Output Args: None 655 * 656 * Input/Output Args: None 657 * 658 * Return: 659 * Returns 0 if the bytes starting at hva for a length of len 660 * are equal the guest virtual bytes starting at gva. Returns 661 * a value < 0, if bytes at hva are less than those at gva. 662 * Otherwise a value > 0 is returned. 663 * 664 * Compares the bytes starting at the host virtual address hva, for 665 * a length of len, to the guest bytes starting at the guest virtual 666 * address given by gva. 667 */ 668 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 669 { 670 size_t amt; 671 672 /* 673 * Compare a batch of bytes until either a match is found 674 * or all the bytes have been compared. 675 */ 676 for (uintptr_t offset = 0; offset < len; offset += amt) { 677 uintptr_t ptr1 = (uintptr_t)hva + offset; 678 679 /* 680 * Determine host address for guest virtual address 681 * at offset. 682 */ 683 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 684 685 /* 686 * Determine amount to compare on this pass. 687 * Don't allow the comparsion to cross a page boundary. 688 */ 689 amt = len - offset; 690 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 691 amt = vm->page_size - (ptr1 % vm->page_size); 692 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 693 amt = vm->page_size - (ptr2 % vm->page_size); 694 695 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 696 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 697 698 /* 699 * Perform the comparison. If there is a difference 700 * return that result to the caller, otherwise need 701 * to continue on looking for a mismatch. 702 */ 703 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 704 if (ret != 0) 705 return ret; 706 } 707 708 /* 709 * No mismatch found. Let the caller know the two memory 710 * areas are equal. 711 */ 712 return 0; 713 } 714 715 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 716 struct userspace_mem_region *region) 717 { 718 struct rb_node **cur, *parent; 719 720 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 721 struct userspace_mem_region *cregion; 722 723 cregion = container_of(*cur, typeof(*cregion), gpa_node); 724 parent = *cur; 725 if (region->region.guest_phys_addr < 726 cregion->region.guest_phys_addr) 727 cur = &(*cur)->rb_left; 728 else { 729 TEST_ASSERT(region->region.guest_phys_addr != 730 cregion->region.guest_phys_addr, 731 "Duplicate GPA in region tree"); 732 733 cur = &(*cur)->rb_right; 734 } 735 } 736 737 rb_link_node(®ion->gpa_node, parent, cur); 738 rb_insert_color(®ion->gpa_node, gpa_tree); 739 } 740 741 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 742 struct userspace_mem_region *region) 743 { 744 struct rb_node **cur, *parent; 745 746 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 747 struct userspace_mem_region *cregion; 748 749 cregion = container_of(*cur, typeof(*cregion), hva_node); 750 parent = *cur; 751 if (region->host_mem < cregion->host_mem) 752 cur = &(*cur)->rb_left; 753 else { 754 TEST_ASSERT(region->host_mem != 755 cregion->host_mem, 756 "Duplicate HVA in region tree"); 757 758 cur = &(*cur)->rb_right; 759 } 760 } 761 762 rb_link_node(®ion->hva_node, parent, cur); 763 rb_insert_color(®ion->hva_node, hva_tree); 764 } 765 766 767 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 768 uint64_t gpa, uint64_t size, void *hva) 769 { 770 struct kvm_userspace_memory_region region = { 771 .slot = slot, 772 .flags = flags, 773 .guest_phys_addr = gpa, 774 .memory_size = size, 775 .userspace_addr = (uintptr_t)hva, 776 }; 777 778 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 779 } 780 781 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 782 uint64_t gpa, uint64_t size, void *hva) 783 { 784 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 785 786 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 787 errno, strerror(errno)); 788 } 789 790 /* 791 * VM Userspace Memory Region Add 792 * 793 * Input Args: 794 * vm - Virtual Machine 795 * src_type - Storage source for this region. 796 * NULL to use anonymous memory. 797 * guest_paddr - Starting guest physical address 798 * slot - KVM region slot 799 * npages - Number of physical pages 800 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 801 * 802 * Output Args: None 803 * 804 * Return: None 805 * 806 * Allocates a memory area of the number of pages specified by npages 807 * and maps it to the VM specified by vm, at a starting physical address 808 * given by guest_paddr. The region is created with a KVM region slot 809 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 810 * region is created with the flags given by flags. 811 */ 812 void vm_userspace_mem_region_add(struct kvm_vm *vm, 813 enum vm_mem_backing_src_type src_type, 814 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 815 uint32_t flags) 816 { 817 int ret; 818 struct userspace_mem_region *region; 819 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 820 size_t alignment; 821 822 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 823 "Number of guest pages is not compatible with the host. " 824 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 825 826 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 827 "address not on a page boundary.\n" 828 " guest_paddr: 0x%lx vm->page_size: 0x%x", 829 guest_paddr, vm->page_size); 830 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 831 <= vm->max_gfn, "Physical range beyond maximum " 832 "supported physical address,\n" 833 " guest_paddr: 0x%lx npages: 0x%lx\n" 834 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 835 guest_paddr, npages, vm->max_gfn, vm->page_size); 836 837 /* 838 * Confirm a mem region with an overlapping address doesn't 839 * already exist. 840 */ 841 region = (struct userspace_mem_region *) userspace_mem_region_find( 842 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 843 if (region != NULL) 844 TEST_FAIL("overlapping userspace_mem_region already " 845 "exists\n" 846 " requested guest_paddr: 0x%lx npages: 0x%lx " 847 "page_size: 0x%x\n" 848 " existing guest_paddr: 0x%lx size: 0x%lx", 849 guest_paddr, npages, vm->page_size, 850 (uint64_t) region->region.guest_phys_addr, 851 (uint64_t) region->region.memory_size); 852 853 /* Confirm no region with the requested slot already exists. */ 854 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 855 slot) { 856 if (region->region.slot != slot) 857 continue; 858 859 TEST_FAIL("A mem region with the requested slot " 860 "already exists.\n" 861 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 862 " existing slot: %u paddr: 0x%lx size: 0x%lx", 863 slot, guest_paddr, npages, 864 region->region.slot, 865 (uint64_t) region->region.guest_phys_addr, 866 (uint64_t) region->region.memory_size); 867 } 868 869 /* Allocate and initialize new mem region structure. */ 870 region = calloc(1, sizeof(*region)); 871 TEST_ASSERT(region != NULL, "Insufficient Memory"); 872 region->mmap_size = npages * vm->page_size; 873 874 #ifdef __s390x__ 875 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 876 alignment = 0x100000; 877 #else 878 alignment = 1; 879 #endif 880 881 /* 882 * When using THP mmap is not guaranteed to returned a hugepage aligned 883 * address so we have to pad the mmap. Padding is not needed for HugeTLB 884 * because mmap will always return an address aligned to the HugeTLB 885 * page size. 886 */ 887 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 888 alignment = max(backing_src_pagesz, alignment); 889 890 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 891 892 /* Add enough memory to align up if necessary */ 893 if (alignment > 1) 894 region->mmap_size += alignment; 895 896 region->fd = -1; 897 if (backing_src_is_shared(src_type)) 898 region->fd = kvm_memfd_alloc(region->mmap_size, 899 src_type == VM_MEM_SRC_SHARED_HUGETLB); 900 901 region->mmap_start = mmap(NULL, region->mmap_size, 902 PROT_READ | PROT_WRITE, 903 vm_mem_backing_src_alias(src_type)->flag, 904 region->fd, 0); 905 TEST_ASSERT(region->mmap_start != MAP_FAILED, 906 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 907 908 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 909 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 910 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 911 region->mmap_start, backing_src_pagesz); 912 913 /* Align host address */ 914 region->host_mem = align_ptr_up(region->mmap_start, alignment); 915 916 /* As needed perform madvise */ 917 if ((src_type == VM_MEM_SRC_ANONYMOUS || 918 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 919 ret = madvise(region->host_mem, npages * vm->page_size, 920 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 921 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 922 region->host_mem, npages * vm->page_size, 923 vm_mem_backing_src_alias(src_type)->name); 924 } 925 926 region->unused_phy_pages = sparsebit_alloc(); 927 sparsebit_set_num(region->unused_phy_pages, 928 guest_paddr >> vm->page_shift, npages); 929 region->region.slot = slot; 930 region->region.flags = flags; 931 region->region.guest_phys_addr = guest_paddr; 932 region->region.memory_size = npages * vm->page_size; 933 region->region.userspace_addr = (uintptr_t) region->host_mem; 934 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 935 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 936 " rc: %i errno: %i\n" 937 " slot: %u flags: 0x%x\n" 938 " guest_phys_addr: 0x%lx size: 0x%lx", 939 ret, errno, slot, flags, 940 guest_paddr, (uint64_t) region->region.memory_size); 941 942 /* Add to quick lookup data structures */ 943 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 944 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 945 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 946 947 /* If shared memory, create an alias. */ 948 if (region->fd >= 0) { 949 region->mmap_alias = mmap(NULL, region->mmap_size, 950 PROT_READ | PROT_WRITE, 951 vm_mem_backing_src_alias(src_type)->flag, 952 region->fd, 0); 953 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 954 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 955 956 /* Align host alias address */ 957 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 958 } 959 } 960 961 /* 962 * Memslot to region 963 * 964 * Input Args: 965 * vm - Virtual Machine 966 * memslot - KVM memory slot ID 967 * 968 * Output Args: None 969 * 970 * Return: 971 * Pointer to memory region structure that describe memory region 972 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 973 * on error (e.g. currently no memory region using memslot as a KVM 974 * memory slot ID). 975 */ 976 struct userspace_mem_region * 977 memslot2region(struct kvm_vm *vm, uint32_t memslot) 978 { 979 struct userspace_mem_region *region; 980 981 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 982 memslot) 983 if (region->region.slot == memslot) 984 return region; 985 986 fprintf(stderr, "No mem region with the requested slot found,\n" 987 " requested slot: %u\n", memslot); 988 fputs("---- vm dump ----\n", stderr); 989 vm_dump(stderr, vm, 2); 990 TEST_FAIL("Mem region not found"); 991 return NULL; 992 } 993 994 /* 995 * VM Memory Region Flags Set 996 * 997 * Input Args: 998 * vm - Virtual Machine 999 * flags - Starting guest physical address 1000 * 1001 * Output Args: None 1002 * 1003 * Return: None 1004 * 1005 * Sets the flags of the memory region specified by the value of slot, 1006 * to the values given by flags. 1007 */ 1008 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1009 { 1010 int ret; 1011 struct userspace_mem_region *region; 1012 1013 region = memslot2region(vm, slot); 1014 1015 region->region.flags = flags; 1016 1017 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1018 1019 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1020 " rc: %i errno: %i slot: %u flags: 0x%x", 1021 ret, errno, slot, flags); 1022 } 1023 1024 /* 1025 * VM Memory Region Move 1026 * 1027 * Input Args: 1028 * vm - Virtual Machine 1029 * slot - Slot of the memory region to move 1030 * new_gpa - Starting guest physical address 1031 * 1032 * Output Args: None 1033 * 1034 * Return: None 1035 * 1036 * Change the gpa of a memory region. 1037 */ 1038 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1039 { 1040 struct userspace_mem_region *region; 1041 int ret; 1042 1043 region = memslot2region(vm, slot); 1044 1045 region->region.guest_phys_addr = new_gpa; 1046 1047 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1048 1049 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1050 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1051 ret, errno, slot, new_gpa); 1052 } 1053 1054 /* 1055 * VM Memory Region Delete 1056 * 1057 * Input Args: 1058 * vm - Virtual Machine 1059 * slot - Slot of the memory region to delete 1060 * 1061 * Output Args: None 1062 * 1063 * Return: None 1064 * 1065 * Delete a memory region. 1066 */ 1067 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1068 { 1069 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1070 } 1071 1072 /* Returns the size of a vCPU's kvm_run structure. */ 1073 static int vcpu_mmap_sz(void) 1074 { 1075 int dev_fd, ret; 1076 1077 dev_fd = open_kvm_dev_path_or_exit(); 1078 1079 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1080 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1081 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1082 1083 close(dev_fd); 1084 1085 return ret; 1086 } 1087 1088 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1089 { 1090 struct kvm_vcpu *vcpu; 1091 1092 list_for_each_entry(vcpu, &vm->vcpus, list) { 1093 if (vcpu->id == vcpu_id) 1094 return true; 1095 } 1096 1097 return false; 1098 } 1099 1100 /* 1101 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1102 * No additional vCPU setup is done. Returns the vCPU. 1103 */ 1104 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1105 { 1106 struct kvm_vcpu *vcpu; 1107 1108 /* Confirm a vcpu with the specified id doesn't already exist. */ 1109 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1110 1111 /* Allocate and initialize new vcpu structure. */ 1112 vcpu = calloc(1, sizeof(*vcpu)); 1113 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1114 1115 vcpu->vm = vm; 1116 vcpu->id = vcpu_id; 1117 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1118 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1119 1120 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1121 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1122 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1123 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1124 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1125 TEST_ASSERT(vcpu->run != MAP_FAILED, 1126 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1127 1128 /* Add to linked-list of VCPUs. */ 1129 list_add(&vcpu->list, &vm->vcpus); 1130 1131 return vcpu; 1132 } 1133 1134 /* 1135 * VM Virtual Address Unused Gap 1136 * 1137 * Input Args: 1138 * vm - Virtual Machine 1139 * sz - Size (bytes) 1140 * vaddr_min - Minimum Virtual Address 1141 * 1142 * Output Args: None 1143 * 1144 * Return: 1145 * Lowest virtual address at or below vaddr_min, with at least 1146 * sz unused bytes. TEST_ASSERT failure if no area of at least 1147 * size sz is available. 1148 * 1149 * Within the VM specified by vm, locates the lowest starting virtual 1150 * address >= vaddr_min, that has at least sz unallocated bytes. A 1151 * TEST_ASSERT failure occurs for invalid input or no area of at least 1152 * sz unallocated bytes >= vaddr_min is available. 1153 */ 1154 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1155 vm_vaddr_t vaddr_min) 1156 { 1157 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1158 1159 /* Determine lowest permitted virtual page index. */ 1160 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1161 if ((pgidx_start * vm->page_size) < vaddr_min) 1162 goto no_va_found; 1163 1164 /* Loop over section with enough valid virtual page indexes. */ 1165 if (!sparsebit_is_set_num(vm->vpages_valid, 1166 pgidx_start, pages)) 1167 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1168 pgidx_start, pages); 1169 do { 1170 /* 1171 * Are there enough unused virtual pages available at 1172 * the currently proposed starting virtual page index. 1173 * If not, adjust proposed starting index to next 1174 * possible. 1175 */ 1176 if (sparsebit_is_clear_num(vm->vpages_mapped, 1177 pgidx_start, pages)) 1178 goto va_found; 1179 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1180 pgidx_start, pages); 1181 if (pgidx_start == 0) 1182 goto no_va_found; 1183 1184 /* 1185 * If needed, adjust proposed starting virtual address, 1186 * to next range of valid virtual addresses. 1187 */ 1188 if (!sparsebit_is_set_num(vm->vpages_valid, 1189 pgidx_start, pages)) { 1190 pgidx_start = sparsebit_next_set_num( 1191 vm->vpages_valid, pgidx_start, pages); 1192 if (pgidx_start == 0) 1193 goto no_va_found; 1194 } 1195 } while (pgidx_start != 0); 1196 1197 no_va_found: 1198 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1199 1200 /* NOT REACHED */ 1201 return -1; 1202 1203 va_found: 1204 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1205 pgidx_start, pages), 1206 "Unexpected, invalid virtual page index range,\n" 1207 " pgidx_start: 0x%lx\n" 1208 " pages: 0x%lx", 1209 pgidx_start, pages); 1210 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1211 pgidx_start, pages), 1212 "Unexpected, pages already mapped,\n" 1213 " pgidx_start: 0x%lx\n" 1214 " pages: 0x%lx", 1215 pgidx_start, pages); 1216 1217 return pgidx_start * vm->page_size; 1218 } 1219 1220 /* 1221 * VM Virtual Address Allocate 1222 * 1223 * Input Args: 1224 * vm - Virtual Machine 1225 * sz - Size in bytes 1226 * vaddr_min - Minimum starting virtual address 1227 * 1228 * Output Args: None 1229 * 1230 * Return: 1231 * Starting guest virtual address 1232 * 1233 * Allocates at least sz bytes within the virtual address space of the vm 1234 * given by vm. The allocated bytes are mapped to a virtual address >= 1235 * the address given by vaddr_min. Note that each allocation uses a 1236 * a unique set of pages, with the minimum real allocation being at least 1237 * a page. 1238 */ 1239 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1240 { 1241 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1242 1243 virt_pgd_alloc(vm); 1244 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1245 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1246 1247 /* 1248 * Find an unused range of virtual page addresses of at least 1249 * pages in length. 1250 */ 1251 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1252 1253 /* Map the virtual pages. */ 1254 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1255 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1256 1257 virt_pg_map(vm, vaddr, paddr); 1258 1259 sparsebit_set(vm->vpages_mapped, 1260 vaddr >> vm->page_shift); 1261 } 1262 1263 return vaddr_start; 1264 } 1265 1266 /* 1267 * VM Virtual Address Allocate Pages 1268 * 1269 * Input Args: 1270 * vm - Virtual Machine 1271 * 1272 * Output Args: None 1273 * 1274 * Return: 1275 * Starting guest virtual address 1276 * 1277 * Allocates at least N system pages worth of bytes within the virtual address 1278 * space of the vm. 1279 */ 1280 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1281 { 1282 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1283 } 1284 1285 /* 1286 * VM Virtual Address Allocate Page 1287 * 1288 * Input Args: 1289 * vm - Virtual Machine 1290 * 1291 * Output Args: None 1292 * 1293 * Return: 1294 * Starting guest virtual address 1295 * 1296 * Allocates at least one system page worth of bytes within the virtual address 1297 * space of the vm. 1298 */ 1299 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1300 { 1301 return vm_vaddr_alloc_pages(vm, 1); 1302 } 1303 1304 /* 1305 * Map a range of VM virtual address to the VM's physical address 1306 * 1307 * Input Args: 1308 * vm - Virtual Machine 1309 * vaddr - Virtuall address to map 1310 * paddr - VM Physical Address 1311 * npages - The number of pages to map 1312 * 1313 * Output Args: None 1314 * 1315 * Return: None 1316 * 1317 * Within the VM given by @vm, creates a virtual translation for 1318 * @npages starting at @vaddr to the page range starting at @paddr. 1319 */ 1320 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1321 unsigned int npages) 1322 { 1323 size_t page_size = vm->page_size; 1324 size_t size = npages * page_size; 1325 1326 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1327 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1328 1329 while (npages--) { 1330 virt_pg_map(vm, vaddr, paddr); 1331 vaddr += page_size; 1332 paddr += page_size; 1333 } 1334 } 1335 1336 /* 1337 * Address VM Physical to Host Virtual 1338 * 1339 * Input Args: 1340 * vm - Virtual Machine 1341 * gpa - VM physical address 1342 * 1343 * Output Args: None 1344 * 1345 * Return: 1346 * Equivalent host virtual address 1347 * 1348 * Locates the memory region containing the VM physical address given 1349 * by gpa, within the VM given by vm. When found, the host virtual 1350 * address providing the memory to the vm physical address is returned. 1351 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1352 */ 1353 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1354 { 1355 struct userspace_mem_region *region; 1356 1357 region = userspace_mem_region_find(vm, gpa, gpa); 1358 if (!region) { 1359 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1360 return NULL; 1361 } 1362 1363 return (void *)((uintptr_t)region->host_mem 1364 + (gpa - region->region.guest_phys_addr)); 1365 } 1366 1367 /* 1368 * Address Host Virtual to VM Physical 1369 * 1370 * Input Args: 1371 * vm - Virtual Machine 1372 * hva - Host virtual address 1373 * 1374 * Output Args: None 1375 * 1376 * Return: 1377 * Equivalent VM physical address 1378 * 1379 * Locates the memory region containing the host virtual address given 1380 * by hva, within the VM given by vm. When found, the equivalent 1381 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1382 * region containing hva exists. 1383 */ 1384 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1385 { 1386 struct rb_node *node; 1387 1388 for (node = vm->regions.hva_tree.rb_node; node; ) { 1389 struct userspace_mem_region *region = 1390 container_of(node, struct userspace_mem_region, hva_node); 1391 1392 if (hva >= region->host_mem) { 1393 if (hva <= (region->host_mem 1394 + region->region.memory_size - 1)) 1395 return (vm_paddr_t)((uintptr_t) 1396 region->region.guest_phys_addr 1397 + (hva - (uintptr_t)region->host_mem)); 1398 1399 node = node->rb_right; 1400 } else 1401 node = node->rb_left; 1402 } 1403 1404 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1405 return -1; 1406 } 1407 1408 /* 1409 * Address VM physical to Host Virtual *alias*. 1410 * 1411 * Input Args: 1412 * vm - Virtual Machine 1413 * gpa - VM physical address 1414 * 1415 * Output Args: None 1416 * 1417 * Return: 1418 * Equivalent address within the host virtual *alias* area, or NULL 1419 * (without failing the test) if the guest memory is not shared (so 1420 * no alias exists). 1421 * 1422 * Create a writable, shared virtual=>physical alias for the specific GPA. 1423 * The primary use case is to allow the host selftest to manipulate guest 1424 * memory without mapping said memory in the guest's address space. And, for 1425 * userfaultfd-based demand paging, to do so without triggering userfaults. 1426 */ 1427 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1428 { 1429 struct userspace_mem_region *region; 1430 uintptr_t offset; 1431 1432 region = userspace_mem_region_find(vm, gpa, gpa); 1433 if (!region) 1434 return NULL; 1435 1436 if (!region->host_alias) 1437 return NULL; 1438 1439 offset = gpa - region->region.guest_phys_addr; 1440 return (void *) ((uintptr_t) region->host_alias + offset); 1441 } 1442 1443 /* Create an interrupt controller chip for the specified VM. */ 1444 void vm_create_irqchip(struct kvm_vm *vm) 1445 { 1446 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1447 1448 vm->has_irqchip = true; 1449 } 1450 1451 int _vcpu_run(struct kvm_vcpu *vcpu) 1452 { 1453 int rc; 1454 1455 do { 1456 rc = __vcpu_run(vcpu); 1457 } while (rc == -1 && errno == EINTR); 1458 1459 assert_on_unhandled_exception(vcpu); 1460 1461 return rc; 1462 } 1463 1464 /* 1465 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1466 * Assert if the KVM returns an error (other than -EINTR). 1467 */ 1468 void vcpu_run(struct kvm_vcpu *vcpu) 1469 { 1470 int ret = _vcpu_run(vcpu); 1471 1472 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1473 } 1474 1475 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1476 { 1477 int ret; 1478 1479 vcpu->run->immediate_exit = 1; 1480 ret = __vcpu_run(vcpu); 1481 vcpu->run->immediate_exit = 0; 1482 1483 TEST_ASSERT(ret == -1 && errno == EINTR, 1484 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1485 ret, errno); 1486 } 1487 1488 /* 1489 * Get the list of guest registers which are supported for 1490 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1491 * it is the caller's responsibility to free the list. 1492 */ 1493 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1494 { 1495 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1496 int ret; 1497 1498 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1499 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1500 1501 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1502 reg_list->n = reg_list_n.n; 1503 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1504 return reg_list; 1505 } 1506 1507 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1508 { 1509 uint32_t page_size = vcpu->vm->page_size; 1510 uint32_t size = vcpu->vm->dirty_ring_size; 1511 1512 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1513 1514 if (!vcpu->dirty_gfns) { 1515 void *addr; 1516 1517 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1518 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1519 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1520 1521 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1522 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1523 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1524 1525 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1526 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1527 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1528 1529 vcpu->dirty_gfns = addr; 1530 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1531 } 1532 1533 return vcpu->dirty_gfns; 1534 } 1535 1536 /* 1537 * Device Ioctl 1538 */ 1539 1540 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1541 { 1542 struct kvm_device_attr attribute = { 1543 .group = group, 1544 .attr = attr, 1545 .flags = 0, 1546 }; 1547 1548 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1549 } 1550 1551 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1552 { 1553 struct kvm_create_device create_dev = { 1554 .type = type, 1555 .flags = KVM_CREATE_DEVICE_TEST, 1556 }; 1557 1558 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1559 } 1560 1561 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1562 { 1563 struct kvm_create_device create_dev = { 1564 .type = type, 1565 .fd = -1, 1566 .flags = 0, 1567 }; 1568 int err; 1569 1570 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1571 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1572 return err ? : create_dev.fd; 1573 } 1574 1575 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1576 { 1577 struct kvm_device_attr kvmattr = { 1578 .group = group, 1579 .attr = attr, 1580 .flags = 0, 1581 .addr = (uintptr_t)val, 1582 }; 1583 1584 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1585 } 1586 1587 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1588 { 1589 struct kvm_device_attr kvmattr = { 1590 .group = group, 1591 .attr = attr, 1592 .flags = 0, 1593 .addr = (uintptr_t)val, 1594 }; 1595 1596 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1597 } 1598 1599 /* 1600 * IRQ related functions. 1601 */ 1602 1603 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1604 { 1605 struct kvm_irq_level irq_level = { 1606 .irq = irq, 1607 .level = level, 1608 }; 1609 1610 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1611 } 1612 1613 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1614 { 1615 int ret = _kvm_irq_line(vm, irq, level); 1616 1617 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1618 } 1619 1620 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1621 { 1622 struct kvm_irq_routing *routing; 1623 size_t size; 1624 1625 size = sizeof(struct kvm_irq_routing); 1626 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1627 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1628 routing = calloc(1, size); 1629 assert(routing); 1630 1631 return routing; 1632 } 1633 1634 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1635 uint32_t gsi, uint32_t pin) 1636 { 1637 int i; 1638 1639 assert(routing); 1640 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1641 1642 i = routing->nr; 1643 routing->entries[i].gsi = gsi; 1644 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1645 routing->entries[i].flags = 0; 1646 routing->entries[i].u.irqchip.irqchip = 0; 1647 routing->entries[i].u.irqchip.pin = pin; 1648 routing->nr++; 1649 } 1650 1651 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1652 { 1653 int ret; 1654 1655 assert(routing); 1656 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1657 free(routing); 1658 1659 return ret; 1660 } 1661 1662 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1663 { 1664 int ret; 1665 1666 ret = _kvm_gsi_routing_write(vm, routing); 1667 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1668 } 1669 1670 /* 1671 * VM Dump 1672 * 1673 * Input Args: 1674 * vm - Virtual Machine 1675 * indent - Left margin indent amount 1676 * 1677 * Output Args: 1678 * stream - Output FILE stream 1679 * 1680 * Return: None 1681 * 1682 * Dumps the current state of the VM given by vm, to the FILE stream 1683 * given by stream. 1684 */ 1685 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1686 { 1687 int ctr; 1688 struct userspace_mem_region *region; 1689 struct kvm_vcpu *vcpu; 1690 1691 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1692 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1693 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1694 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1695 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1696 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1697 "host_virt: %p\n", indent + 2, "", 1698 (uint64_t) region->region.guest_phys_addr, 1699 (uint64_t) region->region.memory_size, 1700 region->host_mem); 1701 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1702 sparsebit_dump(stream, region->unused_phy_pages, 0); 1703 } 1704 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1705 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1706 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1707 vm->pgd_created); 1708 if (vm->pgd_created) { 1709 fprintf(stream, "%*sVirtual Translation Tables:\n", 1710 indent + 2, ""); 1711 virt_dump(stream, vm, indent + 4); 1712 } 1713 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1714 1715 list_for_each_entry(vcpu, &vm->vcpus, list) 1716 vcpu_dump(stream, vcpu, indent + 2); 1717 } 1718 1719 /* Known KVM exit reasons */ 1720 static struct exit_reason { 1721 unsigned int reason; 1722 const char *name; 1723 } exit_reasons_known[] = { 1724 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1725 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1726 {KVM_EXIT_IO, "IO"}, 1727 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1728 {KVM_EXIT_DEBUG, "DEBUG"}, 1729 {KVM_EXIT_HLT, "HLT"}, 1730 {KVM_EXIT_MMIO, "MMIO"}, 1731 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1732 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1733 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1734 {KVM_EXIT_INTR, "INTR"}, 1735 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1736 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1737 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1738 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1739 {KVM_EXIT_DCR, "DCR"}, 1740 {KVM_EXIT_NMI, "NMI"}, 1741 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1742 {KVM_EXIT_OSI, "OSI"}, 1743 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1744 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1745 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1746 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1747 {KVM_EXIT_XEN, "XEN"}, 1748 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1749 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1750 #endif 1751 }; 1752 1753 /* 1754 * Exit Reason String 1755 * 1756 * Input Args: 1757 * exit_reason - Exit reason 1758 * 1759 * Output Args: None 1760 * 1761 * Return: 1762 * Constant string pointer describing the exit reason. 1763 * 1764 * Locates and returns a constant string that describes the KVM exit 1765 * reason given by exit_reason. If no such string is found, a constant 1766 * string of "Unknown" is returned. 1767 */ 1768 const char *exit_reason_str(unsigned int exit_reason) 1769 { 1770 unsigned int n1; 1771 1772 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1773 if (exit_reason == exit_reasons_known[n1].reason) 1774 return exit_reasons_known[n1].name; 1775 } 1776 1777 return "Unknown"; 1778 } 1779 1780 /* 1781 * Physical Contiguous Page Allocator 1782 * 1783 * Input Args: 1784 * vm - Virtual Machine 1785 * num - number of pages 1786 * paddr_min - Physical address minimum 1787 * memslot - Memory region to allocate page from 1788 * 1789 * Output Args: None 1790 * 1791 * Return: 1792 * Starting physical address 1793 * 1794 * Within the VM specified by vm, locates a range of available physical 1795 * pages at or above paddr_min. If found, the pages are marked as in use 1796 * and their base address is returned. A TEST_ASSERT failure occurs if 1797 * not enough pages are available at or above paddr_min. 1798 */ 1799 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1800 vm_paddr_t paddr_min, uint32_t memslot) 1801 { 1802 struct userspace_mem_region *region; 1803 sparsebit_idx_t pg, base; 1804 1805 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1806 1807 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1808 "not divisible by page size.\n" 1809 " paddr_min: 0x%lx page_size: 0x%x", 1810 paddr_min, vm->page_size); 1811 1812 region = memslot2region(vm, memslot); 1813 base = pg = paddr_min >> vm->page_shift; 1814 1815 do { 1816 for (; pg < base + num; ++pg) { 1817 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1818 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1819 break; 1820 } 1821 } 1822 } while (pg && pg != base + num); 1823 1824 if (pg == 0) { 1825 fprintf(stderr, "No guest physical page available, " 1826 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1827 paddr_min, vm->page_size, memslot); 1828 fputs("---- vm dump ----\n", stderr); 1829 vm_dump(stderr, vm, 2); 1830 abort(); 1831 } 1832 1833 for (pg = base; pg < base + num; ++pg) 1834 sparsebit_clear(region->unused_phy_pages, pg); 1835 1836 return base * vm->page_size; 1837 } 1838 1839 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1840 uint32_t memslot) 1841 { 1842 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1843 } 1844 1845 /* Arbitrary minimum physical address used for virtual translation tables. */ 1846 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1847 1848 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1849 { 1850 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1851 } 1852 1853 /* 1854 * Address Guest Virtual to Host Virtual 1855 * 1856 * Input Args: 1857 * vm - Virtual Machine 1858 * gva - VM virtual address 1859 * 1860 * Output Args: None 1861 * 1862 * Return: 1863 * Equivalent host virtual address 1864 */ 1865 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1866 { 1867 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1868 } 1869 1870 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1871 { 1872 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1873 } 1874 1875 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1876 unsigned int page_shift, 1877 unsigned int new_page_shift, 1878 bool ceil) 1879 { 1880 unsigned int n = 1 << (new_page_shift - page_shift); 1881 1882 if (page_shift >= new_page_shift) 1883 return num_pages * (1 << (page_shift - new_page_shift)); 1884 1885 return num_pages / n + !!(ceil && num_pages % n); 1886 } 1887 1888 static inline int getpageshift(void) 1889 { 1890 return __builtin_ffs(getpagesize()) - 1; 1891 } 1892 1893 unsigned int 1894 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1895 { 1896 return vm_calc_num_pages(num_guest_pages, 1897 vm_guest_mode_params[mode].page_shift, 1898 getpageshift(), true); 1899 } 1900 1901 unsigned int 1902 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1903 { 1904 return vm_calc_num_pages(num_host_pages, getpageshift(), 1905 vm_guest_mode_params[mode].page_shift, false); 1906 } 1907 1908 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1909 { 1910 unsigned int n; 1911 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1912 return vm_adjust_num_guest_pages(mode, n); 1913 } 1914 1915 /* 1916 * Read binary stats descriptors 1917 * 1918 * Input Args: 1919 * stats_fd - the file descriptor for the binary stats file from which to read 1920 * header - the binary stats metadata header corresponding to the given FD 1921 * 1922 * Output Args: None 1923 * 1924 * Return: 1925 * A pointer to a newly allocated series of stat descriptors. 1926 * Caller is responsible for freeing the returned kvm_stats_desc. 1927 * 1928 * Read the stats descriptors from the binary stats interface. 1929 */ 1930 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1931 struct kvm_stats_header *header) 1932 { 1933 struct kvm_stats_desc *stats_desc; 1934 ssize_t desc_size, total_size, ret; 1935 1936 desc_size = get_stats_descriptor_size(header); 1937 total_size = header->num_desc * desc_size; 1938 1939 stats_desc = calloc(header->num_desc, desc_size); 1940 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 1941 1942 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 1943 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 1944 1945 return stats_desc; 1946 } 1947 1948 /* 1949 * Read stat data for a particular stat 1950 * 1951 * Input Args: 1952 * stats_fd - the file descriptor for the binary stats file from which to read 1953 * header - the binary stats metadata header corresponding to the given FD 1954 * desc - the binary stat metadata for the particular stat to be read 1955 * max_elements - the maximum number of 8-byte values to read into data 1956 * 1957 * Output Args: 1958 * data - the buffer into which stat data should be read 1959 * 1960 * Read the data values of a specified stat from the binary stats interface. 1961 */ 1962 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 1963 struct kvm_stats_desc *desc, uint64_t *data, 1964 size_t max_elements) 1965 { 1966 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 1967 size_t size = nr_elements * sizeof(*data); 1968 ssize_t ret; 1969 1970 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 1971 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 1972 1973 ret = pread(stats_fd, data, size, 1974 header->data_offset + desc->offset); 1975 1976 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 1977 desc->name, errno, strerror(errno)); 1978 TEST_ASSERT(ret == size, 1979 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 1980 desc->name, size, ret); 1981 } 1982 1983 /* 1984 * Read the data of the named stat 1985 * 1986 * Input Args: 1987 * vm - the VM for which the stat should be read 1988 * stat_name - the name of the stat to read 1989 * max_elements - the maximum number of 8-byte values to read into data 1990 * 1991 * Output Args: 1992 * data - the buffer into which stat data should be read 1993 * 1994 * Read the data values of a specified stat from the binary stats interface. 1995 */ 1996 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 1997 size_t max_elements) 1998 { 1999 struct kvm_stats_desc *desc; 2000 size_t size_desc; 2001 int i; 2002 2003 if (!vm->stats_fd) { 2004 vm->stats_fd = vm_get_stats_fd(vm); 2005 read_stats_header(vm->stats_fd, &vm->stats_header); 2006 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2007 &vm->stats_header); 2008 } 2009 2010 size_desc = get_stats_descriptor_size(&vm->stats_header); 2011 2012 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2013 desc = (void *)vm->stats_desc + (i * size_desc); 2014 2015 if (strcmp(desc->name, stat_name)) 2016 continue; 2017 2018 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2019 data, max_elements); 2020 2021 break; 2022 } 2023 } 2024