1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sys/mman.h> 15 #include <sys/types.h> 16 #include <sys/stat.h> 17 #include <unistd.h> 18 #include <linux/kernel.h> 19 20 #define KVM_UTIL_MIN_PFN 2 21 22 static int vcpu_mmap_sz(void); 23 24 int open_path_or_exit(const char *path, int flags) 25 { 26 int fd; 27 28 fd = open(path, flags); 29 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 30 31 return fd; 32 } 33 34 /* 35 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 * 37 * Input Args: 38 * flags - The flags to pass when opening KVM_DEV_PATH. 39 * 40 * Return: 41 * The opened file descriptor of /dev/kvm. 42 */ 43 static int _open_kvm_dev_path_or_exit(int flags) 44 { 45 return open_path_or_exit(KVM_DEV_PATH, flags); 46 } 47 48 int open_kvm_dev_path_or_exit(void) 49 { 50 return _open_kvm_dev_path_or_exit(O_RDONLY); 51 } 52 53 /* 54 * Capability 55 * 56 * Input Args: 57 * cap - Capability 58 * 59 * Output Args: None 60 * 61 * Return: 62 * On success, the Value corresponding to the capability (KVM_CAP_*) 63 * specified by the value of cap. On failure a TEST_ASSERT failure 64 * is produced. 65 * 66 * Looks up and returns the value corresponding to the capability 67 * (KVM_CAP_*) given by cap. 68 */ 69 unsigned int kvm_check_cap(long cap) 70 { 71 int ret; 72 int kvm_fd; 73 74 kvm_fd = open_kvm_dev_path_or_exit(); 75 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 76 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 77 78 close(kvm_fd); 79 80 return (unsigned int)ret; 81 } 82 83 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 84 { 85 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 86 vm->dirty_ring_size = ring_size; 87 } 88 89 static void vm_open(struct kvm_vm *vm) 90 { 91 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 92 93 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 94 95 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 96 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 97 } 98 99 const char *vm_guest_mode_string(uint32_t i) 100 { 101 static const char * const strings[] = { 102 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 103 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 104 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 105 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 106 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 107 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 108 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 109 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 110 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 111 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 112 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 113 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 114 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 115 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 116 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 117 }; 118 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 119 "Missing new mode strings?"); 120 121 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 122 123 return strings[i]; 124 } 125 126 const struct vm_guest_mode_params vm_guest_mode_params[] = { 127 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 128 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 129 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 130 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 131 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 132 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 133 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 134 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 135 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 136 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 137 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 138 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 139 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 140 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 141 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 142 }; 143 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 144 "Missing new mode params?"); 145 146 struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) 147 { 148 struct kvm_vm *vm; 149 150 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 151 vm_guest_mode_string(mode), nr_pages); 152 153 vm = calloc(1, sizeof(*vm)); 154 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 155 156 INIT_LIST_HEAD(&vm->vcpus); 157 vm->regions.gpa_tree = RB_ROOT; 158 vm->regions.hva_tree = RB_ROOT; 159 hash_init(vm->regions.slot_hash); 160 161 vm->mode = mode; 162 vm->type = 0; 163 164 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 165 vm->va_bits = vm_guest_mode_params[mode].va_bits; 166 vm->page_size = vm_guest_mode_params[mode].page_size; 167 vm->page_shift = vm_guest_mode_params[mode].page_shift; 168 169 /* Setup mode specific traits. */ 170 switch (vm->mode) { 171 case VM_MODE_P52V48_4K: 172 vm->pgtable_levels = 4; 173 break; 174 case VM_MODE_P52V48_64K: 175 vm->pgtable_levels = 3; 176 break; 177 case VM_MODE_P48V48_4K: 178 vm->pgtable_levels = 4; 179 break; 180 case VM_MODE_P48V48_64K: 181 vm->pgtable_levels = 3; 182 break; 183 case VM_MODE_P40V48_4K: 184 case VM_MODE_P36V48_4K: 185 vm->pgtable_levels = 4; 186 break; 187 case VM_MODE_P40V48_64K: 188 case VM_MODE_P36V48_64K: 189 vm->pgtable_levels = 3; 190 break; 191 case VM_MODE_P48V48_16K: 192 case VM_MODE_P40V48_16K: 193 case VM_MODE_P36V48_16K: 194 vm->pgtable_levels = 4; 195 break; 196 case VM_MODE_P36V47_16K: 197 vm->pgtable_levels = 3; 198 break; 199 case VM_MODE_PXXV48_4K: 200 #ifdef __x86_64__ 201 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 202 /* 203 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 204 * it doesn't take effect unless a CR4.LA57 is set, which it 205 * isn't for this VM_MODE. 206 */ 207 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 208 "Linear address width (%d bits) not supported", 209 vm->va_bits); 210 pr_debug("Guest physical address width detected: %d\n", 211 vm->pa_bits); 212 vm->pgtable_levels = 4; 213 vm->va_bits = 48; 214 #else 215 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 216 #endif 217 break; 218 case VM_MODE_P47V64_4K: 219 vm->pgtable_levels = 5; 220 break; 221 case VM_MODE_P44V64_4K: 222 vm->pgtable_levels = 5; 223 break; 224 default: 225 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 226 } 227 228 #ifdef __aarch64__ 229 if (vm->pa_bits != 40) 230 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 231 #endif 232 233 vm_open(vm); 234 235 /* Limit to VA-bit canonical virtual addresses. */ 236 vm->vpages_valid = sparsebit_alloc(); 237 sparsebit_set_num(vm->vpages_valid, 238 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 239 sparsebit_set_num(vm->vpages_valid, 240 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 241 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 242 243 /* Limit physical addresses to PA-bits. */ 244 vm->max_gfn = vm_compute_max_gfn(vm); 245 246 /* Allocate and setup memory for guest. */ 247 vm->vpages_mapped = sparsebit_alloc(); 248 if (nr_pages != 0) 249 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 250 0, 0, nr_pages, 0); 251 252 return vm; 253 } 254 255 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 256 uint32_t nr_runnable_vcpus, 257 uint64_t extra_mem_pages) 258 { 259 uint64_t nr_pages; 260 261 TEST_ASSERT(nr_runnable_vcpus, 262 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 263 264 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 265 "nr_vcpus = %d too large for host, max-vcpus = %d", 266 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 267 268 /* 269 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 270 * test code and other per-VM assets that will be loaded into memslot0. 271 */ 272 nr_pages = 512; 273 274 /* Account for the per-vCPU stacks on behalf of the test. */ 275 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 276 277 /* 278 * Account for the number of pages needed for the page tables. The 279 * maximum page table size for a memory region will be when the 280 * smallest page size is used. Considering each page contains x page 281 * table descriptors, the total extra size for page tables (for extra 282 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 283 * than N/x*2. 284 */ 285 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 286 287 return vm_adjust_num_guest_pages(mode, nr_pages); 288 } 289 290 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 291 uint64_t nr_extra_pages) 292 { 293 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 294 nr_extra_pages); 295 struct kvm_vm *vm; 296 297 vm = ____vm_create(mode, nr_pages); 298 299 kvm_vm_elf_load(vm, program_invocation_name); 300 301 #ifdef __x86_64__ 302 vm_create_irqchip(vm); 303 #endif 304 return vm; 305 } 306 307 /* 308 * VM Create with customized parameters 309 * 310 * Input Args: 311 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 312 * nr_vcpus - VCPU count 313 * extra_mem_pages - Non-slot0 physical memory total size 314 * guest_code - Guest entry point 315 * vcpuids - VCPU IDs 316 * 317 * Output Args: None 318 * 319 * Return: 320 * Pointer to opaque structure that describes the created VM. 321 * 322 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 323 * extra_mem_pages is only used to calculate the maximum page table size, 324 * no real memory allocation for non-slot0 memory in this function. 325 */ 326 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 327 uint64_t extra_mem_pages, 328 void *guest_code, struct kvm_vcpu *vcpus[]) 329 { 330 struct kvm_vm *vm; 331 int i; 332 333 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 334 335 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 336 337 for (i = 0; i < nr_vcpus; ++i) 338 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 339 340 return vm; 341 } 342 343 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 344 uint64_t extra_mem_pages, 345 void *guest_code) 346 { 347 struct kvm_vcpu *vcpus[1]; 348 struct kvm_vm *vm; 349 350 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 351 guest_code, vcpus); 352 353 *vcpu = vcpus[0]; 354 return vm; 355 } 356 357 /* 358 * VM Restart 359 * 360 * Input Args: 361 * vm - VM that has been released before 362 * 363 * Output Args: None 364 * 365 * Reopens the file descriptors associated to the VM and reinstates the 366 * global state, such as the irqchip and the memory regions that are mapped 367 * into the guest. 368 */ 369 void kvm_vm_restart(struct kvm_vm *vmp) 370 { 371 int ctr; 372 struct userspace_mem_region *region; 373 374 vm_open(vmp); 375 if (vmp->has_irqchip) 376 vm_create_irqchip(vmp); 377 378 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 379 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 380 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 381 " rc: %i errno: %i\n" 382 " slot: %u flags: 0x%x\n" 383 " guest_phys_addr: 0x%llx size: 0x%llx", 384 ret, errno, region->region.slot, 385 region->region.flags, 386 region->region.guest_phys_addr, 387 region->region.memory_size); 388 } 389 } 390 391 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 392 uint32_t vcpu_id) 393 { 394 return __vm_vcpu_add(vm, vcpu_id); 395 } 396 397 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 398 { 399 kvm_vm_restart(vm); 400 401 return vm_vcpu_recreate(vm, 0); 402 } 403 404 /* 405 * Userspace Memory Region Find 406 * 407 * Input Args: 408 * vm - Virtual Machine 409 * start - Starting VM physical address 410 * end - Ending VM physical address, inclusive. 411 * 412 * Output Args: None 413 * 414 * Return: 415 * Pointer to overlapping region, NULL if no such region. 416 * 417 * Searches for a region with any physical memory that overlaps with 418 * any portion of the guest physical addresses from start to end 419 * inclusive. If multiple overlapping regions exist, a pointer to any 420 * of the regions is returned. Null is returned only when no overlapping 421 * region exists. 422 */ 423 static struct userspace_mem_region * 424 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 425 { 426 struct rb_node *node; 427 428 for (node = vm->regions.gpa_tree.rb_node; node; ) { 429 struct userspace_mem_region *region = 430 container_of(node, struct userspace_mem_region, gpa_node); 431 uint64_t existing_start = region->region.guest_phys_addr; 432 uint64_t existing_end = region->region.guest_phys_addr 433 + region->region.memory_size - 1; 434 if (start <= existing_end && end >= existing_start) 435 return region; 436 437 if (start < existing_start) 438 node = node->rb_left; 439 else 440 node = node->rb_right; 441 } 442 443 return NULL; 444 } 445 446 /* 447 * KVM Userspace Memory Region Find 448 * 449 * Input Args: 450 * vm - Virtual Machine 451 * start - Starting VM physical address 452 * end - Ending VM physical address, inclusive. 453 * 454 * Output Args: None 455 * 456 * Return: 457 * Pointer to overlapping region, NULL if no such region. 458 * 459 * Public interface to userspace_mem_region_find. Allows tests to look up 460 * the memslot datastructure for a given range of guest physical memory. 461 */ 462 struct kvm_userspace_memory_region * 463 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 464 uint64_t end) 465 { 466 struct userspace_mem_region *region; 467 468 region = userspace_mem_region_find(vm, start, end); 469 if (!region) 470 return NULL; 471 472 return ®ion->region; 473 } 474 475 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 476 { 477 478 } 479 480 /* 481 * VM VCPU Remove 482 * 483 * Input Args: 484 * vcpu - VCPU to remove 485 * 486 * Output Args: None 487 * 488 * Return: None, TEST_ASSERT failures for all error conditions 489 * 490 * Removes a vCPU from a VM and frees its resources. 491 */ 492 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 493 { 494 int ret; 495 496 if (vcpu->dirty_gfns) { 497 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 498 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 499 vcpu->dirty_gfns = NULL; 500 } 501 502 ret = munmap(vcpu->run, vcpu_mmap_sz()); 503 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 504 505 ret = close(vcpu->fd); 506 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 507 508 list_del(&vcpu->list); 509 510 vcpu_arch_free(vcpu); 511 free(vcpu); 512 } 513 514 void kvm_vm_release(struct kvm_vm *vmp) 515 { 516 struct kvm_vcpu *vcpu, *tmp; 517 int ret; 518 519 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 520 vm_vcpu_rm(vmp, vcpu); 521 522 ret = close(vmp->fd); 523 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 524 525 ret = close(vmp->kvm_fd); 526 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 527 } 528 529 static void __vm_mem_region_delete(struct kvm_vm *vm, 530 struct userspace_mem_region *region, 531 bool unlink) 532 { 533 int ret; 534 535 if (unlink) { 536 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 537 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 538 hash_del(®ion->slot_node); 539 } 540 541 region->region.memory_size = 0; 542 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 543 544 sparsebit_free(®ion->unused_phy_pages); 545 ret = munmap(region->mmap_start, region->mmap_size); 546 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 547 548 free(region); 549 } 550 551 /* 552 * Destroys and frees the VM pointed to by vmp. 553 */ 554 void kvm_vm_free(struct kvm_vm *vmp) 555 { 556 int ctr; 557 struct hlist_node *node; 558 struct userspace_mem_region *region; 559 560 if (vmp == NULL) 561 return; 562 563 /* Free cached stats metadata and close FD */ 564 if (vmp->stats_fd) { 565 free(vmp->stats_desc); 566 close(vmp->stats_fd); 567 } 568 569 /* Free userspace_mem_regions. */ 570 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 571 __vm_mem_region_delete(vmp, region, false); 572 573 /* Free sparsebit arrays. */ 574 sparsebit_free(&vmp->vpages_valid); 575 sparsebit_free(&vmp->vpages_mapped); 576 577 kvm_vm_release(vmp); 578 579 /* Free the structure describing the VM. */ 580 free(vmp); 581 } 582 583 int kvm_memfd_alloc(size_t size, bool hugepages) 584 { 585 int memfd_flags = MFD_CLOEXEC; 586 int fd, r; 587 588 if (hugepages) 589 memfd_flags |= MFD_HUGETLB; 590 591 fd = memfd_create("kvm_selftest", memfd_flags); 592 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 593 594 r = ftruncate(fd, size); 595 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 596 597 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 598 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 599 600 return fd; 601 } 602 603 /* 604 * Memory Compare, host virtual to guest virtual 605 * 606 * Input Args: 607 * hva - Starting host virtual address 608 * vm - Virtual Machine 609 * gva - Starting guest virtual address 610 * len - number of bytes to compare 611 * 612 * Output Args: None 613 * 614 * Input/Output Args: None 615 * 616 * Return: 617 * Returns 0 if the bytes starting at hva for a length of len 618 * are equal the guest virtual bytes starting at gva. Returns 619 * a value < 0, if bytes at hva are less than those at gva. 620 * Otherwise a value > 0 is returned. 621 * 622 * Compares the bytes starting at the host virtual address hva, for 623 * a length of len, to the guest bytes starting at the guest virtual 624 * address given by gva. 625 */ 626 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 627 { 628 size_t amt; 629 630 /* 631 * Compare a batch of bytes until either a match is found 632 * or all the bytes have been compared. 633 */ 634 for (uintptr_t offset = 0; offset < len; offset += amt) { 635 uintptr_t ptr1 = (uintptr_t)hva + offset; 636 637 /* 638 * Determine host address for guest virtual address 639 * at offset. 640 */ 641 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 642 643 /* 644 * Determine amount to compare on this pass. 645 * Don't allow the comparsion to cross a page boundary. 646 */ 647 amt = len - offset; 648 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 649 amt = vm->page_size - (ptr1 % vm->page_size); 650 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 651 amt = vm->page_size - (ptr2 % vm->page_size); 652 653 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 654 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 655 656 /* 657 * Perform the comparison. If there is a difference 658 * return that result to the caller, otherwise need 659 * to continue on looking for a mismatch. 660 */ 661 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 662 if (ret != 0) 663 return ret; 664 } 665 666 /* 667 * No mismatch found. Let the caller know the two memory 668 * areas are equal. 669 */ 670 return 0; 671 } 672 673 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 674 struct userspace_mem_region *region) 675 { 676 struct rb_node **cur, *parent; 677 678 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 679 struct userspace_mem_region *cregion; 680 681 cregion = container_of(*cur, typeof(*cregion), gpa_node); 682 parent = *cur; 683 if (region->region.guest_phys_addr < 684 cregion->region.guest_phys_addr) 685 cur = &(*cur)->rb_left; 686 else { 687 TEST_ASSERT(region->region.guest_phys_addr != 688 cregion->region.guest_phys_addr, 689 "Duplicate GPA in region tree"); 690 691 cur = &(*cur)->rb_right; 692 } 693 } 694 695 rb_link_node(®ion->gpa_node, parent, cur); 696 rb_insert_color(®ion->gpa_node, gpa_tree); 697 } 698 699 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 700 struct userspace_mem_region *region) 701 { 702 struct rb_node **cur, *parent; 703 704 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 705 struct userspace_mem_region *cregion; 706 707 cregion = container_of(*cur, typeof(*cregion), hva_node); 708 parent = *cur; 709 if (region->host_mem < cregion->host_mem) 710 cur = &(*cur)->rb_left; 711 else { 712 TEST_ASSERT(region->host_mem != 713 cregion->host_mem, 714 "Duplicate HVA in region tree"); 715 716 cur = &(*cur)->rb_right; 717 } 718 } 719 720 rb_link_node(®ion->hva_node, parent, cur); 721 rb_insert_color(®ion->hva_node, hva_tree); 722 } 723 724 725 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 726 uint64_t gpa, uint64_t size, void *hva) 727 { 728 struct kvm_userspace_memory_region region = { 729 .slot = slot, 730 .flags = flags, 731 .guest_phys_addr = gpa, 732 .memory_size = size, 733 .userspace_addr = (uintptr_t)hva, 734 }; 735 736 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 737 } 738 739 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 740 uint64_t gpa, uint64_t size, void *hva) 741 { 742 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 743 744 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 745 errno, strerror(errno)); 746 } 747 748 /* 749 * VM Userspace Memory Region Add 750 * 751 * Input Args: 752 * vm - Virtual Machine 753 * src_type - Storage source for this region. 754 * NULL to use anonymous memory. 755 * guest_paddr - Starting guest physical address 756 * slot - KVM region slot 757 * npages - Number of physical pages 758 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 759 * 760 * Output Args: None 761 * 762 * Return: None 763 * 764 * Allocates a memory area of the number of pages specified by npages 765 * and maps it to the VM specified by vm, at a starting physical address 766 * given by guest_paddr. The region is created with a KVM region slot 767 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 768 * region is created with the flags given by flags. 769 */ 770 void vm_userspace_mem_region_add(struct kvm_vm *vm, 771 enum vm_mem_backing_src_type src_type, 772 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 773 uint32_t flags) 774 { 775 int ret; 776 struct userspace_mem_region *region; 777 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 778 size_t alignment; 779 780 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 781 "Number of guest pages is not compatible with the host. " 782 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 783 784 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 785 "address not on a page boundary.\n" 786 " guest_paddr: 0x%lx vm->page_size: 0x%x", 787 guest_paddr, vm->page_size); 788 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 789 <= vm->max_gfn, "Physical range beyond maximum " 790 "supported physical address,\n" 791 " guest_paddr: 0x%lx npages: 0x%lx\n" 792 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 793 guest_paddr, npages, vm->max_gfn, vm->page_size); 794 795 /* 796 * Confirm a mem region with an overlapping address doesn't 797 * already exist. 798 */ 799 region = (struct userspace_mem_region *) userspace_mem_region_find( 800 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 801 if (region != NULL) 802 TEST_FAIL("overlapping userspace_mem_region already " 803 "exists\n" 804 " requested guest_paddr: 0x%lx npages: 0x%lx " 805 "page_size: 0x%x\n" 806 " existing guest_paddr: 0x%lx size: 0x%lx", 807 guest_paddr, npages, vm->page_size, 808 (uint64_t) region->region.guest_phys_addr, 809 (uint64_t) region->region.memory_size); 810 811 /* Confirm no region with the requested slot already exists. */ 812 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 813 slot) { 814 if (region->region.slot != slot) 815 continue; 816 817 TEST_FAIL("A mem region with the requested slot " 818 "already exists.\n" 819 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 820 " existing slot: %u paddr: 0x%lx size: 0x%lx", 821 slot, guest_paddr, npages, 822 region->region.slot, 823 (uint64_t) region->region.guest_phys_addr, 824 (uint64_t) region->region.memory_size); 825 } 826 827 /* Allocate and initialize new mem region structure. */ 828 region = calloc(1, sizeof(*region)); 829 TEST_ASSERT(region != NULL, "Insufficient Memory"); 830 region->mmap_size = npages * vm->page_size; 831 832 #ifdef __s390x__ 833 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 834 alignment = 0x100000; 835 #else 836 alignment = 1; 837 #endif 838 839 /* 840 * When using THP mmap is not guaranteed to returned a hugepage aligned 841 * address so we have to pad the mmap. Padding is not needed for HugeTLB 842 * because mmap will always return an address aligned to the HugeTLB 843 * page size. 844 */ 845 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 846 alignment = max(backing_src_pagesz, alignment); 847 848 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 849 850 /* Add enough memory to align up if necessary */ 851 if (alignment > 1) 852 region->mmap_size += alignment; 853 854 region->fd = -1; 855 if (backing_src_is_shared(src_type)) 856 region->fd = kvm_memfd_alloc(region->mmap_size, 857 src_type == VM_MEM_SRC_SHARED_HUGETLB); 858 859 region->mmap_start = mmap(NULL, region->mmap_size, 860 PROT_READ | PROT_WRITE, 861 vm_mem_backing_src_alias(src_type)->flag, 862 region->fd, 0); 863 TEST_ASSERT(region->mmap_start != MAP_FAILED, 864 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 865 866 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 867 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 868 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 869 region->mmap_start, backing_src_pagesz); 870 871 /* Align host address */ 872 region->host_mem = align_ptr_up(region->mmap_start, alignment); 873 874 /* As needed perform madvise */ 875 if ((src_type == VM_MEM_SRC_ANONYMOUS || 876 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 877 ret = madvise(region->host_mem, npages * vm->page_size, 878 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 879 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 880 region->host_mem, npages * vm->page_size, 881 vm_mem_backing_src_alias(src_type)->name); 882 } 883 884 region->unused_phy_pages = sparsebit_alloc(); 885 sparsebit_set_num(region->unused_phy_pages, 886 guest_paddr >> vm->page_shift, npages); 887 region->region.slot = slot; 888 region->region.flags = flags; 889 region->region.guest_phys_addr = guest_paddr; 890 region->region.memory_size = npages * vm->page_size; 891 region->region.userspace_addr = (uintptr_t) region->host_mem; 892 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 893 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 894 " rc: %i errno: %i\n" 895 " slot: %u flags: 0x%x\n" 896 " guest_phys_addr: 0x%lx size: 0x%lx", 897 ret, errno, slot, flags, 898 guest_paddr, (uint64_t) region->region.memory_size); 899 900 /* Add to quick lookup data structures */ 901 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 902 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 903 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 904 905 /* If shared memory, create an alias. */ 906 if (region->fd >= 0) { 907 region->mmap_alias = mmap(NULL, region->mmap_size, 908 PROT_READ | PROT_WRITE, 909 vm_mem_backing_src_alias(src_type)->flag, 910 region->fd, 0); 911 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 912 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 913 914 /* Align host alias address */ 915 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 916 } 917 } 918 919 /* 920 * Memslot to region 921 * 922 * Input Args: 923 * vm - Virtual Machine 924 * memslot - KVM memory slot ID 925 * 926 * Output Args: None 927 * 928 * Return: 929 * Pointer to memory region structure that describe memory region 930 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 931 * on error (e.g. currently no memory region using memslot as a KVM 932 * memory slot ID). 933 */ 934 struct userspace_mem_region * 935 memslot2region(struct kvm_vm *vm, uint32_t memslot) 936 { 937 struct userspace_mem_region *region; 938 939 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 940 memslot) 941 if (region->region.slot == memslot) 942 return region; 943 944 fprintf(stderr, "No mem region with the requested slot found,\n" 945 " requested slot: %u\n", memslot); 946 fputs("---- vm dump ----\n", stderr); 947 vm_dump(stderr, vm, 2); 948 TEST_FAIL("Mem region not found"); 949 return NULL; 950 } 951 952 /* 953 * VM Memory Region Flags Set 954 * 955 * Input Args: 956 * vm - Virtual Machine 957 * flags - Starting guest physical address 958 * 959 * Output Args: None 960 * 961 * Return: None 962 * 963 * Sets the flags of the memory region specified by the value of slot, 964 * to the values given by flags. 965 */ 966 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 967 { 968 int ret; 969 struct userspace_mem_region *region; 970 971 region = memslot2region(vm, slot); 972 973 region->region.flags = flags; 974 975 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 976 977 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 978 " rc: %i errno: %i slot: %u flags: 0x%x", 979 ret, errno, slot, flags); 980 } 981 982 /* 983 * VM Memory Region Move 984 * 985 * Input Args: 986 * vm - Virtual Machine 987 * slot - Slot of the memory region to move 988 * new_gpa - Starting guest physical address 989 * 990 * Output Args: None 991 * 992 * Return: None 993 * 994 * Change the gpa of a memory region. 995 */ 996 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 997 { 998 struct userspace_mem_region *region; 999 int ret; 1000 1001 region = memslot2region(vm, slot); 1002 1003 region->region.guest_phys_addr = new_gpa; 1004 1005 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1006 1007 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1008 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1009 ret, errno, slot, new_gpa); 1010 } 1011 1012 /* 1013 * VM Memory Region Delete 1014 * 1015 * Input Args: 1016 * vm - Virtual Machine 1017 * slot - Slot of the memory region to delete 1018 * 1019 * Output Args: None 1020 * 1021 * Return: None 1022 * 1023 * Delete a memory region. 1024 */ 1025 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1026 { 1027 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1028 } 1029 1030 /* Returns the size of a vCPU's kvm_run structure. */ 1031 static int vcpu_mmap_sz(void) 1032 { 1033 int dev_fd, ret; 1034 1035 dev_fd = open_kvm_dev_path_or_exit(); 1036 1037 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1038 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1039 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1040 1041 close(dev_fd); 1042 1043 return ret; 1044 } 1045 1046 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1047 { 1048 struct kvm_vcpu *vcpu; 1049 1050 list_for_each_entry(vcpu, &vm->vcpus, list) { 1051 if (vcpu->id == vcpu_id) 1052 return true; 1053 } 1054 1055 return false; 1056 } 1057 1058 /* 1059 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1060 * No additional vCPU setup is done. Returns the vCPU. 1061 */ 1062 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1063 { 1064 struct kvm_vcpu *vcpu; 1065 1066 /* Confirm a vcpu with the specified id doesn't already exist. */ 1067 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1068 1069 /* Allocate and initialize new vcpu structure. */ 1070 vcpu = calloc(1, sizeof(*vcpu)); 1071 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1072 1073 vcpu->vm = vm; 1074 vcpu->id = vcpu_id; 1075 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1076 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1077 1078 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1079 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1080 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1081 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1082 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1083 TEST_ASSERT(vcpu->run != MAP_FAILED, 1084 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1085 1086 /* Add to linked-list of VCPUs. */ 1087 list_add(&vcpu->list, &vm->vcpus); 1088 1089 return vcpu; 1090 } 1091 1092 /* 1093 * VM Virtual Address Unused Gap 1094 * 1095 * Input Args: 1096 * vm - Virtual Machine 1097 * sz - Size (bytes) 1098 * vaddr_min - Minimum Virtual Address 1099 * 1100 * Output Args: None 1101 * 1102 * Return: 1103 * Lowest virtual address at or below vaddr_min, with at least 1104 * sz unused bytes. TEST_ASSERT failure if no area of at least 1105 * size sz is available. 1106 * 1107 * Within the VM specified by vm, locates the lowest starting virtual 1108 * address >= vaddr_min, that has at least sz unallocated bytes. A 1109 * TEST_ASSERT failure occurs for invalid input or no area of at least 1110 * sz unallocated bytes >= vaddr_min is available. 1111 */ 1112 static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1113 vm_vaddr_t vaddr_min) 1114 { 1115 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1116 1117 /* Determine lowest permitted virtual page index. */ 1118 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1119 if ((pgidx_start * vm->page_size) < vaddr_min) 1120 goto no_va_found; 1121 1122 /* Loop over section with enough valid virtual page indexes. */ 1123 if (!sparsebit_is_set_num(vm->vpages_valid, 1124 pgidx_start, pages)) 1125 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1126 pgidx_start, pages); 1127 do { 1128 /* 1129 * Are there enough unused virtual pages available at 1130 * the currently proposed starting virtual page index. 1131 * If not, adjust proposed starting index to next 1132 * possible. 1133 */ 1134 if (sparsebit_is_clear_num(vm->vpages_mapped, 1135 pgidx_start, pages)) 1136 goto va_found; 1137 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1138 pgidx_start, pages); 1139 if (pgidx_start == 0) 1140 goto no_va_found; 1141 1142 /* 1143 * If needed, adjust proposed starting virtual address, 1144 * to next range of valid virtual addresses. 1145 */ 1146 if (!sparsebit_is_set_num(vm->vpages_valid, 1147 pgidx_start, pages)) { 1148 pgidx_start = sparsebit_next_set_num( 1149 vm->vpages_valid, pgidx_start, pages); 1150 if (pgidx_start == 0) 1151 goto no_va_found; 1152 } 1153 } while (pgidx_start != 0); 1154 1155 no_va_found: 1156 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1157 1158 /* NOT REACHED */ 1159 return -1; 1160 1161 va_found: 1162 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1163 pgidx_start, pages), 1164 "Unexpected, invalid virtual page index range,\n" 1165 " pgidx_start: 0x%lx\n" 1166 " pages: 0x%lx", 1167 pgidx_start, pages); 1168 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1169 pgidx_start, pages), 1170 "Unexpected, pages already mapped,\n" 1171 " pgidx_start: 0x%lx\n" 1172 " pages: 0x%lx", 1173 pgidx_start, pages); 1174 1175 return pgidx_start * vm->page_size; 1176 } 1177 1178 /* 1179 * VM Virtual Address Allocate 1180 * 1181 * Input Args: 1182 * vm - Virtual Machine 1183 * sz - Size in bytes 1184 * vaddr_min - Minimum starting virtual address 1185 * 1186 * Output Args: None 1187 * 1188 * Return: 1189 * Starting guest virtual address 1190 * 1191 * Allocates at least sz bytes within the virtual address space of the vm 1192 * given by vm. The allocated bytes are mapped to a virtual address >= 1193 * the address given by vaddr_min. Note that each allocation uses a 1194 * a unique set of pages, with the minimum real allocation being at least 1195 * a page. 1196 */ 1197 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1198 { 1199 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1200 1201 virt_pgd_alloc(vm); 1202 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1203 KVM_UTIL_MIN_PFN * vm->page_size, 0); 1204 1205 /* 1206 * Find an unused range of virtual page addresses of at least 1207 * pages in length. 1208 */ 1209 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1210 1211 /* Map the virtual pages. */ 1212 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1213 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1214 1215 virt_pg_map(vm, vaddr, paddr); 1216 1217 sparsebit_set(vm->vpages_mapped, 1218 vaddr >> vm->page_shift); 1219 } 1220 1221 return vaddr_start; 1222 } 1223 1224 /* 1225 * VM Virtual Address Allocate Pages 1226 * 1227 * Input Args: 1228 * vm - Virtual Machine 1229 * 1230 * Output Args: None 1231 * 1232 * Return: 1233 * Starting guest virtual address 1234 * 1235 * Allocates at least N system pages worth of bytes within the virtual address 1236 * space of the vm. 1237 */ 1238 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1239 { 1240 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1241 } 1242 1243 /* 1244 * VM Virtual Address Allocate Page 1245 * 1246 * Input Args: 1247 * vm - Virtual Machine 1248 * 1249 * Output Args: None 1250 * 1251 * Return: 1252 * Starting guest virtual address 1253 * 1254 * Allocates at least one system page worth of bytes within the virtual address 1255 * space of the vm. 1256 */ 1257 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1258 { 1259 return vm_vaddr_alloc_pages(vm, 1); 1260 } 1261 1262 /* 1263 * Map a range of VM virtual address to the VM's physical address 1264 * 1265 * Input Args: 1266 * vm - Virtual Machine 1267 * vaddr - Virtuall address to map 1268 * paddr - VM Physical Address 1269 * npages - The number of pages to map 1270 * 1271 * Output Args: None 1272 * 1273 * Return: None 1274 * 1275 * Within the VM given by @vm, creates a virtual translation for 1276 * @npages starting at @vaddr to the page range starting at @paddr. 1277 */ 1278 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1279 unsigned int npages) 1280 { 1281 size_t page_size = vm->page_size; 1282 size_t size = npages * page_size; 1283 1284 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1285 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1286 1287 while (npages--) { 1288 virt_pg_map(vm, vaddr, paddr); 1289 vaddr += page_size; 1290 paddr += page_size; 1291 } 1292 } 1293 1294 /* 1295 * Address VM Physical to Host Virtual 1296 * 1297 * Input Args: 1298 * vm - Virtual Machine 1299 * gpa - VM physical address 1300 * 1301 * Output Args: None 1302 * 1303 * Return: 1304 * Equivalent host virtual address 1305 * 1306 * Locates the memory region containing the VM physical address given 1307 * by gpa, within the VM given by vm. When found, the host virtual 1308 * address providing the memory to the vm physical address is returned. 1309 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1310 */ 1311 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1312 { 1313 struct userspace_mem_region *region; 1314 1315 region = userspace_mem_region_find(vm, gpa, gpa); 1316 if (!region) { 1317 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1318 return NULL; 1319 } 1320 1321 return (void *)((uintptr_t)region->host_mem 1322 + (gpa - region->region.guest_phys_addr)); 1323 } 1324 1325 /* 1326 * Address Host Virtual to VM Physical 1327 * 1328 * Input Args: 1329 * vm - Virtual Machine 1330 * hva - Host virtual address 1331 * 1332 * Output Args: None 1333 * 1334 * Return: 1335 * Equivalent VM physical address 1336 * 1337 * Locates the memory region containing the host virtual address given 1338 * by hva, within the VM given by vm. When found, the equivalent 1339 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1340 * region containing hva exists. 1341 */ 1342 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1343 { 1344 struct rb_node *node; 1345 1346 for (node = vm->regions.hva_tree.rb_node; node; ) { 1347 struct userspace_mem_region *region = 1348 container_of(node, struct userspace_mem_region, hva_node); 1349 1350 if (hva >= region->host_mem) { 1351 if (hva <= (region->host_mem 1352 + region->region.memory_size - 1)) 1353 return (vm_paddr_t)((uintptr_t) 1354 region->region.guest_phys_addr 1355 + (hva - (uintptr_t)region->host_mem)); 1356 1357 node = node->rb_right; 1358 } else 1359 node = node->rb_left; 1360 } 1361 1362 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1363 return -1; 1364 } 1365 1366 /* 1367 * Address VM physical to Host Virtual *alias*. 1368 * 1369 * Input Args: 1370 * vm - Virtual Machine 1371 * gpa - VM physical address 1372 * 1373 * Output Args: None 1374 * 1375 * Return: 1376 * Equivalent address within the host virtual *alias* area, or NULL 1377 * (without failing the test) if the guest memory is not shared (so 1378 * no alias exists). 1379 * 1380 * Create a writable, shared virtual=>physical alias for the specific GPA. 1381 * The primary use case is to allow the host selftest to manipulate guest 1382 * memory without mapping said memory in the guest's address space. And, for 1383 * userfaultfd-based demand paging, to do so without triggering userfaults. 1384 */ 1385 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1386 { 1387 struct userspace_mem_region *region; 1388 uintptr_t offset; 1389 1390 region = userspace_mem_region_find(vm, gpa, gpa); 1391 if (!region) 1392 return NULL; 1393 1394 if (!region->host_alias) 1395 return NULL; 1396 1397 offset = gpa - region->region.guest_phys_addr; 1398 return (void *) ((uintptr_t) region->host_alias + offset); 1399 } 1400 1401 /* Create an interrupt controller chip for the specified VM. */ 1402 void vm_create_irqchip(struct kvm_vm *vm) 1403 { 1404 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1405 1406 vm->has_irqchip = true; 1407 } 1408 1409 int _vcpu_run(struct kvm_vcpu *vcpu) 1410 { 1411 int rc; 1412 1413 do { 1414 rc = __vcpu_run(vcpu); 1415 } while (rc == -1 && errno == EINTR); 1416 1417 assert_on_unhandled_exception(vcpu); 1418 1419 return rc; 1420 } 1421 1422 /* 1423 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1424 * Assert if the KVM returns an error (other than -EINTR). 1425 */ 1426 void vcpu_run(struct kvm_vcpu *vcpu) 1427 { 1428 int ret = _vcpu_run(vcpu); 1429 1430 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1431 } 1432 1433 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1434 { 1435 int ret; 1436 1437 vcpu->run->immediate_exit = 1; 1438 ret = __vcpu_run(vcpu); 1439 vcpu->run->immediate_exit = 0; 1440 1441 TEST_ASSERT(ret == -1 && errno == EINTR, 1442 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1443 ret, errno); 1444 } 1445 1446 /* 1447 * Get the list of guest registers which are supported for 1448 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1449 * it is the caller's responsibility to free the list. 1450 */ 1451 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1452 { 1453 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1454 int ret; 1455 1456 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1457 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1458 1459 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1460 reg_list->n = reg_list_n.n; 1461 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1462 return reg_list; 1463 } 1464 1465 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1466 { 1467 uint32_t page_size = vcpu->vm->page_size; 1468 uint32_t size = vcpu->vm->dirty_ring_size; 1469 1470 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1471 1472 if (!vcpu->dirty_gfns) { 1473 void *addr; 1474 1475 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1476 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1477 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1478 1479 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1480 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1481 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1482 1483 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1484 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1485 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1486 1487 vcpu->dirty_gfns = addr; 1488 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1489 } 1490 1491 return vcpu->dirty_gfns; 1492 } 1493 1494 /* 1495 * Device Ioctl 1496 */ 1497 1498 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1499 { 1500 struct kvm_device_attr attribute = { 1501 .group = group, 1502 .attr = attr, 1503 .flags = 0, 1504 }; 1505 1506 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1507 } 1508 1509 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1510 { 1511 struct kvm_create_device create_dev = { 1512 .type = type, 1513 .flags = KVM_CREATE_DEVICE_TEST, 1514 }; 1515 1516 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1517 } 1518 1519 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1520 { 1521 struct kvm_create_device create_dev = { 1522 .type = type, 1523 .fd = -1, 1524 .flags = 0, 1525 }; 1526 int err; 1527 1528 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1529 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1530 return err ? : create_dev.fd; 1531 } 1532 1533 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1534 { 1535 struct kvm_device_attr kvmattr = { 1536 .group = group, 1537 .attr = attr, 1538 .flags = 0, 1539 .addr = (uintptr_t)val, 1540 }; 1541 1542 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1543 } 1544 1545 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1546 { 1547 struct kvm_device_attr kvmattr = { 1548 .group = group, 1549 .attr = attr, 1550 .flags = 0, 1551 .addr = (uintptr_t)val, 1552 }; 1553 1554 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1555 } 1556 1557 /* 1558 * IRQ related functions. 1559 */ 1560 1561 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1562 { 1563 struct kvm_irq_level irq_level = { 1564 .irq = irq, 1565 .level = level, 1566 }; 1567 1568 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1569 } 1570 1571 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1572 { 1573 int ret = _kvm_irq_line(vm, irq, level); 1574 1575 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1576 } 1577 1578 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1579 { 1580 struct kvm_irq_routing *routing; 1581 size_t size; 1582 1583 size = sizeof(struct kvm_irq_routing); 1584 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1585 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1586 routing = calloc(1, size); 1587 assert(routing); 1588 1589 return routing; 1590 } 1591 1592 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1593 uint32_t gsi, uint32_t pin) 1594 { 1595 int i; 1596 1597 assert(routing); 1598 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1599 1600 i = routing->nr; 1601 routing->entries[i].gsi = gsi; 1602 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1603 routing->entries[i].flags = 0; 1604 routing->entries[i].u.irqchip.irqchip = 0; 1605 routing->entries[i].u.irqchip.pin = pin; 1606 routing->nr++; 1607 } 1608 1609 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1610 { 1611 int ret; 1612 1613 assert(routing); 1614 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1615 free(routing); 1616 1617 return ret; 1618 } 1619 1620 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1621 { 1622 int ret; 1623 1624 ret = _kvm_gsi_routing_write(vm, routing); 1625 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1626 } 1627 1628 /* 1629 * VM Dump 1630 * 1631 * Input Args: 1632 * vm - Virtual Machine 1633 * indent - Left margin indent amount 1634 * 1635 * Output Args: 1636 * stream - Output FILE stream 1637 * 1638 * Return: None 1639 * 1640 * Dumps the current state of the VM given by vm, to the FILE stream 1641 * given by stream. 1642 */ 1643 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1644 { 1645 int ctr; 1646 struct userspace_mem_region *region; 1647 struct kvm_vcpu *vcpu; 1648 1649 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1650 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1651 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1652 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1653 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1654 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1655 "host_virt: %p\n", indent + 2, "", 1656 (uint64_t) region->region.guest_phys_addr, 1657 (uint64_t) region->region.memory_size, 1658 region->host_mem); 1659 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1660 sparsebit_dump(stream, region->unused_phy_pages, 0); 1661 } 1662 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1663 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1664 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1665 vm->pgd_created); 1666 if (vm->pgd_created) { 1667 fprintf(stream, "%*sVirtual Translation Tables:\n", 1668 indent + 2, ""); 1669 virt_dump(stream, vm, indent + 4); 1670 } 1671 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1672 1673 list_for_each_entry(vcpu, &vm->vcpus, list) 1674 vcpu_dump(stream, vcpu, indent + 2); 1675 } 1676 1677 /* Known KVM exit reasons */ 1678 static struct exit_reason { 1679 unsigned int reason; 1680 const char *name; 1681 } exit_reasons_known[] = { 1682 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1683 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1684 {KVM_EXIT_IO, "IO"}, 1685 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1686 {KVM_EXIT_DEBUG, "DEBUG"}, 1687 {KVM_EXIT_HLT, "HLT"}, 1688 {KVM_EXIT_MMIO, "MMIO"}, 1689 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1690 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1691 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1692 {KVM_EXIT_INTR, "INTR"}, 1693 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1694 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1695 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1696 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1697 {KVM_EXIT_DCR, "DCR"}, 1698 {KVM_EXIT_NMI, "NMI"}, 1699 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1700 {KVM_EXIT_OSI, "OSI"}, 1701 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1702 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1703 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1704 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1705 {KVM_EXIT_XEN, "XEN"}, 1706 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1707 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1708 #endif 1709 }; 1710 1711 /* 1712 * Exit Reason String 1713 * 1714 * Input Args: 1715 * exit_reason - Exit reason 1716 * 1717 * Output Args: None 1718 * 1719 * Return: 1720 * Constant string pointer describing the exit reason. 1721 * 1722 * Locates and returns a constant string that describes the KVM exit 1723 * reason given by exit_reason. If no such string is found, a constant 1724 * string of "Unknown" is returned. 1725 */ 1726 const char *exit_reason_str(unsigned int exit_reason) 1727 { 1728 unsigned int n1; 1729 1730 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1731 if (exit_reason == exit_reasons_known[n1].reason) 1732 return exit_reasons_known[n1].name; 1733 } 1734 1735 return "Unknown"; 1736 } 1737 1738 /* 1739 * Physical Contiguous Page Allocator 1740 * 1741 * Input Args: 1742 * vm - Virtual Machine 1743 * num - number of pages 1744 * paddr_min - Physical address minimum 1745 * memslot - Memory region to allocate page from 1746 * 1747 * Output Args: None 1748 * 1749 * Return: 1750 * Starting physical address 1751 * 1752 * Within the VM specified by vm, locates a range of available physical 1753 * pages at or above paddr_min. If found, the pages are marked as in use 1754 * and their base address is returned. A TEST_ASSERT failure occurs if 1755 * not enough pages are available at or above paddr_min. 1756 */ 1757 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1758 vm_paddr_t paddr_min, uint32_t memslot) 1759 { 1760 struct userspace_mem_region *region; 1761 sparsebit_idx_t pg, base; 1762 1763 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1764 1765 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1766 "not divisible by page size.\n" 1767 " paddr_min: 0x%lx page_size: 0x%x", 1768 paddr_min, vm->page_size); 1769 1770 region = memslot2region(vm, memslot); 1771 base = pg = paddr_min >> vm->page_shift; 1772 1773 do { 1774 for (; pg < base + num; ++pg) { 1775 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1776 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1777 break; 1778 } 1779 } 1780 } while (pg && pg != base + num); 1781 1782 if (pg == 0) { 1783 fprintf(stderr, "No guest physical page available, " 1784 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1785 paddr_min, vm->page_size, memslot); 1786 fputs("---- vm dump ----\n", stderr); 1787 vm_dump(stderr, vm, 2); 1788 abort(); 1789 } 1790 1791 for (pg = base; pg < base + num; ++pg) 1792 sparsebit_clear(region->unused_phy_pages, pg); 1793 1794 return base * vm->page_size; 1795 } 1796 1797 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1798 uint32_t memslot) 1799 { 1800 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1801 } 1802 1803 /* Arbitrary minimum physical address used for virtual translation tables. */ 1804 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1805 1806 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1807 { 1808 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); 1809 } 1810 1811 /* 1812 * Address Guest Virtual to Host Virtual 1813 * 1814 * Input Args: 1815 * vm - Virtual Machine 1816 * gva - VM virtual address 1817 * 1818 * Output Args: None 1819 * 1820 * Return: 1821 * Equivalent host virtual address 1822 */ 1823 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1824 { 1825 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1826 } 1827 1828 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1829 { 1830 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1831 } 1832 1833 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1834 unsigned int page_shift, 1835 unsigned int new_page_shift, 1836 bool ceil) 1837 { 1838 unsigned int n = 1 << (new_page_shift - page_shift); 1839 1840 if (page_shift >= new_page_shift) 1841 return num_pages * (1 << (page_shift - new_page_shift)); 1842 1843 return num_pages / n + !!(ceil && num_pages % n); 1844 } 1845 1846 static inline int getpageshift(void) 1847 { 1848 return __builtin_ffs(getpagesize()) - 1; 1849 } 1850 1851 unsigned int 1852 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1853 { 1854 return vm_calc_num_pages(num_guest_pages, 1855 vm_guest_mode_params[mode].page_shift, 1856 getpageshift(), true); 1857 } 1858 1859 unsigned int 1860 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1861 { 1862 return vm_calc_num_pages(num_host_pages, getpageshift(), 1863 vm_guest_mode_params[mode].page_shift, false); 1864 } 1865 1866 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 1867 { 1868 unsigned int n; 1869 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 1870 return vm_adjust_num_guest_pages(mode, n); 1871 } 1872 1873 /* 1874 * Read binary stats descriptors 1875 * 1876 * Input Args: 1877 * stats_fd - the file descriptor for the binary stats file from which to read 1878 * header - the binary stats metadata header corresponding to the given FD 1879 * 1880 * Output Args: None 1881 * 1882 * Return: 1883 * A pointer to a newly allocated series of stat descriptors. 1884 * Caller is responsible for freeing the returned kvm_stats_desc. 1885 * 1886 * Read the stats descriptors from the binary stats interface. 1887 */ 1888 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 1889 struct kvm_stats_header *header) 1890 { 1891 struct kvm_stats_desc *stats_desc; 1892 ssize_t desc_size, total_size, ret; 1893 1894 desc_size = get_stats_descriptor_size(header); 1895 total_size = header->num_desc * desc_size; 1896 1897 stats_desc = calloc(header->num_desc, desc_size); 1898 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 1899 1900 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 1901 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 1902 1903 return stats_desc; 1904 } 1905 1906 /* 1907 * Read stat data for a particular stat 1908 * 1909 * Input Args: 1910 * stats_fd - the file descriptor for the binary stats file from which to read 1911 * header - the binary stats metadata header corresponding to the given FD 1912 * desc - the binary stat metadata for the particular stat to be read 1913 * max_elements - the maximum number of 8-byte values to read into data 1914 * 1915 * Output Args: 1916 * data - the buffer into which stat data should be read 1917 * 1918 * Read the data values of a specified stat from the binary stats interface. 1919 */ 1920 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 1921 struct kvm_stats_desc *desc, uint64_t *data, 1922 size_t max_elements) 1923 { 1924 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 1925 size_t size = nr_elements * sizeof(*data); 1926 ssize_t ret; 1927 1928 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 1929 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 1930 1931 ret = pread(stats_fd, data, size, 1932 header->data_offset + desc->offset); 1933 1934 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 1935 desc->name, errno, strerror(errno)); 1936 TEST_ASSERT(ret == size, 1937 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 1938 desc->name, size, ret); 1939 } 1940 1941 /* 1942 * Read the data of the named stat 1943 * 1944 * Input Args: 1945 * vm - the VM for which the stat should be read 1946 * stat_name - the name of the stat to read 1947 * max_elements - the maximum number of 8-byte values to read into data 1948 * 1949 * Output Args: 1950 * data - the buffer into which stat data should be read 1951 * 1952 * Read the data values of a specified stat from the binary stats interface. 1953 */ 1954 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 1955 size_t max_elements) 1956 { 1957 struct kvm_stats_desc *desc; 1958 size_t size_desc; 1959 int i; 1960 1961 if (!vm->stats_fd) { 1962 vm->stats_fd = vm_get_stats_fd(vm); 1963 read_stats_header(vm->stats_fd, &vm->stats_header); 1964 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 1965 &vm->stats_header); 1966 } 1967 1968 size_desc = get_stats_descriptor_size(&vm->stats_header); 1969 1970 for (i = 0; i < vm->stats_header.num_desc; ++i) { 1971 desc = (void *)vm->stats_desc + (i * size_desc); 1972 1973 if (strcmp(desc->name, stat_name)) 1974 continue; 1975 1976 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 1977 data, max_elements); 1978 1979 break; 1980 } 1981 } 1982