1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_intel_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm_intel", param); 86 } 87 88 bool get_kvm_amd_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_amd", param); 91 } 92 93 /* 94 * Capability 95 * 96 * Input Args: 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: 102 * On success, the Value corresponding to the capability (KVM_CAP_*) 103 * specified by the value of cap. On failure a TEST_ASSERT failure 104 * is produced. 105 * 106 * Looks up and returns the value corresponding to the capability 107 * (KVM_CAP_*) given by cap. 108 */ 109 unsigned int kvm_check_cap(long cap) 110 { 111 int ret; 112 int kvm_fd; 113 114 kvm_fd = open_kvm_dev_path_or_exit(); 115 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 116 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 117 118 close(kvm_fd); 119 120 return (unsigned int)ret; 121 } 122 123 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 124 { 125 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 126 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 127 else 128 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 129 vm->dirty_ring_size = ring_size; 130 } 131 132 static void vm_open(struct kvm_vm *vm) 133 { 134 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 135 136 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 137 138 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 139 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 140 } 141 142 const char *vm_guest_mode_string(uint32_t i) 143 { 144 static const char * const strings[] = { 145 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 146 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 147 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 148 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 149 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 150 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 151 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 152 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 153 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 154 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 155 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 156 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 157 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 158 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 159 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 160 }; 161 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 162 "Missing new mode strings?"); 163 164 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 165 166 return strings[i]; 167 } 168 169 const struct vm_guest_mode_params vm_guest_mode_params[] = { 170 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 171 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 172 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 173 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 174 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 175 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 176 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 177 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 178 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 179 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 180 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 181 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 182 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 183 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 184 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 185 }; 186 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 187 "Missing new mode params?"); 188 189 __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) 190 { 191 sparsebit_set_num(vm->vpages_valid, 192 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 193 sparsebit_set_num(vm->vpages_valid, 194 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 195 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 196 } 197 198 struct kvm_vm *____vm_create(enum vm_guest_mode mode) 199 { 200 struct kvm_vm *vm; 201 202 vm = calloc(1, sizeof(*vm)); 203 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 204 205 INIT_LIST_HEAD(&vm->vcpus); 206 vm->regions.gpa_tree = RB_ROOT; 207 vm->regions.hva_tree = RB_ROOT; 208 hash_init(vm->regions.slot_hash); 209 210 vm->mode = mode; 211 vm->type = 0; 212 213 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 214 vm->va_bits = vm_guest_mode_params[mode].va_bits; 215 vm->page_size = vm_guest_mode_params[mode].page_size; 216 vm->page_shift = vm_guest_mode_params[mode].page_shift; 217 218 /* Setup mode specific traits. */ 219 switch (vm->mode) { 220 case VM_MODE_P52V48_4K: 221 vm->pgtable_levels = 4; 222 break; 223 case VM_MODE_P52V48_64K: 224 vm->pgtable_levels = 3; 225 break; 226 case VM_MODE_P48V48_4K: 227 vm->pgtable_levels = 4; 228 break; 229 case VM_MODE_P48V48_64K: 230 vm->pgtable_levels = 3; 231 break; 232 case VM_MODE_P40V48_4K: 233 case VM_MODE_P36V48_4K: 234 vm->pgtable_levels = 4; 235 break; 236 case VM_MODE_P40V48_64K: 237 case VM_MODE_P36V48_64K: 238 vm->pgtable_levels = 3; 239 break; 240 case VM_MODE_P48V48_16K: 241 case VM_MODE_P40V48_16K: 242 case VM_MODE_P36V48_16K: 243 vm->pgtable_levels = 4; 244 break; 245 case VM_MODE_P36V47_16K: 246 vm->pgtable_levels = 3; 247 break; 248 case VM_MODE_PXXV48_4K: 249 #ifdef __x86_64__ 250 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 251 /* 252 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 253 * it doesn't take effect unless a CR4.LA57 is set, which it 254 * isn't for this VM_MODE. 255 */ 256 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 257 "Linear address width (%d bits) not supported", 258 vm->va_bits); 259 pr_debug("Guest physical address width detected: %d\n", 260 vm->pa_bits); 261 vm->pgtable_levels = 4; 262 vm->va_bits = 48; 263 #else 264 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 265 #endif 266 break; 267 case VM_MODE_P47V64_4K: 268 vm->pgtable_levels = 5; 269 break; 270 case VM_MODE_P44V64_4K: 271 vm->pgtable_levels = 5; 272 break; 273 default: 274 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 275 } 276 277 #ifdef __aarch64__ 278 if (vm->pa_bits != 40) 279 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 280 #endif 281 282 vm_open(vm); 283 284 /* Limit to VA-bit canonical virtual addresses. */ 285 vm->vpages_valid = sparsebit_alloc(); 286 vm_vaddr_populate_bitmap(vm); 287 288 /* Limit physical addresses to PA-bits. */ 289 vm->max_gfn = vm_compute_max_gfn(vm); 290 291 /* Allocate and setup memory for guest. */ 292 vm->vpages_mapped = sparsebit_alloc(); 293 294 return vm; 295 } 296 297 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 298 uint32_t nr_runnable_vcpus, 299 uint64_t extra_mem_pages) 300 { 301 uint64_t nr_pages; 302 303 TEST_ASSERT(nr_runnable_vcpus, 304 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 305 306 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 307 "nr_vcpus = %d too large for host, max-vcpus = %d", 308 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 309 310 /* 311 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 312 * test code and other per-VM assets that will be loaded into memslot0. 313 */ 314 nr_pages = 512; 315 316 /* Account for the per-vCPU stacks on behalf of the test. */ 317 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 318 319 /* 320 * Account for the number of pages needed for the page tables. The 321 * maximum page table size for a memory region will be when the 322 * smallest page size is used. Considering each page contains x page 323 * table descriptors, the total extra size for page tables (for extra 324 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 325 * than N/x*2. 326 */ 327 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 328 329 return vm_adjust_num_guest_pages(mode, nr_pages); 330 } 331 332 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 333 uint64_t nr_extra_pages) 334 { 335 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 336 nr_extra_pages); 337 struct userspace_mem_region *slot0; 338 struct kvm_vm *vm; 339 int i; 340 341 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 342 vm_guest_mode_string(mode), nr_pages); 343 344 vm = ____vm_create(mode); 345 346 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 347 for (i = 0; i < NR_MEM_REGIONS; i++) 348 vm->memslots[i] = 0; 349 350 kvm_vm_elf_load(vm, program_invocation_name); 351 352 /* 353 * TODO: Add proper defines to protect the library's memslots, and then 354 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 355 * read-only memslots as MMIO, and creating a read-only memslot for the 356 * MMIO region would prevent silently clobbering the MMIO region. 357 */ 358 slot0 = memslot2region(vm, 0); 359 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 360 361 kvm_arch_vm_post_create(vm); 362 363 return vm; 364 } 365 366 /* 367 * VM Create with customized parameters 368 * 369 * Input Args: 370 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 371 * nr_vcpus - VCPU count 372 * extra_mem_pages - Non-slot0 physical memory total size 373 * guest_code - Guest entry point 374 * vcpuids - VCPU IDs 375 * 376 * Output Args: None 377 * 378 * Return: 379 * Pointer to opaque structure that describes the created VM. 380 * 381 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 382 * extra_mem_pages is only used to calculate the maximum page table size, 383 * no real memory allocation for non-slot0 memory in this function. 384 */ 385 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 386 uint64_t extra_mem_pages, 387 void *guest_code, struct kvm_vcpu *vcpus[]) 388 { 389 struct kvm_vm *vm; 390 int i; 391 392 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 393 394 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 395 396 for (i = 0; i < nr_vcpus; ++i) 397 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 398 399 return vm; 400 } 401 402 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 403 uint64_t extra_mem_pages, 404 void *guest_code) 405 { 406 struct kvm_vcpu *vcpus[1]; 407 struct kvm_vm *vm; 408 409 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 410 guest_code, vcpus); 411 412 *vcpu = vcpus[0]; 413 return vm; 414 } 415 416 /* 417 * VM Restart 418 * 419 * Input Args: 420 * vm - VM that has been released before 421 * 422 * Output Args: None 423 * 424 * Reopens the file descriptors associated to the VM and reinstates the 425 * global state, such as the irqchip and the memory regions that are mapped 426 * into the guest. 427 */ 428 void kvm_vm_restart(struct kvm_vm *vmp) 429 { 430 int ctr; 431 struct userspace_mem_region *region; 432 433 vm_open(vmp); 434 if (vmp->has_irqchip) 435 vm_create_irqchip(vmp); 436 437 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 438 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 439 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 440 " rc: %i errno: %i\n" 441 " slot: %u flags: 0x%x\n" 442 " guest_phys_addr: 0x%llx size: 0x%llx", 443 ret, errno, region->region.slot, 444 region->region.flags, 445 region->region.guest_phys_addr, 446 region->region.memory_size); 447 } 448 } 449 450 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 451 uint32_t vcpu_id) 452 { 453 return __vm_vcpu_add(vm, vcpu_id); 454 } 455 456 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 457 { 458 kvm_vm_restart(vm); 459 460 return vm_vcpu_recreate(vm, 0); 461 } 462 463 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 464 { 465 cpu_set_t mask; 466 int r; 467 468 CPU_ZERO(&mask); 469 CPU_SET(pcpu, &mask); 470 r = sched_setaffinity(0, sizeof(mask), &mask); 471 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 472 } 473 474 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 475 { 476 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 477 478 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 479 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 480 return pcpu; 481 } 482 483 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 484 int nr_vcpus) 485 { 486 cpu_set_t allowed_mask; 487 char *cpu, *cpu_list; 488 char delim[2] = ","; 489 int i, r; 490 491 cpu_list = strdup(pcpus_string); 492 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 493 494 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 495 TEST_ASSERT(!r, "sched_getaffinity() failed"); 496 497 cpu = strtok(cpu_list, delim); 498 499 /* 1. Get all pcpus for vcpus. */ 500 for (i = 0; i < nr_vcpus; i++) { 501 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 502 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 503 cpu = strtok(NULL, delim); 504 } 505 506 /* 2. Check if the main worker needs to be pinned. */ 507 if (cpu) { 508 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 509 cpu = strtok(NULL, delim); 510 } 511 512 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 513 free(cpu_list); 514 } 515 516 /* 517 * Userspace Memory Region Find 518 * 519 * Input Args: 520 * vm - Virtual Machine 521 * start - Starting VM physical address 522 * end - Ending VM physical address, inclusive. 523 * 524 * Output Args: None 525 * 526 * Return: 527 * Pointer to overlapping region, NULL if no such region. 528 * 529 * Searches for a region with any physical memory that overlaps with 530 * any portion of the guest physical addresses from start to end 531 * inclusive. If multiple overlapping regions exist, a pointer to any 532 * of the regions is returned. Null is returned only when no overlapping 533 * region exists. 534 */ 535 static struct userspace_mem_region * 536 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 537 { 538 struct rb_node *node; 539 540 for (node = vm->regions.gpa_tree.rb_node; node; ) { 541 struct userspace_mem_region *region = 542 container_of(node, struct userspace_mem_region, gpa_node); 543 uint64_t existing_start = region->region.guest_phys_addr; 544 uint64_t existing_end = region->region.guest_phys_addr 545 + region->region.memory_size - 1; 546 if (start <= existing_end && end >= existing_start) 547 return region; 548 549 if (start < existing_start) 550 node = node->rb_left; 551 else 552 node = node->rb_right; 553 } 554 555 return NULL; 556 } 557 558 /* 559 * KVM Userspace Memory Region Find 560 * 561 * Input Args: 562 * vm - Virtual Machine 563 * start - Starting VM physical address 564 * end - Ending VM physical address, inclusive. 565 * 566 * Output Args: None 567 * 568 * Return: 569 * Pointer to overlapping region, NULL if no such region. 570 * 571 * Public interface to userspace_mem_region_find. Allows tests to look up 572 * the memslot datastructure for a given range of guest physical memory. 573 */ 574 struct kvm_userspace_memory_region * 575 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 576 uint64_t end) 577 { 578 struct userspace_mem_region *region; 579 580 region = userspace_mem_region_find(vm, start, end); 581 if (!region) 582 return NULL; 583 584 return ®ion->region; 585 } 586 587 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 588 { 589 590 } 591 592 /* 593 * VM VCPU Remove 594 * 595 * Input Args: 596 * vcpu - VCPU to remove 597 * 598 * Output Args: None 599 * 600 * Return: None, TEST_ASSERT failures for all error conditions 601 * 602 * Removes a vCPU from a VM and frees its resources. 603 */ 604 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 605 { 606 int ret; 607 608 if (vcpu->dirty_gfns) { 609 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 610 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 611 vcpu->dirty_gfns = NULL; 612 } 613 614 ret = munmap(vcpu->run, vcpu_mmap_sz()); 615 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 616 617 ret = close(vcpu->fd); 618 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 619 620 list_del(&vcpu->list); 621 622 vcpu_arch_free(vcpu); 623 free(vcpu); 624 } 625 626 void kvm_vm_release(struct kvm_vm *vmp) 627 { 628 struct kvm_vcpu *vcpu, *tmp; 629 int ret; 630 631 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 632 vm_vcpu_rm(vmp, vcpu); 633 634 ret = close(vmp->fd); 635 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 636 637 ret = close(vmp->kvm_fd); 638 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 639 } 640 641 static void __vm_mem_region_delete(struct kvm_vm *vm, 642 struct userspace_mem_region *region, 643 bool unlink) 644 { 645 int ret; 646 647 if (unlink) { 648 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 649 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 650 hash_del(®ion->slot_node); 651 } 652 653 region->region.memory_size = 0; 654 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 655 656 sparsebit_free(®ion->unused_phy_pages); 657 ret = munmap(region->mmap_start, region->mmap_size); 658 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 659 if (region->fd >= 0) { 660 /* There's an extra map when using shared memory. */ 661 ret = munmap(region->mmap_alias, region->mmap_size); 662 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 663 close(region->fd); 664 } 665 666 free(region); 667 } 668 669 /* 670 * Destroys and frees the VM pointed to by vmp. 671 */ 672 void kvm_vm_free(struct kvm_vm *vmp) 673 { 674 int ctr; 675 struct hlist_node *node; 676 struct userspace_mem_region *region; 677 678 if (vmp == NULL) 679 return; 680 681 /* Free cached stats metadata and close FD */ 682 if (vmp->stats_fd) { 683 free(vmp->stats_desc); 684 close(vmp->stats_fd); 685 } 686 687 /* Free userspace_mem_regions. */ 688 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 689 __vm_mem_region_delete(vmp, region, false); 690 691 /* Free sparsebit arrays. */ 692 sparsebit_free(&vmp->vpages_valid); 693 sparsebit_free(&vmp->vpages_mapped); 694 695 kvm_vm_release(vmp); 696 697 /* Free the structure describing the VM. */ 698 free(vmp); 699 } 700 701 int kvm_memfd_alloc(size_t size, bool hugepages) 702 { 703 int memfd_flags = MFD_CLOEXEC; 704 int fd, r; 705 706 if (hugepages) 707 memfd_flags |= MFD_HUGETLB; 708 709 fd = memfd_create("kvm_selftest", memfd_flags); 710 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 711 712 r = ftruncate(fd, size); 713 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 714 715 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 716 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 717 718 return fd; 719 } 720 721 /* 722 * Memory Compare, host virtual to guest virtual 723 * 724 * Input Args: 725 * hva - Starting host virtual address 726 * vm - Virtual Machine 727 * gva - Starting guest virtual address 728 * len - number of bytes to compare 729 * 730 * Output Args: None 731 * 732 * Input/Output Args: None 733 * 734 * Return: 735 * Returns 0 if the bytes starting at hva for a length of len 736 * are equal the guest virtual bytes starting at gva. Returns 737 * a value < 0, if bytes at hva are less than those at gva. 738 * Otherwise a value > 0 is returned. 739 * 740 * Compares the bytes starting at the host virtual address hva, for 741 * a length of len, to the guest bytes starting at the guest virtual 742 * address given by gva. 743 */ 744 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 745 { 746 size_t amt; 747 748 /* 749 * Compare a batch of bytes until either a match is found 750 * or all the bytes have been compared. 751 */ 752 for (uintptr_t offset = 0; offset < len; offset += amt) { 753 uintptr_t ptr1 = (uintptr_t)hva + offset; 754 755 /* 756 * Determine host address for guest virtual address 757 * at offset. 758 */ 759 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 760 761 /* 762 * Determine amount to compare on this pass. 763 * Don't allow the comparsion to cross a page boundary. 764 */ 765 amt = len - offset; 766 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 767 amt = vm->page_size - (ptr1 % vm->page_size); 768 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 769 amt = vm->page_size - (ptr2 % vm->page_size); 770 771 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 772 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 773 774 /* 775 * Perform the comparison. If there is a difference 776 * return that result to the caller, otherwise need 777 * to continue on looking for a mismatch. 778 */ 779 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 780 if (ret != 0) 781 return ret; 782 } 783 784 /* 785 * No mismatch found. Let the caller know the two memory 786 * areas are equal. 787 */ 788 return 0; 789 } 790 791 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 792 struct userspace_mem_region *region) 793 { 794 struct rb_node **cur, *parent; 795 796 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 797 struct userspace_mem_region *cregion; 798 799 cregion = container_of(*cur, typeof(*cregion), gpa_node); 800 parent = *cur; 801 if (region->region.guest_phys_addr < 802 cregion->region.guest_phys_addr) 803 cur = &(*cur)->rb_left; 804 else { 805 TEST_ASSERT(region->region.guest_phys_addr != 806 cregion->region.guest_phys_addr, 807 "Duplicate GPA in region tree"); 808 809 cur = &(*cur)->rb_right; 810 } 811 } 812 813 rb_link_node(®ion->gpa_node, parent, cur); 814 rb_insert_color(®ion->gpa_node, gpa_tree); 815 } 816 817 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 818 struct userspace_mem_region *region) 819 { 820 struct rb_node **cur, *parent; 821 822 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 823 struct userspace_mem_region *cregion; 824 825 cregion = container_of(*cur, typeof(*cregion), hva_node); 826 parent = *cur; 827 if (region->host_mem < cregion->host_mem) 828 cur = &(*cur)->rb_left; 829 else { 830 TEST_ASSERT(region->host_mem != 831 cregion->host_mem, 832 "Duplicate HVA in region tree"); 833 834 cur = &(*cur)->rb_right; 835 } 836 } 837 838 rb_link_node(®ion->hva_node, parent, cur); 839 rb_insert_color(®ion->hva_node, hva_tree); 840 } 841 842 843 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 844 uint64_t gpa, uint64_t size, void *hva) 845 { 846 struct kvm_userspace_memory_region region = { 847 .slot = slot, 848 .flags = flags, 849 .guest_phys_addr = gpa, 850 .memory_size = size, 851 .userspace_addr = (uintptr_t)hva, 852 }; 853 854 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 855 } 856 857 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 858 uint64_t gpa, uint64_t size, void *hva) 859 { 860 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 861 862 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 863 errno, strerror(errno)); 864 } 865 866 /* 867 * VM Userspace Memory Region Add 868 * 869 * Input Args: 870 * vm - Virtual Machine 871 * src_type - Storage source for this region. 872 * NULL to use anonymous memory. 873 * guest_paddr - Starting guest physical address 874 * slot - KVM region slot 875 * npages - Number of physical pages 876 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 877 * 878 * Output Args: None 879 * 880 * Return: None 881 * 882 * Allocates a memory area of the number of pages specified by npages 883 * and maps it to the VM specified by vm, at a starting physical address 884 * given by guest_paddr. The region is created with a KVM region slot 885 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 886 * region is created with the flags given by flags. 887 */ 888 void vm_userspace_mem_region_add(struct kvm_vm *vm, 889 enum vm_mem_backing_src_type src_type, 890 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 891 uint32_t flags) 892 { 893 int ret; 894 struct userspace_mem_region *region; 895 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 896 size_t alignment; 897 898 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 899 "Number of guest pages is not compatible with the host. " 900 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 901 902 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 903 "address not on a page boundary.\n" 904 " guest_paddr: 0x%lx vm->page_size: 0x%x", 905 guest_paddr, vm->page_size); 906 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 907 <= vm->max_gfn, "Physical range beyond maximum " 908 "supported physical address,\n" 909 " guest_paddr: 0x%lx npages: 0x%lx\n" 910 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 911 guest_paddr, npages, vm->max_gfn, vm->page_size); 912 913 /* 914 * Confirm a mem region with an overlapping address doesn't 915 * already exist. 916 */ 917 region = (struct userspace_mem_region *) userspace_mem_region_find( 918 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 919 if (region != NULL) 920 TEST_FAIL("overlapping userspace_mem_region already " 921 "exists\n" 922 " requested guest_paddr: 0x%lx npages: 0x%lx " 923 "page_size: 0x%x\n" 924 " existing guest_paddr: 0x%lx size: 0x%lx", 925 guest_paddr, npages, vm->page_size, 926 (uint64_t) region->region.guest_phys_addr, 927 (uint64_t) region->region.memory_size); 928 929 /* Confirm no region with the requested slot already exists. */ 930 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 931 slot) { 932 if (region->region.slot != slot) 933 continue; 934 935 TEST_FAIL("A mem region with the requested slot " 936 "already exists.\n" 937 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 938 " existing slot: %u paddr: 0x%lx size: 0x%lx", 939 slot, guest_paddr, npages, 940 region->region.slot, 941 (uint64_t) region->region.guest_phys_addr, 942 (uint64_t) region->region.memory_size); 943 } 944 945 /* Allocate and initialize new mem region structure. */ 946 region = calloc(1, sizeof(*region)); 947 TEST_ASSERT(region != NULL, "Insufficient Memory"); 948 region->mmap_size = npages * vm->page_size; 949 950 #ifdef __s390x__ 951 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 952 alignment = 0x100000; 953 #else 954 alignment = 1; 955 #endif 956 957 /* 958 * When using THP mmap is not guaranteed to returned a hugepage aligned 959 * address so we have to pad the mmap. Padding is not needed for HugeTLB 960 * because mmap will always return an address aligned to the HugeTLB 961 * page size. 962 */ 963 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 964 alignment = max(backing_src_pagesz, alignment); 965 966 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 967 968 /* Add enough memory to align up if necessary */ 969 if (alignment > 1) 970 region->mmap_size += alignment; 971 972 region->fd = -1; 973 if (backing_src_is_shared(src_type)) 974 region->fd = kvm_memfd_alloc(region->mmap_size, 975 src_type == VM_MEM_SRC_SHARED_HUGETLB); 976 977 region->mmap_start = mmap(NULL, region->mmap_size, 978 PROT_READ | PROT_WRITE, 979 vm_mem_backing_src_alias(src_type)->flag, 980 region->fd, 0); 981 TEST_ASSERT(region->mmap_start != MAP_FAILED, 982 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 983 984 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 985 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 986 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 987 region->mmap_start, backing_src_pagesz); 988 989 /* Align host address */ 990 region->host_mem = align_ptr_up(region->mmap_start, alignment); 991 992 /* As needed perform madvise */ 993 if ((src_type == VM_MEM_SRC_ANONYMOUS || 994 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 995 ret = madvise(region->host_mem, npages * vm->page_size, 996 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 997 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 998 region->host_mem, npages * vm->page_size, 999 vm_mem_backing_src_alias(src_type)->name); 1000 } 1001 1002 region->backing_src_type = src_type; 1003 region->unused_phy_pages = sparsebit_alloc(); 1004 sparsebit_set_num(region->unused_phy_pages, 1005 guest_paddr >> vm->page_shift, npages); 1006 region->region.slot = slot; 1007 region->region.flags = flags; 1008 region->region.guest_phys_addr = guest_paddr; 1009 region->region.memory_size = npages * vm->page_size; 1010 region->region.userspace_addr = (uintptr_t) region->host_mem; 1011 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1012 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1013 " rc: %i errno: %i\n" 1014 " slot: %u flags: 0x%x\n" 1015 " guest_phys_addr: 0x%lx size: 0x%lx", 1016 ret, errno, slot, flags, 1017 guest_paddr, (uint64_t) region->region.memory_size); 1018 1019 /* Add to quick lookup data structures */ 1020 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1021 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1022 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1023 1024 /* If shared memory, create an alias. */ 1025 if (region->fd >= 0) { 1026 region->mmap_alias = mmap(NULL, region->mmap_size, 1027 PROT_READ | PROT_WRITE, 1028 vm_mem_backing_src_alias(src_type)->flag, 1029 region->fd, 0); 1030 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1031 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1032 1033 /* Align host alias address */ 1034 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1035 } 1036 } 1037 1038 /* 1039 * Memslot to region 1040 * 1041 * Input Args: 1042 * vm - Virtual Machine 1043 * memslot - KVM memory slot ID 1044 * 1045 * Output Args: None 1046 * 1047 * Return: 1048 * Pointer to memory region structure that describe memory region 1049 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1050 * on error (e.g. currently no memory region using memslot as a KVM 1051 * memory slot ID). 1052 */ 1053 struct userspace_mem_region * 1054 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1055 { 1056 struct userspace_mem_region *region; 1057 1058 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1059 memslot) 1060 if (region->region.slot == memslot) 1061 return region; 1062 1063 fprintf(stderr, "No mem region with the requested slot found,\n" 1064 " requested slot: %u\n", memslot); 1065 fputs("---- vm dump ----\n", stderr); 1066 vm_dump(stderr, vm, 2); 1067 TEST_FAIL("Mem region not found"); 1068 return NULL; 1069 } 1070 1071 /* 1072 * VM Memory Region Flags Set 1073 * 1074 * Input Args: 1075 * vm - Virtual Machine 1076 * flags - Starting guest physical address 1077 * 1078 * Output Args: None 1079 * 1080 * Return: None 1081 * 1082 * Sets the flags of the memory region specified by the value of slot, 1083 * to the values given by flags. 1084 */ 1085 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1086 { 1087 int ret; 1088 struct userspace_mem_region *region; 1089 1090 region = memslot2region(vm, slot); 1091 1092 region->region.flags = flags; 1093 1094 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1095 1096 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1097 " rc: %i errno: %i slot: %u flags: 0x%x", 1098 ret, errno, slot, flags); 1099 } 1100 1101 /* 1102 * VM Memory Region Move 1103 * 1104 * Input Args: 1105 * vm - Virtual Machine 1106 * slot - Slot of the memory region to move 1107 * new_gpa - Starting guest physical address 1108 * 1109 * Output Args: None 1110 * 1111 * Return: None 1112 * 1113 * Change the gpa of a memory region. 1114 */ 1115 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1116 { 1117 struct userspace_mem_region *region; 1118 int ret; 1119 1120 region = memslot2region(vm, slot); 1121 1122 region->region.guest_phys_addr = new_gpa; 1123 1124 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1125 1126 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1127 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1128 ret, errno, slot, new_gpa); 1129 } 1130 1131 /* 1132 * VM Memory Region Delete 1133 * 1134 * Input Args: 1135 * vm - Virtual Machine 1136 * slot - Slot of the memory region to delete 1137 * 1138 * Output Args: None 1139 * 1140 * Return: None 1141 * 1142 * Delete a memory region. 1143 */ 1144 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1145 { 1146 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1147 } 1148 1149 /* Returns the size of a vCPU's kvm_run structure. */ 1150 static int vcpu_mmap_sz(void) 1151 { 1152 int dev_fd, ret; 1153 1154 dev_fd = open_kvm_dev_path_or_exit(); 1155 1156 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1157 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1158 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1159 1160 close(dev_fd); 1161 1162 return ret; 1163 } 1164 1165 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1166 { 1167 struct kvm_vcpu *vcpu; 1168 1169 list_for_each_entry(vcpu, &vm->vcpus, list) { 1170 if (vcpu->id == vcpu_id) 1171 return true; 1172 } 1173 1174 return false; 1175 } 1176 1177 /* 1178 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1179 * No additional vCPU setup is done. Returns the vCPU. 1180 */ 1181 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1182 { 1183 struct kvm_vcpu *vcpu; 1184 1185 /* Confirm a vcpu with the specified id doesn't already exist. */ 1186 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1187 1188 /* Allocate and initialize new vcpu structure. */ 1189 vcpu = calloc(1, sizeof(*vcpu)); 1190 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1191 1192 vcpu->vm = vm; 1193 vcpu->id = vcpu_id; 1194 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1195 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1196 1197 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1198 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1199 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1200 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1201 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1202 TEST_ASSERT(vcpu->run != MAP_FAILED, 1203 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1204 1205 /* Add to linked-list of VCPUs. */ 1206 list_add(&vcpu->list, &vm->vcpus); 1207 1208 return vcpu; 1209 } 1210 1211 /* 1212 * VM Virtual Address Unused Gap 1213 * 1214 * Input Args: 1215 * vm - Virtual Machine 1216 * sz - Size (bytes) 1217 * vaddr_min - Minimum Virtual Address 1218 * 1219 * Output Args: None 1220 * 1221 * Return: 1222 * Lowest virtual address at or below vaddr_min, with at least 1223 * sz unused bytes. TEST_ASSERT failure if no area of at least 1224 * size sz is available. 1225 * 1226 * Within the VM specified by vm, locates the lowest starting virtual 1227 * address >= vaddr_min, that has at least sz unallocated bytes. A 1228 * TEST_ASSERT failure occurs for invalid input or no area of at least 1229 * sz unallocated bytes >= vaddr_min is available. 1230 */ 1231 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1232 vm_vaddr_t vaddr_min) 1233 { 1234 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1235 1236 /* Determine lowest permitted virtual page index. */ 1237 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1238 if ((pgidx_start * vm->page_size) < vaddr_min) 1239 goto no_va_found; 1240 1241 /* Loop over section with enough valid virtual page indexes. */ 1242 if (!sparsebit_is_set_num(vm->vpages_valid, 1243 pgidx_start, pages)) 1244 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1245 pgidx_start, pages); 1246 do { 1247 /* 1248 * Are there enough unused virtual pages available at 1249 * the currently proposed starting virtual page index. 1250 * If not, adjust proposed starting index to next 1251 * possible. 1252 */ 1253 if (sparsebit_is_clear_num(vm->vpages_mapped, 1254 pgidx_start, pages)) 1255 goto va_found; 1256 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1257 pgidx_start, pages); 1258 if (pgidx_start == 0) 1259 goto no_va_found; 1260 1261 /* 1262 * If needed, adjust proposed starting virtual address, 1263 * to next range of valid virtual addresses. 1264 */ 1265 if (!sparsebit_is_set_num(vm->vpages_valid, 1266 pgidx_start, pages)) { 1267 pgidx_start = sparsebit_next_set_num( 1268 vm->vpages_valid, pgidx_start, pages); 1269 if (pgidx_start == 0) 1270 goto no_va_found; 1271 } 1272 } while (pgidx_start != 0); 1273 1274 no_va_found: 1275 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1276 1277 /* NOT REACHED */ 1278 return -1; 1279 1280 va_found: 1281 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1282 pgidx_start, pages), 1283 "Unexpected, invalid virtual page index range,\n" 1284 " pgidx_start: 0x%lx\n" 1285 " pages: 0x%lx", 1286 pgidx_start, pages); 1287 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1288 pgidx_start, pages), 1289 "Unexpected, pages already mapped,\n" 1290 " pgidx_start: 0x%lx\n" 1291 " pages: 0x%lx", 1292 pgidx_start, pages); 1293 1294 return pgidx_start * vm->page_size; 1295 } 1296 1297 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1298 enum kvm_mem_region_type type) 1299 { 1300 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1301 1302 virt_pgd_alloc(vm); 1303 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1304 KVM_UTIL_MIN_PFN * vm->page_size, 1305 vm->memslots[type]); 1306 1307 /* 1308 * Find an unused range of virtual page addresses of at least 1309 * pages in length. 1310 */ 1311 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1312 1313 /* Map the virtual pages. */ 1314 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1315 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1316 1317 virt_pg_map(vm, vaddr, paddr); 1318 1319 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1320 } 1321 1322 return vaddr_start; 1323 } 1324 1325 /* 1326 * VM Virtual Address Allocate 1327 * 1328 * Input Args: 1329 * vm - Virtual Machine 1330 * sz - Size in bytes 1331 * vaddr_min - Minimum starting virtual address 1332 * 1333 * Output Args: None 1334 * 1335 * Return: 1336 * Starting guest virtual address 1337 * 1338 * Allocates at least sz bytes within the virtual address space of the vm 1339 * given by vm. The allocated bytes are mapped to a virtual address >= 1340 * the address given by vaddr_min. Note that each allocation uses a 1341 * a unique set of pages, with the minimum real allocation being at least 1342 * a page. The allocated physical space comes from the TEST_DATA memory region. 1343 */ 1344 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1345 { 1346 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); 1347 } 1348 1349 /* 1350 * VM Virtual Address Allocate Pages 1351 * 1352 * Input Args: 1353 * vm - Virtual Machine 1354 * 1355 * Output Args: None 1356 * 1357 * Return: 1358 * Starting guest virtual address 1359 * 1360 * Allocates at least N system pages worth of bytes within the virtual address 1361 * space of the vm. 1362 */ 1363 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1364 { 1365 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1366 } 1367 1368 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) 1369 { 1370 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); 1371 } 1372 1373 /* 1374 * VM Virtual Address Allocate Page 1375 * 1376 * Input Args: 1377 * vm - Virtual Machine 1378 * 1379 * Output Args: None 1380 * 1381 * Return: 1382 * Starting guest virtual address 1383 * 1384 * Allocates at least one system page worth of bytes within the virtual address 1385 * space of the vm. 1386 */ 1387 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1388 { 1389 return vm_vaddr_alloc_pages(vm, 1); 1390 } 1391 1392 /* 1393 * Map a range of VM virtual address to the VM's physical address 1394 * 1395 * Input Args: 1396 * vm - Virtual Machine 1397 * vaddr - Virtuall address to map 1398 * paddr - VM Physical Address 1399 * npages - The number of pages to map 1400 * 1401 * Output Args: None 1402 * 1403 * Return: None 1404 * 1405 * Within the VM given by @vm, creates a virtual translation for 1406 * @npages starting at @vaddr to the page range starting at @paddr. 1407 */ 1408 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1409 unsigned int npages) 1410 { 1411 size_t page_size = vm->page_size; 1412 size_t size = npages * page_size; 1413 1414 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1415 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1416 1417 while (npages--) { 1418 virt_pg_map(vm, vaddr, paddr); 1419 vaddr += page_size; 1420 paddr += page_size; 1421 1422 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1423 } 1424 } 1425 1426 /* 1427 * Address VM Physical to Host Virtual 1428 * 1429 * Input Args: 1430 * vm - Virtual Machine 1431 * gpa - VM physical address 1432 * 1433 * Output Args: None 1434 * 1435 * Return: 1436 * Equivalent host virtual address 1437 * 1438 * Locates the memory region containing the VM physical address given 1439 * by gpa, within the VM given by vm. When found, the host virtual 1440 * address providing the memory to the vm physical address is returned. 1441 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1442 */ 1443 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1444 { 1445 struct userspace_mem_region *region; 1446 1447 region = userspace_mem_region_find(vm, gpa, gpa); 1448 if (!region) { 1449 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1450 return NULL; 1451 } 1452 1453 return (void *)((uintptr_t)region->host_mem 1454 + (gpa - region->region.guest_phys_addr)); 1455 } 1456 1457 /* 1458 * Address Host Virtual to VM Physical 1459 * 1460 * Input Args: 1461 * vm - Virtual Machine 1462 * hva - Host virtual address 1463 * 1464 * Output Args: None 1465 * 1466 * Return: 1467 * Equivalent VM physical address 1468 * 1469 * Locates the memory region containing the host virtual address given 1470 * by hva, within the VM given by vm. When found, the equivalent 1471 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1472 * region containing hva exists. 1473 */ 1474 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1475 { 1476 struct rb_node *node; 1477 1478 for (node = vm->regions.hva_tree.rb_node; node; ) { 1479 struct userspace_mem_region *region = 1480 container_of(node, struct userspace_mem_region, hva_node); 1481 1482 if (hva >= region->host_mem) { 1483 if (hva <= (region->host_mem 1484 + region->region.memory_size - 1)) 1485 return (vm_paddr_t)((uintptr_t) 1486 region->region.guest_phys_addr 1487 + (hva - (uintptr_t)region->host_mem)); 1488 1489 node = node->rb_right; 1490 } else 1491 node = node->rb_left; 1492 } 1493 1494 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1495 return -1; 1496 } 1497 1498 /* 1499 * Address VM physical to Host Virtual *alias*. 1500 * 1501 * Input Args: 1502 * vm - Virtual Machine 1503 * gpa - VM physical address 1504 * 1505 * Output Args: None 1506 * 1507 * Return: 1508 * Equivalent address within the host virtual *alias* area, or NULL 1509 * (without failing the test) if the guest memory is not shared (so 1510 * no alias exists). 1511 * 1512 * Create a writable, shared virtual=>physical alias for the specific GPA. 1513 * The primary use case is to allow the host selftest to manipulate guest 1514 * memory without mapping said memory in the guest's address space. And, for 1515 * userfaultfd-based demand paging, to do so without triggering userfaults. 1516 */ 1517 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1518 { 1519 struct userspace_mem_region *region; 1520 uintptr_t offset; 1521 1522 region = userspace_mem_region_find(vm, gpa, gpa); 1523 if (!region) 1524 return NULL; 1525 1526 if (!region->host_alias) 1527 return NULL; 1528 1529 offset = gpa - region->region.guest_phys_addr; 1530 return (void *) ((uintptr_t) region->host_alias + offset); 1531 } 1532 1533 /* Create an interrupt controller chip for the specified VM. */ 1534 void vm_create_irqchip(struct kvm_vm *vm) 1535 { 1536 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1537 1538 vm->has_irqchip = true; 1539 } 1540 1541 int _vcpu_run(struct kvm_vcpu *vcpu) 1542 { 1543 int rc; 1544 1545 do { 1546 rc = __vcpu_run(vcpu); 1547 } while (rc == -1 && errno == EINTR); 1548 1549 assert_on_unhandled_exception(vcpu); 1550 1551 return rc; 1552 } 1553 1554 /* 1555 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1556 * Assert if the KVM returns an error (other than -EINTR). 1557 */ 1558 void vcpu_run(struct kvm_vcpu *vcpu) 1559 { 1560 int ret = _vcpu_run(vcpu); 1561 1562 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1563 } 1564 1565 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1566 { 1567 int ret; 1568 1569 vcpu->run->immediate_exit = 1; 1570 ret = __vcpu_run(vcpu); 1571 vcpu->run->immediate_exit = 0; 1572 1573 TEST_ASSERT(ret == -1 && errno == EINTR, 1574 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1575 ret, errno); 1576 } 1577 1578 /* 1579 * Get the list of guest registers which are supported for 1580 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1581 * it is the caller's responsibility to free the list. 1582 */ 1583 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1584 { 1585 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1586 int ret; 1587 1588 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1589 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1590 1591 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1592 reg_list->n = reg_list_n.n; 1593 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1594 return reg_list; 1595 } 1596 1597 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1598 { 1599 uint32_t page_size = getpagesize(); 1600 uint32_t size = vcpu->vm->dirty_ring_size; 1601 1602 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1603 1604 if (!vcpu->dirty_gfns) { 1605 void *addr; 1606 1607 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1608 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1609 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1610 1611 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1612 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1613 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1614 1615 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1616 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1617 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1618 1619 vcpu->dirty_gfns = addr; 1620 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1621 } 1622 1623 return vcpu->dirty_gfns; 1624 } 1625 1626 /* 1627 * Device Ioctl 1628 */ 1629 1630 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1631 { 1632 struct kvm_device_attr attribute = { 1633 .group = group, 1634 .attr = attr, 1635 .flags = 0, 1636 }; 1637 1638 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1639 } 1640 1641 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1642 { 1643 struct kvm_create_device create_dev = { 1644 .type = type, 1645 .flags = KVM_CREATE_DEVICE_TEST, 1646 }; 1647 1648 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1649 } 1650 1651 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1652 { 1653 struct kvm_create_device create_dev = { 1654 .type = type, 1655 .fd = -1, 1656 .flags = 0, 1657 }; 1658 int err; 1659 1660 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1661 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1662 return err ? : create_dev.fd; 1663 } 1664 1665 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1666 { 1667 struct kvm_device_attr kvmattr = { 1668 .group = group, 1669 .attr = attr, 1670 .flags = 0, 1671 .addr = (uintptr_t)val, 1672 }; 1673 1674 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1675 } 1676 1677 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1678 { 1679 struct kvm_device_attr kvmattr = { 1680 .group = group, 1681 .attr = attr, 1682 .flags = 0, 1683 .addr = (uintptr_t)val, 1684 }; 1685 1686 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1687 } 1688 1689 /* 1690 * IRQ related functions. 1691 */ 1692 1693 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1694 { 1695 struct kvm_irq_level irq_level = { 1696 .irq = irq, 1697 .level = level, 1698 }; 1699 1700 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1701 } 1702 1703 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1704 { 1705 int ret = _kvm_irq_line(vm, irq, level); 1706 1707 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1708 } 1709 1710 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1711 { 1712 struct kvm_irq_routing *routing; 1713 size_t size; 1714 1715 size = sizeof(struct kvm_irq_routing); 1716 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1717 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1718 routing = calloc(1, size); 1719 assert(routing); 1720 1721 return routing; 1722 } 1723 1724 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1725 uint32_t gsi, uint32_t pin) 1726 { 1727 int i; 1728 1729 assert(routing); 1730 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1731 1732 i = routing->nr; 1733 routing->entries[i].gsi = gsi; 1734 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1735 routing->entries[i].flags = 0; 1736 routing->entries[i].u.irqchip.irqchip = 0; 1737 routing->entries[i].u.irqchip.pin = pin; 1738 routing->nr++; 1739 } 1740 1741 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1742 { 1743 int ret; 1744 1745 assert(routing); 1746 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1747 free(routing); 1748 1749 return ret; 1750 } 1751 1752 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1753 { 1754 int ret; 1755 1756 ret = _kvm_gsi_routing_write(vm, routing); 1757 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1758 } 1759 1760 /* 1761 * VM Dump 1762 * 1763 * Input Args: 1764 * vm - Virtual Machine 1765 * indent - Left margin indent amount 1766 * 1767 * Output Args: 1768 * stream - Output FILE stream 1769 * 1770 * Return: None 1771 * 1772 * Dumps the current state of the VM given by vm, to the FILE stream 1773 * given by stream. 1774 */ 1775 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1776 { 1777 int ctr; 1778 struct userspace_mem_region *region; 1779 struct kvm_vcpu *vcpu; 1780 1781 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1782 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1783 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1784 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1785 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1786 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1787 "host_virt: %p\n", indent + 2, "", 1788 (uint64_t) region->region.guest_phys_addr, 1789 (uint64_t) region->region.memory_size, 1790 region->host_mem); 1791 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1792 sparsebit_dump(stream, region->unused_phy_pages, 0); 1793 } 1794 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1795 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1796 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1797 vm->pgd_created); 1798 if (vm->pgd_created) { 1799 fprintf(stream, "%*sVirtual Translation Tables:\n", 1800 indent + 2, ""); 1801 virt_dump(stream, vm, indent + 4); 1802 } 1803 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1804 1805 list_for_each_entry(vcpu, &vm->vcpus, list) 1806 vcpu_dump(stream, vcpu, indent + 2); 1807 } 1808 1809 /* Known KVM exit reasons */ 1810 static struct exit_reason { 1811 unsigned int reason; 1812 const char *name; 1813 } exit_reasons_known[] = { 1814 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1815 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1816 {KVM_EXIT_IO, "IO"}, 1817 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1818 {KVM_EXIT_DEBUG, "DEBUG"}, 1819 {KVM_EXIT_HLT, "HLT"}, 1820 {KVM_EXIT_MMIO, "MMIO"}, 1821 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1822 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1823 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1824 {KVM_EXIT_INTR, "INTR"}, 1825 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1826 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1827 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1828 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1829 {KVM_EXIT_DCR, "DCR"}, 1830 {KVM_EXIT_NMI, "NMI"}, 1831 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1832 {KVM_EXIT_OSI, "OSI"}, 1833 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1834 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1835 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1836 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1837 {KVM_EXIT_XEN, "XEN"}, 1838 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1839 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1840 #endif 1841 }; 1842 1843 /* 1844 * Exit Reason String 1845 * 1846 * Input Args: 1847 * exit_reason - Exit reason 1848 * 1849 * Output Args: None 1850 * 1851 * Return: 1852 * Constant string pointer describing the exit reason. 1853 * 1854 * Locates and returns a constant string that describes the KVM exit 1855 * reason given by exit_reason. If no such string is found, a constant 1856 * string of "Unknown" is returned. 1857 */ 1858 const char *exit_reason_str(unsigned int exit_reason) 1859 { 1860 unsigned int n1; 1861 1862 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1863 if (exit_reason == exit_reasons_known[n1].reason) 1864 return exit_reasons_known[n1].name; 1865 } 1866 1867 return "Unknown"; 1868 } 1869 1870 /* 1871 * Physical Contiguous Page Allocator 1872 * 1873 * Input Args: 1874 * vm - Virtual Machine 1875 * num - number of pages 1876 * paddr_min - Physical address minimum 1877 * memslot - Memory region to allocate page from 1878 * 1879 * Output Args: None 1880 * 1881 * Return: 1882 * Starting physical address 1883 * 1884 * Within the VM specified by vm, locates a range of available physical 1885 * pages at or above paddr_min. If found, the pages are marked as in use 1886 * and their base address is returned. A TEST_ASSERT failure occurs if 1887 * not enough pages are available at or above paddr_min. 1888 */ 1889 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1890 vm_paddr_t paddr_min, uint32_t memslot) 1891 { 1892 struct userspace_mem_region *region; 1893 sparsebit_idx_t pg, base; 1894 1895 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1896 1897 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1898 "not divisible by page size.\n" 1899 " paddr_min: 0x%lx page_size: 0x%x", 1900 paddr_min, vm->page_size); 1901 1902 region = memslot2region(vm, memslot); 1903 base = pg = paddr_min >> vm->page_shift; 1904 1905 do { 1906 for (; pg < base + num; ++pg) { 1907 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1908 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1909 break; 1910 } 1911 } 1912 } while (pg && pg != base + num); 1913 1914 if (pg == 0) { 1915 fprintf(stderr, "No guest physical page available, " 1916 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1917 paddr_min, vm->page_size, memslot); 1918 fputs("---- vm dump ----\n", stderr); 1919 vm_dump(stderr, vm, 2); 1920 abort(); 1921 } 1922 1923 for (pg = base; pg < base + num; ++pg) 1924 sparsebit_clear(region->unused_phy_pages, pg); 1925 1926 return base * vm->page_size; 1927 } 1928 1929 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1930 uint32_t memslot) 1931 { 1932 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1933 } 1934 1935 /* Arbitrary minimum physical address used for virtual translation tables. */ 1936 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 1937 1938 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1939 { 1940 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 1941 vm->memslots[MEM_REGION_PT]); 1942 } 1943 1944 /* 1945 * Address Guest Virtual to Host Virtual 1946 * 1947 * Input Args: 1948 * vm - Virtual Machine 1949 * gva - VM virtual address 1950 * 1951 * Output Args: None 1952 * 1953 * Return: 1954 * Equivalent host virtual address 1955 */ 1956 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1957 { 1958 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1959 } 1960 1961 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1962 { 1963 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1964 } 1965 1966 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1967 unsigned int page_shift, 1968 unsigned int new_page_shift, 1969 bool ceil) 1970 { 1971 unsigned int n = 1 << (new_page_shift - page_shift); 1972 1973 if (page_shift >= new_page_shift) 1974 return num_pages * (1 << (page_shift - new_page_shift)); 1975 1976 return num_pages / n + !!(ceil && num_pages % n); 1977 } 1978 1979 static inline int getpageshift(void) 1980 { 1981 return __builtin_ffs(getpagesize()) - 1; 1982 } 1983 1984 unsigned int 1985 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1986 { 1987 return vm_calc_num_pages(num_guest_pages, 1988 vm_guest_mode_params[mode].page_shift, 1989 getpageshift(), true); 1990 } 1991 1992 unsigned int 1993 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 1994 { 1995 return vm_calc_num_pages(num_host_pages, getpageshift(), 1996 vm_guest_mode_params[mode].page_shift, false); 1997 } 1998 1999 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2000 { 2001 unsigned int n; 2002 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2003 return vm_adjust_num_guest_pages(mode, n); 2004 } 2005 2006 /* 2007 * Read binary stats descriptors 2008 * 2009 * Input Args: 2010 * stats_fd - the file descriptor for the binary stats file from which to read 2011 * header - the binary stats metadata header corresponding to the given FD 2012 * 2013 * Output Args: None 2014 * 2015 * Return: 2016 * A pointer to a newly allocated series of stat descriptors. 2017 * Caller is responsible for freeing the returned kvm_stats_desc. 2018 * 2019 * Read the stats descriptors from the binary stats interface. 2020 */ 2021 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 2022 struct kvm_stats_header *header) 2023 { 2024 struct kvm_stats_desc *stats_desc; 2025 ssize_t desc_size, total_size, ret; 2026 2027 desc_size = get_stats_descriptor_size(header); 2028 total_size = header->num_desc * desc_size; 2029 2030 stats_desc = calloc(header->num_desc, desc_size); 2031 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2032 2033 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2034 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2035 2036 return stats_desc; 2037 } 2038 2039 /* 2040 * Read stat data for a particular stat 2041 * 2042 * Input Args: 2043 * stats_fd - the file descriptor for the binary stats file from which to read 2044 * header - the binary stats metadata header corresponding to the given FD 2045 * desc - the binary stat metadata for the particular stat to be read 2046 * max_elements - the maximum number of 8-byte values to read into data 2047 * 2048 * Output Args: 2049 * data - the buffer into which stat data should be read 2050 * 2051 * Read the data values of a specified stat from the binary stats interface. 2052 */ 2053 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2054 struct kvm_stats_desc *desc, uint64_t *data, 2055 size_t max_elements) 2056 { 2057 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2058 size_t size = nr_elements * sizeof(*data); 2059 ssize_t ret; 2060 2061 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2062 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2063 2064 ret = pread(stats_fd, data, size, 2065 header->data_offset + desc->offset); 2066 2067 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2068 desc->name, errno, strerror(errno)); 2069 TEST_ASSERT(ret == size, 2070 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2071 desc->name, size, ret); 2072 } 2073 2074 /* 2075 * Read the data of the named stat 2076 * 2077 * Input Args: 2078 * vm - the VM for which the stat should be read 2079 * stat_name - the name of the stat to read 2080 * max_elements - the maximum number of 8-byte values to read into data 2081 * 2082 * Output Args: 2083 * data - the buffer into which stat data should be read 2084 * 2085 * Read the data values of a specified stat from the binary stats interface. 2086 */ 2087 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2088 size_t max_elements) 2089 { 2090 struct kvm_stats_desc *desc; 2091 size_t size_desc; 2092 int i; 2093 2094 if (!vm->stats_fd) { 2095 vm->stats_fd = vm_get_stats_fd(vm); 2096 read_stats_header(vm->stats_fd, &vm->stats_header); 2097 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2098 &vm->stats_header); 2099 } 2100 2101 size_desc = get_stats_descriptor_size(&vm->stats_header); 2102 2103 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2104 desc = (void *)vm->stats_desc + (i * size_desc); 2105 2106 if (strcmp(desc->name, stat_name)) 2107 continue; 2108 2109 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2110 data, max_elements); 2111 2112 break; 2113 } 2114 } 2115 2116 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2117 { 2118 } 2119 2120 __weak void kvm_selftest_arch_init(void) 2121 { 2122 } 2123 2124 void __attribute((constructor)) kvm_selftest_init(void) 2125 { 2126 /* Tell stdout not to buffer its content. */ 2127 setbuf(stdout, NULL); 2128 2129 kvm_selftest_arch_init(); 2130 } 2131