1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_intel_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm_intel", param); 86 } 87 88 bool get_kvm_amd_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_amd", param); 91 } 92 93 /* 94 * Capability 95 * 96 * Input Args: 97 * cap - Capability 98 * 99 * Output Args: None 100 * 101 * Return: 102 * On success, the Value corresponding to the capability (KVM_CAP_*) 103 * specified by the value of cap. On failure a TEST_ASSERT failure 104 * is produced. 105 * 106 * Looks up and returns the value corresponding to the capability 107 * (KVM_CAP_*) given by cap. 108 */ 109 unsigned int kvm_check_cap(long cap) 110 { 111 int ret; 112 int kvm_fd; 113 114 kvm_fd = open_kvm_dev_path_or_exit(); 115 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 116 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 117 118 close(kvm_fd); 119 120 return (unsigned int)ret; 121 } 122 123 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 124 { 125 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 126 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 127 else 128 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 129 vm->dirty_ring_size = ring_size; 130 } 131 132 static void vm_open(struct kvm_vm *vm) 133 { 134 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 135 136 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 137 138 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 139 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 140 } 141 142 const char *vm_guest_mode_string(uint32_t i) 143 { 144 static const char * const strings[] = { 145 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 146 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 147 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 148 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 149 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 150 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 151 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 152 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 153 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 154 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 155 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 156 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 157 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 158 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 159 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 160 }; 161 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 162 "Missing new mode strings?"); 163 164 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 165 166 return strings[i]; 167 } 168 169 const struct vm_guest_mode_params vm_guest_mode_params[] = { 170 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 171 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 172 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 173 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 174 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 175 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 176 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 177 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 178 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 179 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 180 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 181 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 182 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 183 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 184 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 185 }; 186 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 187 "Missing new mode params?"); 188 189 /* 190 * Initializes vm->vpages_valid to match the canonical VA space of the 191 * architecture. 192 * 193 * The default implementation is valid for architectures which split the 194 * range addressed by a single page table into a low and high region 195 * based on the MSB of the VA. On architectures with this behavior 196 * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. 197 */ 198 __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) 199 { 200 sparsebit_set_num(vm->vpages_valid, 201 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 202 sparsebit_set_num(vm->vpages_valid, 203 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 204 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 205 } 206 207 struct kvm_vm *____vm_create(enum vm_guest_mode mode) 208 { 209 struct kvm_vm *vm; 210 211 vm = calloc(1, sizeof(*vm)); 212 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 213 214 INIT_LIST_HEAD(&vm->vcpus); 215 vm->regions.gpa_tree = RB_ROOT; 216 vm->regions.hva_tree = RB_ROOT; 217 hash_init(vm->regions.slot_hash); 218 219 vm->mode = mode; 220 vm->type = 0; 221 222 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 223 vm->va_bits = vm_guest_mode_params[mode].va_bits; 224 vm->page_size = vm_guest_mode_params[mode].page_size; 225 vm->page_shift = vm_guest_mode_params[mode].page_shift; 226 227 /* Setup mode specific traits. */ 228 switch (vm->mode) { 229 case VM_MODE_P52V48_4K: 230 vm->pgtable_levels = 4; 231 break; 232 case VM_MODE_P52V48_64K: 233 vm->pgtable_levels = 3; 234 break; 235 case VM_MODE_P48V48_4K: 236 vm->pgtable_levels = 4; 237 break; 238 case VM_MODE_P48V48_64K: 239 vm->pgtable_levels = 3; 240 break; 241 case VM_MODE_P40V48_4K: 242 case VM_MODE_P36V48_4K: 243 vm->pgtable_levels = 4; 244 break; 245 case VM_MODE_P40V48_64K: 246 case VM_MODE_P36V48_64K: 247 vm->pgtable_levels = 3; 248 break; 249 case VM_MODE_P48V48_16K: 250 case VM_MODE_P40V48_16K: 251 case VM_MODE_P36V48_16K: 252 vm->pgtable_levels = 4; 253 break; 254 case VM_MODE_P36V47_16K: 255 vm->pgtable_levels = 3; 256 break; 257 case VM_MODE_PXXV48_4K: 258 #ifdef __x86_64__ 259 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 260 /* 261 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 262 * it doesn't take effect unless a CR4.LA57 is set, which it 263 * isn't for this VM_MODE. 264 */ 265 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 266 "Linear address width (%d bits) not supported", 267 vm->va_bits); 268 pr_debug("Guest physical address width detected: %d\n", 269 vm->pa_bits); 270 vm->pgtable_levels = 4; 271 vm->va_bits = 48; 272 #else 273 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 274 #endif 275 break; 276 case VM_MODE_P47V64_4K: 277 vm->pgtable_levels = 5; 278 break; 279 case VM_MODE_P44V64_4K: 280 vm->pgtable_levels = 5; 281 break; 282 default: 283 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 284 } 285 286 #ifdef __aarch64__ 287 if (vm->pa_bits != 40) 288 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 289 #endif 290 291 vm_open(vm); 292 293 /* Limit to VA-bit canonical virtual addresses. */ 294 vm->vpages_valid = sparsebit_alloc(); 295 vm_vaddr_populate_bitmap(vm); 296 297 /* Limit physical addresses to PA-bits. */ 298 vm->max_gfn = vm_compute_max_gfn(vm); 299 300 /* Allocate and setup memory for guest. */ 301 vm->vpages_mapped = sparsebit_alloc(); 302 303 return vm; 304 } 305 306 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 307 uint32_t nr_runnable_vcpus, 308 uint64_t extra_mem_pages) 309 { 310 uint64_t nr_pages; 311 312 TEST_ASSERT(nr_runnable_vcpus, 313 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 314 315 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 316 "nr_vcpus = %d too large for host, max-vcpus = %d", 317 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 318 319 /* 320 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 321 * test code and other per-VM assets that will be loaded into memslot0. 322 */ 323 nr_pages = 512; 324 325 /* Account for the per-vCPU stacks on behalf of the test. */ 326 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 327 328 /* 329 * Account for the number of pages needed for the page tables. The 330 * maximum page table size for a memory region will be when the 331 * smallest page size is used. Considering each page contains x page 332 * table descriptors, the total extra size for page tables (for extra 333 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 334 * than N/x*2. 335 */ 336 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 337 338 return vm_adjust_num_guest_pages(mode, nr_pages); 339 } 340 341 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 342 uint64_t nr_extra_pages) 343 { 344 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 345 nr_extra_pages); 346 struct userspace_mem_region *slot0; 347 struct kvm_vm *vm; 348 int i; 349 350 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 351 vm_guest_mode_string(mode), nr_pages); 352 353 vm = ____vm_create(mode); 354 355 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 356 for (i = 0; i < NR_MEM_REGIONS; i++) 357 vm->memslots[i] = 0; 358 359 kvm_vm_elf_load(vm, program_invocation_name); 360 361 /* 362 * TODO: Add proper defines to protect the library's memslots, and then 363 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 364 * read-only memslots as MMIO, and creating a read-only memslot for the 365 * MMIO region would prevent silently clobbering the MMIO region. 366 */ 367 slot0 = memslot2region(vm, 0); 368 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 369 370 kvm_arch_vm_post_create(vm); 371 372 return vm; 373 } 374 375 /* 376 * VM Create with customized parameters 377 * 378 * Input Args: 379 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 380 * nr_vcpus - VCPU count 381 * extra_mem_pages - Non-slot0 physical memory total size 382 * guest_code - Guest entry point 383 * vcpuids - VCPU IDs 384 * 385 * Output Args: None 386 * 387 * Return: 388 * Pointer to opaque structure that describes the created VM. 389 * 390 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 391 * extra_mem_pages is only used to calculate the maximum page table size, 392 * no real memory allocation for non-slot0 memory in this function. 393 */ 394 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 395 uint64_t extra_mem_pages, 396 void *guest_code, struct kvm_vcpu *vcpus[]) 397 { 398 struct kvm_vm *vm; 399 int i; 400 401 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 402 403 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 404 405 for (i = 0; i < nr_vcpus; ++i) 406 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 407 408 return vm; 409 } 410 411 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 412 uint64_t extra_mem_pages, 413 void *guest_code) 414 { 415 struct kvm_vcpu *vcpus[1]; 416 struct kvm_vm *vm; 417 418 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 419 guest_code, vcpus); 420 421 *vcpu = vcpus[0]; 422 return vm; 423 } 424 425 /* 426 * VM Restart 427 * 428 * Input Args: 429 * vm - VM that has been released before 430 * 431 * Output Args: None 432 * 433 * Reopens the file descriptors associated to the VM and reinstates the 434 * global state, such as the irqchip and the memory regions that are mapped 435 * into the guest. 436 */ 437 void kvm_vm_restart(struct kvm_vm *vmp) 438 { 439 int ctr; 440 struct userspace_mem_region *region; 441 442 vm_open(vmp); 443 if (vmp->has_irqchip) 444 vm_create_irqchip(vmp); 445 446 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 447 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 448 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 449 " rc: %i errno: %i\n" 450 " slot: %u flags: 0x%x\n" 451 " guest_phys_addr: 0x%llx size: 0x%llx", 452 ret, errno, region->region.slot, 453 region->region.flags, 454 region->region.guest_phys_addr, 455 region->region.memory_size); 456 } 457 } 458 459 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 460 uint32_t vcpu_id) 461 { 462 return __vm_vcpu_add(vm, vcpu_id); 463 } 464 465 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 466 { 467 kvm_vm_restart(vm); 468 469 return vm_vcpu_recreate(vm, 0); 470 } 471 472 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 473 { 474 cpu_set_t mask; 475 int r; 476 477 CPU_ZERO(&mask); 478 CPU_SET(pcpu, &mask); 479 r = sched_setaffinity(0, sizeof(mask), &mask); 480 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 481 } 482 483 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 484 { 485 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 486 487 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 488 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 489 return pcpu; 490 } 491 492 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 493 int nr_vcpus) 494 { 495 cpu_set_t allowed_mask; 496 char *cpu, *cpu_list; 497 char delim[2] = ","; 498 int i, r; 499 500 cpu_list = strdup(pcpus_string); 501 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 502 503 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 504 TEST_ASSERT(!r, "sched_getaffinity() failed"); 505 506 cpu = strtok(cpu_list, delim); 507 508 /* 1. Get all pcpus for vcpus. */ 509 for (i = 0; i < nr_vcpus; i++) { 510 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 511 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 512 cpu = strtok(NULL, delim); 513 } 514 515 /* 2. Check if the main worker needs to be pinned. */ 516 if (cpu) { 517 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 518 cpu = strtok(NULL, delim); 519 } 520 521 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 522 free(cpu_list); 523 } 524 525 /* 526 * Userspace Memory Region Find 527 * 528 * Input Args: 529 * vm - Virtual Machine 530 * start - Starting VM physical address 531 * end - Ending VM physical address, inclusive. 532 * 533 * Output Args: None 534 * 535 * Return: 536 * Pointer to overlapping region, NULL if no such region. 537 * 538 * Searches for a region with any physical memory that overlaps with 539 * any portion of the guest physical addresses from start to end 540 * inclusive. If multiple overlapping regions exist, a pointer to any 541 * of the regions is returned. Null is returned only when no overlapping 542 * region exists. 543 */ 544 static struct userspace_mem_region * 545 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 546 { 547 struct rb_node *node; 548 549 for (node = vm->regions.gpa_tree.rb_node; node; ) { 550 struct userspace_mem_region *region = 551 container_of(node, struct userspace_mem_region, gpa_node); 552 uint64_t existing_start = region->region.guest_phys_addr; 553 uint64_t existing_end = region->region.guest_phys_addr 554 + region->region.memory_size - 1; 555 if (start <= existing_end && end >= existing_start) 556 return region; 557 558 if (start < existing_start) 559 node = node->rb_left; 560 else 561 node = node->rb_right; 562 } 563 564 return NULL; 565 } 566 567 /* 568 * KVM Userspace Memory Region Find 569 * 570 * Input Args: 571 * vm - Virtual Machine 572 * start - Starting VM physical address 573 * end - Ending VM physical address, inclusive. 574 * 575 * Output Args: None 576 * 577 * Return: 578 * Pointer to overlapping region, NULL if no such region. 579 * 580 * Public interface to userspace_mem_region_find. Allows tests to look up 581 * the memslot datastructure for a given range of guest physical memory. 582 */ 583 struct kvm_userspace_memory_region * 584 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 585 uint64_t end) 586 { 587 struct userspace_mem_region *region; 588 589 region = userspace_mem_region_find(vm, start, end); 590 if (!region) 591 return NULL; 592 593 return ®ion->region; 594 } 595 596 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 597 { 598 599 } 600 601 /* 602 * VM VCPU Remove 603 * 604 * Input Args: 605 * vcpu - VCPU to remove 606 * 607 * Output Args: None 608 * 609 * Return: None, TEST_ASSERT failures for all error conditions 610 * 611 * Removes a vCPU from a VM and frees its resources. 612 */ 613 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 614 { 615 int ret; 616 617 if (vcpu->dirty_gfns) { 618 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 619 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 620 vcpu->dirty_gfns = NULL; 621 } 622 623 ret = munmap(vcpu->run, vcpu_mmap_sz()); 624 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 625 626 ret = close(vcpu->fd); 627 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 628 629 list_del(&vcpu->list); 630 631 vcpu_arch_free(vcpu); 632 free(vcpu); 633 } 634 635 void kvm_vm_release(struct kvm_vm *vmp) 636 { 637 struct kvm_vcpu *vcpu, *tmp; 638 int ret; 639 640 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 641 vm_vcpu_rm(vmp, vcpu); 642 643 ret = close(vmp->fd); 644 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 645 646 ret = close(vmp->kvm_fd); 647 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 648 } 649 650 static void __vm_mem_region_delete(struct kvm_vm *vm, 651 struct userspace_mem_region *region, 652 bool unlink) 653 { 654 int ret; 655 656 if (unlink) { 657 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 658 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 659 hash_del(®ion->slot_node); 660 } 661 662 region->region.memory_size = 0; 663 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 664 665 sparsebit_free(®ion->unused_phy_pages); 666 ret = munmap(region->mmap_start, region->mmap_size); 667 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 668 if (region->fd >= 0) { 669 /* There's an extra map when using shared memory. */ 670 ret = munmap(region->mmap_alias, region->mmap_size); 671 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 672 close(region->fd); 673 } 674 675 free(region); 676 } 677 678 /* 679 * Destroys and frees the VM pointed to by vmp. 680 */ 681 void kvm_vm_free(struct kvm_vm *vmp) 682 { 683 int ctr; 684 struct hlist_node *node; 685 struct userspace_mem_region *region; 686 687 if (vmp == NULL) 688 return; 689 690 /* Free cached stats metadata and close FD */ 691 if (vmp->stats_fd) { 692 free(vmp->stats_desc); 693 close(vmp->stats_fd); 694 } 695 696 /* Free userspace_mem_regions. */ 697 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 698 __vm_mem_region_delete(vmp, region, false); 699 700 /* Free sparsebit arrays. */ 701 sparsebit_free(&vmp->vpages_valid); 702 sparsebit_free(&vmp->vpages_mapped); 703 704 kvm_vm_release(vmp); 705 706 /* Free the structure describing the VM. */ 707 free(vmp); 708 } 709 710 int kvm_memfd_alloc(size_t size, bool hugepages) 711 { 712 int memfd_flags = MFD_CLOEXEC; 713 int fd, r; 714 715 if (hugepages) 716 memfd_flags |= MFD_HUGETLB; 717 718 fd = memfd_create("kvm_selftest", memfd_flags); 719 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 720 721 r = ftruncate(fd, size); 722 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 723 724 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 725 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 726 727 return fd; 728 } 729 730 /* 731 * Memory Compare, host virtual to guest virtual 732 * 733 * Input Args: 734 * hva - Starting host virtual address 735 * vm - Virtual Machine 736 * gva - Starting guest virtual address 737 * len - number of bytes to compare 738 * 739 * Output Args: None 740 * 741 * Input/Output Args: None 742 * 743 * Return: 744 * Returns 0 if the bytes starting at hva for a length of len 745 * are equal the guest virtual bytes starting at gva. Returns 746 * a value < 0, if bytes at hva are less than those at gva. 747 * Otherwise a value > 0 is returned. 748 * 749 * Compares the bytes starting at the host virtual address hva, for 750 * a length of len, to the guest bytes starting at the guest virtual 751 * address given by gva. 752 */ 753 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 754 { 755 size_t amt; 756 757 /* 758 * Compare a batch of bytes until either a match is found 759 * or all the bytes have been compared. 760 */ 761 for (uintptr_t offset = 0; offset < len; offset += amt) { 762 uintptr_t ptr1 = (uintptr_t)hva + offset; 763 764 /* 765 * Determine host address for guest virtual address 766 * at offset. 767 */ 768 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 769 770 /* 771 * Determine amount to compare on this pass. 772 * Don't allow the comparsion to cross a page boundary. 773 */ 774 amt = len - offset; 775 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 776 amt = vm->page_size - (ptr1 % vm->page_size); 777 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 778 amt = vm->page_size - (ptr2 % vm->page_size); 779 780 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 781 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 782 783 /* 784 * Perform the comparison. If there is a difference 785 * return that result to the caller, otherwise need 786 * to continue on looking for a mismatch. 787 */ 788 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 789 if (ret != 0) 790 return ret; 791 } 792 793 /* 794 * No mismatch found. Let the caller know the two memory 795 * areas are equal. 796 */ 797 return 0; 798 } 799 800 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 801 struct userspace_mem_region *region) 802 { 803 struct rb_node **cur, *parent; 804 805 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 806 struct userspace_mem_region *cregion; 807 808 cregion = container_of(*cur, typeof(*cregion), gpa_node); 809 parent = *cur; 810 if (region->region.guest_phys_addr < 811 cregion->region.guest_phys_addr) 812 cur = &(*cur)->rb_left; 813 else { 814 TEST_ASSERT(region->region.guest_phys_addr != 815 cregion->region.guest_phys_addr, 816 "Duplicate GPA in region tree"); 817 818 cur = &(*cur)->rb_right; 819 } 820 } 821 822 rb_link_node(®ion->gpa_node, parent, cur); 823 rb_insert_color(®ion->gpa_node, gpa_tree); 824 } 825 826 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 827 struct userspace_mem_region *region) 828 { 829 struct rb_node **cur, *parent; 830 831 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 832 struct userspace_mem_region *cregion; 833 834 cregion = container_of(*cur, typeof(*cregion), hva_node); 835 parent = *cur; 836 if (region->host_mem < cregion->host_mem) 837 cur = &(*cur)->rb_left; 838 else { 839 TEST_ASSERT(region->host_mem != 840 cregion->host_mem, 841 "Duplicate HVA in region tree"); 842 843 cur = &(*cur)->rb_right; 844 } 845 } 846 847 rb_link_node(®ion->hva_node, parent, cur); 848 rb_insert_color(®ion->hva_node, hva_tree); 849 } 850 851 852 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 853 uint64_t gpa, uint64_t size, void *hva) 854 { 855 struct kvm_userspace_memory_region region = { 856 .slot = slot, 857 .flags = flags, 858 .guest_phys_addr = gpa, 859 .memory_size = size, 860 .userspace_addr = (uintptr_t)hva, 861 }; 862 863 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 864 } 865 866 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 867 uint64_t gpa, uint64_t size, void *hva) 868 { 869 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 870 871 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 872 errno, strerror(errno)); 873 } 874 875 /* 876 * VM Userspace Memory Region Add 877 * 878 * Input Args: 879 * vm - Virtual Machine 880 * src_type - Storage source for this region. 881 * NULL to use anonymous memory. 882 * guest_paddr - Starting guest physical address 883 * slot - KVM region slot 884 * npages - Number of physical pages 885 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 886 * 887 * Output Args: None 888 * 889 * Return: None 890 * 891 * Allocates a memory area of the number of pages specified by npages 892 * and maps it to the VM specified by vm, at a starting physical address 893 * given by guest_paddr. The region is created with a KVM region slot 894 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 895 * region is created with the flags given by flags. 896 */ 897 void vm_userspace_mem_region_add(struct kvm_vm *vm, 898 enum vm_mem_backing_src_type src_type, 899 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 900 uint32_t flags) 901 { 902 int ret; 903 struct userspace_mem_region *region; 904 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 905 size_t alignment; 906 907 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 908 "Number of guest pages is not compatible with the host. " 909 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 910 911 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 912 "address not on a page boundary.\n" 913 " guest_paddr: 0x%lx vm->page_size: 0x%x", 914 guest_paddr, vm->page_size); 915 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 916 <= vm->max_gfn, "Physical range beyond maximum " 917 "supported physical address,\n" 918 " guest_paddr: 0x%lx npages: 0x%lx\n" 919 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 920 guest_paddr, npages, vm->max_gfn, vm->page_size); 921 922 /* 923 * Confirm a mem region with an overlapping address doesn't 924 * already exist. 925 */ 926 region = (struct userspace_mem_region *) userspace_mem_region_find( 927 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 928 if (region != NULL) 929 TEST_FAIL("overlapping userspace_mem_region already " 930 "exists\n" 931 " requested guest_paddr: 0x%lx npages: 0x%lx " 932 "page_size: 0x%x\n" 933 " existing guest_paddr: 0x%lx size: 0x%lx", 934 guest_paddr, npages, vm->page_size, 935 (uint64_t) region->region.guest_phys_addr, 936 (uint64_t) region->region.memory_size); 937 938 /* Confirm no region with the requested slot already exists. */ 939 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 940 slot) { 941 if (region->region.slot != slot) 942 continue; 943 944 TEST_FAIL("A mem region with the requested slot " 945 "already exists.\n" 946 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 947 " existing slot: %u paddr: 0x%lx size: 0x%lx", 948 slot, guest_paddr, npages, 949 region->region.slot, 950 (uint64_t) region->region.guest_phys_addr, 951 (uint64_t) region->region.memory_size); 952 } 953 954 /* Allocate and initialize new mem region structure. */ 955 region = calloc(1, sizeof(*region)); 956 TEST_ASSERT(region != NULL, "Insufficient Memory"); 957 region->mmap_size = npages * vm->page_size; 958 959 #ifdef __s390x__ 960 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 961 alignment = 0x100000; 962 #else 963 alignment = 1; 964 #endif 965 966 /* 967 * When using THP mmap is not guaranteed to returned a hugepage aligned 968 * address so we have to pad the mmap. Padding is not needed for HugeTLB 969 * because mmap will always return an address aligned to the HugeTLB 970 * page size. 971 */ 972 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 973 alignment = max(backing_src_pagesz, alignment); 974 975 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 976 977 /* Add enough memory to align up if necessary */ 978 if (alignment > 1) 979 region->mmap_size += alignment; 980 981 region->fd = -1; 982 if (backing_src_is_shared(src_type)) 983 region->fd = kvm_memfd_alloc(region->mmap_size, 984 src_type == VM_MEM_SRC_SHARED_HUGETLB); 985 986 region->mmap_start = mmap(NULL, region->mmap_size, 987 PROT_READ | PROT_WRITE, 988 vm_mem_backing_src_alias(src_type)->flag, 989 region->fd, 0); 990 TEST_ASSERT(region->mmap_start != MAP_FAILED, 991 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 992 993 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 994 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 995 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 996 region->mmap_start, backing_src_pagesz); 997 998 /* Align host address */ 999 region->host_mem = align_ptr_up(region->mmap_start, alignment); 1000 1001 /* As needed perform madvise */ 1002 if ((src_type == VM_MEM_SRC_ANONYMOUS || 1003 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 1004 ret = madvise(region->host_mem, npages * vm->page_size, 1005 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 1006 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 1007 region->host_mem, npages * vm->page_size, 1008 vm_mem_backing_src_alias(src_type)->name); 1009 } 1010 1011 region->backing_src_type = src_type; 1012 region->unused_phy_pages = sparsebit_alloc(); 1013 sparsebit_set_num(region->unused_phy_pages, 1014 guest_paddr >> vm->page_shift, npages); 1015 region->region.slot = slot; 1016 region->region.flags = flags; 1017 region->region.guest_phys_addr = guest_paddr; 1018 region->region.memory_size = npages * vm->page_size; 1019 region->region.userspace_addr = (uintptr_t) region->host_mem; 1020 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1021 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1022 " rc: %i errno: %i\n" 1023 " slot: %u flags: 0x%x\n" 1024 " guest_phys_addr: 0x%lx size: 0x%lx", 1025 ret, errno, slot, flags, 1026 guest_paddr, (uint64_t) region->region.memory_size); 1027 1028 /* Add to quick lookup data structures */ 1029 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1030 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1031 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1032 1033 /* If shared memory, create an alias. */ 1034 if (region->fd >= 0) { 1035 region->mmap_alias = mmap(NULL, region->mmap_size, 1036 PROT_READ | PROT_WRITE, 1037 vm_mem_backing_src_alias(src_type)->flag, 1038 region->fd, 0); 1039 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1040 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1041 1042 /* Align host alias address */ 1043 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1044 } 1045 } 1046 1047 /* 1048 * Memslot to region 1049 * 1050 * Input Args: 1051 * vm - Virtual Machine 1052 * memslot - KVM memory slot ID 1053 * 1054 * Output Args: None 1055 * 1056 * Return: 1057 * Pointer to memory region structure that describe memory region 1058 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1059 * on error (e.g. currently no memory region using memslot as a KVM 1060 * memory slot ID). 1061 */ 1062 struct userspace_mem_region * 1063 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1064 { 1065 struct userspace_mem_region *region; 1066 1067 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1068 memslot) 1069 if (region->region.slot == memslot) 1070 return region; 1071 1072 fprintf(stderr, "No mem region with the requested slot found,\n" 1073 " requested slot: %u\n", memslot); 1074 fputs("---- vm dump ----\n", stderr); 1075 vm_dump(stderr, vm, 2); 1076 TEST_FAIL("Mem region not found"); 1077 return NULL; 1078 } 1079 1080 /* 1081 * VM Memory Region Flags Set 1082 * 1083 * Input Args: 1084 * vm - Virtual Machine 1085 * flags - Starting guest physical address 1086 * 1087 * Output Args: None 1088 * 1089 * Return: None 1090 * 1091 * Sets the flags of the memory region specified by the value of slot, 1092 * to the values given by flags. 1093 */ 1094 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1095 { 1096 int ret; 1097 struct userspace_mem_region *region; 1098 1099 region = memslot2region(vm, slot); 1100 1101 region->region.flags = flags; 1102 1103 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1104 1105 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1106 " rc: %i errno: %i slot: %u flags: 0x%x", 1107 ret, errno, slot, flags); 1108 } 1109 1110 /* 1111 * VM Memory Region Move 1112 * 1113 * Input Args: 1114 * vm - Virtual Machine 1115 * slot - Slot of the memory region to move 1116 * new_gpa - Starting guest physical address 1117 * 1118 * Output Args: None 1119 * 1120 * Return: None 1121 * 1122 * Change the gpa of a memory region. 1123 */ 1124 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1125 { 1126 struct userspace_mem_region *region; 1127 int ret; 1128 1129 region = memslot2region(vm, slot); 1130 1131 region->region.guest_phys_addr = new_gpa; 1132 1133 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1134 1135 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1136 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1137 ret, errno, slot, new_gpa); 1138 } 1139 1140 /* 1141 * VM Memory Region Delete 1142 * 1143 * Input Args: 1144 * vm - Virtual Machine 1145 * slot - Slot of the memory region to delete 1146 * 1147 * Output Args: None 1148 * 1149 * Return: None 1150 * 1151 * Delete a memory region. 1152 */ 1153 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1154 { 1155 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1156 } 1157 1158 /* Returns the size of a vCPU's kvm_run structure. */ 1159 static int vcpu_mmap_sz(void) 1160 { 1161 int dev_fd, ret; 1162 1163 dev_fd = open_kvm_dev_path_or_exit(); 1164 1165 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1166 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1167 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1168 1169 close(dev_fd); 1170 1171 return ret; 1172 } 1173 1174 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1175 { 1176 struct kvm_vcpu *vcpu; 1177 1178 list_for_each_entry(vcpu, &vm->vcpus, list) { 1179 if (vcpu->id == vcpu_id) 1180 return true; 1181 } 1182 1183 return false; 1184 } 1185 1186 /* 1187 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1188 * No additional vCPU setup is done. Returns the vCPU. 1189 */ 1190 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1191 { 1192 struct kvm_vcpu *vcpu; 1193 1194 /* Confirm a vcpu with the specified id doesn't already exist. */ 1195 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1196 1197 /* Allocate and initialize new vcpu structure. */ 1198 vcpu = calloc(1, sizeof(*vcpu)); 1199 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1200 1201 vcpu->vm = vm; 1202 vcpu->id = vcpu_id; 1203 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1204 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1205 1206 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1207 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1208 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1209 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1210 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1211 TEST_ASSERT(vcpu->run != MAP_FAILED, 1212 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1213 1214 /* Add to linked-list of VCPUs. */ 1215 list_add(&vcpu->list, &vm->vcpus); 1216 1217 return vcpu; 1218 } 1219 1220 /* 1221 * VM Virtual Address Unused Gap 1222 * 1223 * Input Args: 1224 * vm - Virtual Machine 1225 * sz - Size (bytes) 1226 * vaddr_min - Minimum Virtual Address 1227 * 1228 * Output Args: None 1229 * 1230 * Return: 1231 * Lowest virtual address at or below vaddr_min, with at least 1232 * sz unused bytes. TEST_ASSERT failure if no area of at least 1233 * size sz is available. 1234 * 1235 * Within the VM specified by vm, locates the lowest starting virtual 1236 * address >= vaddr_min, that has at least sz unallocated bytes. A 1237 * TEST_ASSERT failure occurs for invalid input or no area of at least 1238 * sz unallocated bytes >= vaddr_min is available. 1239 */ 1240 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1241 vm_vaddr_t vaddr_min) 1242 { 1243 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1244 1245 /* Determine lowest permitted virtual page index. */ 1246 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1247 if ((pgidx_start * vm->page_size) < vaddr_min) 1248 goto no_va_found; 1249 1250 /* Loop over section with enough valid virtual page indexes. */ 1251 if (!sparsebit_is_set_num(vm->vpages_valid, 1252 pgidx_start, pages)) 1253 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1254 pgidx_start, pages); 1255 do { 1256 /* 1257 * Are there enough unused virtual pages available at 1258 * the currently proposed starting virtual page index. 1259 * If not, adjust proposed starting index to next 1260 * possible. 1261 */ 1262 if (sparsebit_is_clear_num(vm->vpages_mapped, 1263 pgidx_start, pages)) 1264 goto va_found; 1265 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1266 pgidx_start, pages); 1267 if (pgidx_start == 0) 1268 goto no_va_found; 1269 1270 /* 1271 * If needed, adjust proposed starting virtual address, 1272 * to next range of valid virtual addresses. 1273 */ 1274 if (!sparsebit_is_set_num(vm->vpages_valid, 1275 pgidx_start, pages)) { 1276 pgidx_start = sparsebit_next_set_num( 1277 vm->vpages_valid, pgidx_start, pages); 1278 if (pgidx_start == 0) 1279 goto no_va_found; 1280 } 1281 } while (pgidx_start != 0); 1282 1283 no_va_found: 1284 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1285 1286 /* NOT REACHED */ 1287 return -1; 1288 1289 va_found: 1290 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1291 pgidx_start, pages), 1292 "Unexpected, invalid virtual page index range,\n" 1293 " pgidx_start: 0x%lx\n" 1294 " pages: 0x%lx", 1295 pgidx_start, pages); 1296 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1297 pgidx_start, pages), 1298 "Unexpected, pages already mapped,\n" 1299 " pgidx_start: 0x%lx\n" 1300 " pages: 0x%lx", 1301 pgidx_start, pages); 1302 1303 return pgidx_start * vm->page_size; 1304 } 1305 1306 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1307 enum kvm_mem_region_type type) 1308 { 1309 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1310 1311 virt_pgd_alloc(vm); 1312 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1313 KVM_UTIL_MIN_PFN * vm->page_size, 1314 vm->memslots[type]); 1315 1316 /* 1317 * Find an unused range of virtual page addresses of at least 1318 * pages in length. 1319 */ 1320 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1321 1322 /* Map the virtual pages. */ 1323 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1324 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1325 1326 virt_pg_map(vm, vaddr, paddr); 1327 1328 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1329 } 1330 1331 return vaddr_start; 1332 } 1333 1334 /* 1335 * VM Virtual Address Allocate 1336 * 1337 * Input Args: 1338 * vm - Virtual Machine 1339 * sz - Size in bytes 1340 * vaddr_min - Minimum starting virtual address 1341 * 1342 * Output Args: None 1343 * 1344 * Return: 1345 * Starting guest virtual address 1346 * 1347 * Allocates at least sz bytes within the virtual address space of the vm 1348 * given by vm. The allocated bytes are mapped to a virtual address >= 1349 * the address given by vaddr_min. Note that each allocation uses a 1350 * a unique set of pages, with the minimum real allocation being at least 1351 * a page. The allocated physical space comes from the TEST_DATA memory region. 1352 */ 1353 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1354 { 1355 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); 1356 } 1357 1358 /* 1359 * VM Virtual Address Allocate Pages 1360 * 1361 * Input Args: 1362 * vm - Virtual Machine 1363 * 1364 * Output Args: None 1365 * 1366 * Return: 1367 * Starting guest virtual address 1368 * 1369 * Allocates at least N system pages worth of bytes within the virtual address 1370 * space of the vm. 1371 */ 1372 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1373 { 1374 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1375 } 1376 1377 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) 1378 { 1379 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); 1380 } 1381 1382 /* 1383 * VM Virtual Address Allocate Page 1384 * 1385 * Input Args: 1386 * vm - Virtual Machine 1387 * 1388 * Output Args: None 1389 * 1390 * Return: 1391 * Starting guest virtual address 1392 * 1393 * Allocates at least one system page worth of bytes within the virtual address 1394 * space of the vm. 1395 */ 1396 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1397 { 1398 return vm_vaddr_alloc_pages(vm, 1); 1399 } 1400 1401 /* 1402 * Map a range of VM virtual address to the VM's physical address 1403 * 1404 * Input Args: 1405 * vm - Virtual Machine 1406 * vaddr - Virtuall address to map 1407 * paddr - VM Physical Address 1408 * npages - The number of pages to map 1409 * 1410 * Output Args: None 1411 * 1412 * Return: None 1413 * 1414 * Within the VM given by @vm, creates a virtual translation for 1415 * @npages starting at @vaddr to the page range starting at @paddr. 1416 */ 1417 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1418 unsigned int npages) 1419 { 1420 size_t page_size = vm->page_size; 1421 size_t size = npages * page_size; 1422 1423 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1424 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1425 1426 while (npages--) { 1427 virt_pg_map(vm, vaddr, paddr); 1428 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1429 1430 vaddr += page_size; 1431 paddr += page_size; 1432 } 1433 } 1434 1435 /* 1436 * Address VM Physical to Host Virtual 1437 * 1438 * Input Args: 1439 * vm - Virtual Machine 1440 * gpa - VM physical address 1441 * 1442 * Output Args: None 1443 * 1444 * Return: 1445 * Equivalent host virtual address 1446 * 1447 * Locates the memory region containing the VM physical address given 1448 * by gpa, within the VM given by vm. When found, the host virtual 1449 * address providing the memory to the vm physical address is returned. 1450 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1451 */ 1452 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1453 { 1454 struct userspace_mem_region *region; 1455 1456 region = userspace_mem_region_find(vm, gpa, gpa); 1457 if (!region) { 1458 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1459 return NULL; 1460 } 1461 1462 return (void *)((uintptr_t)region->host_mem 1463 + (gpa - region->region.guest_phys_addr)); 1464 } 1465 1466 /* 1467 * Address Host Virtual to VM Physical 1468 * 1469 * Input Args: 1470 * vm - Virtual Machine 1471 * hva - Host virtual address 1472 * 1473 * Output Args: None 1474 * 1475 * Return: 1476 * Equivalent VM physical address 1477 * 1478 * Locates the memory region containing the host virtual address given 1479 * by hva, within the VM given by vm. When found, the equivalent 1480 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1481 * region containing hva exists. 1482 */ 1483 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1484 { 1485 struct rb_node *node; 1486 1487 for (node = vm->regions.hva_tree.rb_node; node; ) { 1488 struct userspace_mem_region *region = 1489 container_of(node, struct userspace_mem_region, hva_node); 1490 1491 if (hva >= region->host_mem) { 1492 if (hva <= (region->host_mem 1493 + region->region.memory_size - 1)) 1494 return (vm_paddr_t)((uintptr_t) 1495 region->region.guest_phys_addr 1496 + (hva - (uintptr_t)region->host_mem)); 1497 1498 node = node->rb_right; 1499 } else 1500 node = node->rb_left; 1501 } 1502 1503 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1504 return -1; 1505 } 1506 1507 /* 1508 * Address VM physical to Host Virtual *alias*. 1509 * 1510 * Input Args: 1511 * vm - Virtual Machine 1512 * gpa - VM physical address 1513 * 1514 * Output Args: None 1515 * 1516 * Return: 1517 * Equivalent address within the host virtual *alias* area, or NULL 1518 * (without failing the test) if the guest memory is not shared (so 1519 * no alias exists). 1520 * 1521 * Create a writable, shared virtual=>physical alias for the specific GPA. 1522 * The primary use case is to allow the host selftest to manipulate guest 1523 * memory without mapping said memory in the guest's address space. And, for 1524 * userfaultfd-based demand paging, to do so without triggering userfaults. 1525 */ 1526 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1527 { 1528 struct userspace_mem_region *region; 1529 uintptr_t offset; 1530 1531 region = userspace_mem_region_find(vm, gpa, gpa); 1532 if (!region) 1533 return NULL; 1534 1535 if (!region->host_alias) 1536 return NULL; 1537 1538 offset = gpa - region->region.guest_phys_addr; 1539 return (void *) ((uintptr_t) region->host_alias + offset); 1540 } 1541 1542 /* Create an interrupt controller chip for the specified VM. */ 1543 void vm_create_irqchip(struct kvm_vm *vm) 1544 { 1545 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1546 1547 vm->has_irqchip = true; 1548 } 1549 1550 int _vcpu_run(struct kvm_vcpu *vcpu) 1551 { 1552 int rc; 1553 1554 do { 1555 rc = __vcpu_run(vcpu); 1556 } while (rc == -1 && errno == EINTR); 1557 1558 assert_on_unhandled_exception(vcpu); 1559 1560 return rc; 1561 } 1562 1563 /* 1564 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1565 * Assert if the KVM returns an error (other than -EINTR). 1566 */ 1567 void vcpu_run(struct kvm_vcpu *vcpu) 1568 { 1569 int ret = _vcpu_run(vcpu); 1570 1571 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1572 } 1573 1574 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1575 { 1576 int ret; 1577 1578 vcpu->run->immediate_exit = 1; 1579 ret = __vcpu_run(vcpu); 1580 vcpu->run->immediate_exit = 0; 1581 1582 TEST_ASSERT(ret == -1 && errno == EINTR, 1583 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1584 ret, errno); 1585 } 1586 1587 /* 1588 * Get the list of guest registers which are supported for 1589 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1590 * it is the caller's responsibility to free the list. 1591 */ 1592 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1593 { 1594 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1595 int ret; 1596 1597 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1598 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1599 1600 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1601 reg_list->n = reg_list_n.n; 1602 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1603 return reg_list; 1604 } 1605 1606 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1607 { 1608 uint32_t page_size = getpagesize(); 1609 uint32_t size = vcpu->vm->dirty_ring_size; 1610 1611 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1612 1613 if (!vcpu->dirty_gfns) { 1614 void *addr; 1615 1616 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1617 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1618 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1619 1620 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1621 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1622 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1623 1624 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1625 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1626 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1627 1628 vcpu->dirty_gfns = addr; 1629 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1630 } 1631 1632 return vcpu->dirty_gfns; 1633 } 1634 1635 /* 1636 * Device Ioctl 1637 */ 1638 1639 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1640 { 1641 struct kvm_device_attr attribute = { 1642 .group = group, 1643 .attr = attr, 1644 .flags = 0, 1645 }; 1646 1647 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1648 } 1649 1650 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1651 { 1652 struct kvm_create_device create_dev = { 1653 .type = type, 1654 .flags = KVM_CREATE_DEVICE_TEST, 1655 }; 1656 1657 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1658 } 1659 1660 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1661 { 1662 struct kvm_create_device create_dev = { 1663 .type = type, 1664 .fd = -1, 1665 .flags = 0, 1666 }; 1667 int err; 1668 1669 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1670 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1671 return err ? : create_dev.fd; 1672 } 1673 1674 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1675 { 1676 struct kvm_device_attr kvmattr = { 1677 .group = group, 1678 .attr = attr, 1679 .flags = 0, 1680 .addr = (uintptr_t)val, 1681 }; 1682 1683 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1684 } 1685 1686 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1687 { 1688 struct kvm_device_attr kvmattr = { 1689 .group = group, 1690 .attr = attr, 1691 .flags = 0, 1692 .addr = (uintptr_t)val, 1693 }; 1694 1695 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1696 } 1697 1698 /* 1699 * IRQ related functions. 1700 */ 1701 1702 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1703 { 1704 struct kvm_irq_level irq_level = { 1705 .irq = irq, 1706 .level = level, 1707 }; 1708 1709 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1710 } 1711 1712 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1713 { 1714 int ret = _kvm_irq_line(vm, irq, level); 1715 1716 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1717 } 1718 1719 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1720 { 1721 struct kvm_irq_routing *routing; 1722 size_t size; 1723 1724 size = sizeof(struct kvm_irq_routing); 1725 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1726 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1727 routing = calloc(1, size); 1728 assert(routing); 1729 1730 return routing; 1731 } 1732 1733 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1734 uint32_t gsi, uint32_t pin) 1735 { 1736 int i; 1737 1738 assert(routing); 1739 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1740 1741 i = routing->nr; 1742 routing->entries[i].gsi = gsi; 1743 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1744 routing->entries[i].flags = 0; 1745 routing->entries[i].u.irqchip.irqchip = 0; 1746 routing->entries[i].u.irqchip.pin = pin; 1747 routing->nr++; 1748 } 1749 1750 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1751 { 1752 int ret; 1753 1754 assert(routing); 1755 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1756 free(routing); 1757 1758 return ret; 1759 } 1760 1761 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1762 { 1763 int ret; 1764 1765 ret = _kvm_gsi_routing_write(vm, routing); 1766 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1767 } 1768 1769 /* 1770 * VM Dump 1771 * 1772 * Input Args: 1773 * vm - Virtual Machine 1774 * indent - Left margin indent amount 1775 * 1776 * Output Args: 1777 * stream - Output FILE stream 1778 * 1779 * Return: None 1780 * 1781 * Dumps the current state of the VM given by vm, to the FILE stream 1782 * given by stream. 1783 */ 1784 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1785 { 1786 int ctr; 1787 struct userspace_mem_region *region; 1788 struct kvm_vcpu *vcpu; 1789 1790 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1791 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1792 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1793 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1794 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1795 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1796 "host_virt: %p\n", indent + 2, "", 1797 (uint64_t) region->region.guest_phys_addr, 1798 (uint64_t) region->region.memory_size, 1799 region->host_mem); 1800 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1801 sparsebit_dump(stream, region->unused_phy_pages, 0); 1802 } 1803 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1804 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1805 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1806 vm->pgd_created); 1807 if (vm->pgd_created) { 1808 fprintf(stream, "%*sVirtual Translation Tables:\n", 1809 indent + 2, ""); 1810 virt_dump(stream, vm, indent + 4); 1811 } 1812 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1813 1814 list_for_each_entry(vcpu, &vm->vcpus, list) 1815 vcpu_dump(stream, vcpu, indent + 2); 1816 } 1817 1818 /* Known KVM exit reasons */ 1819 static struct exit_reason { 1820 unsigned int reason; 1821 const char *name; 1822 } exit_reasons_known[] = { 1823 {KVM_EXIT_UNKNOWN, "UNKNOWN"}, 1824 {KVM_EXIT_EXCEPTION, "EXCEPTION"}, 1825 {KVM_EXIT_IO, "IO"}, 1826 {KVM_EXIT_HYPERCALL, "HYPERCALL"}, 1827 {KVM_EXIT_DEBUG, "DEBUG"}, 1828 {KVM_EXIT_HLT, "HLT"}, 1829 {KVM_EXIT_MMIO, "MMIO"}, 1830 {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, 1831 {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, 1832 {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, 1833 {KVM_EXIT_INTR, "INTR"}, 1834 {KVM_EXIT_SET_TPR, "SET_TPR"}, 1835 {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, 1836 {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, 1837 {KVM_EXIT_S390_RESET, "S390_RESET"}, 1838 {KVM_EXIT_DCR, "DCR"}, 1839 {KVM_EXIT_NMI, "NMI"}, 1840 {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, 1841 {KVM_EXIT_OSI, "OSI"}, 1842 {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, 1843 {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"}, 1844 {KVM_EXIT_X86_RDMSR, "RDMSR"}, 1845 {KVM_EXIT_X86_WRMSR, "WRMSR"}, 1846 {KVM_EXIT_XEN, "XEN"}, 1847 {KVM_EXIT_HYPERV, "HYPERV"}, 1848 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1849 {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, 1850 #endif 1851 }; 1852 1853 /* 1854 * Exit Reason String 1855 * 1856 * Input Args: 1857 * exit_reason - Exit reason 1858 * 1859 * Output Args: None 1860 * 1861 * Return: 1862 * Constant string pointer describing the exit reason. 1863 * 1864 * Locates and returns a constant string that describes the KVM exit 1865 * reason given by exit_reason. If no such string is found, a constant 1866 * string of "Unknown" is returned. 1867 */ 1868 const char *exit_reason_str(unsigned int exit_reason) 1869 { 1870 unsigned int n1; 1871 1872 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1873 if (exit_reason == exit_reasons_known[n1].reason) 1874 return exit_reasons_known[n1].name; 1875 } 1876 1877 return "Unknown"; 1878 } 1879 1880 /* 1881 * Physical Contiguous Page Allocator 1882 * 1883 * Input Args: 1884 * vm - Virtual Machine 1885 * num - number of pages 1886 * paddr_min - Physical address minimum 1887 * memslot - Memory region to allocate page from 1888 * 1889 * Output Args: None 1890 * 1891 * Return: 1892 * Starting physical address 1893 * 1894 * Within the VM specified by vm, locates a range of available physical 1895 * pages at or above paddr_min. If found, the pages are marked as in use 1896 * and their base address is returned. A TEST_ASSERT failure occurs if 1897 * not enough pages are available at or above paddr_min. 1898 */ 1899 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1900 vm_paddr_t paddr_min, uint32_t memslot) 1901 { 1902 struct userspace_mem_region *region; 1903 sparsebit_idx_t pg, base; 1904 1905 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1906 1907 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1908 "not divisible by page size.\n" 1909 " paddr_min: 0x%lx page_size: 0x%x", 1910 paddr_min, vm->page_size); 1911 1912 region = memslot2region(vm, memslot); 1913 base = pg = paddr_min >> vm->page_shift; 1914 1915 do { 1916 for (; pg < base + num; ++pg) { 1917 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1918 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1919 break; 1920 } 1921 } 1922 } while (pg && pg != base + num); 1923 1924 if (pg == 0) { 1925 fprintf(stderr, "No guest physical page available, " 1926 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1927 paddr_min, vm->page_size, memslot); 1928 fputs("---- vm dump ----\n", stderr); 1929 vm_dump(stderr, vm, 2); 1930 abort(); 1931 } 1932 1933 for (pg = base; pg < base + num; ++pg) 1934 sparsebit_clear(region->unused_phy_pages, pg); 1935 1936 return base * vm->page_size; 1937 } 1938 1939 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1940 uint32_t memslot) 1941 { 1942 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1943 } 1944 1945 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1946 { 1947 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 1948 vm->memslots[MEM_REGION_PT]); 1949 } 1950 1951 /* 1952 * Address Guest Virtual to Host Virtual 1953 * 1954 * Input Args: 1955 * vm - Virtual Machine 1956 * gva - VM virtual address 1957 * 1958 * Output Args: None 1959 * 1960 * Return: 1961 * Equivalent host virtual address 1962 */ 1963 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 1964 { 1965 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 1966 } 1967 1968 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 1969 { 1970 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 1971 } 1972 1973 static unsigned int vm_calc_num_pages(unsigned int num_pages, 1974 unsigned int page_shift, 1975 unsigned int new_page_shift, 1976 bool ceil) 1977 { 1978 unsigned int n = 1 << (new_page_shift - page_shift); 1979 1980 if (page_shift >= new_page_shift) 1981 return num_pages * (1 << (page_shift - new_page_shift)); 1982 1983 return num_pages / n + !!(ceil && num_pages % n); 1984 } 1985 1986 static inline int getpageshift(void) 1987 { 1988 return __builtin_ffs(getpagesize()) - 1; 1989 } 1990 1991 unsigned int 1992 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 1993 { 1994 return vm_calc_num_pages(num_guest_pages, 1995 vm_guest_mode_params[mode].page_shift, 1996 getpageshift(), true); 1997 } 1998 1999 unsigned int 2000 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 2001 { 2002 return vm_calc_num_pages(num_host_pages, getpageshift(), 2003 vm_guest_mode_params[mode].page_shift, false); 2004 } 2005 2006 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2007 { 2008 unsigned int n; 2009 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2010 return vm_adjust_num_guest_pages(mode, n); 2011 } 2012 2013 /* 2014 * Read binary stats descriptors 2015 * 2016 * Input Args: 2017 * stats_fd - the file descriptor for the binary stats file from which to read 2018 * header - the binary stats metadata header corresponding to the given FD 2019 * 2020 * Output Args: None 2021 * 2022 * Return: 2023 * A pointer to a newly allocated series of stat descriptors. 2024 * Caller is responsible for freeing the returned kvm_stats_desc. 2025 * 2026 * Read the stats descriptors from the binary stats interface. 2027 */ 2028 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 2029 struct kvm_stats_header *header) 2030 { 2031 struct kvm_stats_desc *stats_desc; 2032 ssize_t desc_size, total_size, ret; 2033 2034 desc_size = get_stats_descriptor_size(header); 2035 total_size = header->num_desc * desc_size; 2036 2037 stats_desc = calloc(header->num_desc, desc_size); 2038 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2039 2040 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2041 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2042 2043 return stats_desc; 2044 } 2045 2046 /* 2047 * Read stat data for a particular stat 2048 * 2049 * Input Args: 2050 * stats_fd - the file descriptor for the binary stats file from which to read 2051 * header - the binary stats metadata header corresponding to the given FD 2052 * desc - the binary stat metadata for the particular stat to be read 2053 * max_elements - the maximum number of 8-byte values to read into data 2054 * 2055 * Output Args: 2056 * data - the buffer into which stat data should be read 2057 * 2058 * Read the data values of a specified stat from the binary stats interface. 2059 */ 2060 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2061 struct kvm_stats_desc *desc, uint64_t *data, 2062 size_t max_elements) 2063 { 2064 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2065 size_t size = nr_elements * sizeof(*data); 2066 ssize_t ret; 2067 2068 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2069 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2070 2071 ret = pread(stats_fd, data, size, 2072 header->data_offset + desc->offset); 2073 2074 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2075 desc->name, errno, strerror(errno)); 2076 TEST_ASSERT(ret == size, 2077 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2078 desc->name, size, ret); 2079 } 2080 2081 /* 2082 * Read the data of the named stat 2083 * 2084 * Input Args: 2085 * vm - the VM for which the stat should be read 2086 * stat_name - the name of the stat to read 2087 * max_elements - the maximum number of 8-byte values to read into data 2088 * 2089 * Output Args: 2090 * data - the buffer into which stat data should be read 2091 * 2092 * Read the data values of a specified stat from the binary stats interface. 2093 */ 2094 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2095 size_t max_elements) 2096 { 2097 struct kvm_stats_desc *desc; 2098 size_t size_desc; 2099 int i; 2100 2101 if (!vm->stats_fd) { 2102 vm->stats_fd = vm_get_stats_fd(vm); 2103 read_stats_header(vm->stats_fd, &vm->stats_header); 2104 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2105 &vm->stats_header); 2106 } 2107 2108 size_desc = get_stats_descriptor_size(&vm->stats_header); 2109 2110 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2111 desc = (void *)vm->stats_desc + (i * size_desc); 2112 2113 if (strcmp(desc->name, stat_name)) 2114 continue; 2115 2116 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2117 data, max_elements); 2118 2119 break; 2120 } 2121 } 2122 2123 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2124 { 2125 } 2126 2127 __weak void kvm_selftest_arch_init(void) 2128 { 2129 } 2130 2131 void __attribute((constructor)) kvm_selftest_init(void) 2132 { 2133 /* Tell stdout not to buffer its content. */ 2134 setbuf(stdout, NULL); 2135 2136 kvm_selftest_arch_init(); 2137 } 2138