1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * tools/testing/selftests/kvm/lib/kvm_util.c 4 * 5 * Copyright (C) 2018, Google LLC. 6 */ 7 8 #define _GNU_SOURCE /* for program_invocation_name */ 9 #include "test_util.h" 10 #include "kvm_util.h" 11 #include "processor.h" 12 13 #include <assert.h> 14 #include <sched.h> 15 #include <sys/mman.h> 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <unistd.h> 19 #include <linux/kernel.h> 20 21 #define KVM_UTIL_MIN_PFN 2 22 23 static int vcpu_mmap_sz(void); 24 25 int open_path_or_exit(const char *path, int flags) 26 { 27 int fd; 28 29 fd = open(path, flags); 30 __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); 31 32 return fd; 33 } 34 35 /* 36 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 37 * 38 * Input Args: 39 * flags - The flags to pass when opening KVM_DEV_PATH. 40 * 41 * Return: 42 * The opened file descriptor of /dev/kvm. 43 */ 44 static int _open_kvm_dev_path_or_exit(int flags) 45 { 46 return open_path_or_exit(KVM_DEV_PATH, flags); 47 } 48 49 int open_kvm_dev_path_or_exit(void) 50 { 51 return _open_kvm_dev_path_or_exit(O_RDONLY); 52 } 53 54 static bool get_module_param_bool(const char *module_name, const char *param) 55 { 56 const int path_size = 128; 57 char path[path_size]; 58 char value; 59 ssize_t r; 60 int fd; 61 62 r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", 63 module_name, param); 64 TEST_ASSERT(r < path_size, 65 "Failed to construct sysfs path in %d bytes.", path_size); 66 67 fd = open_path_or_exit(path, O_RDONLY); 68 69 r = read(fd, &value, 1); 70 TEST_ASSERT(r == 1, "read(%s) failed", path); 71 72 r = close(fd); 73 TEST_ASSERT(!r, "close(%s) failed", path); 74 75 if (value == 'Y') 76 return true; 77 else if (value == 'N') 78 return false; 79 80 TEST_FAIL("Unrecognized value '%c' for boolean module param", value); 81 } 82 83 bool get_kvm_param_bool(const char *param) 84 { 85 return get_module_param_bool("kvm", param); 86 } 87 88 bool get_kvm_intel_param_bool(const char *param) 89 { 90 return get_module_param_bool("kvm_intel", param); 91 } 92 93 bool get_kvm_amd_param_bool(const char *param) 94 { 95 return get_module_param_bool("kvm_amd", param); 96 } 97 98 /* 99 * Capability 100 * 101 * Input Args: 102 * cap - Capability 103 * 104 * Output Args: None 105 * 106 * Return: 107 * On success, the Value corresponding to the capability (KVM_CAP_*) 108 * specified by the value of cap. On failure a TEST_ASSERT failure 109 * is produced. 110 * 111 * Looks up and returns the value corresponding to the capability 112 * (KVM_CAP_*) given by cap. 113 */ 114 unsigned int kvm_check_cap(long cap) 115 { 116 int ret; 117 int kvm_fd; 118 119 kvm_fd = open_kvm_dev_path_or_exit(); 120 ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); 121 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); 122 123 close(kvm_fd); 124 125 return (unsigned int)ret; 126 } 127 128 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) 129 { 130 if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) 131 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); 132 else 133 vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); 134 vm->dirty_ring_size = ring_size; 135 } 136 137 static void vm_open(struct kvm_vm *vm) 138 { 139 vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); 140 141 TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); 142 143 vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); 144 TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); 145 } 146 147 const char *vm_guest_mode_string(uint32_t i) 148 { 149 static const char * const strings[] = { 150 [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", 151 [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", 152 [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", 153 [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", 154 [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", 155 [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", 156 [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", 157 [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", 158 [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", 159 [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", 160 [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", 161 [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", 162 [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", 163 [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", 164 [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", 165 }; 166 _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, 167 "Missing new mode strings?"); 168 169 TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i); 170 171 return strings[i]; 172 } 173 174 const struct vm_guest_mode_params vm_guest_mode_params[] = { 175 [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, 176 [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, 177 [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, 178 [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, 179 [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, 180 [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, 181 [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, 182 [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, 183 [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, 184 [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, 185 [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, 186 [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, 187 [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, 188 [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, 189 [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, 190 }; 191 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, 192 "Missing new mode params?"); 193 194 /* 195 * Initializes vm->vpages_valid to match the canonical VA space of the 196 * architecture. 197 * 198 * The default implementation is valid for architectures which split the 199 * range addressed by a single page table into a low and high region 200 * based on the MSB of the VA. On architectures with this behavior 201 * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. 202 */ 203 __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) 204 { 205 sparsebit_set_num(vm->vpages_valid, 206 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 207 sparsebit_set_num(vm->vpages_valid, 208 (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, 209 (1ULL << (vm->va_bits - 1)) >> vm->page_shift); 210 } 211 212 struct kvm_vm *____vm_create(enum vm_guest_mode mode) 213 { 214 struct kvm_vm *vm; 215 216 vm = calloc(1, sizeof(*vm)); 217 TEST_ASSERT(vm != NULL, "Insufficient Memory"); 218 219 INIT_LIST_HEAD(&vm->vcpus); 220 vm->regions.gpa_tree = RB_ROOT; 221 vm->regions.hva_tree = RB_ROOT; 222 hash_init(vm->regions.slot_hash); 223 224 vm->mode = mode; 225 vm->type = 0; 226 227 vm->pa_bits = vm_guest_mode_params[mode].pa_bits; 228 vm->va_bits = vm_guest_mode_params[mode].va_bits; 229 vm->page_size = vm_guest_mode_params[mode].page_size; 230 vm->page_shift = vm_guest_mode_params[mode].page_shift; 231 232 /* Setup mode specific traits. */ 233 switch (vm->mode) { 234 case VM_MODE_P52V48_4K: 235 vm->pgtable_levels = 4; 236 break; 237 case VM_MODE_P52V48_64K: 238 vm->pgtable_levels = 3; 239 break; 240 case VM_MODE_P48V48_4K: 241 vm->pgtable_levels = 4; 242 break; 243 case VM_MODE_P48V48_64K: 244 vm->pgtable_levels = 3; 245 break; 246 case VM_MODE_P40V48_4K: 247 case VM_MODE_P36V48_4K: 248 vm->pgtable_levels = 4; 249 break; 250 case VM_MODE_P40V48_64K: 251 case VM_MODE_P36V48_64K: 252 vm->pgtable_levels = 3; 253 break; 254 case VM_MODE_P48V48_16K: 255 case VM_MODE_P40V48_16K: 256 case VM_MODE_P36V48_16K: 257 vm->pgtable_levels = 4; 258 break; 259 case VM_MODE_P36V47_16K: 260 vm->pgtable_levels = 3; 261 break; 262 case VM_MODE_PXXV48_4K: 263 #ifdef __x86_64__ 264 kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); 265 /* 266 * Ignore KVM support for 5-level paging (vm->va_bits == 57), 267 * it doesn't take effect unless a CR4.LA57 is set, which it 268 * isn't for this VM_MODE. 269 */ 270 TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, 271 "Linear address width (%d bits) not supported", 272 vm->va_bits); 273 pr_debug("Guest physical address width detected: %d\n", 274 vm->pa_bits); 275 vm->pgtable_levels = 4; 276 vm->va_bits = 48; 277 #else 278 TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); 279 #endif 280 break; 281 case VM_MODE_P47V64_4K: 282 vm->pgtable_levels = 5; 283 break; 284 case VM_MODE_P44V64_4K: 285 vm->pgtable_levels = 5; 286 break; 287 default: 288 TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); 289 } 290 291 #ifdef __aarch64__ 292 if (vm->pa_bits != 40) 293 vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); 294 #endif 295 296 vm_open(vm); 297 298 /* Limit to VA-bit canonical virtual addresses. */ 299 vm->vpages_valid = sparsebit_alloc(); 300 vm_vaddr_populate_bitmap(vm); 301 302 /* Limit physical addresses to PA-bits. */ 303 vm->max_gfn = vm_compute_max_gfn(vm); 304 305 /* Allocate and setup memory for guest. */ 306 vm->vpages_mapped = sparsebit_alloc(); 307 308 return vm; 309 } 310 311 static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, 312 uint32_t nr_runnable_vcpus, 313 uint64_t extra_mem_pages) 314 { 315 uint64_t nr_pages; 316 317 TEST_ASSERT(nr_runnable_vcpus, 318 "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); 319 320 TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), 321 "nr_vcpus = %d too large for host, max-vcpus = %d", 322 nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); 323 324 /* 325 * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the 326 * test code and other per-VM assets that will be loaded into memslot0. 327 */ 328 nr_pages = 512; 329 330 /* Account for the per-vCPU stacks on behalf of the test. */ 331 nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; 332 333 /* 334 * Account for the number of pages needed for the page tables. The 335 * maximum page table size for a memory region will be when the 336 * smallest page size is used. Considering each page contains x page 337 * table descriptors, the total extra size for page tables (for extra 338 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller 339 * than N/x*2. 340 */ 341 nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; 342 343 return vm_adjust_num_guest_pages(mode, nr_pages); 344 } 345 346 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, 347 uint64_t nr_extra_pages) 348 { 349 uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, 350 nr_extra_pages); 351 struct userspace_mem_region *slot0; 352 struct kvm_vm *vm; 353 int i; 354 355 pr_debug("%s: mode='%s' pages='%ld'\n", __func__, 356 vm_guest_mode_string(mode), nr_pages); 357 358 vm = ____vm_create(mode); 359 360 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); 361 for (i = 0; i < NR_MEM_REGIONS; i++) 362 vm->memslots[i] = 0; 363 364 kvm_vm_elf_load(vm, program_invocation_name); 365 366 /* 367 * TODO: Add proper defines to protect the library's memslots, and then 368 * carve out memslot1 for the ucall MMIO address. KVM treats writes to 369 * read-only memslots as MMIO, and creating a read-only memslot for the 370 * MMIO region would prevent silently clobbering the MMIO region. 371 */ 372 slot0 = memslot2region(vm, 0); 373 ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 374 375 kvm_arch_vm_post_create(vm); 376 377 return vm; 378 } 379 380 /* 381 * VM Create with customized parameters 382 * 383 * Input Args: 384 * mode - VM Mode (e.g. VM_MODE_P52V48_4K) 385 * nr_vcpus - VCPU count 386 * extra_mem_pages - Non-slot0 physical memory total size 387 * guest_code - Guest entry point 388 * vcpuids - VCPU IDs 389 * 390 * Output Args: None 391 * 392 * Return: 393 * Pointer to opaque structure that describes the created VM. 394 * 395 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). 396 * extra_mem_pages is only used to calculate the maximum page table size, 397 * no real memory allocation for non-slot0 memory in this function. 398 */ 399 struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, 400 uint64_t extra_mem_pages, 401 void *guest_code, struct kvm_vcpu *vcpus[]) 402 { 403 struct kvm_vm *vm; 404 int i; 405 406 TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); 407 408 vm = __vm_create(mode, nr_vcpus, extra_mem_pages); 409 410 for (i = 0; i < nr_vcpus; ++i) 411 vcpus[i] = vm_vcpu_add(vm, i, guest_code); 412 413 return vm; 414 } 415 416 struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, 417 uint64_t extra_mem_pages, 418 void *guest_code) 419 { 420 struct kvm_vcpu *vcpus[1]; 421 struct kvm_vm *vm; 422 423 vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, 424 guest_code, vcpus); 425 426 *vcpu = vcpus[0]; 427 return vm; 428 } 429 430 /* 431 * VM Restart 432 * 433 * Input Args: 434 * vm - VM that has been released before 435 * 436 * Output Args: None 437 * 438 * Reopens the file descriptors associated to the VM and reinstates the 439 * global state, such as the irqchip and the memory regions that are mapped 440 * into the guest. 441 */ 442 void kvm_vm_restart(struct kvm_vm *vmp) 443 { 444 int ctr; 445 struct userspace_mem_region *region; 446 447 vm_open(vmp); 448 if (vmp->has_irqchip) 449 vm_create_irqchip(vmp); 450 451 hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { 452 int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); 453 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 454 " rc: %i errno: %i\n" 455 " slot: %u flags: 0x%x\n" 456 " guest_phys_addr: 0x%llx size: 0x%llx", 457 ret, errno, region->region.slot, 458 region->region.flags, 459 region->region.guest_phys_addr, 460 region->region.memory_size); 461 } 462 } 463 464 __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, 465 uint32_t vcpu_id) 466 { 467 return __vm_vcpu_add(vm, vcpu_id); 468 } 469 470 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) 471 { 472 kvm_vm_restart(vm); 473 474 return vm_vcpu_recreate(vm, 0); 475 } 476 477 void kvm_pin_this_task_to_pcpu(uint32_t pcpu) 478 { 479 cpu_set_t mask; 480 int r; 481 482 CPU_ZERO(&mask); 483 CPU_SET(pcpu, &mask); 484 r = sched_setaffinity(0, sizeof(mask), &mask); 485 TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); 486 } 487 488 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) 489 { 490 uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); 491 492 TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), 493 "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); 494 return pcpu; 495 } 496 497 void kvm_print_vcpu_pinning_help(void) 498 { 499 const char *name = program_invocation_name; 500 501 printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" 502 " values (target pCPU), one for each vCPU, plus an optional\n" 503 " entry for the main application task (specified via entry\n" 504 " <nr_vcpus + 1>). If used, entries must be provided for all\n" 505 " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" 506 " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" 507 " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" 508 " %s -v 3 -c 22,23,24,50\n\n" 509 " To leave the application task unpinned, drop the final entry:\n\n" 510 " %s -v 3 -c 22,23,24\n\n" 511 " (default: no pinning)\n", name, name); 512 } 513 514 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], 515 int nr_vcpus) 516 { 517 cpu_set_t allowed_mask; 518 char *cpu, *cpu_list; 519 char delim[2] = ","; 520 int i, r; 521 522 cpu_list = strdup(pcpus_string); 523 TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); 524 525 r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); 526 TEST_ASSERT(!r, "sched_getaffinity() failed"); 527 528 cpu = strtok(cpu_list, delim); 529 530 /* 1. Get all pcpus for vcpus. */ 531 for (i = 0; i < nr_vcpus; i++) { 532 TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); 533 vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); 534 cpu = strtok(NULL, delim); 535 } 536 537 /* 2. Check if the main worker needs to be pinned. */ 538 if (cpu) { 539 kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); 540 cpu = strtok(NULL, delim); 541 } 542 543 TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu); 544 free(cpu_list); 545 } 546 547 /* 548 * Userspace Memory Region Find 549 * 550 * Input Args: 551 * vm - Virtual Machine 552 * start - Starting VM physical address 553 * end - Ending VM physical address, inclusive. 554 * 555 * Output Args: None 556 * 557 * Return: 558 * Pointer to overlapping region, NULL if no such region. 559 * 560 * Searches for a region with any physical memory that overlaps with 561 * any portion of the guest physical addresses from start to end 562 * inclusive. If multiple overlapping regions exist, a pointer to any 563 * of the regions is returned. Null is returned only when no overlapping 564 * region exists. 565 */ 566 static struct userspace_mem_region * 567 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) 568 { 569 struct rb_node *node; 570 571 for (node = vm->regions.gpa_tree.rb_node; node; ) { 572 struct userspace_mem_region *region = 573 container_of(node, struct userspace_mem_region, gpa_node); 574 uint64_t existing_start = region->region.guest_phys_addr; 575 uint64_t existing_end = region->region.guest_phys_addr 576 + region->region.memory_size - 1; 577 if (start <= existing_end && end >= existing_start) 578 return region; 579 580 if (start < existing_start) 581 node = node->rb_left; 582 else 583 node = node->rb_right; 584 } 585 586 return NULL; 587 } 588 589 /* 590 * KVM Userspace Memory Region Find 591 * 592 * Input Args: 593 * vm - Virtual Machine 594 * start - Starting VM physical address 595 * end - Ending VM physical address, inclusive. 596 * 597 * Output Args: None 598 * 599 * Return: 600 * Pointer to overlapping region, NULL if no such region. 601 * 602 * Public interface to userspace_mem_region_find. Allows tests to look up 603 * the memslot datastructure for a given range of guest physical memory. 604 */ 605 struct kvm_userspace_memory_region * 606 kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, 607 uint64_t end) 608 { 609 struct userspace_mem_region *region; 610 611 region = userspace_mem_region_find(vm, start, end); 612 if (!region) 613 return NULL; 614 615 return ®ion->region; 616 } 617 618 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) 619 { 620 621 } 622 623 /* 624 * VM VCPU Remove 625 * 626 * Input Args: 627 * vcpu - VCPU to remove 628 * 629 * Output Args: None 630 * 631 * Return: None, TEST_ASSERT failures for all error conditions 632 * 633 * Removes a vCPU from a VM and frees its resources. 634 */ 635 static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) 636 { 637 int ret; 638 639 if (vcpu->dirty_gfns) { 640 ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); 641 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 642 vcpu->dirty_gfns = NULL; 643 } 644 645 ret = munmap(vcpu->run, vcpu_mmap_sz()); 646 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 647 648 ret = close(vcpu->fd); 649 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 650 651 list_del(&vcpu->list); 652 653 vcpu_arch_free(vcpu); 654 free(vcpu); 655 } 656 657 void kvm_vm_release(struct kvm_vm *vmp) 658 { 659 struct kvm_vcpu *vcpu, *tmp; 660 int ret; 661 662 list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) 663 vm_vcpu_rm(vmp, vcpu); 664 665 ret = close(vmp->fd); 666 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 667 668 ret = close(vmp->kvm_fd); 669 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); 670 } 671 672 static void __vm_mem_region_delete(struct kvm_vm *vm, 673 struct userspace_mem_region *region, 674 bool unlink) 675 { 676 int ret; 677 678 if (unlink) { 679 rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); 680 rb_erase(®ion->hva_node, &vm->regions.hva_tree); 681 hash_del(®ion->slot_node); 682 } 683 684 region->region.memory_size = 0; 685 vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 686 687 sparsebit_free(®ion->unused_phy_pages); 688 ret = munmap(region->mmap_start, region->mmap_size); 689 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 690 if (region->fd >= 0) { 691 /* There's an extra map when using shared memory. */ 692 ret = munmap(region->mmap_alias, region->mmap_size); 693 TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); 694 close(region->fd); 695 } 696 697 free(region); 698 } 699 700 /* 701 * Destroys and frees the VM pointed to by vmp. 702 */ 703 void kvm_vm_free(struct kvm_vm *vmp) 704 { 705 int ctr; 706 struct hlist_node *node; 707 struct userspace_mem_region *region; 708 709 if (vmp == NULL) 710 return; 711 712 /* Free cached stats metadata and close FD */ 713 if (vmp->stats_fd) { 714 free(vmp->stats_desc); 715 close(vmp->stats_fd); 716 } 717 718 /* Free userspace_mem_regions. */ 719 hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) 720 __vm_mem_region_delete(vmp, region, false); 721 722 /* Free sparsebit arrays. */ 723 sparsebit_free(&vmp->vpages_valid); 724 sparsebit_free(&vmp->vpages_mapped); 725 726 kvm_vm_release(vmp); 727 728 /* Free the structure describing the VM. */ 729 free(vmp); 730 } 731 732 int kvm_memfd_alloc(size_t size, bool hugepages) 733 { 734 int memfd_flags = MFD_CLOEXEC; 735 int fd, r; 736 737 if (hugepages) 738 memfd_flags |= MFD_HUGETLB; 739 740 fd = memfd_create("kvm_selftest", memfd_flags); 741 TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); 742 743 r = ftruncate(fd, size); 744 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); 745 746 r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); 747 TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); 748 749 return fd; 750 } 751 752 /* 753 * Memory Compare, host virtual to guest virtual 754 * 755 * Input Args: 756 * hva - Starting host virtual address 757 * vm - Virtual Machine 758 * gva - Starting guest virtual address 759 * len - number of bytes to compare 760 * 761 * Output Args: None 762 * 763 * Input/Output Args: None 764 * 765 * Return: 766 * Returns 0 if the bytes starting at hva for a length of len 767 * are equal the guest virtual bytes starting at gva. Returns 768 * a value < 0, if bytes at hva are less than those at gva. 769 * Otherwise a value > 0 is returned. 770 * 771 * Compares the bytes starting at the host virtual address hva, for 772 * a length of len, to the guest bytes starting at the guest virtual 773 * address given by gva. 774 */ 775 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) 776 { 777 size_t amt; 778 779 /* 780 * Compare a batch of bytes until either a match is found 781 * or all the bytes have been compared. 782 */ 783 for (uintptr_t offset = 0; offset < len; offset += amt) { 784 uintptr_t ptr1 = (uintptr_t)hva + offset; 785 786 /* 787 * Determine host address for guest virtual address 788 * at offset. 789 */ 790 uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); 791 792 /* 793 * Determine amount to compare on this pass. 794 * Don't allow the comparsion to cross a page boundary. 795 */ 796 amt = len - offset; 797 if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) 798 amt = vm->page_size - (ptr1 % vm->page_size); 799 if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) 800 amt = vm->page_size - (ptr2 % vm->page_size); 801 802 assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); 803 assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); 804 805 /* 806 * Perform the comparison. If there is a difference 807 * return that result to the caller, otherwise need 808 * to continue on looking for a mismatch. 809 */ 810 int ret = memcmp((void *)ptr1, (void *)ptr2, amt); 811 if (ret != 0) 812 return ret; 813 } 814 815 /* 816 * No mismatch found. Let the caller know the two memory 817 * areas are equal. 818 */ 819 return 0; 820 } 821 822 static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, 823 struct userspace_mem_region *region) 824 { 825 struct rb_node **cur, *parent; 826 827 for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { 828 struct userspace_mem_region *cregion; 829 830 cregion = container_of(*cur, typeof(*cregion), gpa_node); 831 parent = *cur; 832 if (region->region.guest_phys_addr < 833 cregion->region.guest_phys_addr) 834 cur = &(*cur)->rb_left; 835 else { 836 TEST_ASSERT(region->region.guest_phys_addr != 837 cregion->region.guest_phys_addr, 838 "Duplicate GPA in region tree"); 839 840 cur = &(*cur)->rb_right; 841 } 842 } 843 844 rb_link_node(®ion->gpa_node, parent, cur); 845 rb_insert_color(®ion->gpa_node, gpa_tree); 846 } 847 848 static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, 849 struct userspace_mem_region *region) 850 { 851 struct rb_node **cur, *parent; 852 853 for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { 854 struct userspace_mem_region *cregion; 855 856 cregion = container_of(*cur, typeof(*cregion), hva_node); 857 parent = *cur; 858 if (region->host_mem < cregion->host_mem) 859 cur = &(*cur)->rb_left; 860 else { 861 TEST_ASSERT(region->host_mem != 862 cregion->host_mem, 863 "Duplicate HVA in region tree"); 864 865 cur = &(*cur)->rb_right; 866 } 867 } 868 869 rb_link_node(®ion->hva_node, parent, cur); 870 rb_insert_color(®ion->hva_node, hva_tree); 871 } 872 873 874 int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 875 uint64_t gpa, uint64_t size, void *hva) 876 { 877 struct kvm_userspace_memory_region region = { 878 .slot = slot, 879 .flags = flags, 880 .guest_phys_addr = gpa, 881 .memory_size = size, 882 .userspace_addr = (uintptr_t)hva, 883 }; 884 885 return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); 886 } 887 888 void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, 889 uint64_t gpa, uint64_t size, void *hva) 890 { 891 int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); 892 893 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)", 894 errno, strerror(errno)); 895 } 896 897 /* 898 * VM Userspace Memory Region Add 899 * 900 * Input Args: 901 * vm - Virtual Machine 902 * src_type - Storage source for this region. 903 * NULL to use anonymous memory. 904 * guest_paddr - Starting guest physical address 905 * slot - KVM region slot 906 * npages - Number of physical pages 907 * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) 908 * 909 * Output Args: None 910 * 911 * Return: None 912 * 913 * Allocates a memory area of the number of pages specified by npages 914 * and maps it to the VM specified by vm, at a starting physical address 915 * given by guest_paddr. The region is created with a KVM region slot 916 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The 917 * region is created with the flags given by flags. 918 */ 919 void vm_userspace_mem_region_add(struct kvm_vm *vm, 920 enum vm_mem_backing_src_type src_type, 921 uint64_t guest_paddr, uint32_t slot, uint64_t npages, 922 uint32_t flags) 923 { 924 int ret; 925 struct userspace_mem_region *region; 926 size_t backing_src_pagesz = get_backing_src_pagesz(src_type); 927 size_t alignment; 928 929 TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, 930 "Number of guest pages is not compatible with the host. " 931 "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); 932 933 TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " 934 "address not on a page boundary.\n" 935 " guest_paddr: 0x%lx vm->page_size: 0x%x", 936 guest_paddr, vm->page_size); 937 TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) 938 <= vm->max_gfn, "Physical range beyond maximum " 939 "supported physical address,\n" 940 " guest_paddr: 0x%lx npages: 0x%lx\n" 941 " vm->max_gfn: 0x%lx vm->page_size: 0x%x", 942 guest_paddr, npages, vm->max_gfn, vm->page_size); 943 944 /* 945 * Confirm a mem region with an overlapping address doesn't 946 * already exist. 947 */ 948 region = (struct userspace_mem_region *) userspace_mem_region_find( 949 vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); 950 if (region != NULL) 951 TEST_FAIL("overlapping userspace_mem_region already " 952 "exists\n" 953 " requested guest_paddr: 0x%lx npages: 0x%lx " 954 "page_size: 0x%x\n" 955 " existing guest_paddr: 0x%lx size: 0x%lx", 956 guest_paddr, npages, vm->page_size, 957 (uint64_t) region->region.guest_phys_addr, 958 (uint64_t) region->region.memory_size); 959 960 /* Confirm no region with the requested slot already exists. */ 961 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 962 slot) { 963 if (region->region.slot != slot) 964 continue; 965 966 TEST_FAIL("A mem region with the requested slot " 967 "already exists.\n" 968 " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" 969 " existing slot: %u paddr: 0x%lx size: 0x%lx", 970 slot, guest_paddr, npages, 971 region->region.slot, 972 (uint64_t) region->region.guest_phys_addr, 973 (uint64_t) region->region.memory_size); 974 } 975 976 /* Allocate and initialize new mem region structure. */ 977 region = calloc(1, sizeof(*region)); 978 TEST_ASSERT(region != NULL, "Insufficient Memory"); 979 region->mmap_size = npages * vm->page_size; 980 981 #ifdef __s390x__ 982 /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ 983 alignment = 0x100000; 984 #else 985 alignment = 1; 986 #endif 987 988 /* 989 * When using THP mmap is not guaranteed to returned a hugepage aligned 990 * address so we have to pad the mmap. Padding is not needed for HugeTLB 991 * because mmap will always return an address aligned to the HugeTLB 992 * page size. 993 */ 994 if (src_type == VM_MEM_SRC_ANONYMOUS_THP) 995 alignment = max(backing_src_pagesz, alignment); 996 997 ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); 998 999 /* Add enough memory to align up if necessary */ 1000 if (alignment > 1) 1001 region->mmap_size += alignment; 1002 1003 region->fd = -1; 1004 if (backing_src_is_shared(src_type)) 1005 region->fd = kvm_memfd_alloc(region->mmap_size, 1006 src_type == VM_MEM_SRC_SHARED_HUGETLB); 1007 1008 region->mmap_start = mmap(NULL, region->mmap_size, 1009 PROT_READ | PROT_WRITE, 1010 vm_mem_backing_src_alias(src_type)->flag, 1011 region->fd, 0); 1012 TEST_ASSERT(region->mmap_start != MAP_FAILED, 1013 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1014 1015 TEST_ASSERT(!is_backing_src_hugetlb(src_type) || 1016 region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), 1017 "mmap_start %p is not aligned to HugeTLB page size 0x%lx", 1018 region->mmap_start, backing_src_pagesz); 1019 1020 /* Align host address */ 1021 region->host_mem = align_ptr_up(region->mmap_start, alignment); 1022 1023 /* As needed perform madvise */ 1024 if ((src_type == VM_MEM_SRC_ANONYMOUS || 1025 src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { 1026 ret = madvise(region->host_mem, npages * vm->page_size, 1027 src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); 1028 TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", 1029 region->host_mem, npages * vm->page_size, 1030 vm_mem_backing_src_alias(src_type)->name); 1031 } 1032 1033 region->backing_src_type = src_type; 1034 region->unused_phy_pages = sparsebit_alloc(); 1035 sparsebit_set_num(region->unused_phy_pages, 1036 guest_paddr >> vm->page_shift, npages); 1037 region->region.slot = slot; 1038 region->region.flags = flags; 1039 region->region.guest_phys_addr = guest_paddr; 1040 region->region.memory_size = npages * vm->page_size; 1041 region->region.userspace_addr = (uintptr_t) region->host_mem; 1042 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1043 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1044 " rc: %i errno: %i\n" 1045 " slot: %u flags: 0x%x\n" 1046 " guest_phys_addr: 0x%lx size: 0x%lx", 1047 ret, errno, slot, flags, 1048 guest_paddr, (uint64_t) region->region.memory_size); 1049 1050 /* Add to quick lookup data structures */ 1051 vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); 1052 vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); 1053 hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); 1054 1055 /* If shared memory, create an alias. */ 1056 if (region->fd >= 0) { 1057 region->mmap_alias = mmap(NULL, region->mmap_size, 1058 PROT_READ | PROT_WRITE, 1059 vm_mem_backing_src_alias(src_type)->flag, 1060 region->fd, 0); 1061 TEST_ASSERT(region->mmap_alias != MAP_FAILED, 1062 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1063 1064 /* Align host alias address */ 1065 region->host_alias = align_ptr_up(region->mmap_alias, alignment); 1066 } 1067 } 1068 1069 /* 1070 * Memslot to region 1071 * 1072 * Input Args: 1073 * vm - Virtual Machine 1074 * memslot - KVM memory slot ID 1075 * 1076 * Output Args: None 1077 * 1078 * Return: 1079 * Pointer to memory region structure that describe memory region 1080 * using kvm memory slot ID given by memslot. TEST_ASSERT failure 1081 * on error (e.g. currently no memory region using memslot as a KVM 1082 * memory slot ID). 1083 */ 1084 struct userspace_mem_region * 1085 memslot2region(struct kvm_vm *vm, uint32_t memslot) 1086 { 1087 struct userspace_mem_region *region; 1088 1089 hash_for_each_possible(vm->regions.slot_hash, region, slot_node, 1090 memslot) 1091 if (region->region.slot == memslot) 1092 return region; 1093 1094 fprintf(stderr, "No mem region with the requested slot found,\n" 1095 " requested slot: %u\n", memslot); 1096 fputs("---- vm dump ----\n", stderr); 1097 vm_dump(stderr, vm, 2); 1098 TEST_FAIL("Mem region not found"); 1099 return NULL; 1100 } 1101 1102 /* 1103 * VM Memory Region Flags Set 1104 * 1105 * Input Args: 1106 * vm - Virtual Machine 1107 * flags - Starting guest physical address 1108 * 1109 * Output Args: None 1110 * 1111 * Return: None 1112 * 1113 * Sets the flags of the memory region specified by the value of slot, 1114 * to the values given by flags. 1115 */ 1116 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) 1117 { 1118 int ret; 1119 struct userspace_mem_region *region; 1120 1121 region = memslot2region(vm, slot); 1122 1123 region->region.flags = flags; 1124 1125 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1126 1127 TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" 1128 " rc: %i errno: %i slot: %u flags: 0x%x", 1129 ret, errno, slot, flags); 1130 } 1131 1132 /* 1133 * VM Memory Region Move 1134 * 1135 * Input Args: 1136 * vm - Virtual Machine 1137 * slot - Slot of the memory region to move 1138 * new_gpa - Starting guest physical address 1139 * 1140 * Output Args: None 1141 * 1142 * Return: None 1143 * 1144 * Change the gpa of a memory region. 1145 */ 1146 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) 1147 { 1148 struct userspace_mem_region *region; 1149 int ret; 1150 1151 region = memslot2region(vm, slot); 1152 1153 region->region.guest_phys_addr = new_gpa; 1154 1155 ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); 1156 1157 TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" 1158 "ret: %i errno: %i slot: %u new_gpa: 0x%lx", 1159 ret, errno, slot, new_gpa); 1160 } 1161 1162 /* 1163 * VM Memory Region Delete 1164 * 1165 * Input Args: 1166 * vm - Virtual Machine 1167 * slot - Slot of the memory region to delete 1168 * 1169 * Output Args: None 1170 * 1171 * Return: None 1172 * 1173 * Delete a memory region. 1174 */ 1175 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) 1176 { 1177 __vm_mem_region_delete(vm, memslot2region(vm, slot), true); 1178 } 1179 1180 /* Returns the size of a vCPU's kvm_run structure. */ 1181 static int vcpu_mmap_sz(void) 1182 { 1183 int dev_fd, ret; 1184 1185 dev_fd = open_kvm_dev_path_or_exit(); 1186 1187 ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); 1188 TEST_ASSERT(ret >= sizeof(struct kvm_run), 1189 KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); 1190 1191 close(dev_fd); 1192 1193 return ret; 1194 } 1195 1196 static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) 1197 { 1198 struct kvm_vcpu *vcpu; 1199 1200 list_for_each_entry(vcpu, &vm->vcpus, list) { 1201 if (vcpu->id == vcpu_id) 1202 return true; 1203 } 1204 1205 return false; 1206 } 1207 1208 /* 1209 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. 1210 * No additional vCPU setup is done. Returns the vCPU. 1211 */ 1212 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 1213 { 1214 struct kvm_vcpu *vcpu; 1215 1216 /* Confirm a vcpu with the specified id doesn't already exist. */ 1217 TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); 1218 1219 /* Allocate and initialize new vcpu structure. */ 1220 vcpu = calloc(1, sizeof(*vcpu)); 1221 TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); 1222 1223 vcpu->vm = vm; 1224 vcpu->id = vcpu_id; 1225 vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); 1226 TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); 1227 1228 TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " 1229 "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", 1230 vcpu_mmap_sz(), sizeof(*vcpu->run)); 1231 vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), 1232 PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); 1233 TEST_ASSERT(vcpu->run != MAP_FAILED, 1234 __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); 1235 1236 /* Add to linked-list of VCPUs. */ 1237 list_add(&vcpu->list, &vm->vcpus); 1238 1239 return vcpu; 1240 } 1241 1242 /* 1243 * VM Virtual Address Unused Gap 1244 * 1245 * Input Args: 1246 * vm - Virtual Machine 1247 * sz - Size (bytes) 1248 * vaddr_min - Minimum Virtual Address 1249 * 1250 * Output Args: None 1251 * 1252 * Return: 1253 * Lowest virtual address at or below vaddr_min, with at least 1254 * sz unused bytes. TEST_ASSERT failure if no area of at least 1255 * size sz is available. 1256 * 1257 * Within the VM specified by vm, locates the lowest starting virtual 1258 * address >= vaddr_min, that has at least sz unallocated bytes. A 1259 * TEST_ASSERT failure occurs for invalid input or no area of at least 1260 * sz unallocated bytes >= vaddr_min is available. 1261 */ 1262 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, 1263 vm_vaddr_t vaddr_min) 1264 { 1265 uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; 1266 1267 /* Determine lowest permitted virtual page index. */ 1268 uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; 1269 if ((pgidx_start * vm->page_size) < vaddr_min) 1270 goto no_va_found; 1271 1272 /* Loop over section with enough valid virtual page indexes. */ 1273 if (!sparsebit_is_set_num(vm->vpages_valid, 1274 pgidx_start, pages)) 1275 pgidx_start = sparsebit_next_set_num(vm->vpages_valid, 1276 pgidx_start, pages); 1277 do { 1278 /* 1279 * Are there enough unused virtual pages available at 1280 * the currently proposed starting virtual page index. 1281 * If not, adjust proposed starting index to next 1282 * possible. 1283 */ 1284 if (sparsebit_is_clear_num(vm->vpages_mapped, 1285 pgidx_start, pages)) 1286 goto va_found; 1287 pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, 1288 pgidx_start, pages); 1289 if (pgidx_start == 0) 1290 goto no_va_found; 1291 1292 /* 1293 * If needed, adjust proposed starting virtual address, 1294 * to next range of valid virtual addresses. 1295 */ 1296 if (!sparsebit_is_set_num(vm->vpages_valid, 1297 pgidx_start, pages)) { 1298 pgidx_start = sparsebit_next_set_num( 1299 vm->vpages_valid, pgidx_start, pages); 1300 if (pgidx_start == 0) 1301 goto no_va_found; 1302 } 1303 } while (pgidx_start != 0); 1304 1305 no_va_found: 1306 TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); 1307 1308 /* NOT REACHED */ 1309 return -1; 1310 1311 va_found: 1312 TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, 1313 pgidx_start, pages), 1314 "Unexpected, invalid virtual page index range,\n" 1315 " pgidx_start: 0x%lx\n" 1316 " pages: 0x%lx", 1317 pgidx_start, pages); 1318 TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, 1319 pgidx_start, pages), 1320 "Unexpected, pages already mapped,\n" 1321 " pgidx_start: 0x%lx\n" 1322 " pages: 0x%lx", 1323 pgidx_start, pages); 1324 1325 return pgidx_start * vm->page_size; 1326 } 1327 1328 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, 1329 enum kvm_mem_region_type type) 1330 { 1331 uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); 1332 1333 virt_pgd_alloc(vm); 1334 vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, 1335 KVM_UTIL_MIN_PFN * vm->page_size, 1336 vm->memslots[type]); 1337 1338 /* 1339 * Find an unused range of virtual page addresses of at least 1340 * pages in length. 1341 */ 1342 vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); 1343 1344 /* Map the virtual pages. */ 1345 for (vm_vaddr_t vaddr = vaddr_start; pages > 0; 1346 pages--, vaddr += vm->page_size, paddr += vm->page_size) { 1347 1348 virt_pg_map(vm, vaddr, paddr); 1349 1350 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1351 } 1352 1353 return vaddr_start; 1354 } 1355 1356 /* 1357 * VM Virtual Address Allocate 1358 * 1359 * Input Args: 1360 * vm - Virtual Machine 1361 * sz - Size in bytes 1362 * vaddr_min - Minimum starting virtual address 1363 * 1364 * Output Args: None 1365 * 1366 * Return: 1367 * Starting guest virtual address 1368 * 1369 * Allocates at least sz bytes within the virtual address space of the vm 1370 * given by vm. The allocated bytes are mapped to a virtual address >= 1371 * the address given by vaddr_min. Note that each allocation uses a 1372 * a unique set of pages, with the minimum real allocation being at least 1373 * a page. The allocated physical space comes from the TEST_DATA memory region. 1374 */ 1375 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) 1376 { 1377 return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); 1378 } 1379 1380 /* 1381 * VM Virtual Address Allocate Pages 1382 * 1383 * Input Args: 1384 * vm - Virtual Machine 1385 * 1386 * Output Args: None 1387 * 1388 * Return: 1389 * Starting guest virtual address 1390 * 1391 * Allocates at least N system pages worth of bytes within the virtual address 1392 * space of the vm. 1393 */ 1394 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) 1395 { 1396 return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); 1397 } 1398 1399 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) 1400 { 1401 return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); 1402 } 1403 1404 /* 1405 * VM Virtual Address Allocate Page 1406 * 1407 * Input Args: 1408 * vm - Virtual Machine 1409 * 1410 * Output Args: None 1411 * 1412 * Return: 1413 * Starting guest virtual address 1414 * 1415 * Allocates at least one system page worth of bytes within the virtual address 1416 * space of the vm. 1417 */ 1418 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) 1419 { 1420 return vm_vaddr_alloc_pages(vm, 1); 1421 } 1422 1423 /* 1424 * Map a range of VM virtual address to the VM's physical address 1425 * 1426 * Input Args: 1427 * vm - Virtual Machine 1428 * vaddr - Virtuall address to map 1429 * paddr - VM Physical Address 1430 * npages - The number of pages to map 1431 * 1432 * Output Args: None 1433 * 1434 * Return: None 1435 * 1436 * Within the VM given by @vm, creates a virtual translation for 1437 * @npages starting at @vaddr to the page range starting at @paddr. 1438 */ 1439 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 1440 unsigned int npages) 1441 { 1442 size_t page_size = vm->page_size; 1443 size_t size = npages * page_size; 1444 1445 TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); 1446 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 1447 1448 while (npages--) { 1449 virt_pg_map(vm, vaddr, paddr); 1450 sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); 1451 1452 vaddr += page_size; 1453 paddr += page_size; 1454 } 1455 } 1456 1457 /* 1458 * Address VM Physical to Host Virtual 1459 * 1460 * Input Args: 1461 * vm - Virtual Machine 1462 * gpa - VM physical address 1463 * 1464 * Output Args: None 1465 * 1466 * Return: 1467 * Equivalent host virtual address 1468 * 1469 * Locates the memory region containing the VM physical address given 1470 * by gpa, within the VM given by vm. When found, the host virtual 1471 * address providing the memory to the vm physical address is returned. 1472 * A TEST_ASSERT failure occurs if no region containing gpa exists. 1473 */ 1474 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) 1475 { 1476 struct userspace_mem_region *region; 1477 1478 region = userspace_mem_region_find(vm, gpa, gpa); 1479 if (!region) { 1480 TEST_FAIL("No vm physical memory at 0x%lx", gpa); 1481 return NULL; 1482 } 1483 1484 return (void *)((uintptr_t)region->host_mem 1485 + (gpa - region->region.guest_phys_addr)); 1486 } 1487 1488 /* 1489 * Address Host Virtual to VM Physical 1490 * 1491 * Input Args: 1492 * vm - Virtual Machine 1493 * hva - Host virtual address 1494 * 1495 * Output Args: None 1496 * 1497 * Return: 1498 * Equivalent VM physical address 1499 * 1500 * Locates the memory region containing the host virtual address given 1501 * by hva, within the VM given by vm. When found, the equivalent 1502 * VM physical address is returned. A TEST_ASSERT failure occurs if no 1503 * region containing hva exists. 1504 */ 1505 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) 1506 { 1507 struct rb_node *node; 1508 1509 for (node = vm->regions.hva_tree.rb_node; node; ) { 1510 struct userspace_mem_region *region = 1511 container_of(node, struct userspace_mem_region, hva_node); 1512 1513 if (hva >= region->host_mem) { 1514 if (hva <= (region->host_mem 1515 + region->region.memory_size - 1)) 1516 return (vm_paddr_t)((uintptr_t) 1517 region->region.guest_phys_addr 1518 + (hva - (uintptr_t)region->host_mem)); 1519 1520 node = node->rb_right; 1521 } else 1522 node = node->rb_left; 1523 } 1524 1525 TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); 1526 return -1; 1527 } 1528 1529 /* 1530 * Address VM physical to Host Virtual *alias*. 1531 * 1532 * Input Args: 1533 * vm - Virtual Machine 1534 * gpa - VM physical address 1535 * 1536 * Output Args: None 1537 * 1538 * Return: 1539 * Equivalent address within the host virtual *alias* area, or NULL 1540 * (without failing the test) if the guest memory is not shared (so 1541 * no alias exists). 1542 * 1543 * Create a writable, shared virtual=>physical alias for the specific GPA. 1544 * The primary use case is to allow the host selftest to manipulate guest 1545 * memory without mapping said memory in the guest's address space. And, for 1546 * userfaultfd-based demand paging, to do so without triggering userfaults. 1547 */ 1548 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) 1549 { 1550 struct userspace_mem_region *region; 1551 uintptr_t offset; 1552 1553 region = userspace_mem_region_find(vm, gpa, gpa); 1554 if (!region) 1555 return NULL; 1556 1557 if (!region->host_alias) 1558 return NULL; 1559 1560 offset = gpa - region->region.guest_phys_addr; 1561 return (void *) ((uintptr_t) region->host_alias + offset); 1562 } 1563 1564 /* Create an interrupt controller chip for the specified VM. */ 1565 void vm_create_irqchip(struct kvm_vm *vm) 1566 { 1567 vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); 1568 1569 vm->has_irqchip = true; 1570 } 1571 1572 int _vcpu_run(struct kvm_vcpu *vcpu) 1573 { 1574 int rc; 1575 1576 do { 1577 rc = __vcpu_run(vcpu); 1578 } while (rc == -1 && errno == EINTR); 1579 1580 assert_on_unhandled_exception(vcpu); 1581 1582 return rc; 1583 } 1584 1585 /* 1586 * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. 1587 * Assert if the KVM returns an error (other than -EINTR). 1588 */ 1589 void vcpu_run(struct kvm_vcpu *vcpu) 1590 { 1591 int ret = _vcpu_run(vcpu); 1592 1593 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); 1594 } 1595 1596 void vcpu_run_complete_io(struct kvm_vcpu *vcpu) 1597 { 1598 int ret; 1599 1600 vcpu->run->immediate_exit = 1; 1601 ret = __vcpu_run(vcpu); 1602 vcpu->run->immediate_exit = 0; 1603 1604 TEST_ASSERT(ret == -1 && errno == EINTR, 1605 "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", 1606 ret, errno); 1607 } 1608 1609 /* 1610 * Get the list of guest registers which are supported for 1611 * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, 1612 * it is the caller's responsibility to free the list. 1613 */ 1614 struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) 1615 { 1616 struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; 1617 int ret; 1618 1619 ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); 1620 TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); 1621 1622 reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); 1623 reg_list->n = reg_list_n.n; 1624 vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); 1625 return reg_list; 1626 } 1627 1628 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) 1629 { 1630 uint32_t page_size = getpagesize(); 1631 uint32_t size = vcpu->vm->dirty_ring_size; 1632 1633 TEST_ASSERT(size > 0, "Should enable dirty ring first"); 1634 1635 if (!vcpu->dirty_gfns) { 1636 void *addr; 1637 1638 addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, 1639 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1640 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); 1641 1642 addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, 1643 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1644 TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); 1645 1646 addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 1647 page_size * KVM_DIRTY_LOG_PAGE_OFFSET); 1648 TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); 1649 1650 vcpu->dirty_gfns = addr; 1651 vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); 1652 } 1653 1654 return vcpu->dirty_gfns; 1655 } 1656 1657 /* 1658 * Device Ioctl 1659 */ 1660 1661 int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) 1662 { 1663 struct kvm_device_attr attribute = { 1664 .group = group, 1665 .attr = attr, 1666 .flags = 0, 1667 }; 1668 1669 return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); 1670 } 1671 1672 int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) 1673 { 1674 struct kvm_create_device create_dev = { 1675 .type = type, 1676 .flags = KVM_CREATE_DEVICE_TEST, 1677 }; 1678 1679 return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1680 } 1681 1682 int __kvm_create_device(struct kvm_vm *vm, uint64_t type) 1683 { 1684 struct kvm_create_device create_dev = { 1685 .type = type, 1686 .fd = -1, 1687 .flags = 0, 1688 }; 1689 int err; 1690 1691 err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); 1692 TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); 1693 return err ? : create_dev.fd; 1694 } 1695 1696 int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) 1697 { 1698 struct kvm_device_attr kvmattr = { 1699 .group = group, 1700 .attr = attr, 1701 .flags = 0, 1702 .addr = (uintptr_t)val, 1703 }; 1704 1705 return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); 1706 } 1707 1708 int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) 1709 { 1710 struct kvm_device_attr kvmattr = { 1711 .group = group, 1712 .attr = attr, 1713 .flags = 0, 1714 .addr = (uintptr_t)val, 1715 }; 1716 1717 return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); 1718 } 1719 1720 /* 1721 * IRQ related functions. 1722 */ 1723 1724 int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1725 { 1726 struct kvm_irq_level irq_level = { 1727 .irq = irq, 1728 .level = level, 1729 }; 1730 1731 return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); 1732 } 1733 1734 void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) 1735 { 1736 int ret = _kvm_irq_line(vm, irq, level); 1737 1738 TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); 1739 } 1740 1741 struct kvm_irq_routing *kvm_gsi_routing_create(void) 1742 { 1743 struct kvm_irq_routing *routing; 1744 size_t size; 1745 1746 size = sizeof(struct kvm_irq_routing); 1747 /* Allocate space for the max number of entries: this wastes 196 KBs. */ 1748 size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); 1749 routing = calloc(1, size); 1750 assert(routing); 1751 1752 return routing; 1753 } 1754 1755 void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, 1756 uint32_t gsi, uint32_t pin) 1757 { 1758 int i; 1759 1760 assert(routing); 1761 assert(routing->nr < KVM_MAX_IRQ_ROUTES); 1762 1763 i = routing->nr; 1764 routing->entries[i].gsi = gsi; 1765 routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; 1766 routing->entries[i].flags = 0; 1767 routing->entries[i].u.irqchip.irqchip = 0; 1768 routing->entries[i].u.irqchip.pin = pin; 1769 routing->nr++; 1770 } 1771 1772 int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1773 { 1774 int ret; 1775 1776 assert(routing); 1777 ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); 1778 free(routing); 1779 1780 return ret; 1781 } 1782 1783 void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) 1784 { 1785 int ret; 1786 1787 ret = _kvm_gsi_routing_write(vm, routing); 1788 TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); 1789 } 1790 1791 /* 1792 * VM Dump 1793 * 1794 * Input Args: 1795 * vm - Virtual Machine 1796 * indent - Left margin indent amount 1797 * 1798 * Output Args: 1799 * stream - Output FILE stream 1800 * 1801 * Return: None 1802 * 1803 * Dumps the current state of the VM given by vm, to the FILE stream 1804 * given by stream. 1805 */ 1806 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 1807 { 1808 int ctr; 1809 struct userspace_mem_region *region; 1810 struct kvm_vcpu *vcpu; 1811 1812 fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); 1813 fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); 1814 fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); 1815 fprintf(stream, "%*sMem Regions:\n", indent, ""); 1816 hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { 1817 fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " 1818 "host_virt: %p\n", indent + 2, "", 1819 (uint64_t) region->region.guest_phys_addr, 1820 (uint64_t) region->region.memory_size, 1821 region->host_mem); 1822 fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); 1823 sparsebit_dump(stream, region->unused_phy_pages, 0); 1824 } 1825 fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); 1826 sparsebit_dump(stream, vm->vpages_mapped, indent + 2); 1827 fprintf(stream, "%*spgd_created: %u\n", indent, "", 1828 vm->pgd_created); 1829 if (vm->pgd_created) { 1830 fprintf(stream, "%*sVirtual Translation Tables:\n", 1831 indent + 2, ""); 1832 virt_dump(stream, vm, indent + 4); 1833 } 1834 fprintf(stream, "%*sVCPUs:\n", indent, ""); 1835 1836 list_for_each_entry(vcpu, &vm->vcpus, list) 1837 vcpu_dump(stream, vcpu, indent + 2); 1838 } 1839 1840 #define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x} 1841 1842 /* Known KVM exit reasons */ 1843 static struct exit_reason { 1844 unsigned int reason; 1845 const char *name; 1846 } exit_reasons_known[] = { 1847 KVM_EXIT_STRING(UNKNOWN), 1848 KVM_EXIT_STRING(EXCEPTION), 1849 KVM_EXIT_STRING(IO), 1850 KVM_EXIT_STRING(HYPERCALL), 1851 KVM_EXIT_STRING(DEBUG), 1852 KVM_EXIT_STRING(HLT), 1853 KVM_EXIT_STRING(MMIO), 1854 KVM_EXIT_STRING(IRQ_WINDOW_OPEN), 1855 KVM_EXIT_STRING(SHUTDOWN), 1856 KVM_EXIT_STRING(FAIL_ENTRY), 1857 KVM_EXIT_STRING(INTR), 1858 KVM_EXIT_STRING(SET_TPR), 1859 KVM_EXIT_STRING(TPR_ACCESS), 1860 KVM_EXIT_STRING(S390_SIEIC), 1861 KVM_EXIT_STRING(S390_RESET), 1862 KVM_EXIT_STRING(DCR), 1863 KVM_EXIT_STRING(NMI), 1864 KVM_EXIT_STRING(INTERNAL_ERROR), 1865 KVM_EXIT_STRING(OSI), 1866 KVM_EXIT_STRING(PAPR_HCALL), 1867 KVM_EXIT_STRING(S390_UCONTROL), 1868 KVM_EXIT_STRING(WATCHDOG), 1869 KVM_EXIT_STRING(S390_TSCH), 1870 KVM_EXIT_STRING(EPR), 1871 KVM_EXIT_STRING(SYSTEM_EVENT), 1872 KVM_EXIT_STRING(S390_STSI), 1873 KVM_EXIT_STRING(IOAPIC_EOI), 1874 KVM_EXIT_STRING(HYPERV), 1875 KVM_EXIT_STRING(ARM_NISV), 1876 KVM_EXIT_STRING(X86_RDMSR), 1877 KVM_EXIT_STRING(X86_WRMSR), 1878 KVM_EXIT_STRING(DIRTY_RING_FULL), 1879 KVM_EXIT_STRING(AP_RESET_HOLD), 1880 KVM_EXIT_STRING(X86_BUS_LOCK), 1881 KVM_EXIT_STRING(XEN), 1882 KVM_EXIT_STRING(RISCV_SBI), 1883 KVM_EXIT_STRING(RISCV_CSR), 1884 KVM_EXIT_STRING(NOTIFY), 1885 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT 1886 KVM_EXIT_STRING(MEMORY_NOT_PRESENT), 1887 #endif 1888 }; 1889 1890 /* 1891 * Exit Reason String 1892 * 1893 * Input Args: 1894 * exit_reason - Exit reason 1895 * 1896 * Output Args: None 1897 * 1898 * Return: 1899 * Constant string pointer describing the exit reason. 1900 * 1901 * Locates and returns a constant string that describes the KVM exit 1902 * reason given by exit_reason. If no such string is found, a constant 1903 * string of "Unknown" is returned. 1904 */ 1905 const char *exit_reason_str(unsigned int exit_reason) 1906 { 1907 unsigned int n1; 1908 1909 for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { 1910 if (exit_reason == exit_reasons_known[n1].reason) 1911 return exit_reasons_known[n1].name; 1912 } 1913 1914 return "Unknown"; 1915 } 1916 1917 /* 1918 * Physical Contiguous Page Allocator 1919 * 1920 * Input Args: 1921 * vm - Virtual Machine 1922 * num - number of pages 1923 * paddr_min - Physical address minimum 1924 * memslot - Memory region to allocate page from 1925 * 1926 * Output Args: None 1927 * 1928 * Return: 1929 * Starting physical address 1930 * 1931 * Within the VM specified by vm, locates a range of available physical 1932 * pages at or above paddr_min. If found, the pages are marked as in use 1933 * and their base address is returned. A TEST_ASSERT failure occurs if 1934 * not enough pages are available at or above paddr_min. 1935 */ 1936 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, 1937 vm_paddr_t paddr_min, uint32_t memslot) 1938 { 1939 struct userspace_mem_region *region; 1940 sparsebit_idx_t pg, base; 1941 1942 TEST_ASSERT(num > 0, "Must allocate at least one page"); 1943 1944 TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " 1945 "not divisible by page size.\n" 1946 " paddr_min: 0x%lx page_size: 0x%x", 1947 paddr_min, vm->page_size); 1948 1949 region = memslot2region(vm, memslot); 1950 base = pg = paddr_min >> vm->page_shift; 1951 1952 do { 1953 for (; pg < base + num; ++pg) { 1954 if (!sparsebit_is_set(region->unused_phy_pages, pg)) { 1955 base = pg = sparsebit_next_set(region->unused_phy_pages, pg); 1956 break; 1957 } 1958 } 1959 } while (pg && pg != base + num); 1960 1961 if (pg == 0) { 1962 fprintf(stderr, "No guest physical page available, " 1963 "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", 1964 paddr_min, vm->page_size, memslot); 1965 fputs("---- vm dump ----\n", stderr); 1966 vm_dump(stderr, vm, 2); 1967 abort(); 1968 } 1969 1970 for (pg = base; pg < base + num; ++pg) 1971 sparsebit_clear(region->unused_phy_pages, pg); 1972 1973 return base * vm->page_size; 1974 } 1975 1976 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, 1977 uint32_t memslot) 1978 { 1979 return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); 1980 } 1981 1982 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) 1983 { 1984 return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 1985 vm->memslots[MEM_REGION_PT]); 1986 } 1987 1988 /* 1989 * Address Guest Virtual to Host Virtual 1990 * 1991 * Input Args: 1992 * vm - Virtual Machine 1993 * gva - VM virtual address 1994 * 1995 * Output Args: None 1996 * 1997 * Return: 1998 * Equivalent host virtual address 1999 */ 2000 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) 2001 { 2002 return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); 2003 } 2004 2005 unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) 2006 { 2007 return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; 2008 } 2009 2010 static unsigned int vm_calc_num_pages(unsigned int num_pages, 2011 unsigned int page_shift, 2012 unsigned int new_page_shift, 2013 bool ceil) 2014 { 2015 unsigned int n = 1 << (new_page_shift - page_shift); 2016 2017 if (page_shift >= new_page_shift) 2018 return num_pages * (1 << (page_shift - new_page_shift)); 2019 2020 return num_pages / n + !!(ceil && num_pages % n); 2021 } 2022 2023 static inline int getpageshift(void) 2024 { 2025 return __builtin_ffs(getpagesize()) - 1; 2026 } 2027 2028 unsigned int 2029 vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) 2030 { 2031 return vm_calc_num_pages(num_guest_pages, 2032 vm_guest_mode_params[mode].page_shift, 2033 getpageshift(), true); 2034 } 2035 2036 unsigned int 2037 vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages) 2038 { 2039 return vm_calc_num_pages(num_host_pages, getpageshift(), 2040 vm_guest_mode_params[mode].page_shift, false); 2041 } 2042 2043 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) 2044 { 2045 unsigned int n; 2046 n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size); 2047 return vm_adjust_num_guest_pages(mode, n); 2048 } 2049 2050 /* 2051 * Read binary stats descriptors 2052 * 2053 * Input Args: 2054 * stats_fd - the file descriptor for the binary stats file from which to read 2055 * header - the binary stats metadata header corresponding to the given FD 2056 * 2057 * Output Args: None 2058 * 2059 * Return: 2060 * A pointer to a newly allocated series of stat descriptors. 2061 * Caller is responsible for freeing the returned kvm_stats_desc. 2062 * 2063 * Read the stats descriptors from the binary stats interface. 2064 */ 2065 struct kvm_stats_desc *read_stats_descriptors(int stats_fd, 2066 struct kvm_stats_header *header) 2067 { 2068 struct kvm_stats_desc *stats_desc; 2069 ssize_t desc_size, total_size, ret; 2070 2071 desc_size = get_stats_descriptor_size(header); 2072 total_size = header->num_desc * desc_size; 2073 2074 stats_desc = calloc(header->num_desc, desc_size); 2075 TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); 2076 2077 ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); 2078 TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); 2079 2080 return stats_desc; 2081 } 2082 2083 /* 2084 * Read stat data for a particular stat 2085 * 2086 * Input Args: 2087 * stats_fd - the file descriptor for the binary stats file from which to read 2088 * header - the binary stats metadata header corresponding to the given FD 2089 * desc - the binary stat metadata for the particular stat to be read 2090 * max_elements - the maximum number of 8-byte values to read into data 2091 * 2092 * Output Args: 2093 * data - the buffer into which stat data should be read 2094 * 2095 * Read the data values of a specified stat from the binary stats interface. 2096 */ 2097 void read_stat_data(int stats_fd, struct kvm_stats_header *header, 2098 struct kvm_stats_desc *desc, uint64_t *data, 2099 size_t max_elements) 2100 { 2101 size_t nr_elements = min_t(ssize_t, desc->size, max_elements); 2102 size_t size = nr_elements * sizeof(*data); 2103 ssize_t ret; 2104 2105 TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); 2106 TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); 2107 2108 ret = pread(stats_fd, data, size, 2109 header->data_offset + desc->offset); 2110 2111 TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", 2112 desc->name, errno, strerror(errno)); 2113 TEST_ASSERT(ret == size, 2114 "pread() on stat '%s' read %ld bytes, wanted %lu bytes", 2115 desc->name, size, ret); 2116 } 2117 2118 /* 2119 * Read the data of the named stat 2120 * 2121 * Input Args: 2122 * vm - the VM for which the stat should be read 2123 * stat_name - the name of the stat to read 2124 * max_elements - the maximum number of 8-byte values to read into data 2125 * 2126 * Output Args: 2127 * data - the buffer into which stat data should be read 2128 * 2129 * Read the data values of a specified stat from the binary stats interface. 2130 */ 2131 void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, 2132 size_t max_elements) 2133 { 2134 struct kvm_stats_desc *desc; 2135 size_t size_desc; 2136 int i; 2137 2138 if (!vm->stats_fd) { 2139 vm->stats_fd = vm_get_stats_fd(vm); 2140 read_stats_header(vm->stats_fd, &vm->stats_header); 2141 vm->stats_desc = read_stats_descriptors(vm->stats_fd, 2142 &vm->stats_header); 2143 } 2144 2145 size_desc = get_stats_descriptor_size(&vm->stats_header); 2146 2147 for (i = 0; i < vm->stats_header.num_desc; ++i) { 2148 desc = (void *)vm->stats_desc + (i * size_desc); 2149 2150 if (strcmp(desc->name, stat_name)) 2151 continue; 2152 2153 read_stat_data(vm->stats_fd, &vm->stats_header, desc, 2154 data, max_elements); 2155 2156 break; 2157 } 2158 } 2159 2160 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) 2161 { 2162 } 2163 2164 __weak void kvm_selftest_arch_init(void) 2165 { 2166 } 2167 2168 void __attribute((constructor)) kvm_selftest_init(void) 2169 { 2170 /* Tell stdout not to buffer its content. */ 2171 setbuf(stdout, NULL); 2172 2173 kvm_selftest_arch_init(); 2174 } 2175