1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 10 #define _GNU_SOURCE /* for program_invocation_name */ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <sys/syscall.h> 15 #include <unistd.h> 16 #include <asm/unistd.h> 17 #include <time.h> 18 #include <poll.h> 19 #include <pthread.h> 20 #include <linux/bitmap.h> 21 #include <linux/bitops.h> 22 #include <linux/userfaultfd.h> 23 24 #include "perf_test_util.h" 25 #include "processor.h" 26 #include "test_util.h" 27 28 #ifdef __NR_userfaultfd 29 30 #ifdef PRINT_PER_PAGE_UPDATES 31 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 32 #else 33 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 34 #endif 35 36 #ifdef PRINT_PER_VCPU_UPDATES 37 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 38 #else 39 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 40 #endif 41 42 static char *guest_data_prototype; 43 44 static void *vcpu_worker(void *data) 45 { 46 int ret; 47 struct vcpu_args *vcpu_args = (struct vcpu_args *)data; 48 int vcpu_id = vcpu_args->vcpu_id; 49 struct kvm_vm *vm = perf_test_args.vm; 50 struct kvm_run *run; 51 struct timespec start; 52 struct timespec ts_diff; 53 54 vcpu_args_set(vm, vcpu_id, 1, vcpu_id); 55 run = vcpu_state(vm, vcpu_id); 56 57 clock_gettime(CLOCK_MONOTONIC, &start); 58 59 /* Let the guest access its memory */ 60 ret = _vcpu_run(vm, vcpu_id); 61 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 62 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { 63 TEST_ASSERT(false, 64 "Invalid guest sync status: exit_reason=%s\n", 65 exit_reason_str(run->exit_reason)); 66 } 67 68 ts_diff = timespec_diff_now(start); 69 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, 70 ts_diff.tv_sec, ts_diff.tv_nsec); 71 72 return NULL; 73 } 74 75 static int handle_uffd_page_request(int uffd, uint64_t addr) 76 { 77 pid_t tid; 78 struct timespec start; 79 struct timespec ts_diff; 80 struct uffdio_copy copy; 81 int r; 82 83 tid = syscall(__NR_gettid); 84 85 copy.src = (uint64_t)guest_data_prototype; 86 copy.dst = addr; 87 copy.len = perf_test_args.host_page_size; 88 copy.mode = 0; 89 90 clock_gettime(CLOCK_MONOTONIC, &start); 91 92 r = ioctl(uffd, UFFDIO_COPY, ©); 93 if (r == -1) { 94 pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", 95 addr, tid, errno); 96 return r; 97 } 98 99 ts_diff = timespec_diff_now(start); 100 101 PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, 102 timespec_to_ns(ts_diff)); 103 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 104 perf_test_args.host_page_size, addr, tid); 105 106 return 0; 107 } 108 109 bool quit_uffd_thread; 110 111 struct uffd_handler_args { 112 int uffd; 113 int pipefd; 114 useconds_t delay; 115 }; 116 117 static void *uffd_handler_thread_fn(void *arg) 118 { 119 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 120 int uffd = uffd_args->uffd; 121 int pipefd = uffd_args->pipefd; 122 useconds_t delay = uffd_args->delay; 123 int64_t pages = 0; 124 struct timespec start; 125 struct timespec ts_diff; 126 127 clock_gettime(CLOCK_MONOTONIC, &start); 128 while (!quit_uffd_thread) { 129 struct uffd_msg msg; 130 struct pollfd pollfd[2]; 131 char tmp_chr; 132 int r; 133 uint64_t addr; 134 135 pollfd[0].fd = uffd; 136 pollfd[0].events = POLLIN; 137 pollfd[1].fd = pipefd; 138 pollfd[1].events = POLLIN; 139 140 r = poll(pollfd, 2, -1); 141 switch (r) { 142 case -1: 143 pr_info("poll err"); 144 continue; 145 case 0: 146 continue; 147 case 1: 148 break; 149 default: 150 pr_info("Polling uffd returned %d", r); 151 return NULL; 152 } 153 154 if (pollfd[0].revents & POLLERR) { 155 pr_info("uffd revents has POLLERR"); 156 return NULL; 157 } 158 159 if (pollfd[1].revents & POLLIN) { 160 r = read(pollfd[1].fd, &tmp_chr, 1); 161 TEST_ASSERT(r == 1, 162 "Error reading pipefd in UFFD thread\n"); 163 return NULL; 164 } 165 166 if (!pollfd[0].revents & POLLIN) 167 continue; 168 169 r = read(uffd, &msg, sizeof(msg)); 170 if (r == -1) { 171 if (errno == EAGAIN) 172 continue; 173 pr_info("Read of uffd gor errno %d", errno); 174 return NULL; 175 } 176 177 if (r != sizeof(msg)) { 178 pr_info("Read on uffd returned unexpected size: %d bytes", r); 179 return NULL; 180 } 181 182 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 183 continue; 184 185 if (delay) 186 usleep(delay); 187 addr = msg.arg.pagefault.address; 188 r = handle_uffd_page_request(uffd, addr); 189 if (r < 0) 190 return NULL; 191 pages++; 192 } 193 194 ts_diff = timespec_diff_now(start); 195 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 196 pages, ts_diff.tv_sec, ts_diff.tv_nsec, 197 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 198 199 return NULL; 200 } 201 202 static int setup_demand_paging(struct kvm_vm *vm, 203 pthread_t *uffd_handler_thread, int pipefd, 204 useconds_t uffd_delay, 205 struct uffd_handler_args *uffd_args, 206 void *hva, uint64_t len) 207 { 208 int uffd; 209 struct uffdio_api uffdio_api; 210 struct uffdio_register uffdio_register; 211 212 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 213 if (uffd == -1) { 214 pr_info("uffd creation failed\n"); 215 return -1; 216 } 217 218 uffdio_api.api = UFFD_API; 219 uffdio_api.features = 0; 220 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { 221 pr_info("ioctl uffdio_api failed\n"); 222 return -1; 223 } 224 225 uffdio_register.range.start = (uint64_t)hva; 226 uffdio_register.range.len = len; 227 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 228 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { 229 pr_info("ioctl uffdio_register failed\n"); 230 return -1; 231 } 232 233 if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != 234 UFFD_API_RANGE_IOCTLS) { 235 pr_info("unexpected userfaultfd ioctl set\n"); 236 return -1; 237 } 238 239 uffd_args->uffd = uffd; 240 uffd_args->pipefd = pipefd; 241 uffd_args->delay = uffd_delay; 242 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 243 uffd_args); 244 245 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 246 hva, hva + len); 247 248 return 0; 249 } 250 251 static void run_test(enum vm_guest_mode mode, bool use_uffd, 252 useconds_t uffd_delay) 253 { 254 pthread_t *vcpu_threads; 255 pthread_t *uffd_handler_threads = NULL; 256 struct uffd_handler_args *uffd_args = NULL; 257 struct timespec start; 258 struct timespec ts_diff; 259 int *pipefds = NULL; 260 struct kvm_vm *vm; 261 int vcpu_id; 262 int r; 263 264 vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); 265 266 perf_test_args.wr_fract = 1; 267 268 guest_data_prototype = malloc(perf_test_args.host_page_size); 269 TEST_ASSERT(guest_data_prototype, 270 "Failed to allocate buffer for guest data pattern"); 271 memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); 272 273 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 274 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 275 276 add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); 277 278 if (use_uffd) { 279 uffd_handler_threads = 280 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 281 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 282 283 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 284 TEST_ASSERT(uffd_args, "Memory allocation failed"); 285 286 pipefds = malloc(sizeof(int) * nr_vcpus * 2); 287 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 288 289 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 290 vm_paddr_t vcpu_gpa; 291 void *vcpu_hva; 292 293 vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); 294 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 295 vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); 296 297 /* Cache the HVA pointer of the region */ 298 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 299 300 /* 301 * Set up user fault fd to handle demand paging 302 * requests. 303 */ 304 r = pipe2(&pipefds[vcpu_id * 2], 305 O_CLOEXEC | O_NONBLOCK); 306 TEST_ASSERT(!r, "Failed to set up pipefd"); 307 308 r = setup_demand_paging(vm, 309 &uffd_handler_threads[vcpu_id], 310 pipefds[vcpu_id * 2], 311 uffd_delay, &uffd_args[vcpu_id], 312 vcpu_hva, guest_percpu_mem_size); 313 if (r < 0) 314 exit(-r); 315 } 316 } 317 318 /* Export the shared variables to the guest */ 319 sync_global_to_guest(vm, perf_test_args); 320 321 pr_info("Finished creating vCPUs and starting uffd threads\n"); 322 323 clock_gettime(CLOCK_MONOTONIC, &start); 324 325 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 326 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 327 &perf_test_args.vcpu_args[vcpu_id]); 328 } 329 330 pr_info("Started all vCPUs\n"); 331 332 /* Wait for the vcpu threads to quit */ 333 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 334 pthread_join(vcpu_threads[vcpu_id], NULL); 335 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); 336 } 337 338 ts_diff = timespec_diff_now(start); 339 340 pr_info("All vCPU threads joined\n"); 341 342 if (use_uffd) { 343 char c; 344 345 /* Tell the user fault fd handler threads to quit */ 346 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 347 r = write(pipefds[vcpu_id * 2 + 1], &c, 1); 348 TEST_ASSERT(r == 1, "Unable to write to pipefd"); 349 350 pthread_join(uffd_handler_threads[vcpu_id], NULL); 351 } 352 } 353 354 pr_info("Total guest execution time: %ld.%.9lds\n", 355 ts_diff.tv_sec, ts_diff.tv_nsec); 356 pr_info("Overall demand paging rate: %f pgs/sec\n", 357 perf_test_args.vcpu_args[0].pages * nr_vcpus / 358 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 359 360 ucall_uninit(vm); 361 kvm_vm_free(vm); 362 363 free(guest_data_prototype); 364 free(vcpu_threads); 365 if (use_uffd) { 366 free(uffd_handler_threads); 367 free(uffd_args); 368 free(pipefds); 369 } 370 } 371 372 struct guest_mode { 373 bool supported; 374 bool enabled; 375 }; 376 static struct guest_mode guest_modes[NUM_VM_MODES]; 377 378 #define guest_mode_init(mode, supported, enabled) ({ \ 379 guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ 380 }) 381 382 static void help(char *name) 383 { 384 int i; 385 386 puts(""); 387 printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" 388 " [-b memory] [-v vcpus]\n", name); 389 printf(" -m: specify the guest mode ID to test\n" 390 " (default: test all supported modes)\n" 391 " This option may be used multiple times.\n" 392 " Guest mode IDs:\n"); 393 for (i = 0; i < NUM_VM_MODES; ++i) { 394 printf(" %d: %s%s\n", i, vm_guest_mode_string(i), 395 guest_modes[i].supported ? " (supported)" : ""); 396 } 397 printf(" -u: use User Fault FD to handle vCPU page\n" 398 " faults.\n"); 399 printf(" -d: add a delay in usec to the User Fault\n" 400 " FD handler to simulate demand paging\n" 401 " overheads. Ignored without -u.\n"); 402 printf(" -b: specify the size of the memory region which should be\n" 403 " demand paged by each vCPU. e.g. 10M or 3G.\n" 404 " Default: 1G\n"); 405 printf(" -v: specify the number of vCPUs to run.\n"); 406 puts(""); 407 exit(0); 408 } 409 410 int main(int argc, char *argv[]) 411 { 412 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 413 bool mode_selected = false; 414 unsigned int mode; 415 int opt, i; 416 bool use_uffd = false; 417 useconds_t uffd_delay = 0; 418 419 #ifdef __x86_64__ 420 guest_mode_init(VM_MODE_PXXV48_4K, true, true); 421 #endif 422 #ifdef __aarch64__ 423 guest_mode_init(VM_MODE_P40V48_4K, true, true); 424 guest_mode_init(VM_MODE_P40V48_64K, true, true); 425 { 426 unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); 427 428 if (limit >= 52) 429 guest_mode_init(VM_MODE_P52V48_64K, true, true); 430 if (limit >= 48) { 431 guest_mode_init(VM_MODE_P48V48_4K, true, true); 432 guest_mode_init(VM_MODE_P48V48_64K, true, true); 433 } 434 } 435 #endif 436 #ifdef __s390x__ 437 guest_mode_init(VM_MODE_P40V48_4K, true, true); 438 #endif 439 440 while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { 441 switch (opt) { 442 case 'm': 443 if (!mode_selected) { 444 for (i = 0; i < NUM_VM_MODES; ++i) 445 guest_modes[i].enabled = false; 446 mode_selected = true; 447 } 448 mode = strtoul(optarg, NULL, 10); 449 TEST_ASSERT(mode < NUM_VM_MODES, 450 "Guest mode ID %d too big", mode); 451 guest_modes[mode].enabled = true; 452 break; 453 case 'u': 454 use_uffd = true; 455 break; 456 case 'd': 457 uffd_delay = strtoul(optarg, NULL, 0); 458 TEST_ASSERT(uffd_delay >= 0, 459 "A negative UFFD delay is not supported."); 460 break; 461 case 'b': 462 guest_percpu_mem_size = parse_size(optarg); 463 break; 464 case 'v': 465 nr_vcpus = atoi(optarg); 466 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 467 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 468 break; 469 case 'h': 470 default: 471 help(argv[0]); 472 break; 473 } 474 } 475 476 for (i = 0; i < NUM_VM_MODES; ++i) { 477 if (!guest_modes[i].enabled) 478 continue; 479 TEST_ASSERT(guest_modes[i].supported, 480 "Guest mode ID %d (%s) not supported.", 481 i, vm_guest_mode_string(i)); 482 run_test(i, use_uffd, uffd_delay); 483 } 484 485 return 0; 486 } 487 488 #else /* __NR_userfaultfd */ 489 490 #warning "missing __NR_userfaultfd definition" 491 492 int main(void) 493 { 494 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 495 return KSFT_SKIP; 496 } 497 498 #endif /* __NR_userfaultfd */ 499