1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 10 #define _GNU_SOURCE /* for pipe2 */ 11 12 #include <inttypes.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <time.h> 16 #include <poll.h> 17 #include <pthread.h> 18 #include <linux/userfaultfd.h> 19 #include <sys/syscall.h> 20 21 #include "kvm_util.h" 22 #include "test_util.h" 23 #include "perf_test_util.h" 24 #include "guest_modes.h" 25 26 #ifdef __NR_userfaultfd 27 28 #ifdef PRINT_PER_PAGE_UPDATES 29 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 30 #else 31 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 32 #endif 33 34 #ifdef PRINT_PER_VCPU_UPDATES 35 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 36 #else 37 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 38 #endif 39 40 static int nr_vcpus = 1; 41 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 42 static size_t demand_paging_size; 43 static char *guest_data_prototype; 44 45 static void *vcpu_worker(void *data) 46 { 47 int ret; 48 struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; 49 int vcpu_id = vcpu_args->vcpu_id; 50 struct kvm_vm *vm = perf_test_args.vm; 51 struct kvm_run *run; 52 struct timespec start; 53 struct timespec ts_diff; 54 55 run = vcpu_state(vm, vcpu_id); 56 57 clock_gettime(CLOCK_MONOTONIC, &start); 58 59 /* Let the guest access its memory */ 60 ret = _vcpu_run(vm, vcpu_id); 61 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 62 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { 63 TEST_ASSERT(false, 64 "Invalid guest sync status: exit_reason=%s\n", 65 exit_reason_str(run->exit_reason)); 66 } 67 68 ts_diff = timespec_elapsed(start); 69 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, 70 ts_diff.tv_sec, ts_diff.tv_nsec); 71 72 return NULL; 73 } 74 75 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) 76 { 77 pid_t tid = syscall(__NR_gettid); 78 struct timespec start; 79 struct timespec ts_diff; 80 int r; 81 82 clock_gettime(CLOCK_MONOTONIC, &start); 83 84 if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { 85 struct uffdio_copy copy; 86 87 copy.src = (uint64_t)guest_data_prototype; 88 copy.dst = addr; 89 copy.len = demand_paging_size; 90 copy.mode = 0; 91 92 r = ioctl(uffd, UFFDIO_COPY, ©); 93 if (r == -1) { 94 pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", 95 addr, tid, errno); 96 return r; 97 } 98 } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 99 struct uffdio_continue cont = {0}; 100 101 cont.range.start = addr; 102 cont.range.len = demand_paging_size; 103 104 r = ioctl(uffd, UFFDIO_CONTINUE, &cont); 105 if (r == -1) { 106 pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", 107 addr, tid, errno); 108 return r; 109 } 110 } else { 111 TEST_FAIL("Invalid uffd mode %d", uffd_mode); 112 } 113 114 ts_diff = timespec_elapsed(start); 115 116 PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, 117 timespec_to_ns(ts_diff)); 118 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 119 demand_paging_size, addr, tid); 120 121 return 0; 122 } 123 124 bool quit_uffd_thread; 125 126 struct uffd_handler_args { 127 int uffd_mode; 128 int uffd; 129 int pipefd; 130 useconds_t delay; 131 }; 132 133 static void *uffd_handler_thread_fn(void *arg) 134 { 135 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 136 int uffd = uffd_args->uffd; 137 int pipefd = uffd_args->pipefd; 138 useconds_t delay = uffd_args->delay; 139 int64_t pages = 0; 140 struct timespec start; 141 struct timespec ts_diff; 142 143 clock_gettime(CLOCK_MONOTONIC, &start); 144 while (!quit_uffd_thread) { 145 struct uffd_msg msg; 146 struct pollfd pollfd[2]; 147 char tmp_chr; 148 int r; 149 uint64_t addr; 150 151 pollfd[0].fd = uffd; 152 pollfd[0].events = POLLIN; 153 pollfd[1].fd = pipefd; 154 pollfd[1].events = POLLIN; 155 156 r = poll(pollfd, 2, -1); 157 switch (r) { 158 case -1: 159 pr_info("poll err"); 160 continue; 161 case 0: 162 continue; 163 case 1: 164 break; 165 default: 166 pr_info("Polling uffd returned %d", r); 167 return NULL; 168 } 169 170 if (pollfd[0].revents & POLLERR) { 171 pr_info("uffd revents has POLLERR"); 172 return NULL; 173 } 174 175 if (pollfd[1].revents & POLLIN) { 176 r = read(pollfd[1].fd, &tmp_chr, 1); 177 TEST_ASSERT(r == 1, 178 "Error reading pipefd in UFFD thread\n"); 179 return NULL; 180 } 181 182 if (!(pollfd[0].revents & POLLIN)) 183 continue; 184 185 r = read(uffd, &msg, sizeof(msg)); 186 if (r == -1) { 187 if (errno == EAGAIN) 188 continue; 189 pr_info("Read of uffd got errno %d\n", errno); 190 return NULL; 191 } 192 193 if (r != sizeof(msg)) { 194 pr_info("Read on uffd returned unexpected size: %d bytes", r); 195 return NULL; 196 } 197 198 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 199 continue; 200 201 if (delay) 202 usleep(delay); 203 addr = msg.arg.pagefault.address; 204 r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); 205 if (r < 0) 206 return NULL; 207 pages++; 208 } 209 210 ts_diff = timespec_elapsed(start); 211 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 212 pages, ts_diff.tv_sec, ts_diff.tv_nsec, 213 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 214 215 return NULL; 216 } 217 218 static void setup_demand_paging(struct kvm_vm *vm, 219 pthread_t *uffd_handler_thread, int pipefd, 220 int uffd_mode, useconds_t uffd_delay, 221 struct uffd_handler_args *uffd_args, 222 void *hva, void *alias, uint64_t len) 223 { 224 bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); 225 int uffd; 226 struct uffdio_api uffdio_api; 227 struct uffdio_register uffdio_register; 228 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 229 230 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 231 is_minor ? "MINOR" : "MISSING", 232 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 233 234 /* In order to get minor faults, prefault via the alias. */ 235 if (is_minor) { 236 size_t p; 237 238 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; 239 240 TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 241 for (p = 0; p < (len / demand_paging_size); ++p) { 242 memcpy(alias + (p * demand_paging_size), 243 guest_data_prototype, demand_paging_size); 244 } 245 } 246 247 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 248 TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); 249 250 uffdio_api.api = UFFD_API; 251 uffdio_api.features = 0; 252 TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, 253 "ioctl UFFDIO_API failed: %" PRIu64, 254 (uint64_t)uffdio_api.api); 255 256 uffdio_register.range.start = (uint64_t)hva; 257 uffdio_register.range.len = len; 258 uffdio_register.mode = uffd_mode; 259 TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, 260 "ioctl UFFDIO_REGISTER failed"); 261 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 262 expected_ioctls, "missing userfaultfd ioctls"); 263 264 uffd_args->uffd_mode = uffd_mode; 265 uffd_args->uffd = uffd; 266 uffd_args->pipefd = pipefd; 267 uffd_args->delay = uffd_delay; 268 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 269 uffd_args); 270 271 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 272 hva, hva + len); 273 } 274 275 struct test_params { 276 int uffd_mode; 277 useconds_t uffd_delay; 278 enum vm_mem_backing_src_type src_type; 279 bool partition_vcpu_memory_access; 280 }; 281 282 static void run_test(enum vm_guest_mode mode, void *arg) 283 { 284 struct test_params *p = arg; 285 pthread_t *vcpu_threads; 286 pthread_t *uffd_handler_threads = NULL; 287 struct uffd_handler_args *uffd_args = NULL; 288 struct timespec start; 289 struct timespec ts_diff; 290 int *pipefds = NULL; 291 struct kvm_vm *vm; 292 int vcpu_id; 293 int r; 294 295 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, 296 p->src_type); 297 298 perf_test_args.wr_fract = 1; 299 300 demand_paging_size = get_backing_src_pagesz(p->src_type); 301 302 guest_data_prototype = malloc(demand_paging_size); 303 TEST_ASSERT(guest_data_prototype, 304 "Failed to allocate buffer for guest data pattern"); 305 memset(guest_data_prototype, 0xAB, demand_paging_size); 306 307 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 308 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 309 310 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, 311 p->partition_vcpu_memory_access); 312 313 if (p->uffd_mode) { 314 uffd_handler_threads = 315 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 316 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 317 318 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 319 TEST_ASSERT(uffd_args, "Memory allocation failed"); 320 321 pipefds = malloc(sizeof(int) * nr_vcpus * 2); 322 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 323 324 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 325 vm_paddr_t vcpu_gpa; 326 void *vcpu_hva; 327 void *vcpu_alias; 328 uint64_t vcpu_mem_size; 329 330 331 if (p->partition_vcpu_memory_access) { 332 vcpu_gpa = guest_test_phys_mem + 333 (vcpu_id * guest_percpu_mem_size); 334 vcpu_mem_size = guest_percpu_mem_size; 335 } else { 336 vcpu_gpa = guest_test_phys_mem; 337 vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; 338 } 339 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 340 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); 341 342 /* Cache the host addresses of the region */ 343 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 344 vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); 345 346 /* 347 * Set up user fault fd to handle demand paging 348 * requests. 349 */ 350 r = pipe2(&pipefds[vcpu_id * 2], 351 O_CLOEXEC | O_NONBLOCK); 352 TEST_ASSERT(!r, "Failed to set up pipefd"); 353 354 setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], 355 pipefds[vcpu_id * 2], p->uffd_mode, 356 p->uffd_delay, &uffd_args[vcpu_id], 357 vcpu_hva, vcpu_alias, 358 vcpu_mem_size); 359 } 360 } 361 362 /* Export the shared variables to the guest */ 363 sync_global_to_guest(vm, perf_test_args); 364 365 pr_info("Finished creating vCPUs and starting uffd threads\n"); 366 367 clock_gettime(CLOCK_MONOTONIC, &start); 368 369 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 370 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 371 &perf_test_args.vcpu_args[vcpu_id]); 372 } 373 374 pr_info("Started all vCPUs\n"); 375 376 /* Wait for the vcpu threads to quit */ 377 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 378 pthread_join(vcpu_threads[vcpu_id], NULL); 379 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); 380 } 381 382 ts_diff = timespec_elapsed(start); 383 384 pr_info("All vCPU threads joined\n"); 385 386 if (p->uffd_mode) { 387 char c; 388 389 /* Tell the user fault fd handler threads to quit */ 390 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 391 r = write(pipefds[vcpu_id * 2 + 1], &c, 1); 392 TEST_ASSERT(r == 1, "Unable to write to pipefd"); 393 394 pthread_join(uffd_handler_threads[vcpu_id], NULL); 395 } 396 } 397 398 pr_info("Total guest execution time: %ld.%.9lds\n", 399 ts_diff.tv_sec, ts_diff.tv_nsec); 400 pr_info("Overall demand paging rate: %f pgs/sec\n", 401 perf_test_args.vcpu_args[0].pages * nr_vcpus / 402 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 403 404 perf_test_destroy_vm(vm); 405 406 free(guest_data_prototype); 407 free(vcpu_threads); 408 if (p->uffd_mode) { 409 free(uffd_handler_threads); 410 free(uffd_args); 411 free(pipefds); 412 } 413 } 414 415 static void help(char *name) 416 { 417 puts(""); 418 printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" 419 " [-b memory] [-s type] [-v vcpus] [-o]\n", name); 420 guest_modes_help(); 421 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" 422 " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); 423 printf(" -d: add a delay in usec to the User Fault\n" 424 " FD handler to simulate demand paging\n" 425 " overheads. Ignored without -u.\n"); 426 printf(" -b: specify the size of the memory region which should be\n" 427 " demand paged by each vCPU. e.g. 10M or 3G.\n" 428 " Default: 1G\n"); 429 backing_src_help("-s"); 430 printf(" -v: specify the number of vCPUs to run.\n"); 431 printf(" -o: Overlap guest memory accesses instead of partitioning\n" 432 " them into a separate region of memory for each vCPU.\n"); 433 puts(""); 434 exit(0); 435 } 436 437 int main(int argc, char *argv[]) 438 { 439 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 440 struct test_params p = { 441 .src_type = DEFAULT_VM_MEM_SRC, 442 .partition_vcpu_memory_access = true, 443 }; 444 int opt; 445 446 guest_modes_append_default(); 447 448 while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) { 449 switch (opt) { 450 case 'm': 451 guest_modes_cmdline(optarg); 452 break; 453 case 'u': 454 if (!strcmp("MISSING", optarg)) 455 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; 456 else if (!strcmp("MINOR", optarg)) 457 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; 458 TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); 459 break; 460 case 'd': 461 p.uffd_delay = strtoul(optarg, NULL, 0); 462 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); 463 break; 464 case 'b': 465 guest_percpu_mem_size = parse_size(optarg); 466 break; 467 case 's': 468 p.src_type = parse_backing_src_type(optarg); 469 break; 470 case 'v': 471 nr_vcpus = atoi(optarg); 472 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 473 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 474 break; 475 case 'o': 476 p.partition_vcpu_memory_access = false; 477 break; 478 case 'h': 479 default: 480 help(argv[0]); 481 break; 482 } 483 } 484 485 if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && 486 !backing_src_is_shared(p.src_type)) { 487 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s"); 488 } 489 490 for_each_guest_mode(run_test, &p); 491 492 return 0; 493 } 494 495 #else /* __NR_userfaultfd */ 496 497 #warning "missing __NR_userfaultfd definition" 498 499 int main(void) 500 { 501 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 502 return KSFT_SKIP; 503 } 504 505 #endif /* __NR_userfaultfd */ 506