1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 10 #define _GNU_SOURCE /* for pipe2 */ 11 12 #include <inttypes.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <time.h> 16 #include <poll.h> 17 #include <pthread.h> 18 #include <linux/userfaultfd.h> 19 #include <sys/syscall.h> 20 21 #include "kvm_util.h" 22 #include "test_util.h" 23 #include "perf_test_util.h" 24 #include "guest_modes.h" 25 26 #ifdef __NR_userfaultfd 27 28 #ifdef PRINT_PER_PAGE_UPDATES 29 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 30 #else 31 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 32 #endif 33 34 #ifdef PRINT_PER_VCPU_UPDATES 35 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 36 #else 37 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 38 #endif 39 40 static int nr_vcpus = 1; 41 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 42 static size_t demand_paging_size; 43 static char *guest_data_prototype; 44 45 static void *vcpu_worker(void *data) 46 { 47 int ret; 48 struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; 49 int vcpu_id = vcpu_args->vcpu_id; 50 struct kvm_vm *vm = perf_test_args.vm; 51 struct kvm_run *run; 52 struct timespec start; 53 struct timespec ts_diff; 54 55 vcpu_args_set(vm, vcpu_id, 1, vcpu_id); 56 run = vcpu_state(vm, vcpu_id); 57 58 clock_gettime(CLOCK_MONOTONIC, &start); 59 60 /* Let the guest access its memory */ 61 ret = _vcpu_run(vm, vcpu_id); 62 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 63 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { 64 TEST_ASSERT(false, 65 "Invalid guest sync status: exit_reason=%s\n", 66 exit_reason_str(run->exit_reason)); 67 } 68 69 ts_diff = timespec_elapsed(start); 70 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, 71 ts_diff.tv_sec, ts_diff.tv_nsec); 72 73 return NULL; 74 } 75 76 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) 77 { 78 pid_t tid = syscall(__NR_gettid); 79 struct timespec start; 80 struct timespec ts_diff; 81 int r; 82 83 clock_gettime(CLOCK_MONOTONIC, &start); 84 85 if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { 86 struct uffdio_copy copy; 87 88 copy.src = (uint64_t)guest_data_prototype; 89 copy.dst = addr; 90 copy.len = demand_paging_size; 91 copy.mode = 0; 92 93 r = ioctl(uffd, UFFDIO_COPY, ©); 94 if (r == -1) { 95 pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", 96 addr, tid, errno); 97 return r; 98 } 99 } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { 100 struct uffdio_continue cont = {0}; 101 102 cont.range.start = addr; 103 cont.range.len = demand_paging_size; 104 105 r = ioctl(uffd, UFFDIO_CONTINUE, &cont); 106 if (r == -1) { 107 pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", 108 addr, tid, errno); 109 return r; 110 } 111 } else { 112 TEST_FAIL("Invalid uffd mode %d", uffd_mode); 113 } 114 115 ts_diff = timespec_elapsed(start); 116 117 PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, 118 timespec_to_ns(ts_diff)); 119 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 120 demand_paging_size, addr, tid); 121 122 return 0; 123 } 124 125 bool quit_uffd_thread; 126 127 struct uffd_handler_args { 128 int uffd_mode; 129 int uffd; 130 int pipefd; 131 useconds_t delay; 132 }; 133 134 static void *uffd_handler_thread_fn(void *arg) 135 { 136 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 137 int uffd = uffd_args->uffd; 138 int pipefd = uffd_args->pipefd; 139 useconds_t delay = uffd_args->delay; 140 int64_t pages = 0; 141 struct timespec start; 142 struct timespec ts_diff; 143 144 clock_gettime(CLOCK_MONOTONIC, &start); 145 while (!quit_uffd_thread) { 146 struct uffd_msg msg; 147 struct pollfd pollfd[2]; 148 char tmp_chr; 149 int r; 150 uint64_t addr; 151 152 pollfd[0].fd = uffd; 153 pollfd[0].events = POLLIN; 154 pollfd[1].fd = pipefd; 155 pollfd[1].events = POLLIN; 156 157 r = poll(pollfd, 2, -1); 158 switch (r) { 159 case -1: 160 pr_info("poll err"); 161 continue; 162 case 0: 163 continue; 164 case 1: 165 break; 166 default: 167 pr_info("Polling uffd returned %d", r); 168 return NULL; 169 } 170 171 if (pollfd[0].revents & POLLERR) { 172 pr_info("uffd revents has POLLERR"); 173 return NULL; 174 } 175 176 if (pollfd[1].revents & POLLIN) { 177 r = read(pollfd[1].fd, &tmp_chr, 1); 178 TEST_ASSERT(r == 1, 179 "Error reading pipefd in UFFD thread\n"); 180 return NULL; 181 } 182 183 if (!pollfd[0].revents & POLLIN) 184 continue; 185 186 r = read(uffd, &msg, sizeof(msg)); 187 if (r == -1) { 188 if (errno == EAGAIN) 189 continue; 190 pr_info("Read of uffd got errno %d\n", errno); 191 return NULL; 192 } 193 194 if (r != sizeof(msg)) { 195 pr_info("Read on uffd returned unexpected size: %d bytes", r); 196 return NULL; 197 } 198 199 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 200 continue; 201 202 if (delay) 203 usleep(delay); 204 addr = msg.arg.pagefault.address; 205 r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); 206 if (r < 0) 207 return NULL; 208 pages++; 209 } 210 211 ts_diff = timespec_elapsed(start); 212 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 213 pages, ts_diff.tv_sec, ts_diff.tv_nsec, 214 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 215 216 return NULL; 217 } 218 219 static void setup_demand_paging(struct kvm_vm *vm, 220 pthread_t *uffd_handler_thread, int pipefd, 221 int uffd_mode, useconds_t uffd_delay, 222 struct uffd_handler_args *uffd_args, 223 void *hva, void *alias, uint64_t len) 224 { 225 bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); 226 int uffd; 227 struct uffdio_api uffdio_api; 228 struct uffdio_register uffdio_register; 229 uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; 230 231 PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", 232 is_minor ? "MINOR" : "MISSING", 233 is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); 234 235 /* In order to get minor faults, prefault via the alias. */ 236 if (is_minor) { 237 size_t p; 238 239 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; 240 241 TEST_ASSERT(alias != NULL, "Alias required for minor faults"); 242 for (p = 0; p < (len / demand_paging_size); ++p) { 243 memcpy(alias + (p * demand_paging_size), 244 guest_data_prototype, demand_paging_size); 245 } 246 } 247 248 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 249 TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); 250 251 uffdio_api.api = UFFD_API; 252 uffdio_api.features = 0; 253 TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, 254 "ioctl UFFDIO_API failed: %" PRIu64, 255 (uint64_t)uffdio_api.api); 256 257 uffdio_register.range.start = (uint64_t)hva; 258 uffdio_register.range.len = len; 259 uffdio_register.mode = uffd_mode; 260 TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, 261 "ioctl UFFDIO_REGISTER failed"); 262 TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == 263 expected_ioctls, "missing userfaultfd ioctls"); 264 265 uffd_args->uffd_mode = uffd_mode; 266 uffd_args->uffd = uffd; 267 uffd_args->pipefd = pipefd; 268 uffd_args->delay = uffd_delay; 269 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 270 uffd_args); 271 272 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 273 hva, hva + len); 274 } 275 276 struct test_params { 277 int uffd_mode; 278 useconds_t uffd_delay; 279 enum vm_mem_backing_src_type src_type; 280 bool partition_vcpu_memory_access; 281 }; 282 283 static void run_test(enum vm_guest_mode mode, void *arg) 284 { 285 struct test_params *p = arg; 286 pthread_t *vcpu_threads; 287 pthread_t *uffd_handler_threads = NULL; 288 struct uffd_handler_args *uffd_args = NULL; 289 struct timespec start; 290 struct timespec ts_diff; 291 int *pipefds = NULL; 292 struct kvm_vm *vm; 293 int vcpu_id; 294 int r; 295 296 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 297 p->src_type); 298 299 perf_test_args.wr_fract = 1; 300 301 demand_paging_size = get_backing_src_pagesz(p->src_type); 302 303 guest_data_prototype = malloc(demand_paging_size); 304 TEST_ASSERT(guest_data_prototype, 305 "Failed to allocate buffer for guest data pattern"); 306 memset(guest_data_prototype, 0xAB, demand_paging_size); 307 308 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 309 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 310 311 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, 312 p->partition_vcpu_memory_access); 313 314 if (p->uffd_mode) { 315 uffd_handler_threads = 316 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 317 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 318 319 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 320 TEST_ASSERT(uffd_args, "Memory allocation failed"); 321 322 pipefds = malloc(sizeof(int) * nr_vcpus * 2); 323 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 324 325 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 326 vm_paddr_t vcpu_gpa; 327 void *vcpu_hva; 328 void *vcpu_alias; 329 uint64_t vcpu_mem_size; 330 331 332 if (p->partition_vcpu_memory_access) { 333 vcpu_gpa = guest_test_phys_mem + 334 (vcpu_id * guest_percpu_mem_size); 335 vcpu_mem_size = guest_percpu_mem_size; 336 } else { 337 vcpu_gpa = guest_test_phys_mem; 338 vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; 339 } 340 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 341 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); 342 343 /* Cache the host addresses of the region */ 344 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 345 vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); 346 347 /* 348 * Set up user fault fd to handle demand paging 349 * requests. 350 */ 351 r = pipe2(&pipefds[vcpu_id * 2], 352 O_CLOEXEC | O_NONBLOCK); 353 TEST_ASSERT(!r, "Failed to set up pipefd"); 354 355 setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], 356 pipefds[vcpu_id * 2], p->uffd_mode, 357 p->uffd_delay, &uffd_args[vcpu_id], 358 vcpu_hva, vcpu_alias, 359 vcpu_mem_size); 360 } 361 } 362 363 /* Export the shared variables to the guest */ 364 sync_global_to_guest(vm, perf_test_args); 365 366 pr_info("Finished creating vCPUs and starting uffd threads\n"); 367 368 clock_gettime(CLOCK_MONOTONIC, &start); 369 370 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 371 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 372 &perf_test_args.vcpu_args[vcpu_id]); 373 } 374 375 pr_info("Started all vCPUs\n"); 376 377 /* Wait for the vcpu threads to quit */ 378 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 379 pthread_join(vcpu_threads[vcpu_id], NULL); 380 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); 381 } 382 383 ts_diff = timespec_elapsed(start); 384 385 pr_info("All vCPU threads joined\n"); 386 387 if (p->uffd_mode) { 388 char c; 389 390 /* Tell the user fault fd handler threads to quit */ 391 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 392 r = write(pipefds[vcpu_id * 2 + 1], &c, 1); 393 TEST_ASSERT(r == 1, "Unable to write to pipefd"); 394 395 pthread_join(uffd_handler_threads[vcpu_id], NULL); 396 } 397 } 398 399 pr_info("Total guest execution time: %ld.%.9lds\n", 400 ts_diff.tv_sec, ts_diff.tv_nsec); 401 pr_info("Overall demand paging rate: %f pgs/sec\n", 402 perf_test_args.vcpu_args[0].pages * nr_vcpus / 403 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 404 405 perf_test_destroy_vm(vm); 406 407 free(guest_data_prototype); 408 free(vcpu_threads); 409 if (p->uffd_mode) { 410 free(uffd_handler_threads); 411 free(uffd_args); 412 free(pipefds); 413 } 414 } 415 416 static void help(char *name) 417 { 418 puts(""); 419 printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" 420 " [-b memory] [-t type] [-v vcpus] [-o]\n", name); 421 guest_modes_help(); 422 printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" 423 " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); 424 printf(" -d: add a delay in usec to the User Fault\n" 425 " FD handler to simulate demand paging\n" 426 " overheads. Ignored without -u.\n"); 427 printf(" -b: specify the size of the memory region which should be\n" 428 " demand paged by each vCPU. e.g. 10M or 3G.\n" 429 " Default: 1G\n"); 430 printf(" -t: The type of backing memory to use. Default: anonymous\n"); 431 backing_src_help(); 432 printf(" -v: specify the number of vCPUs to run.\n"); 433 printf(" -o: Overlap guest memory accesses instead of partitioning\n" 434 " them into a separate region of memory for each vCPU.\n"); 435 puts(""); 436 exit(0); 437 } 438 439 int main(int argc, char *argv[]) 440 { 441 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 442 struct test_params p = { 443 .src_type = VM_MEM_SRC_ANONYMOUS, 444 .partition_vcpu_memory_access = true, 445 }; 446 int opt; 447 448 guest_modes_append_default(); 449 450 while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) { 451 switch (opt) { 452 case 'm': 453 guest_modes_cmdline(optarg); 454 break; 455 case 'u': 456 if (!strcmp("MISSING", optarg)) 457 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; 458 else if (!strcmp("MINOR", optarg)) 459 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; 460 TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); 461 break; 462 case 'd': 463 p.uffd_delay = strtoul(optarg, NULL, 0); 464 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); 465 break; 466 case 'b': 467 guest_percpu_mem_size = parse_size(optarg); 468 break; 469 case 't': 470 p.src_type = parse_backing_src_type(optarg); 471 break; 472 case 'v': 473 nr_vcpus = atoi(optarg); 474 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 475 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 476 break; 477 case 'o': 478 p.partition_vcpu_memory_access = false; 479 break; 480 case 'h': 481 default: 482 help(argv[0]); 483 break; 484 } 485 } 486 487 if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && 488 !backing_src_is_shared(p.src_type)) { 489 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t"); 490 } 491 492 for_each_guest_mode(run_test, &p); 493 494 return 0; 495 } 496 497 #else /* __NR_userfaultfd */ 498 499 #warning "missing __NR_userfaultfd definition" 500 501 int main(void) 502 { 503 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 504 return KSFT_SKIP; 505 } 506 507 #endif /* __NR_userfaultfd */ 508