1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM demand paging test 4 * Adapted from dirty_log_test.c 5 * 6 * Copyright (C) 2018, Red Hat, Inc. 7 * Copyright (C) 2019, Google, Inc. 8 */ 9 10 #define _GNU_SOURCE /* for pipe2 */ 11 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <time.h> 15 #include <poll.h> 16 #include <pthread.h> 17 #include <linux/userfaultfd.h> 18 #include <sys/syscall.h> 19 20 #include "kvm_util.h" 21 #include "test_util.h" 22 #include "perf_test_util.h" 23 #include "guest_modes.h" 24 25 #ifdef __NR_userfaultfd 26 27 #ifdef PRINT_PER_PAGE_UPDATES 28 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) 29 #else 30 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) 31 #endif 32 33 #ifdef PRINT_PER_VCPU_UPDATES 34 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) 35 #else 36 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) 37 #endif 38 39 static int nr_vcpus = 1; 40 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; 41 static char *guest_data_prototype; 42 43 static void *vcpu_worker(void *data) 44 { 45 int ret; 46 struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; 47 int vcpu_id = vcpu_args->vcpu_id; 48 struct kvm_vm *vm = perf_test_args.vm; 49 struct kvm_run *run; 50 struct timespec start; 51 struct timespec ts_diff; 52 53 vcpu_args_set(vm, vcpu_id, 1, vcpu_id); 54 run = vcpu_state(vm, vcpu_id); 55 56 clock_gettime(CLOCK_MONOTONIC, &start); 57 58 /* Let the guest access its memory */ 59 ret = _vcpu_run(vm, vcpu_id); 60 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 61 if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { 62 TEST_ASSERT(false, 63 "Invalid guest sync status: exit_reason=%s\n", 64 exit_reason_str(run->exit_reason)); 65 } 66 67 ts_diff = timespec_elapsed(start); 68 PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, 69 ts_diff.tv_sec, ts_diff.tv_nsec); 70 71 return NULL; 72 } 73 74 static int handle_uffd_page_request(int uffd, uint64_t addr) 75 { 76 pid_t tid; 77 struct timespec start; 78 struct timespec ts_diff; 79 struct uffdio_copy copy; 80 int r; 81 82 tid = syscall(__NR_gettid); 83 84 copy.src = (uint64_t)guest_data_prototype; 85 copy.dst = addr; 86 copy.len = perf_test_args.host_page_size; 87 copy.mode = 0; 88 89 clock_gettime(CLOCK_MONOTONIC, &start); 90 91 r = ioctl(uffd, UFFDIO_COPY, ©); 92 if (r == -1) { 93 pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", 94 addr, tid, errno); 95 return r; 96 } 97 98 ts_diff = timespec_elapsed(start); 99 100 PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, 101 timespec_to_ns(ts_diff)); 102 PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", 103 perf_test_args.host_page_size, addr, tid); 104 105 return 0; 106 } 107 108 bool quit_uffd_thread; 109 110 struct uffd_handler_args { 111 int uffd; 112 int pipefd; 113 useconds_t delay; 114 }; 115 116 static void *uffd_handler_thread_fn(void *arg) 117 { 118 struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; 119 int uffd = uffd_args->uffd; 120 int pipefd = uffd_args->pipefd; 121 useconds_t delay = uffd_args->delay; 122 int64_t pages = 0; 123 struct timespec start; 124 struct timespec ts_diff; 125 126 clock_gettime(CLOCK_MONOTONIC, &start); 127 while (!quit_uffd_thread) { 128 struct uffd_msg msg; 129 struct pollfd pollfd[2]; 130 char tmp_chr; 131 int r; 132 uint64_t addr; 133 134 pollfd[0].fd = uffd; 135 pollfd[0].events = POLLIN; 136 pollfd[1].fd = pipefd; 137 pollfd[1].events = POLLIN; 138 139 r = poll(pollfd, 2, -1); 140 switch (r) { 141 case -1: 142 pr_info("poll err"); 143 continue; 144 case 0: 145 continue; 146 case 1: 147 break; 148 default: 149 pr_info("Polling uffd returned %d", r); 150 return NULL; 151 } 152 153 if (pollfd[0].revents & POLLERR) { 154 pr_info("uffd revents has POLLERR"); 155 return NULL; 156 } 157 158 if (pollfd[1].revents & POLLIN) { 159 r = read(pollfd[1].fd, &tmp_chr, 1); 160 TEST_ASSERT(r == 1, 161 "Error reading pipefd in UFFD thread\n"); 162 return NULL; 163 } 164 165 if (!pollfd[0].revents & POLLIN) 166 continue; 167 168 r = read(uffd, &msg, sizeof(msg)); 169 if (r == -1) { 170 if (errno == EAGAIN) 171 continue; 172 pr_info("Read of uffd gor errno %d", errno); 173 return NULL; 174 } 175 176 if (r != sizeof(msg)) { 177 pr_info("Read on uffd returned unexpected size: %d bytes", r); 178 return NULL; 179 } 180 181 if (!(msg.event & UFFD_EVENT_PAGEFAULT)) 182 continue; 183 184 if (delay) 185 usleep(delay); 186 addr = msg.arg.pagefault.address; 187 r = handle_uffd_page_request(uffd, addr); 188 if (r < 0) 189 return NULL; 190 pages++; 191 } 192 193 ts_diff = timespec_elapsed(start); 194 PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", 195 pages, ts_diff.tv_sec, ts_diff.tv_nsec, 196 pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 197 198 return NULL; 199 } 200 201 static int setup_demand_paging(struct kvm_vm *vm, 202 pthread_t *uffd_handler_thread, int pipefd, 203 useconds_t uffd_delay, 204 struct uffd_handler_args *uffd_args, 205 void *hva, uint64_t len) 206 { 207 int uffd; 208 struct uffdio_api uffdio_api; 209 struct uffdio_register uffdio_register; 210 211 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 212 if (uffd == -1) { 213 pr_info("uffd creation failed\n"); 214 return -1; 215 } 216 217 uffdio_api.api = UFFD_API; 218 uffdio_api.features = 0; 219 if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { 220 pr_info("ioctl uffdio_api failed\n"); 221 return -1; 222 } 223 224 uffdio_register.range.start = (uint64_t)hva; 225 uffdio_register.range.len = len; 226 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; 227 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { 228 pr_info("ioctl uffdio_register failed\n"); 229 return -1; 230 } 231 232 if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != 233 UFFD_API_RANGE_IOCTLS) { 234 pr_info("unexpected userfaultfd ioctl set\n"); 235 return -1; 236 } 237 238 uffd_args->uffd = uffd; 239 uffd_args->pipefd = pipefd; 240 uffd_args->delay = uffd_delay; 241 pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, 242 uffd_args); 243 244 PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", 245 hva, hva + len); 246 247 return 0; 248 } 249 250 struct test_params { 251 bool use_uffd; 252 useconds_t uffd_delay; 253 bool partition_vcpu_memory_access; 254 }; 255 256 static void run_test(enum vm_guest_mode mode, void *arg) 257 { 258 struct test_params *p = arg; 259 pthread_t *vcpu_threads; 260 pthread_t *uffd_handler_threads = NULL; 261 struct uffd_handler_args *uffd_args = NULL; 262 struct timespec start; 263 struct timespec ts_diff; 264 int *pipefds = NULL; 265 struct kvm_vm *vm; 266 int vcpu_id; 267 int r; 268 269 vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 270 VM_MEM_SRC_ANONYMOUS); 271 272 perf_test_args.wr_fract = 1; 273 274 guest_data_prototype = malloc(perf_test_args.host_page_size); 275 TEST_ASSERT(guest_data_prototype, 276 "Failed to allocate buffer for guest data pattern"); 277 memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); 278 279 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 280 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 281 282 perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, 283 p->partition_vcpu_memory_access); 284 285 if (p->use_uffd) { 286 uffd_handler_threads = 287 malloc(nr_vcpus * sizeof(*uffd_handler_threads)); 288 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); 289 290 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); 291 TEST_ASSERT(uffd_args, "Memory allocation failed"); 292 293 pipefds = malloc(sizeof(int) * nr_vcpus * 2); 294 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); 295 296 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 297 vm_paddr_t vcpu_gpa; 298 void *vcpu_hva; 299 uint64_t vcpu_mem_size; 300 301 302 if (p->partition_vcpu_memory_access) { 303 vcpu_gpa = guest_test_phys_mem + 304 (vcpu_id * guest_percpu_mem_size); 305 vcpu_mem_size = guest_percpu_mem_size; 306 } else { 307 vcpu_gpa = guest_test_phys_mem; 308 vcpu_mem_size = guest_percpu_mem_size * nr_vcpus; 309 } 310 PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", 311 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); 312 313 /* Cache the HVA pointer of the region */ 314 vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); 315 316 /* 317 * Set up user fault fd to handle demand paging 318 * requests. 319 */ 320 r = pipe2(&pipefds[vcpu_id * 2], 321 O_CLOEXEC | O_NONBLOCK); 322 TEST_ASSERT(!r, "Failed to set up pipefd"); 323 324 r = setup_demand_paging(vm, 325 &uffd_handler_threads[vcpu_id], 326 pipefds[vcpu_id * 2], 327 p->uffd_delay, &uffd_args[vcpu_id], 328 vcpu_hva, vcpu_mem_size); 329 if (r < 0) 330 exit(-r); 331 } 332 } 333 334 /* Export the shared variables to the guest */ 335 sync_global_to_guest(vm, perf_test_args); 336 337 pr_info("Finished creating vCPUs and starting uffd threads\n"); 338 339 clock_gettime(CLOCK_MONOTONIC, &start); 340 341 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 342 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 343 &perf_test_args.vcpu_args[vcpu_id]); 344 } 345 346 pr_info("Started all vCPUs\n"); 347 348 /* Wait for the vcpu threads to quit */ 349 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 350 pthread_join(vcpu_threads[vcpu_id], NULL); 351 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); 352 } 353 354 ts_diff = timespec_elapsed(start); 355 356 pr_info("All vCPU threads joined\n"); 357 358 if (p->use_uffd) { 359 char c; 360 361 /* Tell the user fault fd handler threads to quit */ 362 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 363 r = write(pipefds[vcpu_id * 2 + 1], &c, 1); 364 TEST_ASSERT(r == 1, "Unable to write to pipefd"); 365 366 pthread_join(uffd_handler_threads[vcpu_id], NULL); 367 } 368 } 369 370 pr_info("Total guest execution time: %ld.%.9lds\n", 371 ts_diff.tv_sec, ts_diff.tv_nsec); 372 pr_info("Overall demand paging rate: %f pgs/sec\n", 373 perf_test_args.vcpu_args[0].pages * nr_vcpus / 374 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); 375 376 perf_test_destroy_vm(vm); 377 378 free(guest_data_prototype); 379 free(vcpu_threads); 380 if (p->use_uffd) { 381 free(uffd_handler_threads); 382 free(uffd_args); 383 free(pipefds); 384 } 385 } 386 387 static void help(char *name) 388 { 389 puts(""); 390 printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" 391 " [-b memory] [-v vcpus] [-o]\n", name); 392 guest_modes_help(); 393 printf(" -u: use User Fault FD to handle vCPU page\n" 394 " faults.\n"); 395 printf(" -d: add a delay in usec to the User Fault\n" 396 " FD handler to simulate demand paging\n" 397 " overheads. Ignored without -u.\n"); 398 printf(" -b: specify the size of the memory region which should be\n" 399 " demand paged by each vCPU. e.g. 10M or 3G.\n" 400 " Default: 1G\n"); 401 printf(" -v: specify the number of vCPUs to run.\n"); 402 printf(" -o: Overlap guest memory accesses instead of partitioning\n" 403 " them into a separate region of memory for each vCPU.\n"); 404 puts(""); 405 exit(0); 406 } 407 408 int main(int argc, char *argv[]) 409 { 410 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 411 struct test_params p = { 412 .partition_vcpu_memory_access = true, 413 }; 414 int opt; 415 416 guest_modes_append_default(); 417 418 while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) { 419 switch (opt) { 420 case 'm': 421 guest_modes_cmdline(optarg); 422 break; 423 case 'u': 424 p.use_uffd = true; 425 break; 426 case 'd': 427 p.uffd_delay = strtoul(optarg, NULL, 0); 428 TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); 429 break; 430 case 'b': 431 guest_percpu_mem_size = parse_size(optarg); 432 break; 433 case 'v': 434 nr_vcpus = atoi(optarg); 435 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 436 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 437 break; 438 case 'o': 439 p.partition_vcpu_memory_access = false; 440 break; 441 case 'h': 442 default: 443 help(argv[0]); 444 break; 445 } 446 } 447 448 for_each_guest_mode(run_test, &p); 449 450 return 0; 451 } 452 453 #else /* __NR_userfaultfd */ 454 455 #warning "missing __NR_userfaultfd definition" 456 457 int main(void) 458 { 459 print_skip("__NR_userfaultfd must be present for userfaultfd test"); 460 return KSFT_SKIP; 461 } 462 463 #endif /* __NR_userfaultfd */ 464