1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM page table test 4 * 5 * Copyright (C) 2021, Huawei, Inc. 6 * 7 * Make sure that THP has been enabled or enough HUGETLB pages with specific 8 * page size have been pre-allocated on your system, if you are planning to 9 * use hugepages to back the guest memory for testing. 10 */ 11 12 #define _GNU_SOURCE /* for program_invocation_name */ 13 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <time.h> 17 #include <pthread.h> 18 #include <semaphore.h> 19 20 #include "test_util.h" 21 #include "kvm_util.h" 22 #include "processor.h" 23 #include "guest_modes.h" 24 25 #define TEST_MEM_SLOT_INDEX 1 26 27 /* Default size(1GB) of the memory for testing */ 28 #define DEFAULT_TEST_MEM_SIZE (1 << 30) 29 30 /* Default guest test virtual memory offset */ 31 #define DEFAULT_GUEST_TEST_MEM 0xc0000000 32 33 /* Different guest memory accessing stages */ 34 enum test_stage { 35 KVM_BEFORE_MAPPINGS, 36 KVM_CREATE_MAPPINGS, 37 KVM_UPDATE_MAPPINGS, 38 KVM_ADJUST_MAPPINGS, 39 NUM_TEST_STAGES, 40 }; 41 42 static const char * const test_stage_string[] = { 43 "KVM_BEFORE_MAPPINGS", 44 "KVM_CREATE_MAPPINGS", 45 "KVM_UPDATE_MAPPINGS", 46 "KVM_ADJUST_MAPPINGS", 47 }; 48 49 struct vcpu_args { 50 int vcpu_id; 51 bool vcpu_write; 52 }; 53 54 struct test_args { 55 struct kvm_vm *vm; 56 uint64_t guest_test_virt_mem; 57 uint64_t host_page_size; 58 uint64_t host_num_pages; 59 uint64_t large_page_size; 60 uint64_t large_num_pages; 61 uint64_t host_pages_per_lpage; 62 enum vm_mem_backing_src_type src_type; 63 struct vcpu_args vcpu_args[KVM_MAX_VCPUS]; 64 }; 65 66 /* 67 * Guest variables. Use addr_gva2hva() if these variables need 68 * to be changed in host. 69 */ 70 static enum test_stage guest_test_stage; 71 72 /* Host variables */ 73 static uint32_t nr_vcpus = 1; 74 static struct test_args test_args; 75 static enum test_stage *current_stage; 76 static bool host_quit; 77 78 /* Whether the test stage is updated, or completed */ 79 static sem_t test_stage_updated; 80 static sem_t test_stage_completed; 81 82 /* 83 * Guest physical memory offset of the testing memory slot. 84 * This will be set to the topmost valid physical address minus 85 * the test memory size. 86 */ 87 static uint64_t guest_test_phys_mem; 88 89 /* 90 * Guest virtual memory offset of the testing memory slot. 91 * Must not conflict with identity mapped test code. 92 */ 93 static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; 94 95 static void guest_code(int vcpu_id) 96 { 97 struct test_args *p = &test_args; 98 struct vcpu_args *vcpu_args = &p->vcpu_args[vcpu_id]; 99 enum test_stage *current_stage = &guest_test_stage; 100 uint64_t addr; 101 int i, j; 102 103 /* Make sure vCPU args data structure is not corrupt */ 104 GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); 105 106 while (true) { 107 addr = p->guest_test_virt_mem; 108 109 switch (READ_ONCE(*current_stage)) { 110 /* 111 * All vCPU threads will be started in this stage, 112 * where guest code of each vCPU will do nothing. 113 */ 114 case KVM_BEFORE_MAPPINGS: 115 break; 116 117 /* 118 * Before dirty logging, vCPUs concurrently access the first 119 * 8 bytes of each page (host page/large page) within the same 120 * memory region with different accessing types (read/write). 121 * Then KVM will create normal page mappings or huge block 122 * mappings for them. 123 */ 124 case KVM_CREATE_MAPPINGS: 125 for (i = 0; i < p->large_num_pages; i++) { 126 if (vcpu_args->vcpu_write) 127 *(uint64_t *)addr = 0x0123456789ABCDEF; 128 else 129 READ_ONCE(*(uint64_t *)addr); 130 131 addr += p->large_page_size; 132 } 133 break; 134 135 /* 136 * During dirty logging, KVM will only update attributes of the 137 * normal page mappings from RO to RW if memory backing src type 138 * is anonymous. In other cases, KVM will split the huge block 139 * mappings into normal page mappings if memory backing src type 140 * is THP or HUGETLB. 141 */ 142 case KVM_UPDATE_MAPPINGS: 143 if (p->src_type == VM_MEM_SRC_ANONYMOUS) { 144 for (i = 0; i < p->host_num_pages; i++) { 145 *(uint64_t *)addr = 0x0123456789ABCDEF; 146 addr += p->host_page_size; 147 } 148 break; 149 } 150 151 for (i = 0; i < p->large_num_pages; i++) { 152 /* 153 * Write to the first host page in each large 154 * page region, and triger break of large pages. 155 */ 156 *(uint64_t *)addr = 0x0123456789ABCDEF; 157 158 /* 159 * Access the middle host pages in each large 160 * page region. Since dirty logging is enabled, 161 * this will create new mappings at the smallest 162 * granularity. 163 */ 164 addr += p->large_page_size / 2; 165 for (j = 0; j < p->host_pages_per_lpage / 2; j++) { 166 READ_ONCE(*(uint64_t *)addr); 167 addr += p->host_page_size; 168 } 169 } 170 break; 171 172 /* 173 * After dirty logging is stopped, vCPUs concurrently read 174 * from every single host page. Then KVM will coalesce the 175 * split page mappings back to block mappings. And a TLB 176 * conflict abort could occur here if TLB entries of the 177 * page mappings are not fully invalidated. 178 */ 179 case KVM_ADJUST_MAPPINGS: 180 for (i = 0; i < p->host_num_pages; i++) { 181 READ_ONCE(*(uint64_t *)addr); 182 addr += p->host_page_size; 183 } 184 break; 185 186 default: 187 GUEST_ASSERT(0); 188 } 189 190 GUEST_SYNC(1); 191 } 192 } 193 194 static void *vcpu_worker(void *data) 195 { 196 int ret; 197 struct vcpu_args *vcpu_args = data; 198 struct kvm_vm *vm = test_args.vm; 199 int vcpu_id = vcpu_args->vcpu_id; 200 struct kvm_run *run; 201 struct timespec start; 202 struct timespec ts_diff; 203 enum test_stage stage; 204 205 vcpu_args_set(vm, vcpu_id, 1, vcpu_id); 206 run = vcpu_state(vm, vcpu_id); 207 208 while (!READ_ONCE(host_quit)) { 209 ret = sem_wait(&test_stage_updated); 210 TEST_ASSERT(ret == 0, "Error in sem_wait"); 211 212 if (READ_ONCE(host_quit)) 213 return NULL; 214 215 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 216 ret = _vcpu_run(vm, vcpu_id); 217 ts_diff = timespec_elapsed(start); 218 219 TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); 220 TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, 221 "Invalid guest sync status: exit_reason=%s\n", 222 exit_reason_str(run->exit_reason)); 223 224 pr_debug("Got sync event from vCPU %d\n", vcpu_id); 225 stage = READ_ONCE(*current_stage); 226 227 /* 228 * Here we can know the execution time of every 229 * single vcpu running in different test stages. 230 */ 231 pr_debug("vCPU %d has completed stage %s\n" 232 "execution time is: %ld.%.9lds\n\n", 233 vcpu_id, test_stage_string[stage], 234 ts_diff.tv_sec, ts_diff.tv_nsec); 235 236 ret = sem_post(&test_stage_completed); 237 TEST_ASSERT(ret == 0, "Error in sem_post"); 238 } 239 240 return NULL; 241 } 242 243 struct test_params { 244 uint64_t phys_offset; 245 uint64_t test_mem_size; 246 enum vm_mem_backing_src_type src_type; 247 }; 248 249 static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) 250 { 251 int ret; 252 struct test_params *p = arg; 253 struct vcpu_args *vcpu_args; 254 enum vm_mem_backing_src_type src_type = p->src_type; 255 uint64_t large_page_size = get_backing_src_pagesz(src_type); 256 uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; 257 uint64_t host_page_size = getpagesize(); 258 uint64_t test_mem_size = p->test_mem_size; 259 uint64_t guest_num_pages; 260 uint64_t alignment; 261 void *host_test_mem; 262 struct kvm_vm *vm; 263 int vcpu_id; 264 265 /* Align up the test memory size */ 266 alignment = max(large_page_size, guest_page_size); 267 test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1); 268 269 /* Create a VM with enough guest pages */ 270 guest_num_pages = test_mem_size / guest_page_size; 271 vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, 272 guest_num_pages, 0, guest_code, NULL); 273 274 /* Align down GPA of the testing memslot */ 275 if (!p->phys_offset) 276 guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * 277 guest_page_size; 278 else 279 guest_test_phys_mem = p->phys_offset; 280 #ifdef __s390x__ 281 alignment = max(0x100000, alignment); 282 #endif 283 guest_test_phys_mem &= ~(alignment - 1); 284 285 /* Set up the shared data structure test_args */ 286 test_args.vm = vm; 287 test_args.guest_test_virt_mem = guest_test_virt_mem; 288 test_args.host_page_size = host_page_size; 289 test_args.host_num_pages = test_mem_size / host_page_size; 290 test_args.large_page_size = large_page_size; 291 test_args.large_num_pages = test_mem_size / large_page_size; 292 test_args.host_pages_per_lpage = large_page_size / host_page_size; 293 test_args.src_type = src_type; 294 295 for (vcpu_id = 0; vcpu_id < KVM_MAX_VCPUS; vcpu_id++) { 296 vcpu_args = &test_args.vcpu_args[vcpu_id]; 297 vcpu_args->vcpu_id = vcpu_id; 298 vcpu_args->vcpu_write = !(vcpu_id % 2); 299 } 300 301 /* Add an extra memory slot with specified backing src type */ 302 vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem, 303 TEST_MEM_SLOT_INDEX, guest_num_pages, 0); 304 305 /* Do mapping(GVA->GPA) for the testing memory slot */ 306 virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); 307 308 /* Cache the HVA pointer of the region */ 309 host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); 310 311 /* Export shared structure test_args to guest */ 312 ucall_init(vm, NULL); 313 sync_global_to_guest(vm, test_args); 314 315 ret = sem_init(&test_stage_updated, 0, 0); 316 TEST_ASSERT(ret == 0, "Error in sem_init"); 317 318 ret = sem_init(&test_stage_completed, 0, 0); 319 TEST_ASSERT(ret == 0, "Error in sem_init"); 320 321 current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage)); 322 *current_stage = NUM_TEST_STAGES; 323 324 pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); 325 pr_info("Testing memory backing src type: %s\n", 326 vm_mem_backing_src_alias(src_type)->name); 327 pr_info("Testing memory backing src granularity: 0x%lx\n", 328 large_page_size); 329 pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size); 330 pr_info("Guest physical test memory offset: 0x%lx\n", 331 guest_test_phys_mem); 332 pr_info("Host virtual test memory offset: 0x%lx\n", 333 (uint64_t)host_test_mem); 334 pr_info("Number of testing vCPUs: %d\n", nr_vcpus); 335 336 return vm; 337 } 338 339 static void vcpus_complete_new_stage(enum test_stage stage) 340 { 341 int ret; 342 int vcpus; 343 344 /* Wake up all the vcpus to run new test stage */ 345 for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { 346 ret = sem_post(&test_stage_updated); 347 TEST_ASSERT(ret == 0, "Error in sem_post"); 348 } 349 pr_debug("All vcpus have been notified to continue\n"); 350 351 /* Wait for all the vcpus to complete new test stage */ 352 for (vcpus = 0; vcpus < nr_vcpus; vcpus++) { 353 ret = sem_wait(&test_stage_completed); 354 TEST_ASSERT(ret == 0, "Error in sem_wait"); 355 356 pr_debug("%d vcpus have completed stage %s\n", 357 vcpus + 1, test_stage_string[stage]); 358 } 359 360 pr_debug("All vcpus have completed stage %s\n", 361 test_stage_string[stage]); 362 } 363 364 static void run_test(enum vm_guest_mode mode, void *arg) 365 { 366 int ret; 367 pthread_t *vcpu_threads; 368 struct kvm_vm *vm; 369 int vcpu_id; 370 struct timespec start; 371 struct timespec ts_diff; 372 373 /* Create VM with vCPUs and make some pre-initialization */ 374 vm = pre_init_before_test(mode, arg); 375 376 vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); 377 TEST_ASSERT(vcpu_threads, "Memory allocation failed"); 378 379 host_quit = false; 380 *current_stage = KVM_BEFORE_MAPPINGS; 381 382 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 383 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, 384 &test_args.vcpu_args[vcpu_id]); 385 } 386 387 vcpus_complete_new_stage(*current_stage); 388 pr_info("Started all vCPUs successfully\n"); 389 390 /* Test the stage of KVM creating mappings */ 391 *current_stage = KVM_CREATE_MAPPINGS; 392 393 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 394 vcpus_complete_new_stage(*current_stage); 395 ts_diff = timespec_elapsed(start); 396 397 pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", 398 ts_diff.tv_sec, ts_diff.tv_nsec); 399 400 /* Test the stage of KVM updating mappings */ 401 vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 402 KVM_MEM_LOG_DIRTY_PAGES); 403 404 *current_stage = KVM_UPDATE_MAPPINGS; 405 406 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 407 vcpus_complete_new_stage(*current_stage); 408 ts_diff = timespec_elapsed(start); 409 410 pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n", 411 ts_diff.tv_sec, ts_diff.tv_nsec); 412 413 /* Test the stage of KVM adjusting mappings */ 414 vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); 415 416 *current_stage = KVM_ADJUST_MAPPINGS; 417 418 clock_gettime(CLOCK_MONOTONIC_RAW, &start); 419 vcpus_complete_new_stage(*current_stage); 420 ts_diff = timespec_elapsed(start); 421 422 pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n", 423 ts_diff.tv_sec, ts_diff.tv_nsec); 424 425 /* Tell the vcpu thread to quit */ 426 host_quit = true; 427 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 428 ret = sem_post(&test_stage_updated); 429 TEST_ASSERT(ret == 0, "Error in sem_post"); 430 } 431 432 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) 433 pthread_join(vcpu_threads[vcpu_id], NULL); 434 435 ret = sem_destroy(&test_stage_updated); 436 TEST_ASSERT(ret == 0, "Error in sem_destroy"); 437 438 ret = sem_destroy(&test_stage_completed); 439 TEST_ASSERT(ret == 0, "Error in sem_destroy"); 440 441 free(vcpu_threads); 442 ucall_uninit(vm); 443 kvm_vm_free(vm); 444 } 445 446 static void help(char *name) 447 { 448 puts(""); 449 printf("usage: %s [-h] [-p offset] [-m mode] " 450 "[-b mem-size] [-v vcpus] [-s mem-type]\n", name); 451 puts(""); 452 printf(" -p: specify guest physical test memory offset\n" 453 " Warning: a low offset can conflict with the loaded test code.\n"); 454 guest_modes_help(); 455 printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n" 456 " (default: 1G)\n"); 457 printf(" -v: specify the number of vCPUs to run\n" 458 " (default: 1)\n"); 459 backing_src_help("-s"); 460 puts(""); 461 } 462 463 int main(int argc, char *argv[]) 464 { 465 int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); 466 struct test_params p = { 467 .test_mem_size = DEFAULT_TEST_MEM_SIZE, 468 .src_type = DEFAULT_VM_MEM_SRC, 469 }; 470 int opt; 471 472 guest_modes_append_default(); 473 474 while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) { 475 switch (opt) { 476 case 'p': 477 p.phys_offset = strtoull(optarg, NULL, 0); 478 break; 479 case 'm': 480 guest_modes_cmdline(optarg); 481 break; 482 case 'b': 483 p.test_mem_size = parse_size(optarg); 484 break; 485 case 'v': 486 nr_vcpus = atoi(optarg); 487 TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, 488 "Invalid number of vcpus, must be between 1 and %d", max_vcpus); 489 break; 490 case 's': 491 p.src_type = parse_backing_src_type(optarg); 492 break; 493 case 'h': 494 default: 495 help(argv[0]); 496 exit(0); 497 } 498 } 499 500 for_each_guest_mode(run_test, &p); 501 502 return 0; 503 } 504