1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xapic_ipi_test 4 * 5 * Copyright (C) 2020, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10 * another vCPU that is halted when KVM's backing page for the APIC access 11 * address has been moved by mm. 12 * 13 * The test starts two vCPUs: one that sends IPIs and one that continually 14 * executes HLT. The sender checks that the halter has woken from the HLT and 15 * has reentered HLT before sending the next IPI. While the vCPUs are running, 16 * the host continually calls migrate_pages to move all of the process' pages 17 * amongst the available numa nodes on the machine. 18 * 19 * Migration is a command line option. When used on non-numa machines will 20 * exit with error. Test is still usefull on non-numa for testing IPIs. 21 */ 22 23 #define _GNU_SOURCE /* for program_invocation_short_name */ 24 #include <getopt.h> 25 #include <pthread.h> 26 #include <inttypes.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "kvm_util.h" 31 #include "numaif.h" 32 #include "processor.h" 33 #include "test_util.h" 34 #include "vmx.h" 35 36 /* Default running time for the test */ 37 #define DEFAULT_RUN_SECS 3 38 39 /* Default delay between migrate_pages calls (microseconds) */ 40 #define DEFAULT_DELAY_USECS 500000 41 42 #define HALTER_VCPU_ID 0 43 #define SENDER_VCPU_ID 1 44 45 volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA; 46 47 /* 48 * Vector for IPI from sender vCPU to halting vCPU. 49 * Value is arbitrary and was chosen for the alternating bit pattern. Any 50 * value should work. 51 */ 52 #define IPI_VECTOR 0xa5 53 54 /* 55 * Incremented in the IPI handler. Provides evidence to the sender that the IPI 56 * arrived at the destination 57 */ 58 static volatile uint64_t ipis_rcvd; 59 60 /* Data struct shared between host main thread and vCPUs */ 61 struct test_data_page { 62 uint32_t halter_apic_id; 63 volatile uint64_t hlt_count; 64 volatile uint64_t wake_count; 65 uint64_t ipis_sent; 66 uint64_t migrations_attempted; 67 uint64_t migrations_completed; 68 uint32_t icr; 69 uint32_t icr2; 70 uint32_t halter_tpr; 71 uint32_t halter_ppr; 72 73 /* 74 * Record local version register as a cross-check that APIC access 75 * worked. Value should match what KVM reports (APIC_VERSION in 76 * arch/x86/kvm/lapic.c). If test is failing, check that values match 77 * to determine whether APIC access exits are working. 78 */ 79 uint32_t halter_lvr; 80 }; 81 82 struct thread_params { 83 struct test_data_page *data; 84 struct kvm_vm *vm; 85 uint32_t vcpu_id; 86 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 87 }; 88 89 uint32_t read_apic_reg(uint reg) 90 { 91 return apic_base[reg >> 2]; 92 } 93 94 void write_apic_reg(uint reg, uint32_t val) 95 { 96 apic_base[reg >> 2] = val; 97 } 98 99 void disable_apic(void) 100 { 101 wrmsr(MSR_IA32_APICBASE, 102 rdmsr(MSR_IA32_APICBASE) & 103 ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD)); 104 } 105 106 void enable_xapic(void) 107 { 108 uint64_t val = rdmsr(MSR_IA32_APICBASE); 109 110 /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ 111 if (val & MSR_IA32_APICBASE_EXTD) { 112 disable_apic(); 113 wrmsr(MSR_IA32_APICBASE, 114 rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE); 115 } else if (!(val & MSR_IA32_APICBASE_ENABLE)) { 116 wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE); 117 } 118 119 /* 120 * Per SDM: reset value of spurious interrupt vector register has the 121 * APIC software enabled bit=0. It must be enabled in addition to the 122 * enable bit in the MSR. 123 */ 124 val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED; 125 write_apic_reg(APIC_SPIV, val); 126 } 127 128 void verify_apic_base_addr(void) 129 { 130 uint64_t msr = rdmsr(MSR_IA32_APICBASE); 131 uint64_t base = GET_APIC_BASE(msr); 132 133 GUEST_ASSERT(base == APIC_DEFAULT_GPA); 134 } 135 136 static void halter_guest_code(struct test_data_page *data) 137 { 138 verify_apic_base_addr(); 139 enable_xapic(); 140 141 data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID)); 142 data->halter_lvr = read_apic_reg(APIC_LVR); 143 144 /* 145 * Loop forever HLTing and recording halts & wakes. Disable interrupts 146 * each time around to minimize window between signaling the pending 147 * halt to the sender vCPU and executing the halt. No need to disable on 148 * first run as this vCPU executes first and the host waits for it to 149 * signal going into first halt before starting the sender vCPU. Record 150 * TPR and PPR for diagnostic purposes in case the test fails. 151 */ 152 for (;;) { 153 data->halter_tpr = read_apic_reg(APIC_TASKPRI); 154 data->halter_ppr = read_apic_reg(APIC_PROCPRI); 155 data->hlt_count++; 156 asm volatile("sti; hlt; cli"); 157 data->wake_count++; 158 } 159 } 160 161 /* 162 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 163 * enable diagnosing errant writes to the APIC access address backing page in 164 * case of test failure. 165 */ 166 static void guest_ipi_handler(struct ex_regs *regs) 167 { 168 ipis_rcvd++; 169 write_apic_reg(APIC_EOI, 77); 170 } 171 172 static void sender_guest_code(struct test_data_page *data) 173 { 174 uint64_t last_wake_count; 175 uint64_t last_hlt_count; 176 uint64_t last_ipis_rcvd_count; 177 uint32_t icr_val; 178 uint32_t icr2_val; 179 uint64_t tsc_start; 180 181 verify_apic_base_addr(); 182 enable_xapic(); 183 184 /* 185 * Init interrupt command register for sending IPIs 186 * 187 * Delivery mode=fixed, per SDM: 188 * "Delivers the interrupt specified in the vector field to the target 189 * processor." 190 * 191 * Destination mode=physical i.e. specify target by its local APIC 192 * ID. This vCPU assumes that the halter vCPU has already started and 193 * set data->halter_apic_id. 194 */ 195 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 196 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 197 data->icr = icr_val; 198 data->icr2 = icr2_val; 199 200 last_wake_count = data->wake_count; 201 last_hlt_count = data->hlt_count; 202 last_ipis_rcvd_count = ipis_rcvd; 203 for (;;) { 204 /* 205 * Send IPI to halter vCPU. 206 * First IPI can be sent unconditionally because halter vCPU 207 * starts earlier. 208 */ 209 write_apic_reg(APIC_ICR2, icr2_val); 210 write_apic_reg(APIC_ICR, icr_val); 211 data->ipis_sent++; 212 213 /* 214 * Wait up to ~1 sec for halter to indicate that it has: 215 * 1. Received the IPI 216 * 2. Woken up from the halt 217 * 3. Gone back into halt 218 * Current CPUs typically run at 2.x Ghz which is ~2 219 * billion ticks per second. 220 */ 221 tsc_start = rdtsc(); 222 while (rdtsc() - tsc_start < 2000000000) { 223 if ((ipis_rcvd != last_ipis_rcvd_count) && 224 (data->wake_count != last_wake_count) && 225 (data->hlt_count != last_hlt_count)) 226 break; 227 } 228 229 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 230 (data->wake_count != last_wake_count) && 231 (data->hlt_count != last_hlt_count)); 232 233 last_wake_count = data->wake_count; 234 last_hlt_count = data->hlt_count; 235 last_ipis_rcvd_count = ipis_rcvd; 236 } 237 } 238 239 static void *vcpu_thread(void *arg) 240 { 241 struct thread_params *params = (struct thread_params *)arg; 242 struct ucall uc; 243 int old; 244 int r; 245 unsigned int exit_reason; 246 247 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 248 TEST_ASSERT(r == 0, 249 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 250 params->vcpu_id, r); 251 252 fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id); 253 vcpu_run(params->vm, params->vcpu_id); 254 exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason; 255 256 TEST_ASSERT(exit_reason == KVM_EXIT_IO, 257 "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", 258 params->vcpu_id, exit_reason, exit_reason_str(exit_reason)); 259 260 if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) { 261 TEST_ASSERT(false, 262 "vCPU %u exited with error: %s.\n" 263 "Sending vCPU sent %lu IPIs to halting vCPU\n" 264 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 265 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 266 "Migrations attempted: %lu\n" 267 "Migrations completed: %lu\n", 268 params->vcpu_id, (const char *)uc.args[0], 269 params->data->ipis_sent, params->data->hlt_count, 270 params->data->wake_count, 271 *params->pipis_rcvd, params->data->halter_tpr, 272 params->data->halter_ppr, params->data->halter_lvr, 273 params->data->migrations_attempted, 274 params->data->migrations_completed); 275 } 276 277 return NULL; 278 } 279 280 static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) 281 { 282 void *retval; 283 int r; 284 285 r = pthread_cancel(thread); 286 TEST_ASSERT(r == 0, 287 "pthread_cancel on vcpu_id=%d failed with errno=%d", 288 vcpu_id, r); 289 290 r = pthread_join(thread, &retval); 291 TEST_ASSERT(r == 0, 292 "pthread_join on vcpu_id=%d failed with errno=%d", 293 vcpu_id, r); 294 TEST_ASSERT(retval == PTHREAD_CANCELED, 295 "expected retval=%p, got %p", PTHREAD_CANCELED, 296 retval); 297 } 298 299 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 300 uint64_t *pipis_rcvd) 301 { 302 long pages_not_moved; 303 unsigned long nodemask = 0; 304 unsigned long nodemasks[sizeof(nodemask) * 8]; 305 int nodes = 0; 306 time_t start_time, last_update, now; 307 time_t interval_secs = 1; 308 int i, r; 309 int from, to; 310 unsigned long bit; 311 uint64_t hlt_count; 312 uint64_t wake_count; 313 uint64_t ipis_sent; 314 315 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 316 delay_usecs); 317 318 /* Get set of first 64 numa nodes available */ 319 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 320 0, MPOL_F_MEMS_ALLOWED); 321 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 322 323 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 324 "(each 1-bit indicates node is present): %#lx\n", 325 sizeof(nodemask) * 8, nodemask); 326 327 /* Init array of masks containing a single-bit in each, one for each 328 * available node. migrate_pages called below requires specifying nodes 329 * as bit masks. 330 */ 331 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 332 if (nodemask & bit) { 333 nodemasks[nodes] = nodemask & bit; 334 nodes++; 335 } 336 } 337 338 TEST_ASSERT(nodes > 1, 339 "Did not find at least 2 numa nodes. Can't do migration\n"); 340 341 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 342 343 from = 0; 344 to = 1; 345 start_time = time(NULL); 346 last_update = start_time; 347 348 ipis_sent = data->ipis_sent; 349 hlt_count = data->hlt_count; 350 wake_count = data->wake_count; 351 352 while ((int)(time(NULL) - start_time) < run_secs) { 353 data->migrations_attempted++; 354 355 /* 356 * migrate_pages with PID=0 will migrate all pages of this 357 * process between the nodes specified as bitmasks. The page 358 * backing the APIC access address belongs to this process 359 * because it is allocated by KVM in the context of the 360 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 361 * test may break or give a false positive signal. 362 */ 363 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 364 &nodemasks[from], 365 &nodemasks[to]); 366 if (pages_not_moved < 0) 367 fprintf(stderr, 368 "migrate_pages failed, errno=%d\n", errno); 369 else if (pages_not_moved > 0) 370 fprintf(stderr, 371 "migrate_pages could not move %ld pages\n", 372 pages_not_moved); 373 else 374 data->migrations_completed++; 375 376 from = to; 377 to++; 378 if (to == nodes) 379 to = 0; 380 381 now = time(NULL); 382 if (((now - start_time) % interval_secs == 0) && 383 (now != last_update)) { 384 last_update = now; 385 fprintf(stderr, 386 "%lu seconds: Migrations attempted=%lu completed=%lu, " 387 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 388 now - start_time, data->migrations_attempted, 389 data->migrations_completed, 390 data->ipis_sent, *pipis_rcvd, 391 data->hlt_count, data->wake_count); 392 393 TEST_ASSERT(ipis_sent != data->ipis_sent && 394 hlt_count != data->hlt_count && 395 wake_count != data->wake_count, 396 "IPI, HLT and wake count have not increased " 397 "in the last %lu seconds. " 398 "HLTer is likely hung.\n", interval_secs); 399 400 ipis_sent = data->ipis_sent; 401 hlt_count = data->hlt_count; 402 wake_count = data->wake_count; 403 } 404 usleep(delay_usecs); 405 } 406 } 407 408 void get_cmdline_args(int argc, char *argv[], int *run_secs, 409 bool *migrate, int *delay_usecs) 410 { 411 for (;;) { 412 int opt = getopt(argc, argv, "s:d:m"); 413 414 if (opt == -1) 415 break; 416 switch (opt) { 417 case 's': 418 *run_secs = parse_size(optarg); 419 break; 420 case 'm': 421 *migrate = true; 422 break; 423 case 'd': 424 *delay_usecs = parse_size(optarg); 425 break; 426 default: 427 TEST_ASSERT(false, 428 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 429 "-m adds calls to migrate_pages while vCPUs are running." 430 " Default is no migrations.\n" 431 "-d <delay microseconds> - delay between migrate_pages() calls." 432 " Default is %d microseconds.\n", 433 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 434 } 435 } 436 } 437 438 int main(int argc, char *argv[]) 439 { 440 int r; 441 int wait_secs; 442 const int max_halter_wait = 10; 443 int run_secs = 0; 444 int delay_usecs = 0; 445 struct test_data_page *data; 446 vm_vaddr_t test_data_page_vaddr; 447 bool migrate = false; 448 pthread_t threads[2]; 449 struct thread_params params[2]; 450 struct kvm_vm *vm; 451 uint64_t *pipis_rcvd; 452 453 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 454 if (run_secs <= 0) 455 run_secs = DEFAULT_RUN_SECS; 456 if (delay_usecs <= 0) 457 delay_usecs = DEFAULT_DELAY_USECS; 458 459 vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code); 460 params[0].vm = vm; 461 params[1].vm = vm; 462 463 vm_init_descriptor_tables(vm); 464 vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); 465 vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler); 466 467 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0); 468 469 vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code); 470 471 test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0); 472 data = 473 (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr); 474 memset(data, 0, sizeof(*data)); 475 params[0].data = data; 476 params[1].data = data; 477 478 vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr); 479 vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr); 480 481 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 482 params[0].pipis_rcvd = pipis_rcvd; 483 params[1].pipis_rcvd = pipis_rcvd; 484 485 /* Start halter vCPU thread and wait for it to execute first HLT. */ 486 params[0].vcpu_id = HALTER_VCPU_ID; 487 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 488 TEST_ASSERT(r == 0, 489 "pthread_create halter failed errno=%d", errno); 490 fprintf(stderr, "Halter vCPU thread started\n"); 491 492 wait_secs = 0; 493 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 494 sleep(1); 495 wait_secs++; 496 } 497 498 TEST_ASSERT(data->hlt_count, 499 "Halter vCPU did not execute first HLT within %d seconds", 500 max_halter_wait); 501 502 fprintf(stderr, 503 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 504 data->halter_apic_id, wait_secs); 505 506 params[1].vcpu_id = SENDER_VCPU_ID; 507 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 508 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 509 510 fprintf(stderr, 511 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 512 run_secs); 513 514 if (!migrate) 515 sleep(run_secs); 516 else 517 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 518 519 /* 520 * Cancel threads and wait for them to stop. 521 */ 522 cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID); 523 cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID); 524 525 fprintf(stderr, 526 "Test successful after running for %d seconds.\n" 527 "Sending vCPU sent %lu IPIs to halting vCPU\n" 528 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 529 "Halter APIC ID=%#x\n" 530 "Sender ICR value=%#x ICR2 value=%#x\n" 531 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 532 "Migrations attempted: %lu\n" 533 "Migrations completed: %lu\n", 534 run_secs, data->ipis_sent, 535 data->hlt_count, data->wake_count, *pipis_rcvd, 536 data->halter_apic_id, 537 data->icr, data->icr2, 538 data->halter_tpr, data->halter_ppr, data->halter_lvr, 539 data->migrations_attempted, data->migrations_completed); 540 541 kvm_vm_free(vm); 542 543 return 0; 544 } 545