1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xapic_ipi_test 4 * 5 * Copyright (C) 2020, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10 * another vCPU that is halted when KVM's backing page for the APIC access 11 * address has been moved by mm. 12 * 13 * The test starts two vCPUs: one that sends IPIs and one that continually 14 * executes HLT. The sender checks that the halter has woken from the HLT and 15 * has reentered HLT before sending the next IPI. While the vCPUs are running, 16 * the host continually calls migrate_pages to move all of the process' pages 17 * amongst the available numa nodes on the machine. 18 * 19 * Migration is a command line option. When used on non-numa machines will 20 * exit with error. Test is still usefull on non-numa for testing IPIs. 21 */ 22 23 #define _GNU_SOURCE /* for program_invocation_short_name */ 24 #include <getopt.h> 25 #include <pthread.h> 26 #include <inttypes.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "kvm_util.h" 31 #include "numaif.h" 32 #include "processor.h" 33 #include "test_util.h" 34 #include "vmx.h" 35 36 /* Default running time for the test */ 37 #define DEFAULT_RUN_SECS 3 38 39 /* Default delay between migrate_pages calls (microseconds) */ 40 #define DEFAULT_DELAY_USECS 500000 41 42 #define HALTER_VCPU_ID 0 43 #define SENDER_VCPU_ID 1 44 45 /* 46 * Vector for IPI from sender vCPU to halting vCPU. 47 * Value is arbitrary and was chosen for the alternating bit pattern. Any 48 * value should work. 49 */ 50 #define IPI_VECTOR 0xa5 51 52 /* 53 * Incremented in the IPI handler. Provides evidence to the sender that the IPI 54 * arrived at the destination 55 */ 56 static volatile uint64_t ipis_rcvd; 57 58 /* Data struct shared between host main thread and vCPUs */ 59 struct test_data_page { 60 uint32_t halter_apic_id; 61 volatile uint64_t hlt_count; 62 volatile uint64_t wake_count; 63 uint64_t ipis_sent; 64 uint64_t migrations_attempted; 65 uint64_t migrations_completed; 66 uint32_t icr; 67 uint32_t icr2; 68 uint32_t halter_tpr; 69 uint32_t halter_ppr; 70 71 /* 72 * Record local version register as a cross-check that APIC access 73 * worked. Value should match what KVM reports (APIC_VERSION in 74 * arch/x86/kvm/lapic.c). If test is failing, check that values match 75 * to determine whether APIC access exits are working. 76 */ 77 uint32_t halter_lvr; 78 }; 79 80 struct thread_params { 81 struct test_data_page *data; 82 struct kvm_vm *vm; 83 uint32_t vcpu_id; 84 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 85 }; 86 87 void verify_apic_base_addr(void) 88 { 89 uint64_t msr = rdmsr(MSR_IA32_APICBASE); 90 uint64_t base = GET_APIC_BASE(msr); 91 92 GUEST_ASSERT(base == APIC_DEFAULT_GPA); 93 } 94 95 static void halter_guest_code(struct test_data_page *data) 96 { 97 verify_apic_base_addr(); 98 xapic_enable(); 99 100 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 101 data->halter_lvr = xapic_read_reg(APIC_LVR); 102 103 /* 104 * Loop forever HLTing and recording halts & wakes. Disable interrupts 105 * each time around to minimize window between signaling the pending 106 * halt to the sender vCPU and executing the halt. No need to disable on 107 * first run as this vCPU executes first and the host waits for it to 108 * signal going into first halt before starting the sender vCPU. Record 109 * TPR and PPR for diagnostic purposes in case the test fails. 110 */ 111 for (;;) { 112 data->halter_tpr = xapic_read_reg(APIC_TASKPRI); 113 data->halter_ppr = xapic_read_reg(APIC_PROCPRI); 114 data->hlt_count++; 115 asm volatile("sti; hlt; cli"); 116 data->wake_count++; 117 } 118 } 119 120 /* 121 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 122 * enable diagnosing errant writes to the APIC access address backing page in 123 * case of test failure. 124 */ 125 static void guest_ipi_handler(struct ex_regs *regs) 126 { 127 ipis_rcvd++; 128 xapic_write_reg(APIC_EOI, 77); 129 } 130 131 static void sender_guest_code(struct test_data_page *data) 132 { 133 uint64_t last_wake_count; 134 uint64_t last_hlt_count; 135 uint64_t last_ipis_rcvd_count; 136 uint32_t icr_val; 137 uint32_t icr2_val; 138 uint64_t tsc_start; 139 140 verify_apic_base_addr(); 141 xapic_enable(); 142 143 /* 144 * Init interrupt command register for sending IPIs 145 * 146 * Delivery mode=fixed, per SDM: 147 * "Delivers the interrupt specified in the vector field to the target 148 * processor." 149 * 150 * Destination mode=physical i.e. specify target by its local APIC 151 * ID. This vCPU assumes that the halter vCPU has already started and 152 * set data->halter_apic_id. 153 */ 154 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 155 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 156 data->icr = icr_val; 157 data->icr2 = icr2_val; 158 159 last_wake_count = data->wake_count; 160 last_hlt_count = data->hlt_count; 161 last_ipis_rcvd_count = ipis_rcvd; 162 for (;;) { 163 /* 164 * Send IPI to halter vCPU. 165 * First IPI can be sent unconditionally because halter vCPU 166 * starts earlier. 167 */ 168 xapic_write_reg(APIC_ICR2, icr2_val); 169 xapic_write_reg(APIC_ICR, icr_val); 170 data->ipis_sent++; 171 172 /* 173 * Wait up to ~1 sec for halter to indicate that it has: 174 * 1. Received the IPI 175 * 2. Woken up from the halt 176 * 3. Gone back into halt 177 * Current CPUs typically run at 2.x Ghz which is ~2 178 * billion ticks per second. 179 */ 180 tsc_start = rdtsc(); 181 while (rdtsc() - tsc_start < 2000000000) { 182 if ((ipis_rcvd != last_ipis_rcvd_count) && 183 (data->wake_count != last_wake_count) && 184 (data->hlt_count != last_hlt_count)) 185 break; 186 } 187 188 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 189 (data->wake_count != last_wake_count) && 190 (data->hlt_count != last_hlt_count)); 191 192 last_wake_count = data->wake_count; 193 last_hlt_count = data->hlt_count; 194 last_ipis_rcvd_count = ipis_rcvd; 195 } 196 } 197 198 static void *vcpu_thread(void *arg) 199 { 200 struct thread_params *params = (struct thread_params *)arg; 201 struct ucall uc; 202 int old; 203 int r; 204 unsigned int exit_reason; 205 206 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 207 TEST_ASSERT(r == 0, 208 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 209 params->vcpu_id, r); 210 211 fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id); 212 vcpu_run(params->vm, params->vcpu_id); 213 exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason; 214 215 TEST_ASSERT(exit_reason == KVM_EXIT_IO, 216 "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", 217 params->vcpu_id, exit_reason, exit_reason_str(exit_reason)); 218 219 if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) { 220 TEST_ASSERT(false, 221 "vCPU %u exited with error: %s.\n" 222 "Sending vCPU sent %lu IPIs to halting vCPU\n" 223 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 224 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 225 "Migrations attempted: %lu\n" 226 "Migrations completed: %lu\n", 227 params->vcpu_id, (const char *)uc.args[0], 228 params->data->ipis_sent, params->data->hlt_count, 229 params->data->wake_count, 230 *params->pipis_rcvd, params->data->halter_tpr, 231 params->data->halter_ppr, params->data->halter_lvr, 232 params->data->migrations_attempted, 233 params->data->migrations_completed); 234 } 235 236 return NULL; 237 } 238 239 static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) 240 { 241 void *retval; 242 int r; 243 244 r = pthread_cancel(thread); 245 TEST_ASSERT(r == 0, 246 "pthread_cancel on vcpu_id=%d failed with errno=%d", 247 vcpu_id, r); 248 249 r = pthread_join(thread, &retval); 250 TEST_ASSERT(r == 0, 251 "pthread_join on vcpu_id=%d failed with errno=%d", 252 vcpu_id, r); 253 TEST_ASSERT(retval == PTHREAD_CANCELED, 254 "expected retval=%p, got %p", PTHREAD_CANCELED, 255 retval); 256 } 257 258 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 259 uint64_t *pipis_rcvd) 260 { 261 long pages_not_moved; 262 unsigned long nodemask = 0; 263 unsigned long nodemasks[sizeof(nodemask) * 8]; 264 int nodes = 0; 265 time_t start_time, last_update, now; 266 time_t interval_secs = 1; 267 int i, r; 268 int from, to; 269 unsigned long bit; 270 uint64_t hlt_count; 271 uint64_t wake_count; 272 uint64_t ipis_sent; 273 274 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 275 delay_usecs); 276 277 /* Get set of first 64 numa nodes available */ 278 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 279 0, MPOL_F_MEMS_ALLOWED); 280 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 281 282 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 283 "(each 1-bit indicates node is present): %#lx\n", 284 sizeof(nodemask) * 8, nodemask); 285 286 /* Init array of masks containing a single-bit in each, one for each 287 * available node. migrate_pages called below requires specifying nodes 288 * as bit masks. 289 */ 290 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 291 if (nodemask & bit) { 292 nodemasks[nodes] = nodemask & bit; 293 nodes++; 294 } 295 } 296 297 TEST_ASSERT(nodes > 1, 298 "Did not find at least 2 numa nodes. Can't do migration\n"); 299 300 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 301 302 from = 0; 303 to = 1; 304 start_time = time(NULL); 305 last_update = start_time; 306 307 ipis_sent = data->ipis_sent; 308 hlt_count = data->hlt_count; 309 wake_count = data->wake_count; 310 311 while ((int)(time(NULL) - start_time) < run_secs) { 312 data->migrations_attempted++; 313 314 /* 315 * migrate_pages with PID=0 will migrate all pages of this 316 * process between the nodes specified as bitmasks. The page 317 * backing the APIC access address belongs to this process 318 * because it is allocated by KVM in the context of the 319 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 320 * test may break or give a false positive signal. 321 */ 322 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 323 &nodemasks[from], 324 &nodemasks[to]); 325 if (pages_not_moved < 0) 326 fprintf(stderr, 327 "migrate_pages failed, errno=%d\n", errno); 328 else if (pages_not_moved > 0) 329 fprintf(stderr, 330 "migrate_pages could not move %ld pages\n", 331 pages_not_moved); 332 else 333 data->migrations_completed++; 334 335 from = to; 336 to++; 337 if (to == nodes) 338 to = 0; 339 340 now = time(NULL); 341 if (((now - start_time) % interval_secs == 0) && 342 (now != last_update)) { 343 last_update = now; 344 fprintf(stderr, 345 "%lu seconds: Migrations attempted=%lu completed=%lu, " 346 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 347 now - start_time, data->migrations_attempted, 348 data->migrations_completed, 349 data->ipis_sent, *pipis_rcvd, 350 data->hlt_count, data->wake_count); 351 352 TEST_ASSERT(ipis_sent != data->ipis_sent && 353 hlt_count != data->hlt_count && 354 wake_count != data->wake_count, 355 "IPI, HLT and wake count have not increased " 356 "in the last %lu seconds. " 357 "HLTer is likely hung.\n", interval_secs); 358 359 ipis_sent = data->ipis_sent; 360 hlt_count = data->hlt_count; 361 wake_count = data->wake_count; 362 } 363 usleep(delay_usecs); 364 } 365 } 366 367 void get_cmdline_args(int argc, char *argv[], int *run_secs, 368 bool *migrate, int *delay_usecs) 369 { 370 for (;;) { 371 int opt = getopt(argc, argv, "s:d:m"); 372 373 if (opt == -1) 374 break; 375 switch (opt) { 376 case 's': 377 *run_secs = parse_size(optarg); 378 break; 379 case 'm': 380 *migrate = true; 381 break; 382 case 'd': 383 *delay_usecs = parse_size(optarg); 384 break; 385 default: 386 TEST_ASSERT(false, 387 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 388 "-m adds calls to migrate_pages while vCPUs are running." 389 " Default is no migrations.\n" 390 "-d <delay microseconds> - delay between migrate_pages() calls." 391 " Default is %d microseconds.\n", 392 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 393 } 394 } 395 } 396 397 int main(int argc, char *argv[]) 398 { 399 int r; 400 int wait_secs; 401 const int max_halter_wait = 10; 402 int run_secs = 0; 403 int delay_usecs = 0; 404 struct test_data_page *data; 405 vm_vaddr_t test_data_page_vaddr; 406 bool migrate = false; 407 pthread_t threads[2]; 408 struct thread_params params[2]; 409 struct kvm_vm *vm; 410 uint64_t *pipis_rcvd; 411 412 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 413 if (run_secs <= 0) 414 run_secs = DEFAULT_RUN_SECS; 415 if (delay_usecs <= 0) 416 delay_usecs = DEFAULT_DELAY_USECS; 417 418 vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code); 419 params[0].vm = vm; 420 params[1].vm = vm; 421 422 vm_init_descriptor_tables(vm); 423 vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); 424 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); 425 426 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 427 428 vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code); 429 430 test_data_page_vaddr = vm_vaddr_alloc_page(vm); 431 data = 432 (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr); 433 memset(data, 0, sizeof(*data)); 434 params[0].data = data; 435 params[1].data = data; 436 437 vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr); 438 vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr); 439 440 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 441 params[0].pipis_rcvd = pipis_rcvd; 442 params[1].pipis_rcvd = pipis_rcvd; 443 444 /* Start halter vCPU thread and wait for it to execute first HLT. */ 445 params[0].vcpu_id = HALTER_VCPU_ID; 446 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 447 TEST_ASSERT(r == 0, 448 "pthread_create halter failed errno=%d", errno); 449 fprintf(stderr, "Halter vCPU thread started\n"); 450 451 wait_secs = 0; 452 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 453 sleep(1); 454 wait_secs++; 455 } 456 457 TEST_ASSERT(data->hlt_count, 458 "Halter vCPU did not execute first HLT within %d seconds", 459 max_halter_wait); 460 461 fprintf(stderr, 462 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 463 data->halter_apic_id, wait_secs); 464 465 params[1].vcpu_id = SENDER_VCPU_ID; 466 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 467 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 468 469 fprintf(stderr, 470 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 471 run_secs); 472 473 if (!migrate) 474 sleep(run_secs); 475 else 476 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 477 478 /* 479 * Cancel threads and wait for them to stop. 480 */ 481 cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID); 482 cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID); 483 484 fprintf(stderr, 485 "Test successful after running for %d seconds.\n" 486 "Sending vCPU sent %lu IPIs to halting vCPU\n" 487 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 488 "Halter APIC ID=%#x\n" 489 "Sender ICR value=%#x ICR2 value=%#x\n" 490 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 491 "Migrations attempted: %lu\n" 492 "Migrations completed: %lu\n", 493 run_secs, data->ipis_sent, 494 data->hlt_count, data->wake_count, *pipis_rcvd, 495 data->halter_apic_id, 496 data->icr, data->icr2, 497 data->halter_tpr, data->halter_ppr, data->halter_lvr, 498 data->migrations_attempted, data->migrations_completed); 499 500 kvm_vm_free(vm); 501 502 return 0; 503 } 504