1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * xapic_ipi_test 4 * 5 * Copyright (C) 2020, Google LLC. 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2. 8 * 9 * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake 10 * another vCPU that is halted when KVM's backing page for the APIC access 11 * address has been moved by mm. 12 * 13 * The test starts two vCPUs: one that sends IPIs and one that continually 14 * executes HLT. The sender checks that the halter has woken from the HLT and 15 * has reentered HLT before sending the next IPI. While the vCPUs are running, 16 * the host continually calls migrate_pages to move all of the process' pages 17 * amongst the available numa nodes on the machine. 18 * 19 * Migration is a command line option. When used on non-numa machines will 20 * exit with error. Test is still usefull on non-numa for testing IPIs. 21 */ 22 23 #define _GNU_SOURCE /* for program_invocation_short_name */ 24 #include <getopt.h> 25 #include <pthread.h> 26 #include <inttypes.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "kvm_util.h" 31 #include "numaif.h" 32 #include "processor.h" 33 #include "test_util.h" 34 #include "vmx.h" 35 36 /* Default running time for the test */ 37 #define DEFAULT_RUN_SECS 3 38 39 /* Default delay between migrate_pages calls (microseconds) */ 40 #define DEFAULT_DELAY_USECS 500000 41 42 /* 43 * Vector for IPI from sender vCPU to halting vCPU. 44 * Value is arbitrary and was chosen for the alternating bit pattern. Any 45 * value should work. 46 */ 47 #define IPI_VECTOR 0xa5 48 49 /* 50 * Incremented in the IPI handler. Provides evidence to the sender that the IPI 51 * arrived at the destination 52 */ 53 static volatile uint64_t ipis_rcvd; 54 55 /* Data struct shared between host main thread and vCPUs */ 56 struct test_data_page { 57 uint32_t halter_apic_id; 58 volatile uint64_t hlt_count; 59 volatile uint64_t wake_count; 60 uint64_t ipis_sent; 61 uint64_t migrations_attempted; 62 uint64_t migrations_completed; 63 uint32_t icr; 64 uint32_t icr2; 65 uint32_t halter_tpr; 66 uint32_t halter_ppr; 67 68 /* 69 * Record local version register as a cross-check that APIC access 70 * worked. Value should match what KVM reports (APIC_VERSION in 71 * arch/x86/kvm/lapic.c). If test is failing, check that values match 72 * to determine whether APIC access exits are working. 73 */ 74 uint32_t halter_lvr; 75 }; 76 77 struct thread_params { 78 struct test_data_page *data; 79 struct kvm_vcpu *vcpu; 80 uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ 81 }; 82 83 void verify_apic_base_addr(void) 84 { 85 uint64_t msr = rdmsr(MSR_IA32_APICBASE); 86 uint64_t base = GET_APIC_BASE(msr); 87 88 GUEST_ASSERT(base == APIC_DEFAULT_GPA); 89 } 90 91 static void halter_guest_code(struct test_data_page *data) 92 { 93 verify_apic_base_addr(); 94 xapic_enable(); 95 96 data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); 97 data->halter_lvr = xapic_read_reg(APIC_LVR); 98 99 /* 100 * Loop forever HLTing and recording halts & wakes. Disable interrupts 101 * each time around to minimize window between signaling the pending 102 * halt to the sender vCPU and executing the halt. No need to disable on 103 * first run as this vCPU executes first and the host waits for it to 104 * signal going into first halt before starting the sender vCPU. Record 105 * TPR and PPR for diagnostic purposes in case the test fails. 106 */ 107 for (;;) { 108 data->halter_tpr = xapic_read_reg(APIC_TASKPRI); 109 data->halter_ppr = xapic_read_reg(APIC_PROCPRI); 110 data->hlt_count++; 111 asm volatile("sti; hlt; cli"); 112 data->wake_count++; 113 } 114 } 115 116 /* 117 * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to 118 * enable diagnosing errant writes to the APIC access address backing page in 119 * case of test failure. 120 */ 121 static void guest_ipi_handler(struct ex_regs *regs) 122 { 123 ipis_rcvd++; 124 xapic_write_reg(APIC_EOI, 77); 125 } 126 127 static void sender_guest_code(struct test_data_page *data) 128 { 129 uint64_t last_wake_count; 130 uint64_t last_hlt_count; 131 uint64_t last_ipis_rcvd_count; 132 uint32_t icr_val; 133 uint32_t icr2_val; 134 uint64_t tsc_start; 135 136 verify_apic_base_addr(); 137 xapic_enable(); 138 139 /* 140 * Init interrupt command register for sending IPIs 141 * 142 * Delivery mode=fixed, per SDM: 143 * "Delivers the interrupt specified in the vector field to the target 144 * processor." 145 * 146 * Destination mode=physical i.e. specify target by its local APIC 147 * ID. This vCPU assumes that the halter vCPU has already started and 148 * set data->halter_apic_id. 149 */ 150 icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); 151 icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); 152 data->icr = icr_val; 153 data->icr2 = icr2_val; 154 155 last_wake_count = data->wake_count; 156 last_hlt_count = data->hlt_count; 157 last_ipis_rcvd_count = ipis_rcvd; 158 for (;;) { 159 /* 160 * Send IPI to halter vCPU. 161 * First IPI can be sent unconditionally because halter vCPU 162 * starts earlier. 163 */ 164 xapic_write_reg(APIC_ICR2, icr2_val); 165 xapic_write_reg(APIC_ICR, icr_val); 166 data->ipis_sent++; 167 168 /* 169 * Wait up to ~1 sec for halter to indicate that it has: 170 * 1. Received the IPI 171 * 2. Woken up from the halt 172 * 3. Gone back into halt 173 * Current CPUs typically run at 2.x Ghz which is ~2 174 * billion ticks per second. 175 */ 176 tsc_start = rdtsc(); 177 while (rdtsc() - tsc_start < 2000000000) { 178 if ((ipis_rcvd != last_ipis_rcvd_count) && 179 (data->wake_count != last_wake_count) && 180 (data->hlt_count != last_hlt_count)) 181 break; 182 } 183 184 GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && 185 (data->wake_count != last_wake_count) && 186 (data->hlt_count != last_hlt_count)); 187 188 last_wake_count = data->wake_count; 189 last_hlt_count = data->hlt_count; 190 last_ipis_rcvd_count = ipis_rcvd; 191 } 192 } 193 194 static void *vcpu_thread(void *arg) 195 { 196 struct thread_params *params = (struct thread_params *)arg; 197 struct kvm_vcpu *vcpu = params->vcpu; 198 struct ucall uc; 199 int old; 200 int r; 201 unsigned int exit_reason; 202 203 r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); 204 TEST_ASSERT(r == 0, 205 "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", 206 vcpu->id, r); 207 208 fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); 209 vcpu_run(vcpu); 210 exit_reason = vcpu->run->exit_reason; 211 212 TEST_ASSERT(exit_reason == KVM_EXIT_IO, 213 "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", 214 vcpu->id, exit_reason, exit_reason_str(exit_reason)); 215 216 if (get_ucall(vcpu, &uc) == UCALL_ABORT) { 217 TEST_ASSERT(false, 218 "vCPU %u exited with error: %s.\n" 219 "Sending vCPU sent %lu IPIs to halting vCPU\n" 220 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 221 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 222 "Migrations attempted: %lu\n" 223 "Migrations completed: %lu\n", 224 vcpu->id, (const char *)uc.args[0], 225 params->data->ipis_sent, params->data->hlt_count, 226 params->data->wake_count, 227 *params->pipis_rcvd, params->data->halter_tpr, 228 params->data->halter_ppr, params->data->halter_lvr, 229 params->data->migrations_attempted, 230 params->data->migrations_completed); 231 } 232 233 return NULL; 234 } 235 236 static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) 237 { 238 void *retval; 239 int r; 240 241 r = pthread_cancel(thread); 242 TEST_ASSERT(r == 0, 243 "pthread_cancel on vcpu_id=%d failed with errno=%d", 244 vcpu->id, r); 245 246 r = pthread_join(thread, &retval); 247 TEST_ASSERT(r == 0, 248 "pthread_join on vcpu_id=%d failed with errno=%d", 249 vcpu->id, r); 250 TEST_ASSERT(retval == PTHREAD_CANCELED, 251 "expected retval=%p, got %p", PTHREAD_CANCELED, 252 retval); 253 } 254 255 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, 256 uint64_t *pipis_rcvd) 257 { 258 long pages_not_moved; 259 unsigned long nodemask = 0; 260 unsigned long nodemasks[sizeof(nodemask) * 8]; 261 int nodes = 0; 262 time_t start_time, last_update, now; 263 time_t interval_secs = 1; 264 int i, r; 265 int from, to; 266 unsigned long bit; 267 uint64_t hlt_count; 268 uint64_t wake_count; 269 uint64_t ipis_sent; 270 271 fprintf(stderr, "Calling migrate_pages every %d microseconds\n", 272 delay_usecs); 273 274 /* Get set of first 64 numa nodes available */ 275 r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 276 0, MPOL_F_MEMS_ALLOWED); 277 TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); 278 279 fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " 280 "(each 1-bit indicates node is present): %#lx\n", 281 sizeof(nodemask) * 8, nodemask); 282 283 /* Init array of masks containing a single-bit in each, one for each 284 * available node. migrate_pages called below requires specifying nodes 285 * as bit masks. 286 */ 287 for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { 288 if (nodemask & bit) { 289 nodemasks[nodes] = nodemask & bit; 290 nodes++; 291 } 292 } 293 294 TEST_ASSERT(nodes > 1, 295 "Did not find at least 2 numa nodes. Can't do migration\n"); 296 297 fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); 298 299 from = 0; 300 to = 1; 301 start_time = time(NULL); 302 last_update = start_time; 303 304 ipis_sent = data->ipis_sent; 305 hlt_count = data->hlt_count; 306 wake_count = data->wake_count; 307 308 while ((int)(time(NULL) - start_time) < run_secs) { 309 data->migrations_attempted++; 310 311 /* 312 * migrate_pages with PID=0 will migrate all pages of this 313 * process between the nodes specified as bitmasks. The page 314 * backing the APIC access address belongs to this process 315 * because it is allocated by KVM in the context of the 316 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this 317 * test may break or give a false positive signal. 318 */ 319 pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), 320 &nodemasks[from], 321 &nodemasks[to]); 322 if (pages_not_moved < 0) 323 fprintf(stderr, 324 "migrate_pages failed, errno=%d\n", errno); 325 else if (pages_not_moved > 0) 326 fprintf(stderr, 327 "migrate_pages could not move %ld pages\n", 328 pages_not_moved); 329 else 330 data->migrations_completed++; 331 332 from = to; 333 to++; 334 if (to == nodes) 335 to = 0; 336 337 now = time(NULL); 338 if (((now - start_time) % interval_secs == 0) && 339 (now != last_update)) { 340 last_update = now; 341 fprintf(stderr, 342 "%lu seconds: Migrations attempted=%lu completed=%lu, " 343 "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", 344 now - start_time, data->migrations_attempted, 345 data->migrations_completed, 346 data->ipis_sent, *pipis_rcvd, 347 data->hlt_count, data->wake_count); 348 349 TEST_ASSERT(ipis_sent != data->ipis_sent && 350 hlt_count != data->hlt_count && 351 wake_count != data->wake_count, 352 "IPI, HLT and wake count have not increased " 353 "in the last %lu seconds. " 354 "HLTer is likely hung.\n", interval_secs); 355 356 ipis_sent = data->ipis_sent; 357 hlt_count = data->hlt_count; 358 wake_count = data->wake_count; 359 } 360 usleep(delay_usecs); 361 } 362 } 363 364 void get_cmdline_args(int argc, char *argv[], int *run_secs, 365 bool *migrate, int *delay_usecs) 366 { 367 for (;;) { 368 int opt = getopt(argc, argv, "s:d:m"); 369 370 if (opt == -1) 371 break; 372 switch (opt) { 373 case 's': 374 *run_secs = parse_size(optarg); 375 break; 376 case 'm': 377 *migrate = true; 378 break; 379 case 'd': 380 *delay_usecs = parse_size(optarg); 381 break; 382 default: 383 TEST_ASSERT(false, 384 "Usage: -s <runtime seconds>. Default is %d seconds.\n" 385 "-m adds calls to migrate_pages while vCPUs are running." 386 " Default is no migrations.\n" 387 "-d <delay microseconds> - delay between migrate_pages() calls." 388 " Default is %d microseconds.\n", 389 DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); 390 } 391 } 392 } 393 394 int main(int argc, char *argv[]) 395 { 396 int r; 397 int wait_secs; 398 const int max_halter_wait = 10; 399 int run_secs = 0; 400 int delay_usecs = 0; 401 struct test_data_page *data; 402 vm_vaddr_t test_data_page_vaddr; 403 bool migrate = false; 404 pthread_t threads[2]; 405 struct thread_params params[2]; 406 struct kvm_vm *vm; 407 uint64_t *pipis_rcvd; 408 409 get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); 410 if (run_secs <= 0) 411 run_secs = DEFAULT_RUN_SECS; 412 if (delay_usecs <= 0) 413 delay_usecs = DEFAULT_DELAY_USECS; 414 415 vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); 416 417 vm_init_descriptor_tables(vm); 418 vcpu_init_descriptor_tables(params[0].vcpu); 419 vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); 420 421 virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); 422 423 params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); 424 425 test_data_page_vaddr = vm_vaddr_alloc_page(vm); 426 data = addr_gva2hva(vm, test_data_page_vaddr); 427 memset(data, 0, sizeof(*data)); 428 params[0].data = data; 429 params[1].data = data; 430 431 vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); 432 vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); 433 434 pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); 435 params[0].pipis_rcvd = pipis_rcvd; 436 params[1].pipis_rcvd = pipis_rcvd; 437 438 /* Start halter vCPU thread and wait for it to execute first HLT. */ 439 r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); 440 TEST_ASSERT(r == 0, 441 "pthread_create halter failed errno=%d", errno); 442 fprintf(stderr, "Halter vCPU thread started\n"); 443 444 wait_secs = 0; 445 while ((wait_secs < max_halter_wait) && !data->hlt_count) { 446 sleep(1); 447 wait_secs++; 448 } 449 450 TEST_ASSERT(data->hlt_count, 451 "Halter vCPU did not execute first HLT within %d seconds", 452 max_halter_wait); 453 454 fprintf(stderr, 455 "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", 456 data->halter_apic_id, wait_secs); 457 458 r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); 459 TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); 460 461 fprintf(stderr, 462 "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", 463 run_secs); 464 465 if (!migrate) 466 sleep(run_secs); 467 else 468 do_migrations(data, run_secs, delay_usecs, pipis_rcvd); 469 470 /* 471 * Cancel threads and wait for them to stop. 472 */ 473 cancel_join_vcpu_thread(threads[0], params[0].vcpu); 474 cancel_join_vcpu_thread(threads[1], params[1].vcpu); 475 476 fprintf(stderr, 477 "Test successful after running for %d seconds.\n" 478 "Sending vCPU sent %lu IPIs to halting vCPU\n" 479 "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" 480 "Halter APIC ID=%#x\n" 481 "Sender ICR value=%#x ICR2 value=%#x\n" 482 "Halter TPR=%#x PPR=%#x LVR=%#x\n" 483 "Migrations attempted: %lu\n" 484 "Migrations completed: %lu\n", 485 run_secs, data->ipis_sent, 486 data->hlt_count, data->wake_count, *pipis_rcvd, 487 data->halter_apic_id, 488 data->icr, data->icr2, 489 data->halter_tpr, data->halter_ppr, data->halter_lvr, 490 data->migrations_attempted, data->migrations_completed); 491 492 kvm_vm_free(vm); 493 494 return 0; 495 } 496