1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <pthread.h> 19 20 #include <sys/eventfd.h> 21 22 /* Defined in include/linux/kvm_types.h */ 23 #define GPA_INVALID (~(ulong)0) 24 25 #define SHINFO_REGION_GVA 0xc0000000ULL 26 #define SHINFO_REGION_GPA 0xc0000000ULL 27 #define SHINFO_REGION_SLOT 10 28 29 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) 30 #define DUMMY_REGION_SLOT 11 31 32 #define SHINFO_ADDR (SHINFO_REGION_GPA) 33 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 34 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) 35 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 36 37 #define SHINFO_VADDR (SHINFO_REGION_GVA) 38 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) 39 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 40 41 #define EVTCHN_VECTOR 0x10 42 43 #define EVTCHN_TEST1 15 44 #define EVTCHN_TEST2 66 45 #define EVTCHN_TIMER 13 46 47 #define XEN_HYPERCALL_MSR 0x40000000 48 49 #define MIN_STEAL_TIME 50000 50 51 #define SHINFO_RACE_TIMEOUT 2 /* seconds */ 52 53 #define __HYPERVISOR_set_timer_op 15 54 #define __HYPERVISOR_sched_op 29 55 #define __HYPERVISOR_event_channel_op 32 56 57 #define SCHEDOP_poll 3 58 59 #define EVTCHNOP_send 4 60 61 #define EVTCHNSTAT_interdomain 2 62 63 struct evtchn_send { 64 u32 port; 65 }; 66 67 struct sched_poll { 68 u32 *ports; 69 unsigned int nr_ports; 70 u64 timeout; 71 }; 72 73 struct pvclock_vcpu_time_info { 74 u32 version; 75 u32 pad0; 76 u64 tsc_timestamp; 77 u64 system_time; 78 u32 tsc_to_system_mul; 79 s8 tsc_shift; 80 u8 flags; 81 u8 pad[2]; 82 } __attribute__((__packed__)); /* 32 bytes */ 83 84 struct pvclock_wall_clock { 85 u32 version; 86 u32 sec; 87 u32 nsec; 88 } __attribute__((__packed__)); 89 90 struct vcpu_runstate_info { 91 uint32_t state; 92 uint64_t state_entry_time; 93 uint64_t time[4]; 94 }; 95 96 struct arch_vcpu_info { 97 unsigned long cr2; 98 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 99 }; 100 101 struct vcpu_info { 102 uint8_t evtchn_upcall_pending; 103 uint8_t evtchn_upcall_mask; 104 unsigned long evtchn_pending_sel; 105 struct arch_vcpu_info arch; 106 struct pvclock_vcpu_time_info time; 107 }; /* 64 bytes (x86) */ 108 109 struct shared_info { 110 struct vcpu_info vcpu_info[32]; 111 unsigned long evtchn_pending[64]; 112 unsigned long evtchn_mask[64]; 113 struct pvclock_wall_clock wc; 114 uint32_t wc_sec_hi; 115 /* arch_shared_info here */ 116 }; 117 118 #define RUNSTATE_running 0 119 #define RUNSTATE_runnable 1 120 #define RUNSTATE_blocked 2 121 #define RUNSTATE_offline 3 122 123 static const char *runstate_names[] = { 124 "running", 125 "runnable", 126 "blocked", 127 "offline" 128 }; 129 130 struct { 131 struct kvm_irq_routing info; 132 struct kvm_irq_routing_entry entries[2]; 133 } irq_routes; 134 135 static volatile bool guest_saw_irq; 136 137 static void evtchn_handler(struct ex_regs *regs) 138 { 139 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 140 vi->evtchn_upcall_pending = 0; 141 vi->evtchn_pending_sel = 0; 142 guest_saw_irq = true; 143 144 GUEST_SYNC(0x20); 145 } 146 147 static void guest_wait_for_irq(void) 148 { 149 while (!guest_saw_irq) 150 __asm__ __volatile__ ("rep nop" : : : "memory"); 151 guest_saw_irq = false; 152 } 153 154 static void guest_code(void) 155 { 156 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 157 int i; 158 159 __asm__ __volatile__( 160 "sti\n" 161 "nop\n" 162 ); 163 164 /* Trigger an interrupt injection */ 165 GUEST_SYNC(0); 166 167 guest_wait_for_irq(); 168 169 /* Test having the host set runstates manually */ 170 GUEST_SYNC(RUNSTATE_runnable); 171 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 172 GUEST_ASSERT(rs->state == 0); 173 174 GUEST_SYNC(RUNSTATE_blocked); 175 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 176 GUEST_ASSERT(rs->state == 0); 177 178 GUEST_SYNC(RUNSTATE_offline); 179 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 180 GUEST_ASSERT(rs->state == 0); 181 182 /* Test runstate time adjust */ 183 GUEST_SYNC(4); 184 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 185 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 186 187 /* Test runstate time set */ 188 GUEST_SYNC(5); 189 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 190 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 191 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 192 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 193 194 /* sched_yield() should result in some 'runnable' time */ 195 GUEST_SYNC(6); 196 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 197 198 /* Attempt to deliver a *masked* interrupt */ 199 GUEST_SYNC(7); 200 201 /* Wait until we see the bit set */ 202 struct shared_info *si = (void *)SHINFO_VADDR; 203 while (!si->evtchn_pending[0]) 204 __asm__ __volatile__ ("rep nop" : : : "memory"); 205 206 /* Now deliver an *unmasked* interrupt */ 207 GUEST_SYNC(8); 208 209 guest_wait_for_irq(); 210 211 /* Change memslots and deliver an interrupt */ 212 GUEST_SYNC(9); 213 214 guest_wait_for_irq(); 215 216 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ 217 GUEST_SYNC(10); 218 219 guest_wait_for_irq(); 220 221 GUEST_SYNC(11); 222 223 /* Our turn. Deliver event channel (to ourselves) with 224 * EVTCHNOP_send hypercall. */ 225 unsigned long rax; 226 struct evtchn_send s = { .port = 127 }; 227 __asm__ __volatile__ ("vmcall" : 228 "=a" (rax) : 229 "a" (__HYPERVISOR_event_channel_op), 230 "D" (EVTCHNOP_send), 231 "S" (&s)); 232 233 GUEST_ASSERT(rax == 0); 234 235 guest_wait_for_irq(); 236 237 GUEST_SYNC(12); 238 239 /* Deliver "outbound" event channel to an eventfd which 240 * happens to be one of our own irqfds. */ 241 s.port = 197; 242 __asm__ __volatile__ ("vmcall" : 243 "=a" (rax) : 244 "a" (__HYPERVISOR_event_channel_op), 245 "D" (EVTCHNOP_send), 246 "S" (&s)); 247 248 GUEST_ASSERT(rax == 0); 249 250 guest_wait_for_irq(); 251 252 GUEST_SYNC(13); 253 254 /* Set a timer 100ms in the future. */ 255 __asm__ __volatile__ ("vmcall" : 256 "=a" (rax) : 257 "a" (__HYPERVISOR_set_timer_op), 258 "D" (rs->state_entry_time + 100000000)); 259 GUEST_ASSERT(rax == 0); 260 261 GUEST_SYNC(14); 262 263 /* Now wait for the timer */ 264 guest_wait_for_irq(); 265 266 GUEST_SYNC(15); 267 268 /* The host has 'restored' the timer. Just wait for it. */ 269 guest_wait_for_irq(); 270 271 GUEST_SYNC(16); 272 273 /* Poll for an event channel port which is already set */ 274 u32 ports[1] = { EVTCHN_TIMER }; 275 struct sched_poll p = { 276 .ports = ports, 277 .nr_ports = 1, 278 .timeout = 0, 279 }; 280 281 __asm__ __volatile__ ("vmcall" : 282 "=a" (rax) : 283 "a" (__HYPERVISOR_sched_op), 284 "D" (SCHEDOP_poll), 285 "S" (&p)); 286 287 GUEST_ASSERT(rax == 0); 288 289 GUEST_SYNC(17); 290 291 /* Poll for an unset port and wait for the timeout. */ 292 p.timeout = 100000000; 293 __asm__ __volatile__ ("vmcall" : 294 "=a" (rax) : 295 "a" (__HYPERVISOR_sched_op), 296 "D" (SCHEDOP_poll), 297 "S" (&p)); 298 299 GUEST_ASSERT(rax == 0); 300 301 GUEST_SYNC(18); 302 303 /* A timer will wake the masked port we're waiting on, while we poll */ 304 p.timeout = 0; 305 __asm__ __volatile__ ("vmcall" : 306 "=a" (rax) : 307 "a" (__HYPERVISOR_sched_op), 308 "D" (SCHEDOP_poll), 309 "S" (&p)); 310 311 GUEST_ASSERT(rax == 0); 312 313 GUEST_SYNC(19); 314 315 /* A timer wake an *unmasked* port which should wake us with an 316 * actual interrupt, while we're polling on a different port. */ 317 ports[0]++; 318 p.timeout = 0; 319 __asm__ __volatile__ ("vmcall" : 320 "=a" (rax) : 321 "a" (__HYPERVISOR_sched_op), 322 "D" (SCHEDOP_poll), 323 "S" (&p)); 324 325 GUEST_ASSERT(rax == 0); 326 327 guest_wait_for_irq(); 328 329 GUEST_SYNC(20); 330 331 /* Timer should have fired already */ 332 guest_wait_for_irq(); 333 334 GUEST_SYNC(21); 335 /* Racing host ioctls */ 336 337 guest_wait_for_irq(); 338 339 GUEST_SYNC(22); 340 /* Racing vmcall against host ioctl */ 341 342 ports[0] = 0; 343 344 p = (struct sched_poll) { 345 .ports = ports, 346 .nr_ports = 1, 347 .timeout = 0 348 }; 349 350 wait_for_timer: 351 /* 352 * Poll for a timer wake event while the worker thread is mucking with 353 * the shared info. KVM XEN drops timer IRQs if the shared info is 354 * invalid when the timer expires. Arbitrarily poll 100 times before 355 * giving up and asking the VMM to re-arm the timer. 100 polls should 356 * consume enough time to beat on KVM without taking too long if the 357 * timer IRQ is dropped due to an invalid event channel. 358 */ 359 for (i = 0; i < 100 && !guest_saw_irq; i++) 360 asm volatile("vmcall" 361 : "=a" (rax) 362 : "a" (__HYPERVISOR_sched_op), 363 "D" (SCHEDOP_poll), 364 "S" (&p) 365 : "memory"); 366 367 /* 368 * Re-send the timer IRQ if it was (likely) dropped due to the timer 369 * expiring while the event channel was invalid. 370 */ 371 if (!guest_saw_irq) { 372 GUEST_SYNC(23); 373 goto wait_for_timer; 374 } 375 guest_saw_irq = false; 376 377 GUEST_SYNC(24); 378 } 379 380 static int cmp_timespec(struct timespec *a, struct timespec *b) 381 { 382 if (a->tv_sec > b->tv_sec) 383 return 1; 384 else if (a->tv_sec < b->tv_sec) 385 return -1; 386 else if (a->tv_nsec > b->tv_nsec) 387 return 1; 388 else if (a->tv_nsec < b->tv_nsec) 389 return -1; 390 else 391 return 0; 392 } 393 394 static struct vcpu_info *vinfo; 395 static struct kvm_vcpu *vcpu; 396 397 static void handle_alrm(int sig) 398 { 399 if (vinfo) 400 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); 401 vcpu_dump(stdout, vcpu, 0); 402 TEST_FAIL("IRQ delivery timed out"); 403 } 404 405 static void *juggle_shinfo_state(void *arg) 406 { 407 struct kvm_vm *vm = (struct kvm_vm *)arg; 408 409 struct kvm_xen_hvm_attr cache_init = { 410 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 411 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE 412 }; 413 414 struct kvm_xen_hvm_attr cache_destroy = { 415 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 416 .u.shared_info.gfn = GPA_INVALID 417 }; 418 419 for (;;) { 420 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init); 421 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy); 422 pthread_testcancel(); 423 }; 424 425 return NULL; 426 } 427 428 int main(int argc, char *argv[]) 429 { 430 struct timespec min_ts, max_ts, vm_ts; 431 struct kvm_vm *vm; 432 pthread_t thread; 433 bool verbose; 434 int ret; 435 436 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 437 !strncmp(argv[1], "--verbose", 10)); 438 439 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 440 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); 441 442 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 443 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 444 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 445 446 clock_gettime(CLOCK_REALTIME, &min_ts); 447 448 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 449 450 /* Map a region for the shared_info page */ 451 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 452 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); 453 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); 454 455 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 456 457 int zero_fd = open("/dev/zero", O_RDONLY); 458 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 459 460 struct kvm_xen_hvm_config hvmc = { 461 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 462 .msr = XEN_HYPERCALL_MSR, 463 }; 464 465 /* Let the kernel know that we *will* use it for sending all 466 * event channels, which lets it intercept SCHEDOP_poll */ 467 if (do_evtchn_tests) 468 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 469 470 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 471 472 struct kvm_xen_hvm_attr lm = { 473 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 474 .u.long_mode = 1, 475 }; 476 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 477 478 struct kvm_xen_hvm_attr ha = { 479 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 480 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 481 }; 482 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 483 484 /* 485 * Test what happens when the HVA of the shinfo page is remapped after 486 * the kernel has a reference to it. But make sure we copy the clock 487 * info over since that's only set at setup time, and we test it later. 488 */ 489 struct pvclock_wall_clock wc_copy = shinfo->wc; 490 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 491 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 492 shinfo->wc = wc_copy; 493 494 struct kvm_xen_vcpu_attr vi = { 495 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 496 .u.gpa = VCPU_INFO_ADDR, 497 }; 498 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); 499 500 struct kvm_xen_vcpu_attr pvclock = { 501 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 502 .u.gpa = PVTIME_ADDR, 503 }; 504 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); 505 506 struct kvm_xen_hvm_attr vec = { 507 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 508 .u.vector = EVTCHN_VECTOR, 509 }; 510 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 511 512 vm_init_descriptor_tables(vm); 513 vcpu_init_descriptor_tables(vcpu); 514 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 515 516 if (do_runstate_tests) { 517 struct kvm_xen_vcpu_attr st = { 518 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 519 .u.gpa = RUNSTATE_ADDR, 520 }; 521 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 522 } 523 524 int irq_fd[2] = { -1, -1 }; 525 526 if (do_eventfd_tests) { 527 irq_fd[0] = eventfd(0, 0); 528 irq_fd[1] = eventfd(0, 0); 529 530 /* Unexpected, but not a KVM failure */ 531 if (irq_fd[0] == -1 || irq_fd[1] == -1) 532 do_evtchn_tests = do_eventfd_tests = false; 533 } 534 535 if (do_eventfd_tests) { 536 irq_routes.info.nr = 2; 537 538 irq_routes.entries[0].gsi = 32; 539 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 540 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; 541 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; 542 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 543 544 irq_routes.entries[1].gsi = 33; 545 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 546 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; 547 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; 548 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 549 550 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); 551 552 struct kvm_irqfd ifd = { }; 553 554 ifd.fd = irq_fd[0]; 555 ifd.gsi = 32; 556 vm_ioctl(vm, KVM_IRQFD, &ifd); 557 558 ifd.fd = irq_fd[1]; 559 ifd.gsi = 33; 560 vm_ioctl(vm, KVM_IRQFD, &ifd); 561 562 struct sigaction sa = { }; 563 sa.sa_handler = handle_alrm; 564 sigaction(SIGALRM, &sa, NULL); 565 } 566 567 struct kvm_xen_vcpu_attr tmr = { 568 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 569 .u.timer.port = EVTCHN_TIMER, 570 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 571 .u.timer.expires_ns = 0 572 }; 573 574 if (do_evtchn_tests) { 575 struct kvm_xen_hvm_attr inj = { 576 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 577 .u.evtchn.send_port = 127, 578 .u.evtchn.type = EVTCHNSTAT_interdomain, 579 .u.evtchn.flags = 0, 580 .u.evtchn.deliver.port.port = EVTCHN_TEST1, 581 .u.evtchn.deliver.port.vcpu = vcpu->id + 1, 582 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 583 }; 584 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 585 586 /* Test migration to a different vCPU */ 587 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; 588 inj.u.evtchn.deliver.port.vcpu = vcpu->id; 589 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 590 591 inj.u.evtchn.send_port = 197; 592 inj.u.evtchn.deliver.eventfd.port = 0; 593 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; 594 inj.u.evtchn.flags = 0; 595 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 596 597 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 598 } 599 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 600 vinfo->evtchn_upcall_pending = 0; 601 602 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 603 rs->state = 0x5a; 604 605 bool evtchn_irq_expected = false; 606 607 for (;;) { 608 volatile struct kvm_run *run = vcpu->run; 609 struct ucall uc; 610 611 vcpu_run(vcpu); 612 613 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 614 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 615 run->exit_reason, 616 exit_reason_str(run->exit_reason)); 617 618 switch (get_ucall(vcpu, &uc)) { 619 case UCALL_ABORT: 620 REPORT_GUEST_ASSERT(uc); 621 /* NOT REACHED */ 622 case UCALL_SYNC: { 623 struct kvm_xen_vcpu_attr rst; 624 long rundelay; 625 626 if (do_runstate_tests) 627 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 628 rs->time[1] + rs->time[2] + rs->time[3], 629 "runstate times don't add up"); 630 631 switch (uc.args[1]) { 632 case 0: 633 if (verbose) 634 printf("Delivering evtchn upcall\n"); 635 evtchn_irq_expected = true; 636 vinfo->evtchn_upcall_pending = 1; 637 break; 638 639 case RUNSTATE_runnable...RUNSTATE_offline: 640 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 641 if (!do_runstate_tests) 642 goto done; 643 if (verbose) 644 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 645 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 646 rst.u.runstate.state = uc.args[1]; 647 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 648 break; 649 650 case 4: 651 if (verbose) 652 printf("Testing RUNSTATE_ADJUST\n"); 653 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 654 memset(&rst.u, 0, sizeof(rst.u)); 655 rst.u.runstate.state = (uint64_t)-1; 656 rst.u.runstate.time_blocked = 657 0x5a - rs->time[RUNSTATE_blocked]; 658 rst.u.runstate.time_offline = 659 0x6b6b - rs->time[RUNSTATE_offline]; 660 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 661 rst.u.runstate.time_offline; 662 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 663 break; 664 665 case 5: 666 if (verbose) 667 printf("Testing RUNSTATE_DATA\n"); 668 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 669 memset(&rst.u, 0, sizeof(rst.u)); 670 rst.u.runstate.state = RUNSTATE_running; 671 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 672 rst.u.runstate.time_blocked = 0x6b6b; 673 rst.u.runstate.time_offline = 0x5a; 674 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 675 break; 676 677 case 6: 678 if (verbose) 679 printf("Testing steal time\n"); 680 /* Yield until scheduler delay exceeds target */ 681 rundelay = get_run_delay() + MIN_STEAL_TIME; 682 do { 683 sched_yield(); 684 } while (get_run_delay() < rundelay); 685 break; 686 687 case 7: 688 if (!do_eventfd_tests) 689 goto done; 690 if (verbose) 691 printf("Testing masked event channel\n"); 692 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; 693 eventfd_write(irq_fd[0], 1UL); 694 alarm(1); 695 break; 696 697 case 8: 698 if (verbose) 699 printf("Testing unmasked event channel\n"); 700 /* Unmask that, but deliver the other one */ 701 shinfo->evtchn_pending[0] = 0; 702 shinfo->evtchn_mask[0] = 0; 703 eventfd_write(irq_fd[1], 1UL); 704 evtchn_irq_expected = true; 705 alarm(1); 706 break; 707 708 case 9: 709 TEST_ASSERT(!evtchn_irq_expected, 710 "Expected event channel IRQ but it didn't happen"); 711 shinfo->evtchn_pending[1] = 0; 712 if (verbose) 713 printf("Testing event channel after memslot change\n"); 714 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 715 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 716 eventfd_write(irq_fd[0], 1UL); 717 evtchn_irq_expected = true; 718 alarm(1); 719 break; 720 721 case 10: 722 TEST_ASSERT(!evtchn_irq_expected, 723 "Expected event channel IRQ but it didn't happen"); 724 if (!do_evtchn_tests) 725 goto done; 726 727 shinfo->evtchn_pending[0] = 0; 728 if (verbose) 729 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); 730 731 struct kvm_irq_routing_xen_evtchn e; 732 e.port = EVTCHN_TEST2; 733 e.vcpu = vcpu->id; 734 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 735 736 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); 737 evtchn_irq_expected = true; 738 alarm(1); 739 break; 740 741 case 11: 742 TEST_ASSERT(!evtchn_irq_expected, 743 "Expected event channel IRQ but it didn't happen"); 744 shinfo->evtchn_pending[1] = 0; 745 746 if (verbose) 747 printf("Testing guest EVTCHNOP_send direct to evtchn\n"); 748 evtchn_irq_expected = true; 749 alarm(1); 750 break; 751 752 case 12: 753 TEST_ASSERT(!evtchn_irq_expected, 754 "Expected event channel IRQ but it didn't happen"); 755 shinfo->evtchn_pending[0] = 0; 756 757 if (verbose) 758 printf("Testing guest EVTCHNOP_send to eventfd\n"); 759 evtchn_irq_expected = true; 760 alarm(1); 761 break; 762 763 case 13: 764 TEST_ASSERT(!evtchn_irq_expected, 765 "Expected event channel IRQ but it didn't happen"); 766 shinfo->evtchn_pending[1] = 0; 767 768 if (verbose) 769 printf("Testing guest oneshot timer\n"); 770 break; 771 772 case 14: 773 memset(&tmr, 0, sizeof(tmr)); 774 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; 775 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 776 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, 777 "Timer port not returned"); 778 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 779 "Timer priority not returned"); 780 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, 781 "Timer expiry not returned"); 782 evtchn_irq_expected = true; 783 alarm(1); 784 break; 785 786 case 15: 787 TEST_ASSERT(!evtchn_irq_expected, 788 "Expected event channel IRQ but it didn't happen"); 789 shinfo->evtchn_pending[0] = 0; 790 791 if (verbose) 792 printf("Testing restored oneshot timer\n"); 793 794 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 795 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 796 evtchn_irq_expected = true; 797 alarm(1); 798 break; 799 800 case 16: 801 TEST_ASSERT(!evtchn_irq_expected, 802 "Expected event channel IRQ but it didn't happen"); 803 804 if (verbose) 805 printf("Testing SCHEDOP_poll with already pending event\n"); 806 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; 807 alarm(1); 808 break; 809 810 case 17: 811 if (verbose) 812 printf("Testing SCHEDOP_poll timeout\n"); 813 shinfo->evtchn_pending[0] = 0; 814 alarm(1); 815 break; 816 817 case 18: 818 if (verbose) 819 printf("Testing SCHEDOP_poll wake on masked event\n"); 820 821 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 822 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 823 alarm(1); 824 break; 825 826 case 19: 827 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; 828 if (verbose) 829 printf("Testing SCHEDOP_poll wake on unmasked event\n"); 830 831 evtchn_irq_expected = true; 832 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 833 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 834 835 /* Read it back and check the pending time is reported correctly */ 836 tmr.u.timer.expires_ns = 0; 837 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 838 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, 839 "Timer not reported pending"); 840 alarm(1); 841 break; 842 843 case 20: 844 TEST_ASSERT(!evtchn_irq_expected, 845 "Expected event channel IRQ but it didn't happen"); 846 /* Read timer and check it is no longer pending */ 847 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 848 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); 849 850 shinfo->evtchn_pending[0] = 0; 851 if (verbose) 852 printf("Testing timer in the past\n"); 853 854 evtchn_irq_expected = true; 855 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; 856 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 857 alarm(1); 858 break; 859 860 case 21: 861 TEST_ASSERT(!evtchn_irq_expected, 862 "Expected event channel IRQ but it didn't happen"); 863 alarm(0); 864 865 if (verbose) 866 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); 867 868 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); 869 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); 870 871 struct kvm_irq_routing_xen_evtchn uxe = { 872 .port = 1, 873 .vcpu = vcpu->id, 874 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL 875 }; 876 877 evtchn_irq_expected = true; 878 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) 879 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); 880 break; 881 882 case 22: 883 TEST_ASSERT(!evtchn_irq_expected, 884 "Expected event channel IRQ but it didn't happen"); 885 886 if (verbose) 887 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); 888 889 shinfo->evtchn_pending[0] = 1; 890 891 evtchn_irq_expected = true; 892 tmr.u.timer.expires_ns = rs->state_entry_time + 893 SHINFO_RACE_TIMEOUT * 1000000000ULL; 894 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 895 break; 896 897 case 23: 898 /* 899 * Optional and possibly repeated sync point. 900 * Injecting the timer IRQ may fail if the 901 * shinfo is invalid when the timer expires. 902 * If the timer has expired but the IRQ hasn't 903 * been delivered, rearm the timer and retry. 904 */ 905 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 906 907 /* Resume the guest if the timer is still pending. */ 908 if (tmr.u.timer.expires_ns) 909 break; 910 911 /* All done if the IRQ was delivered. */ 912 if (!evtchn_irq_expected) 913 break; 914 915 tmr.u.timer.expires_ns = rs->state_entry_time + 916 SHINFO_RACE_TIMEOUT * 1000000000ULL; 917 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 918 break; 919 case 24: 920 TEST_ASSERT(!evtchn_irq_expected, 921 "Expected event channel IRQ but it didn't happen"); 922 923 ret = pthread_cancel(thread); 924 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); 925 926 ret = pthread_join(thread, 0); 927 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); 928 goto done; 929 930 case 0x20: 931 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 932 evtchn_irq_expected = false; 933 break; 934 } 935 break; 936 } 937 case UCALL_DONE: 938 goto done; 939 default: 940 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 941 } 942 } 943 944 done: 945 alarm(0); 946 clock_gettime(CLOCK_REALTIME, &max_ts); 947 948 /* 949 * Just a *really* basic check that things are being put in the 950 * right place. The actual calculations are much the same for 951 * Xen as they are for the KVM variants, so no need to check. 952 */ 953 struct pvclock_wall_clock *wc; 954 struct pvclock_vcpu_time_info *ti, *ti2; 955 956 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 957 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 958 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 959 960 if (verbose) { 961 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 962 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 963 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 964 ti->tsc_shift, ti->flags); 965 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 966 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 967 ti2->tsc_shift, ti2->flags); 968 } 969 970 vm_ts.tv_sec = wc->sec; 971 vm_ts.tv_nsec = wc->nsec; 972 TEST_ASSERT(wc->version && !(wc->version & 1), 973 "Bad wallclock version %x", wc->version); 974 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 975 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 976 977 TEST_ASSERT(ti->version && !(ti->version & 1), 978 "Bad time_info version %x", ti->version); 979 TEST_ASSERT(ti2->version && !(ti2->version & 1), 980 "Bad time_info version %x", ti->version); 981 982 if (do_runstate_tests) { 983 /* 984 * Fetch runstate and check sanity. Strictly speaking in the 985 * general case we might not expect the numbers to be identical 986 * but in this case we know we aren't running the vCPU any more. 987 */ 988 struct kvm_xen_vcpu_attr rst = { 989 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 990 }; 991 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); 992 993 if (verbose) { 994 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 995 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 996 rs->state, rs->state_entry_time); 997 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 998 printf("State %s: %" PRIu64 " ns\n", 999 runstate_names[i], rs->time[i]); 1000 } 1001 } 1002 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 1003 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 1004 "State entry time mismatch"); 1005 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1006 "Running time mismatch"); 1007 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1008 "Runnable time mismatch"); 1009 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1010 "Blocked time mismatch"); 1011 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1012 "Offline time mismatch"); 1013 1014 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 1015 rs->time[1] + rs->time[2] + rs->time[3], 1016 "runstate times don't add up"); 1017 } 1018 kvm_vm_free(vm); 1019 return 0; 1020 } 1021