1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <pthread.h> 19 20 #include <sys/eventfd.h> 21 22 /* Defined in include/linux/kvm_types.h */ 23 #define GPA_INVALID (~(ulong)0) 24 25 #define SHINFO_REGION_GVA 0xc0000000ULL 26 #define SHINFO_REGION_GPA 0xc0000000ULL 27 #define SHINFO_REGION_SLOT 10 28 29 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) 30 #define DUMMY_REGION_SLOT 11 31 32 #define SHINFO_ADDR (SHINFO_REGION_GPA) 33 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 34 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 35 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) 36 37 #define SHINFO_VADDR (SHINFO_REGION_GVA) 38 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 39 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) 40 41 #define EVTCHN_VECTOR 0x10 42 43 #define EVTCHN_TEST1 15 44 #define EVTCHN_TEST2 66 45 #define EVTCHN_TIMER 13 46 47 #define XEN_HYPERCALL_MSR 0x40000000 48 49 #define MIN_STEAL_TIME 50000 50 51 #define SHINFO_RACE_TIMEOUT 2 /* seconds */ 52 53 #define __HYPERVISOR_set_timer_op 15 54 #define __HYPERVISOR_sched_op 29 55 #define __HYPERVISOR_event_channel_op 32 56 57 #define SCHEDOP_poll 3 58 59 #define EVTCHNOP_send 4 60 61 #define EVTCHNSTAT_interdomain 2 62 63 struct evtchn_send { 64 u32 port; 65 }; 66 67 struct sched_poll { 68 u32 *ports; 69 unsigned int nr_ports; 70 u64 timeout; 71 }; 72 73 struct pvclock_vcpu_time_info { 74 u32 version; 75 u32 pad0; 76 u64 tsc_timestamp; 77 u64 system_time; 78 u32 tsc_to_system_mul; 79 s8 tsc_shift; 80 u8 flags; 81 u8 pad[2]; 82 } __attribute__((__packed__)); /* 32 bytes */ 83 84 struct pvclock_wall_clock { 85 u32 version; 86 u32 sec; 87 u32 nsec; 88 } __attribute__((__packed__)); 89 90 struct vcpu_runstate_info { 91 uint32_t state; 92 uint64_t state_entry_time; 93 uint64_t time[5]; /* Extra field for overrun check */ 94 }; 95 96 struct compat_vcpu_runstate_info { 97 uint32_t state; 98 uint64_t state_entry_time; 99 uint64_t time[5]; 100 } __attribute__((__packed__));; 101 102 struct arch_vcpu_info { 103 unsigned long cr2; 104 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 105 }; 106 107 struct vcpu_info { 108 uint8_t evtchn_upcall_pending; 109 uint8_t evtchn_upcall_mask; 110 unsigned long evtchn_pending_sel; 111 struct arch_vcpu_info arch; 112 struct pvclock_vcpu_time_info time; 113 }; /* 64 bytes (x86) */ 114 115 struct shared_info { 116 struct vcpu_info vcpu_info[32]; 117 unsigned long evtchn_pending[64]; 118 unsigned long evtchn_mask[64]; 119 struct pvclock_wall_clock wc; 120 uint32_t wc_sec_hi; 121 /* arch_shared_info here */ 122 }; 123 124 #define RUNSTATE_running 0 125 #define RUNSTATE_runnable 1 126 #define RUNSTATE_blocked 2 127 #define RUNSTATE_offline 3 128 129 static const char *runstate_names[] = { 130 "running", 131 "runnable", 132 "blocked", 133 "offline" 134 }; 135 136 struct { 137 struct kvm_irq_routing info; 138 struct kvm_irq_routing_entry entries[2]; 139 } irq_routes; 140 141 static volatile bool guest_saw_irq; 142 143 static void evtchn_handler(struct ex_regs *regs) 144 { 145 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 146 vi->evtchn_upcall_pending = 0; 147 vi->evtchn_pending_sel = 0; 148 guest_saw_irq = true; 149 150 GUEST_SYNC(0x20); 151 } 152 153 static void guest_wait_for_irq(void) 154 { 155 while (!guest_saw_irq) 156 __asm__ __volatile__ ("rep nop" : : : "memory"); 157 guest_saw_irq = false; 158 } 159 160 static void guest_code(void) 161 { 162 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 163 int i; 164 165 __asm__ __volatile__( 166 "sti\n" 167 "nop\n" 168 ); 169 170 /* Trigger an interrupt injection */ 171 GUEST_SYNC(0); 172 173 guest_wait_for_irq(); 174 175 /* Test having the host set runstates manually */ 176 GUEST_SYNC(RUNSTATE_runnable); 177 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 178 GUEST_ASSERT(rs->state == 0); 179 180 GUEST_SYNC(RUNSTATE_blocked); 181 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 182 GUEST_ASSERT(rs->state == 0); 183 184 GUEST_SYNC(RUNSTATE_offline); 185 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 186 GUEST_ASSERT(rs->state == 0); 187 188 /* Test runstate time adjust */ 189 GUEST_SYNC(4); 190 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 191 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 192 193 /* Test runstate time set */ 194 GUEST_SYNC(5); 195 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 196 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 197 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 198 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 199 200 /* sched_yield() should result in some 'runnable' time */ 201 GUEST_SYNC(6); 202 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 203 204 /* Attempt to deliver a *masked* interrupt */ 205 GUEST_SYNC(7); 206 207 /* Wait until we see the bit set */ 208 struct shared_info *si = (void *)SHINFO_VADDR; 209 while (!si->evtchn_pending[0]) 210 __asm__ __volatile__ ("rep nop" : : : "memory"); 211 212 /* Now deliver an *unmasked* interrupt */ 213 GUEST_SYNC(8); 214 215 guest_wait_for_irq(); 216 217 /* Change memslots and deliver an interrupt */ 218 GUEST_SYNC(9); 219 220 guest_wait_for_irq(); 221 222 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ 223 GUEST_SYNC(10); 224 225 guest_wait_for_irq(); 226 227 GUEST_SYNC(11); 228 229 /* Our turn. Deliver event channel (to ourselves) with 230 * EVTCHNOP_send hypercall. */ 231 unsigned long rax; 232 struct evtchn_send s = { .port = 127 }; 233 __asm__ __volatile__ ("vmcall" : 234 "=a" (rax) : 235 "a" (__HYPERVISOR_event_channel_op), 236 "D" (EVTCHNOP_send), 237 "S" (&s)); 238 239 GUEST_ASSERT(rax == 0); 240 241 guest_wait_for_irq(); 242 243 GUEST_SYNC(12); 244 245 /* Deliver "outbound" event channel to an eventfd which 246 * happens to be one of our own irqfds. */ 247 s.port = 197; 248 __asm__ __volatile__ ("vmcall" : 249 "=a" (rax) : 250 "a" (__HYPERVISOR_event_channel_op), 251 "D" (EVTCHNOP_send), 252 "S" (&s)); 253 254 GUEST_ASSERT(rax == 0); 255 256 guest_wait_for_irq(); 257 258 GUEST_SYNC(13); 259 260 /* Set a timer 100ms in the future. */ 261 __asm__ __volatile__ ("vmcall" : 262 "=a" (rax) : 263 "a" (__HYPERVISOR_set_timer_op), 264 "D" (rs->state_entry_time + 100000000)); 265 GUEST_ASSERT(rax == 0); 266 267 GUEST_SYNC(14); 268 269 /* Now wait for the timer */ 270 guest_wait_for_irq(); 271 272 GUEST_SYNC(15); 273 274 /* The host has 'restored' the timer. Just wait for it. */ 275 guest_wait_for_irq(); 276 277 GUEST_SYNC(16); 278 279 /* Poll for an event channel port which is already set */ 280 u32 ports[1] = { EVTCHN_TIMER }; 281 struct sched_poll p = { 282 .ports = ports, 283 .nr_ports = 1, 284 .timeout = 0, 285 }; 286 287 __asm__ __volatile__ ("vmcall" : 288 "=a" (rax) : 289 "a" (__HYPERVISOR_sched_op), 290 "D" (SCHEDOP_poll), 291 "S" (&p)); 292 293 GUEST_ASSERT(rax == 0); 294 295 GUEST_SYNC(17); 296 297 /* Poll for an unset port and wait for the timeout. */ 298 p.timeout = 100000000; 299 __asm__ __volatile__ ("vmcall" : 300 "=a" (rax) : 301 "a" (__HYPERVISOR_sched_op), 302 "D" (SCHEDOP_poll), 303 "S" (&p)); 304 305 GUEST_ASSERT(rax == 0); 306 307 GUEST_SYNC(18); 308 309 /* A timer will wake the masked port we're waiting on, while we poll */ 310 p.timeout = 0; 311 __asm__ __volatile__ ("vmcall" : 312 "=a" (rax) : 313 "a" (__HYPERVISOR_sched_op), 314 "D" (SCHEDOP_poll), 315 "S" (&p)); 316 317 GUEST_ASSERT(rax == 0); 318 319 GUEST_SYNC(19); 320 321 /* A timer wake an *unmasked* port which should wake us with an 322 * actual interrupt, while we're polling on a different port. */ 323 ports[0]++; 324 p.timeout = 0; 325 __asm__ __volatile__ ("vmcall" : 326 "=a" (rax) : 327 "a" (__HYPERVISOR_sched_op), 328 "D" (SCHEDOP_poll), 329 "S" (&p)); 330 331 GUEST_ASSERT(rax == 0); 332 333 guest_wait_for_irq(); 334 335 GUEST_SYNC(20); 336 337 /* Timer should have fired already */ 338 guest_wait_for_irq(); 339 340 GUEST_SYNC(21); 341 /* Racing host ioctls */ 342 343 guest_wait_for_irq(); 344 345 GUEST_SYNC(22); 346 /* Racing vmcall against host ioctl */ 347 348 ports[0] = 0; 349 350 p = (struct sched_poll) { 351 .ports = ports, 352 .nr_ports = 1, 353 .timeout = 0 354 }; 355 356 wait_for_timer: 357 /* 358 * Poll for a timer wake event while the worker thread is mucking with 359 * the shared info. KVM XEN drops timer IRQs if the shared info is 360 * invalid when the timer expires. Arbitrarily poll 100 times before 361 * giving up and asking the VMM to re-arm the timer. 100 polls should 362 * consume enough time to beat on KVM without taking too long if the 363 * timer IRQ is dropped due to an invalid event channel. 364 */ 365 for (i = 0; i < 100 && !guest_saw_irq; i++) 366 asm volatile("vmcall" 367 : "=a" (rax) 368 : "a" (__HYPERVISOR_sched_op), 369 "D" (SCHEDOP_poll), 370 "S" (&p) 371 : "memory"); 372 373 /* 374 * Re-send the timer IRQ if it was (likely) dropped due to the timer 375 * expiring while the event channel was invalid. 376 */ 377 if (!guest_saw_irq) { 378 GUEST_SYNC(23); 379 goto wait_for_timer; 380 } 381 guest_saw_irq = false; 382 383 GUEST_SYNC(24); 384 } 385 386 static int cmp_timespec(struct timespec *a, struct timespec *b) 387 { 388 if (a->tv_sec > b->tv_sec) 389 return 1; 390 else if (a->tv_sec < b->tv_sec) 391 return -1; 392 else if (a->tv_nsec > b->tv_nsec) 393 return 1; 394 else if (a->tv_nsec < b->tv_nsec) 395 return -1; 396 else 397 return 0; 398 } 399 400 static struct vcpu_info *vinfo; 401 static struct kvm_vcpu *vcpu; 402 403 static void handle_alrm(int sig) 404 { 405 if (vinfo) 406 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); 407 vcpu_dump(stdout, vcpu, 0); 408 TEST_FAIL("IRQ delivery timed out"); 409 } 410 411 static void *juggle_shinfo_state(void *arg) 412 { 413 struct kvm_vm *vm = (struct kvm_vm *)arg; 414 415 struct kvm_xen_hvm_attr cache_init = { 416 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 417 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE 418 }; 419 420 struct kvm_xen_hvm_attr cache_destroy = { 421 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 422 .u.shared_info.gfn = GPA_INVALID 423 }; 424 425 for (;;) { 426 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_init); 427 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_destroy); 428 pthread_testcancel(); 429 }; 430 431 return NULL; 432 } 433 434 int main(int argc, char *argv[]) 435 { 436 struct timespec min_ts, max_ts, vm_ts; 437 struct kvm_vm *vm; 438 pthread_t thread; 439 bool verbose; 440 int ret; 441 442 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 443 !strncmp(argv[1], "--verbose", 10)); 444 445 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 446 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); 447 448 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 449 bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); 450 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 451 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 452 453 clock_gettime(CLOCK_REALTIME, &min_ts); 454 455 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 456 457 /* Map a region for the shared_info page */ 458 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 459 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); 460 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); 461 462 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 463 464 int zero_fd = open("/dev/zero", O_RDONLY); 465 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 466 467 struct kvm_xen_hvm_config hvmc = { 468 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 469 .msr = XEN_HYPERCALL_MSR, 470 }; 471 472 /* Let the kernel know that we *will* use it for sending all 473 * event channels, which lets it intercept SCHEDOP_poll */ 474 if (do_evtchn_tests) 475 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 476 477 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 478 479 struct kvm_xen_hvm_attr lm = { 480 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 481 .u.long_mode = 1, 482 }; 483 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 484 485 if (do_runstate_flag) { 486 struct kvm_xen_hvm_attr ruf = { 487 .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, 488 .u.runstate_update_flag = 1, 489 }; 490 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); 491 492 ruf.u.runstate_update_flag = 0; 493 vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); 494 TEST_ASSERT(ruf.u.runstate_update_flag == 1, 495 "Failed to read back RUNSTATE_UPDATE_FLAG attr"); 496 } 497 498 struct kvm_xen_hvm_attr ha = { 499 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 500 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 501 }; 502 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 503 504 /* 505 * Test what happens when the HVA of the shinfo page is remapped after 506 * the kernel has a reference to it. But make sure we copy the clock 507 * info over since that's only set at setup time, and we test it later. 508 */ 509 struct pvclock_wall_clock wc_copy = shinfo->wc; 510 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 511 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 512 shinfo->wc = wc_copy; 513 514 struct kvm_xen_vcpu_attr vi = { 515 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 516 .u.gpa = VCPU_INFO_ADDR, 517 }; 518 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); 519 520 struct kvm_xen_vcpu_attr pvclock = { 521 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 522 .u.gpa = PVTIME_ADDR, 523 }; 524 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); 525 526 struct kvm_xen_hvm_attr vec = { 527 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 528 .u.vector = EVTCHN_VECTOR, 529 }; 530 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 531 532 vm_init_descriptor_tables(vm); 533 vcpu_init_descriptor_tables(vcpu); 534 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 535 536 if (do_runstate_tests) { 537 struct kvm_xen_vcpu_attr st = { 538 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 539 .u.gpa = RUNSTATE_ADDR, 540 }; 541 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 542 } 543 544 int irq_fd[2] = { -1, -1 }; 545 546 if (do_eventfd_tests) { 547 irq_fd[0] = eventfd(0, 0); 548 irq_fd[1] = eventfd(0, 0); 549 550 /* Unexpected, but not a KVM failure */ 551 if (irq_fd[0] == -1 || irq_fd[1] == -1) 552 do_evtchn_tests = do_eventfd_tests = false; 553 } 554 555 if (do_eventfd_tests) { 556 irq_routes.info.nr = 2; 557 558 irq_routes.entries[0].gsi = 32; 559 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 560 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; 561 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; 562 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 563 564 irq_routes.entries[1].gsi = 33; 565 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 566 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; 567 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; 568 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 569 570 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); 571 572 struct kvm_irqfd ifd = { }; 573 574 ifd.fd = irq_fd[0]; 575 ifd.gsi = 32; 576 vm_ioctl(vm, KVM_IRQFD, &ifd); 577 578 ifd.fd = irq_fd[1]; 579 ifd.gsi = 33; 580 vm_ioctl(vm, KVM_IRQFD, &ifd); 581 582 struct sigaction sa = { }; 583 sa.sa_handler = handle_alrm; 584 sigaction(SIGALRM, &sa, NULL); 585 } 586 587 struct kvm_xen_vcpu_attr tmr = { 588 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 589 .u.timer.port = EVTCHN_TIMER, 590 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 591 .u.timer.expires_ns = 0 592 }; 593 594 if (do_evtchn_tests) { 595 struct kvm_xen_hvm_attr inj = { 596 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 597 .u.evtchn.send_port = 127, 598 .u.evtchn.type = EVTCHNSTAT_interdomain, 599 .u.evtchn.flags = 0, 600 .u.evtchn.deliver.port.port = EVTCHN_TEST1, 601 .u.evtchn.deliver.port.vcpu = vcpu->id + 1, 602 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 603 }; 604 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 605 606 /* Test migration to a different vCPU */ 607 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; 608 inj.u.evtchn.deliver.port.vcpu = vcpu->id; 609 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 610 611 inj.u.evtchn.send_port = 197; 612 inj.u.evtchn.deliver.eventfd.port = 0; 613 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; 614 inj.u.evtchn.flags = 0; 615 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 616 617 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 618 } 619 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 620 vinfo->evtchn_upcall_pending = 0; 621 622 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 623 rs->state = 0x5a; 624 625 bool evtchn_irq_expected = false; 626 627 for (;;) { 628 volatile struct kvm_run *run = vcpu->run; 629 struct ucall uc; 630 631 vcpu_run(vcpu); 632 633 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 634 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 635 run->exit_reason, 636 exit_reason_str(run->exit_reason)); 637 638 switch (get_ucall(vcpu, &uc)) { 639 case UCALL_ABORT: 640 REPORT_GUEST_ASSERT(uc); 641 /* NOT REACHED */ 642 case UCALL_SYNC: { 643 struct kvm_xen_vcpu_attr rst; 644 long rundelay; 645 646 if (do_runstate_tests) 647 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 648 rs->time[1] + rs->time[2] + rs->time[3], 649 "runstate times don't add up"); 650 651 switch (uc.args[1]) { 652 case 0: 653 if (verbose) 654 printf("Delivering evtchn upcall\n"); 655 evtchn_irq_expected = true; 656 vinfo->evtchn_upcall_pending = 1; 657 break; 658 659 case RUNSTATE_runnable...RUNSTATE_offline: 660 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 661 if (!do_runstate_tests) 662 goto done; 663 if (verbose) 664 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 665 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 666 rst.u.runstate.state = uc.args[1]; 667 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 668 break; 669 670 case 4: 671 if (verbose) 672 printf("Testing RUNSTATE_ADJUST\n"); 673 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 674 memset(&rst.u, 0, sizeof(rst.u)); 675 rst.u.runstate.state = (uint64_t)-1; 676 rst.u.runstate.time_blocked = 677 0x5a - rs->time[RUNSTATE_blocked]; 678 rst.u.runstate.time_offline = 679 0x6b6b - rs->time[RUNSTATE_offline]; 680 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 681 rst.u.runstate.time_offline; 682 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 683 break; 684 685 case 5: 686 if (verbose) 687 printf("Testing RUNSTATE_DATA\n"); 688 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 689 memset(&rst.u, 0, sizeof(rst.u)); 690 rst.u.runstate.state = RUNSTATE_running; 691 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 692 rst.u.runstate.time_blocked = 0x6b6b; 693 rst.u.runstate.time_offline = 0x5a; 694 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 695 break; 696 697 case 6: 698 if (verbose) 699 printf("Testing steal time\n"); 700 /* Yield until scheduler delay exceeds target */ 701 rundelay = get_run_delay() + MIN_STEAL_TIME; 702 do { 703 sched_yield(); 704 } while (get_run_delay() < rundelay); 705 break; 706 707 case 7: 708 if (!do_eventfd_tests) 709 goto done; 710 if (verbose) 711 printf("Testing masked event channel\n"); 712 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; 713 eventfd_write(irq_fd[0], 1UL); 714 alarm(1); 715 break; 716 717 case 8: 718 if (verbose) 719 printf("Testing unmasked event channel\n"); 720 /* Unmask that, but deliver the other one */ 721 shinfo->evtchn_pending[0] = 0; 722 shinfo->evtchn_mask[0] = 0; 723 eventfd_write(irq_fd[1], 1UL); 724 evtchn_irq_expected = true; 725 alarm(1); 726 break; 727 728 case 9: 729 TEST_ASSERT(!evtchn_irq_expected, 730 "Expected event channel IRQ but it didn't happen"); 731 shinfo->evtchn_pending[1] = 0; 732 if (verbose) 733 printf("Testing event channel after memslot change\n"); 734 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 735 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 736 eventfd_write(irq_fd[0], 1UL); 737 evtchn_irq_expected = true; 738 alarm(1); 739 break; 740 741 case 10: 742 TEST_ASSERT(!evtchn_irq_expected, 743 "Expected event channel IRQ but it didn't happen"); 744 if (!do_evtchn_tests) 745 goto done; 746 747 shinfo->evtchn_pending[0] = 0; 748 if (verbose) 749 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); 750 751 struct kvm_irq_routing_xen_evtchn e; 752 e.port = EVTCHN_TEST2; 753 e.vcpu = vcpu->id; 754 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 755 756 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); 757 evtchn_irq_expected = true; 758 alarm(1); 759 break; 760 761 case 11: 762 TEST_ASSERT(!evtchn_irq_expected, 763 "Expected event channel IRQ but it didn't happen"); 764 shinfo->evtchn_pending[1] = 0; 765 766 if (verbose) 767 printf("Testing guest EVTCHNOP_send direct to evtchn\n"); 768 evtchn_irq_expected = true; 769 alarm(1); 770 break; 771 772 case 12: 773 TEST_ASSERT(!evtchn_irq_expected, 774 "Expected event channel IRQ but it didn't happen"); 775 shinfo->evtchn_pending[0] = 0; 776 777 if (verbose) 778 printf("Testing guest EVTCHNOP_send to eventfd\n"); 779 evtchn_irq_expected = true; 780 alarm(1); 781 break; 782 783 case 13: 784 TEST_ASSERT(!evtchn_irq_expected, 785 "Expected event channel IRQ but it didn't happen"); 786 shinfo->evtchn_pending[1] = 0; 787 788 if (verbose) 789 printf("Testing guest oneshot timer\n"); 790 break; 791 792 case 14: 793 memset(&tmr, 0, sizeof(tmr)); 794 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; 795 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 796 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, 797 "Timer port not returned"); 798 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 799 "Timer priority not returned"); 800 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, 801 "Timer expiry not returned"); 802 evtchn_irq_expected = true; 803 alarm(1); 804 break; 805 806 case 15: 807 TEST_ASSERT(!evtchn_irq_expected, 808 "Expected event channel IRQ but it didn't happen"); 809 shinfo->evtchn_pending[0] = 0; 810 811 if (verbose) 812 printf("Testing restored oneshot timer\n"); 813 814 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 815 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 816 evtchn_irq_expected = true; 817 alarm(1); 818 break; 819 820 case 16: 821 TEST_ASSERT(!evtchn_irq_expected, 822 "Expected event channel IRQ but it didn't happen"); 823 824 if (verbose) 825 printf("Testing SCHEDOP_poll with already pending event\n"); 826 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; 827 alarm(1); 828 break; 829 830 case 17: 831 if (verbose) 832 printf("Testing SCHEDOP_poll timeout\n"); 833 shinfo->evtchn_pending[0] = 0; 834 alarm(1); 835 break; 836 837 case 18: 838 if (verbose) 839 printf("Testing SCHEDOP_poll wake on masked event\n"); 840 841 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 842 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 843 alarm(1); 844 break; 845 846 case 19: 847 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; 848 if (verbose) 849 printf("Testing SCHEDOP_poll wake on unmasked event\n"); 850 851 evtchn_irq_expected = true; 852 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 853 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 854 855 /* Read it back and check the pending time is reported correctly */ 856 tmr.u.timer.expires_ns = 0; 857 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 858 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, 859 "Timer not reported pending"); 860 alarm(1); 861 break; 862 863 case 20: 864 TEST_ASSERT(!evtchn_irq_expected, 865 "Expected event channel IRQ but it didn't happen"); 866 /* Read timer and check it is no longer pending */ 867 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 868 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); 869 870 shinfo->evtchn_pending[0] = 0; 871 if (verbose) 872 printf("Testing timer in the past\n"); 873 874 evtchn_irq_expected = true; 875 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; 876 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 877 alarm(1); 878 break; 879 880 case 21: 881 TEST_ASSERT(!evtchn_irq_expected, 882 "Expected event channel IRQ but it didn't happen"); 883 alarm(0); 884 885 if (verbose) 886 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); 887 888 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); 889 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); 890 891 struct kvm_irq_routing_xen_evtchn uxe = { 892 .port = 1, 893 .vcpu = vcpu->id, 894 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL 895 }; 896 897 evtchn_irq_expected = true; 898 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) 899 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); 900 break; 901 902 case 22: 903 TEST_ASSERT(!evtchn_irq_expected, 904 "Expected event channel IRQ but it didn't happen"); 905 906 if (verbose) 907 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); 908 909 shinfo->evtchn_pending[0] = 1; 910 911 evtchn_irq_expected = true; 912 tmr.u.timer.expires_ns = rs->state_entry_time + 913 SHINFO_RACE_TIMEOUT * 1000000000ULL; 914 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 915 break; 916 917 case 23: 918 /* 919 * Optional and possibly repeated sync point. 920 * Injecting the timer IRQ may fail if the 921 * shinfo is invalid when the timer expires. 922 * If the timer has expired but the IRQ hasn't 923 * been delivered, rearm the timer and retry. 924 */ 925 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 926 927 /* Resume the guest if the timer is still pending. */ 928 if (tmr.u.timer.expires_ns) 929 break; 930 931 /* All done if the IRQ was delivered. */ 932 if (!evtchn_irq_expected) 933 break; 934 935 tmr.u.timer.expires_ns = rs->state_entry_time + 936 SHINFO_RACE_TIMEOUT * 1000000000ULL; 937 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 938 break; 939 case 24: 940 TEST_ASSERT(!evtchn_irq_expected, 941 "Expected event channel IRQ but it didn't happen"); 942 943 ret = pthread_cancel(thread); 944 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); 945 946 ret = pthread_join(thread, 0); 947 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); 948 goto done; 949 950 case 0x20: 951 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 952 evtchn_irq_expected = false; 953 break; 954 } 955 break; 956 } 957 case UCALL_DONE: 958 goto done; 959 default: 960 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 961 } 962 } 963 964 done: 965 struct kvm_xen_hvm_attr evt_reset = { 966 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 967 .u.evtchn.flags = KVM_XEN_EVTCHN_RESET, 968 }; 969 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); 970 971 alarm(0); 972 clock_gettime(CLOCK_REALTIME, &max_ts); 973 974 /* 975 * Just a *really* basic check that things are being put in the 976 * right place. The actual calculations are much the same for 977 * Xen as they are for the KVM variants, so no need to check. 978 */ 979 struct pvclock_wall_clock *wc; 980 struct pvclock_vcpu_time_info *ti, *ti2; 981 982 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 983 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 984 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 985 986 if (verbose) { 987 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 988 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 989 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 990 ti->tsc_shift, ti->flags); 991 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 992 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 993 ti2->tsc_shift, ti2->flags); 994 } 995 996 vm_ts.tv_sec = wc->sec; 997 vm_ts.tv_nsec = wc->nsec; 998 TEST_ASSERT(wc->version && !(wc->version & 1), 999 "Bad wallclock version %x", wc->version); 1000 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 1001 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 1002 1003 TEST_ASSERT(ti->version && !(ti->version & 1), 1004 "Bad time_info version %x", ti->version); 1005 TEST_ASSERT(ti2->version && !(ti2->version & 1), 1006 "Bad time_info version %x", ti->version); 1007 1008 if (do_runstate_tests) { 1009 /* 1010 * Fetch runstate and check sanity. Strictly speaking in the 1011 * general case we might not expect the numbers to be identical 1012 * but in this case we know we aren't running the vCPU any more. 1013 */ 1014 struct kvm_xen_vcpu_attr rst = { 1015 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 1016 }; 1017 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); 1018 1019 if (verbose) { 1020 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 1021 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 1022 rs->state, rs->state_entry_time); 1023 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 1024 printf("State %s: %" PRIu64 " ns\n", 1025 runstate_names[i], rs->time[i]); 1026 } 1027 } 1028 1029 /* 1030 * Exercise runstate info at all points across the page boundary, in 1031 * 32-bit and 64-bit mode. In particular, test the case where it is 1032 * configured in 32-bit mode and then switched to 64-bit mode while 1033 * active, which takes it onto the second page. 1034 */ 1035 unsigned long runstate_addr; 1036 struct compat_vcpu_runstate_info *crs; 1037 for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; 1038 runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { 1039 1040 rs = addr_gpa2hva(vm, runstate_addr); 1041 crs = (void *)rs; 1042 1043 memset(rs, 0xa5, sizeof(*rs)); 1044 1045 /* Set to compatibility mode */ 1046 lm.u.long_mode = 0; 1047 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1048 1049 /* Set runstate to new address (kernel will write it) */ 1050 struct kvm_xen_vcpu_attr st = { 1051 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 1052 .u.gpa = runstate_addr, 1053 }; 1054 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 1055 1056 if (verbose) 1057 printf("Compatibility runstate at %08lx\n", runstate_addr); 1058 1059 TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); 1060 TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, 1061 "State entry time mismatch"); 1062 TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1063 "Running time mismatch"); 1064 TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1065 "Runnable time mismatch"); 1066 TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1067 "Blocked time mismatch"); 1068 TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1069 "Offline time mismatch"); 1070 TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1071 "Structure overrun"); 1072 TEST_ASSERT(crs->state_entry_time == crs->time[0] + 1073 crs->time[1] + crs->time[2] + crs->time[3], 1074 "runstate times don't add up"); 1075 1076 1077 /* Now switch to 64-bit mode */ 1078 lm.u.long_mode = 1; 1079 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1080 1081 memset(rs, 0xa5, sizeof(*rs)); 1082 1083 /* Don't change the address, just trigger a write */ 1084 struct kvm_xen_vcpu_attr adj = { 1085 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, 1086 .u.runstate.state = (uint64_t)-1 1087 }; 1088 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); 1089 1090 if (verbose) 1091 printf("64-bit runstate at %08lx\n", runstate_addr); 1092 1093 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 1094 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 1095 "State entry time mismatch"); 1096 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1097 "Running time mismatch"); 1098 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1099 "Runnable time mismatch"); 1100 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1101 "Blocked time mismatch"); 1102 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1103 "Offline time mismatch"); 1104 TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1105 "Structure overrun"); 1106 1107 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 1108 rs->time[1] + rs->time[2] + rs->time[3], 1109 "runstate times don't add up"); 1110 } 1111 } 1112 1113 kvm_vm_free(vm); 1114 return 0; 1115 } 1116