1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 4 */ 5 6 #include "test_util.h" 7 #include "kvm_util.h" 8 #include "processor.h" 9 10 #include <stdint.h> 11 #include <time.h> 12 #include <sched.h> 13 #include <signal.h> 14 #include <pthread.h> 15 16 #include <sys/eventfd.h> 17 18 #define SHINFO_REGION_GVA 0xc0000000ULL 19 #define SHINFO_REGION_GPA 0xc0000000ULL 20 #define SHINFO_REGION_SLOT 10 21 22 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) 23 #define DUMMY_REGION_SLOT 11 24 25 #define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE)) 26 #define DUMMY_REGION_SLOT_2 12 27 28 #define SHINFO_ADDR (SHINFO_REGION_GPA) 29 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 30 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 31 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) 32 33 #define SHINFO_VADDR (SHINFO_REGION_GVA) 34 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 35 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) 36 37 #define EVTCHN_VECTOR 0x10 38 39 #define EVTCHN_TEST1 15 40 #define EVTCHN_TEST2 66 41 #define EVTCHN_TIMER 13 42 43 enum { 44 TEST_INJECT_VECTOR = 0, 45 TEST_RUNSTATE_runnable, 46 TEST_RUNSTATE_blocked, 47 TEST_RUNSTATE_offline, 48 TEST_RUNSTATE_ADJUST, 49 TEST_RUNSTATE_DATA, 50 TEST_STEAL_TIME, 51 TEST_EVTCHN_MASKED, 52 TEST_EVTCHN_UNMASKED, 53 TEST_EVTCHN_SLOWPATH, 54 TEST_EVTCHN_SEND_IOCTL, 55 TEST_EVTCHN_HCALL, 56 TEST_EVTCHN_HCALL_SLOWPATH, 57 TEST_EVTCHN_HCALL_EVENTFD, 58 TEST_TIMER_SETUP, 59 TEST_TIMER_WAIT, 60 TEST_TIMER_RESTORE, 61 TEST_POLL_READY, 62 TEST_POLL_TIMEOUT, 63 TEST_POLL_MASKED, 64 TEST_POLL_WAKE, 65 TEST_TIMER_PAST, 66 TEST_LOCKING_SEND_RACE, 67 TEST_LOCKING_POLL_RACE, 68 TEST_LOCKING_POLL_TIMEOUT, 69 TEST_DONE, 70 71 TEST_GUEST_SAW_IRQ, 72 }; 73 74 #define XEN_HYPERCALL_MSR 0x40000000 75 76 #define MIN_STEAL_TIME 50000 77 78 #define SHINFO_RACE_TIMEOUT 2 /* seconds */ 79 80 #define __HYPERVISOR_set_timer_op 15 81 #define __HYPERVISOR_sched_op 29 82 #define __HYPERVISOR_event_channel_op 32 83 84 #define SCHEDOP_poll 3 85 86 #define EVTCHNOP_send 4 87 88 #define EVTCHNSTAT_interdomain 2 89 90 struct evtchn_send { 91 u32 port; 92 }; 93 94 struct sched_poll { 95 u32 *ports; 96 unsigned int nr_ports; 97 u64 timeout; 98 }; 99 100 struct pvclock_vcpu_time_info { 101 u32 version; 102 u32 pad0; 103 u64 tsc_timestamp; 104 u64 system_time; 105 u32 tsc_to_system_mul; 106 s8 tsc_shift; 107 u8 flags; 108 u8 pad[2]; 109 } __attribute__((__packed__)); /* 32 bytes */ 110 111 struct pvclock_wall_clock { 112 u32 version; 113 u32 sec; 114 u32 nsec; 115 } __attribute__((__packed__)); 116 117 struct vcpu_runstate_info { 118 uint32_t state; 119 uint64_t state_entry_time; 120 uint64_t time[5]; /* Extra field for overrun check */ 121 }; 122 123 struct compat_vcpu_runstate_info { 124 uint32_t state; 125 uint64_t state_entry_time; 126 uint64_t time[5]; 127 } __attribute__((__packed__));; 128 129 struct arch_vcpu_info { 130 unsigned long cr2; 131 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 132 }; 133 134 struct vcpu_info { 135 uint8_t evtchn_upcall_pending; 136 uint8_t evtchn_upcall_mask; 137 unsigned long evtchn_pending_sel; 138 struct arch_vcpu_info arch; 139 struct pvclock_vcpu_time_info time; 140 }; /* 64 bytes (x86) */ 141 142 struct shared_info { 143 struct vcpu_info vcpu_info[32]; 144 unsigned long evtchn_pending[64]; 145 unsigned long evtchn_mask[64]; 146 struct pvclock_wall_clock wc; 147 uint32_t wc_sec_hi; 148 /* arch_shared_info here */ 149 }; 150 151 #define RUNSTATE_running 0 152 #define RUNSTATE_runnable 1 153 #define RUNSTATE_blocked 2 154 #define RUNSTATE_offline 3 155 156 static const char *runstate_names[] = { 157 "running", 158 "runnable", 159 "blocked", 160 "offline" 161 }; 162 163 struct { 164 struct kvm_irq_routing info; 165 struct kvm_irq_routing_entry entries[2]; 166 } irq_routes; 167 168 static volatile bool guest_saw_irq; 169 170 static void evtchn_handler(struct ex_regs *regs) 171 { 172 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 173 vi->evtchn_upcall_pending = 0; 174 vi->evtchn_pending_sel = 0; 175 guest_saw_irq = true; 176 177 GUEST_SYNC(TEST_GUEST_SAW_IRQ); 178 } 179 180 static void guest_wait_for_irq(void) 181 { 182 while (!guest_saw_irq) 183 __asm__ __volatile__ ("rep nop" : : : "memory"); 184 guest_saw_irq = false; 185 } 186 187 static void guest_code(void) 188 { 189 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 190 int i; 191 192 __asm__ __volatile__( 193 "sti\n" 194 "nop\n" 195 ); 196 197 /* Trigger an interrupt injection */ 198 GUEST_SYNC(TEST_INJECT_VECTOR); 199 200 guest_wait_for_irq(); 201 202 /* Test having the host set runstates manually */ 203 GUEST_SYNC(TEST_RUNSTATE_runnable); 204 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 205 GUEST_ASSERT(rs->state == 0); 206 207 GUEST_SYNC(TEST_RUNSTATE_blocked); 208 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 209 GUEST_ASSERT(rs->state == 0); 210 211 GUEST_SYNC(TEST_RUNSTATE_offline); 212 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 213 GUEST_ASSERT(rs->state == 0); 214 215 /* Test runstate time adjust */ 216 GUEST_SYNC(TEST_RUNSTATE_ADJUST); 217 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 218 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 219 220 /* Test runstate time set */ 221 GUEST_SYNC(TEST_RUNSTATE_DATA); 222 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 223 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 224 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 225 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 226 227 /* sched_yield() should result in some 'runnable' time */ 228 GUEST_SYNC(TEST_STEAL_TIME); 229 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 230 231 /* Attempt to deliver a *masked* interrupt */ 232 GUEST_SYNC(TEST_EVTCHN_MASKED); 233 234 /* Wait until we see the bit set */ 235 struct shared_info *si = (void *)SHINFO_VADDR; 236 while (!si->evtchn_pending[0]) 237 __asm__ __volatile__ ("rep nop" : : : "memory"); 238 239 /* Now deliver an *unmasked* interrupt */ 240 GUEST_SYNC(TEST_EVTCHN_UNMASKED); 241 242 guest_wait_for_irq(); 243 244 /* Change memslots and deliver an interrupt */ 245 GUEST_SYNC(TEST_EVTCHN_SLOWPATH); 246 247 guest_wait_for_irq(); 248 249 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ 250 GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL); 251 252 guest_wait_for_irq(); 253 254 GUEST_SYNC(TEST_EVTCHN_HCALL); 255 256 /* Our turn. Deliver event channel (to ourselves) with 257 * EVTCHNOP_send hypercall. */ 258 struct evtchn_send s = { .port = 127 }; 259 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 260 261 guest_wait_for_irq(); 262 263 GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH); 264 265 /* 266 * Same again, but this time the host has messed with memslots so it 267 * should take the slow path in kvm_xen_set_evtchn(). 268 */ 269 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 270 271 guest_wait_for_irq(); 272 273 GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD); 274 275 /* Deliver "outbound" event channel to an eventfd which 276 * happens to be one of our own irqfds. */ 277 s.port = 197; 278 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 279 280 guest_wait_for_irq(); 281 282 GUEST_SYNC(TEST_TIMER_SETUP); 283 284 /* Set a timer 100ms in the future. */ 285 xen_hypercall(__HYPERVISOR_set_timer_op, 286 rs->state_entry_time + 100000000, NULL); 287 288 GUEST_SYNC(TEST_TIMER_WAIT); 289 290 /* Now wait for the timer */ 291 guest_wait_for_irq(); 292 293 GUEST_SYNC(TEST_TIMER_RESTORE); 294 295 /* The host has 'restored' the timer. Just wait for it. */ 296 guest_wait_for_irq(); 297 298 GUEST_SYNC(TEST_POLL_READY); 299 300 /* Poll for an event channel port which is already set */ 301 u32 ports[1] = { EVTCHN_TIMER }; 302 struct sched_poll p = { 303 .ports = ports, 304 .nr_ports = 1, 305 .timeout = 0, 306 }; 307 308 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 309 310 GUEST_SYNC(TEST_POLL_TIMEOUT); 311 312 /* Poll for an unset port and wait for the timeout. */ 313 p.timeout = 100000000; 314 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 315 316 GUEST_SYNC(TEST_POLL_MASKED); 317 318 /* A timer will wake the masked port we're waiting on, while we poll */ 319 p.timeout = 0; 320 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 321 322 GUEST_SYNC(TEST_POLL_WAKE); 323 324 /* A timer wake an *unmasked* port which should wake us with an 325 * actual interrupt, while we're polling on a different port. */ 326 ports[0]++; 327 p.timeout = 0; 328 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 329 330 guest_wait_for_irq(); 331 332 GUEST_SYNC(TEST_TIMER_PAST); 333 334 /* Timer should have fired already */ 335 guest_wait_for_irq(); 336 337 GUEST_SYNC(TEST_LOCKING_SEND_RACE); 338 /* Racing host ioctls */ 339 340 guest_wait_for_irq(); 341 342 GUEST_SYNC(TEST_LOCKING_POLL_RACE); 343 /* Racing vmcall against host ioctl */ 344 345 ports[0] = 0; 346 347 p = (struct sched_poll) { 348 .ports = ports, 349 .nr_ports = 1, 350 .timeout = 0 351 }; 352 353 wait_for_timer: 354 /* 355 * Poll for a timer wake event while the worker thread is mucking with 356 * the shared info. KVM XEN drops timer IRQs if the shared info is 357 * invalid when the timer expires. Arbitrarily poll 100 times before 358 * giving up and asking the VMM to re-arm the timer. 100 polls should 359 * consume enough time to beat on KVM without taking too long if the 360 * timer IRQ is dropped due to an invalid event channel. 361 */ 362 for (i = 0; i < 100 && !guest_saw_irq; i++) 363 __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 364 365 /* 366 * Re-send the timer IRQ if it was (likely) dropped due to the timer 367 * expiring while the event channel was invalid. 368 */ 369 if (!guest_saw_irq) { 370 GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT); 371 goto wait_for_timer; 372 } 373 guest_saw_irq = false; 374 375 GUEST_SYNC(TEST_DONE); 376 } 377 378 static int cmp_timespec(struct timespec *a, struct timespec *b) 379 { 380 if (a->tv_sec > b->tv_sec) 381 return 1; 382 else if (a->tv_sec < b->tv_sec) 383 return -1; 384 else if (a->tv_nsec > b->tv_nsec) 385 return 1; 386 else if (a->tv_nsec < b->tv_nsec) 387 return -1; 388 else 389 return 0; 390 } 391 392 static struct vcpu_info *vinfo; 393 static struct kvm_vcpu *vcpu; 394 395 static void handle_alrm(int sig) 396 { 397 if (vinfo) 398 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); 399 vcpu_dump(stdout, vcpu, 0); 400 TEST_FAIL("IRQ delivery timed out"); 401 } 402 403 static void *juggle_shinfo_state(void *arg) 404 { 405 struct kvm_vm *vm = (struct kvm_vm *)arg; 406 407 struct kvm_xen_hvm_attr cache_activate = { 408 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 409 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE 410 }; 411 412 struct kvm_xen_hvm_attr cache_deactivate = { 413 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 414 .u.shared_info.gfn = KVM_XEN_INVALID_GFN 415 }; 416 417 for (;;) { 418 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate); 419 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate); 420 pthread_testcancel(); 421 } 422 423 return NULL; 424 } 425 426 int main(int argc, char *argv[]) 427 { 428 struct timespec min_ts, max_ts, vm_ts; 429 struct kvm_xen_hvm_attr evt_reset; 430 struct kvm_vm *vm; 431 pthread_t thread; 432 bool verbose; 433 int ret; 434 435 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 436 !strncmp(argv[1], "--verbose", 10)); 437 438 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 439 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); 440 441 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 442 bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); 443 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 444 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 445 446 clock_gettime(CLOCK_REALTIME, &min_ts); 447 448 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 449 450 /* Map a region for the shared_info page */ 451 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 452 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); 453 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); 454 455 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 456 457 int zero_fd = open("/dev/zero", O_RDONLY); 458 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 459 460 struct kvm_xen_hvm_config hvmc = { 461 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 462 .msr = XEN_HYPERCALL_MSR, 463 }; 464 465 /* Let the kernel know that we *will* use it for sending all 466 * event channels, which lets it intercept SCHEDOP_poll */ 467 if (do_evtchn_tests) 468 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 469 470 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 471 472 struct kvm_xen_hvm_attr lm = { 473 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 474 .u.long_mode = 1, 475 }; 476 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 477 478 if (do_runstate_flag) { 479 struct kvm_xen_hvm_attr ruf = { 480 .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, 481 .u.runstate_update_flag = 1, 482 }; 483 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); 484 485 ruf.u.runstate_update_flag = 0; 486 vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); 487 TEST_ASSERT(ruf.u.runstate_update_flag == 1, 488 "Failed to read back RUNSTATE_UPDATE_FLAG attr"); 489 } 490 491 struct kvm_xen_hvm_attr ha = { 492 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 493 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 494 }; 495 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 496 497 /* 498 * Test what happens when the HVA of the shinfo page is remapped after 499 * the kernel has a reference to it. But make sure we copy the clock 500 * info over since that's only set at setup time, and we test it later. 501 */ 502 struct pvclock_wall_clock wc_copy = shinfo->wc; 503 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 504 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 505 shinfo->wc = wc_copy; 506 507 struct kvm_xen_vcpu_attr vi = { 508 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 509 .u.gpa = VCPU_INFO_ADDR, 510 }; 511 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); 512 513 struct kvm_xen_vcpu_attr pvclock = { 514 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 515 .u.gpa = PVTIME_ADDR, 516 }; 517 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); 518 519 struct kvm_xen_hvm_attr vec = { 520 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 521 .u.vector = EVTCHN_VECTOR, 522 }; 523 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 524 525 vm_init_descriptor_tables(vm); 526 vcpu_init_descriptor_tables(vcpu); 527 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 528 529 if (do_runstate_tests) { 530 struct kvm_xen_vcpu_attr st = { 531 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 532 .u.gpa = RUNSTATE_ADDR, 533 }; 534 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 535 } 536 537 int irq_fd[2] = { -1, -1 }; 538 539 if (do_eventfd_tests) { 540 irq_fd[0] = eventfd(0, 0); 541 irq_fd[1] = eventfd(0, 0); 542 543 /* Unexpected, but not a KVM failure */ 544 if (irq_fd[0] == -1 || irq_fd[1] == -1) 545 do_evtchn_tests = do_eventfd_tests = false; 546 } 547 548 if (do_eventfd_tests) { 549 irq_routes.info.nr = 2; 550 551 irq_routes.entries[0].gsi = 32; 552 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 553 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; 554 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; 555 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 556 557 irq_routes.entries[1].gsi = 33; 558 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 559 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; 560 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; 561 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 562 563 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); 564 565 struct kvm_irqfd ifd = { }; 566 567 ifd.fd = irq_fd[0]; 568 ifd.gsi = 32; 569 vm_ioctl(vm, KVM_IRQFD, &ifd); 570 571 ifd.fd = irq_fd[1]; 572 ifd.gsi = 33; 573 vm_ioctl(vm, KVM_IRQFD, &ifd); 574 575 struct sigaction sa = { }; 576 sa.sa_handler = handle_alrm; 577 sigaction(SIGALRM, &sa, NULL); 578 } 579 580 struct kvm_xen_vcpu_attr tmr = { 581 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 582 .u.timer.port = EVTCHN_TIMER, 583 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 584 .u.timer.expires_ns = 0 585 }; 586 587 if (do_evtchn_tests) { 588 struct kvm_xen_hvm_attr inj = { 589 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 590 .u.evtchn.send_port = 127, 591 .u.evtchn.type = EVTCHNSTAT_interdomain, 592 .u.evtchn.flags = 0, 593 .u.evtchn.deliver.port.port = EVTCHN_TEST1, 594 .u.evtchn.deliver.port.vcpu = vcpu->id + 1, 595 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 596 }; 597 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 598 599 /* Test migration to a different vCPU */ 600 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; 601 inj.u.evtchn.deliver.port.vcpu = vcpu->id; 602 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 603 604 inj.u.evtchn.send_port = 197; 605 inj.u.evtchn.deliver.eventfd.port = 0; 606 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; 607 inj.u.evtchn.flags = 0; 608 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 609 610 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 611 } 612 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 613 vinfo->evtchn_upcall_pending = 0; 614 615 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 616 rs->state = 0x5a; 617 618 bool evtchn_irq_expected = false; 619 620 for (;;) { 621 struct ucall uc; 622 623 vcpu_run(vcpu); 624 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 625 626 switch (get_ucall(vcpu, &uc)) { 627 case UCALL_ABORT: 628 REPORT_GUEST_ASSERT(uc); 629 /* NOT REACHED */ 630 case UCALL_SYNC: { 631 struct kvm_xen_vcpu_attr rst; 632 long rundelay; 633 634 if (do_runstate_tests) 635 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 636 rs->time[1] + rs->time[2] + rs->time[3], 637 "runstate times don't add up"); 638 639 switch (uc.args[1]) { 640 case TEST_INJECT_VECTOR: 641 if (verbose) 642 printf("Delivering evtchn upcall\n"); 643 evtchn_irq_expected = true; 644 vinfo->evtchn_upcall_pending = 1; 645 break; 646 647 case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline: 648 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 649 if (!do_runstate_tests) 650 goto done; 651 if (verbose) 652 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 653 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 654 rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable - 655 TEST_RUNSTATE_runnable; 656 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 657 break; 658 659 case TEST_RUNSTATE_ADJUST: 660 if (verbose) 661 printf("Testing RUNSTATE_ADJUST\n"); 662 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 663 memset(&rst.u, 0, sizeof(rst.u)); 664 rst.u.runstate.state = (uint64_t)-1; 665 rst.u.runstate.time_blocked = 666 0x5a - rs->time[RUNSTATE_blocked]; 667 rst.u.runstate.time_offline = 668 0x6b6b - rs->time[RUNSTATE_offline]; 669 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 670 rst.u.runstate.time_offline; 671 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 672 break; 673 674 case TEST_RUNSTATE_DATA: 675 if (verbose) 676 printf("Testing RUNSTATE_DATA\n"); 677 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 678 memset(&rst.u, 0, sizeof(rst.u)); 679 rst.u.runstate.state = RUNSTATE_running; 680 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 681 rst.u.runstate.time_blocked = 0x6b6b; 682 rst.u.runstate.time_offline = 0x5a; 683 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 684 break; 685 686 case TEST_STEAL_TIME: 687 if (verbose) 688 printf("Testing steal time\n"); 689 /* Yield until scheduler delay exceeds target */ 690 rundelay = get_run_delay() + MIN_STEAL_TIME; 691 do { 692 sched_yield(); 693 } while (get_run_delay() < rundelay); 694 break; 695 696 case TEST_EVTCHN_MASKED: 697 if (!do_eventfd_tests) 698 goto done; 699 if (verbose) 700 printf("Testing masked event channel\n"); 701 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; 702 eventfd_write(irq_fd[0], 1UL); 703 alarm(1); 704 break; 705 706 case TEST_EVTCHN_UNMASKED: 707 if (verbose) 708 printf("Testing unmasked event channel\n"); 709 /* Unmask that, but deliver the other one */ 710 shinfo->evtchn_pending[0] = 0; 711 shinfo->evtchn_mask[0] = 0; 712 eventfd_write(irq_fd[1], 1UL); 713 evtchn_irq_expected = true; 714 alarm(1); 715 break; 716 717 case TEST_EVTCHN_SLOWPATH: 718 TEST_ASSERT(!evtchn_irq_expected, 719 "Expected event channel IRQ but it didn't happen"); 720 shinfo->evtchn_pending[1] = 0; 721 if (verbose) 722 printf("Testing event channel after memslot change\n"); 723 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 724 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 725 eventfd_write(irq_fd[0], 1UL); 726 evtchn_irq_expected = true; 727 alarm(1); 728 break; 729 730 case TEST_EVTCHN_SEND_IOCTL: 731 TEST_ASSERT(!evtchn_irq_expected, 732 "Expected event channel IRQ but it didn't happen"); 733 if (!do_evtchn_tests) 734 goto done; 735 736 shinfo->evtchn_pending[0] = 0; 737 if (verbose) 738 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); 739 740 struct kvm_irq_routing_xen_evtchn e; 741 e.port = EVTCHN_TEST2; 742 e.vcpu = vcpu->id; 743 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 744 745 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); 746 evtchn_irq_expected = true; 747 alarm(1); 748 break; 749 750 case TEST_EVTCHN_HCALL: 751 TEST_ASSERT(!evtchn_irq_expected, 752 "Expected event channel IRQ but it didn't happen"); 753 shinfo->evtchn_pending[1] = 0; 754 755 if (verbose) 756 printf("Testing guest EVTCHNOP_send direct to evtchn\n"); 757 evtchn_irq_expected = true; 758 alarm(1); 759 break; 760 761 case TEST_EVTCHN_HCALL_SLOWPATH: 762 TEST_ASSERT(!evtchn_irq_expected, 763 "Expected event channel IRQ but it didn't happen"); 764 shinfo->evtchn_pending[0] = 0; 765 766 if (verbose) 767 printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n"); 768 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 769 DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0); 770 evtchn_irq_expected = true; 771 alarm(1); 772 break; 773 774 case TEST_EVTCHN_HCALL_EVENTFD: 775 TEST_ASSERT(!evtchn_irq_expected, 776 "Expected event channel IRQ but it didn't happen"); 777 shinfo->evtchn_pending[0] = 0; 778 779 if (verbose) 780 printf("Testing guest EVTCHNOP_send to eventfd\n"); 781 evtchn_irq_expected = true; 782 alarm(1); 783 break; 784 785 case TEST_TIMER_SETUP: 786 TEST_ASSERT(!evtchn_irq_expected, 787 "Expected event channel IRQ but it didn't happen"); 788 shinfo->evtchn_pending[1] = 0; 789 790 if (verbose) 791 printf("Testing guest oneshot timer\n"); 792 break; 793 794 case TEST_TIMER_WAIT: 795 memset(&tmr, 0, sizeof(tmr)); 796 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; 797 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 798 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, 799 "Timer port not returned"); 800 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 801 "Timer priority not returned"); 802 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, 803 "Timer expiry not returned"); 804 evtchn_irq_expected = true; 805 alarm(1); 806 break; 807 808 case TEST_TIMER_RESTORE: 809 TEST_ASSERT(!evtchn_irq_expected, 810 "Expected event channel IRQ but it didn't happen"); 811 shinfo->evtchn_pending[0] = 0; 812 813 if (verbose) 814 printf("Testing restored oneshot timer\n"); 815 816 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 817 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 818 evtchn_irq_expected = true; 819 alarm(1); 820 break; 821 822 case TEST_POLL_READY: 823 TEST_ASSERT(!evtchn_irq_expected, 824 "Expected event channel IRQ but it didn't happen"); 825 826 if (verbose) 827 printf("Testing SCHEDOP_poll with already pending event\n"); 828 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; 829 alarm(1); 830 break; 831 832 case TEST_POLL_TIMEOUT: 833 if (verbose) 834 printf("Testing SCHEDOP_poll timeout\n"); 835 shinfo->evtchn_pending[0] = 0; 836 alarm(1); 837 break; 838 839 case TEST_POLL_MASKED: 840 if (verbose) 841 printf("Testing SCHEDOP_poll wake on masked event\n"); 842 843 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 844 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 845 alarm(1); 846 break; 847 848 case TEST_POLL_WAKE: 849 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; 850 if (verbose) 851 printf("Testing SCHEDOP_poll wake on unmasked event\n"); 852 853 evtchn_irq_expected = true; 854 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 855 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 856 857 /* Read it back and check the pending time is reported correctly */ 858 tmr.u.timer.expires_ns = 0; 859 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 860 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, 861 "Timer not reported pending"); 862 alarm(1); 863 break; 864 865 case TEST_TIMER_PAST: 866 TEST_ASSERT(!evtchn_irq_expected, 867 "Expected event channel IRQ but it didn't happen"); 868 /* Read timer and check it is no longer pending */ 869 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 870 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); 871 872 shinfo->evtchn_pending[0] = 0; 873 if (verbose) 874 printf("Testing timer in the past\n"); 875 876 evtchn_irq_expected = true; 877 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; 878 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 879 alarm(1); 880 break; 881 882 case TEST_LOCKING_SEND_RACE: 883 TEST_ASSERT(!evtchn_irq_expected, 884 "Expected event channel IRQ but it didn't happen"); 885 alarm(0); 886 887 if (verbose) 888 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); 889 890 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); 891 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); 892 893 struct kvm_irq_routing_xen_evtchn uxe = { 894 .port = 1, 895 .vcpu = vcpu->id, 896 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL 897 }; 898 899 evtchn_irq_expected = true; 900 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) 901 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); 902 break; 903 904 case TEST_LOCKING_POLL_RACE: 905 TEST_ASSERT(!evtchn_irq_expected, 906 "Expected event channel IRQ but it didn't happen"); 907 908 if (verbose) 909 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); 910 911 shinfo->evtchn_pending[0] = 1; 912 913 evtchn_irq_expected = true; 914 tmr.u.timer.expires_ns = rs->state_entry_time + 915 SHINFO_RACE_TIMEOUT * 1000000000ULL; 916 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 917 break; 918 919 case TEST_LOCKING_POLL_TIMEOUT: 920 /* 921 * Optional and possibly repeated sync point. 922 * Injecting the timer IRQ may fail if the 923 * shinfo is invalid when the timer expires. 924 * If the timer has expired but the IRQ hasn't 925 * been delivered, rearm the timer and retry. 926 */ 927 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 928 929 /* Resume the guest if the timer is still pending. */ 930 if (tmr.u.timer.expires_ns) 931 break; 932 933 /* All done if the IRQ was delivered. */ 934 if (!evtchn_irq_expected) 935 break; 936 937 tmr.u.timer.expires_ns = rs->state_entry_time + 938 SHINFO_RACE_TIMEOUT * 1000000000ULL; 939 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 940 break; 941 case TEST_DONE: 942 TEST_ASSERT(!evtchn_irq_expected, 943 "Expected event channel IRQ but it didn't happen"); 944 945 ret = pthread_cancel(thread); 946 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); 947 948 ret = pthread_join(thread, 0); 949 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); 950 goto done; 951 952 case TEST_GUEST_SAW_IRQ: 953 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 954 evtchn_irq_expected = false; 955 break; 956 } 957 break; 958 } 959 case UCALL_DONE: 960 goto done; 961 default: 962 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 963 } 964 } 965 966 done: 967 evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; 968 evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; 969 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); 970 971 alarm(0); 972 clock_gettime(CLOCK_REALTIME, &max_ts); 973 974 /* 975 * Just a *really* basic check that things are being put in the 976 * right place. The actual calculations are much the same for 977 * Xen as they are for the KVM variants, so no need to check. 978 */ 979 struct pvclock_wall_clock *wc; 980 struct pvclock_vcpu_time_info *ti, *ti2; 981 982 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 983 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 984 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 985 986 if (verbose) { 987 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 988 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 989 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 990 ti->tsc_shift, ti->flags); 991 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 992 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 993 ti2->tsc_shift, ti2->flags); 994 } 995 996 vm_ts.tv_sec = wc->sec; 997 vm_ts.tv_nsec = wc->nsec; 998 TEST_ASSERT(wc->version && !(wc->version & 1), 999 "Bad wallclock version %x", wc->version); 1000 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 1001 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 1002 1003 TEST_ASSERT(ti->version && !(ti->version & 1), 1004 "Bad time_info version %x", ti->version); 1005 TEST_ASSERT(ti2->version && !(ti2->version & 1), 1006 "Bad time_info version %x", ti->version); 1007 1008 if (do_runstate_tests) { 1009 /* 1010 * Fetch runstate and check sanity. Strictly speaking in the 1011 * general case we might not expect the numbers to be identical 1012 * but in this case we know we aren't running the vCPU any more. 1013 */ 1014 struct kvm_xen_vcpu_attr rst = { 1015 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 1016 }; 1017 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); 1018 1019 if (verbose) { 1020 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 1021 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 1022 rs->state, rs->state_entry_time); 1023 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 1024 printf("State %s: %" PRIu64 " ns\n", 1025 runstate_names[i], rs->time[i]); 1026 } 1027 } 1028 1029 /* 1030 * Exercise runstate info at all points across the page boundary, in 1031 * 32-bit and 64-bit mode. In particular, test the case where it is 1032 * configured in 32-bit mode and then switched to 64-bit mode while 1033 * active, which takes it onto the second page. 1034 */ 1035 unsigned long runstate_addr; 1036 struct compat_vcpu_runstate_info *crs; 1037 for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; 1038 runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { 1039 1040 rs = addr_gpa2hva(vm, runstate_addr); 1041 crs = (void *)rs; 1042 1043 memset(rs, 0xa5, sizeof(*rs)); 1044 1045 /* Set to compatibility mode */ 1046 lm.u.long_mode = 0; 1047 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1048 1049 /* Set runstate to new address (kernel will write it) */ 1050 struct kvm_xen_vcpu_attr st = { 1051 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 1052 .u.gpa = runstate_addr, 1053 }; 1054 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 1055 1056 if (verbose) 1057 printf("Compatibility runstate at %08lx\n", runstate_addr); 1058 1059 TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); 1060 TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, 1061 "State entry time mismatch"); 1062 TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1063 "Running time mismatch"); 1064 TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1065 "Runnable time mismatch"); 1066 TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1067 "Blocked time mismatch"); 1068 TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1069 "Offline time mismatch"); 1070 TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1071 "Structure overrun"); 1072 TEST_ASSERT(crs->state_entry_time == crs->time[0] + 1073 crs->time[1] + crs->time[2] + crs->time[3], 1074 "runstate times don't add up"); 1075 1076 1077 /* Now switch to 64-bit mode */ 1078 lm.u.long_mode = 1; 1079 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1080 1081 memset(rs, 0xa5, sizeof(*rs)); 1082 1083 /* Don't change the address, just trigger a write */ 1084 struct kvm_xen_vcpu_attr adj = { 1085 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, 1086 .u.runstate.state = (uint64_t)-1 1087 }; 1088 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); 1089 1090 if (verbose) 1091 printf("64-bit runstate at %08lx\n", runstate_addr); 1092 1093 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 1094 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 1095 "State entry time mismatch"); 1096 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1097 "Running time mismatch"); 1098 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1099 "Runnable time mismatch"); 1100 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1101 "Blocked time mismatch"); 1102 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1103 "Offline time mismatch"); 1104 TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1105 "Structure overrun"); 1106 1107 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 1108 rs->time[1] + rs->time[2] + rs->time[3], 1109 "runstate times don't add up"); 1110 } 1111 } 1112 1113 kvm_vm_free(vm); 1114 return 0; 1115 } 1116