1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 #include <pthread.h> 19 20 #include <sys/eventfd.h> 21 22 #define SHINFO_REGION_GVA 0xc0000000ULL 23 #define SHINFO_REGION_GPA 0xc0000000ULL 24 #define SHINFO_REGION_SLOT 10 25 26 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE)) 27 #define DUMMY_REGION_SLOT 11 28 29 #define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE)) 30 #define DUMMY_REGION_SLOT_2 12 31 32 #define SHINFO_ADDR (SHINFO_REGION_GPA) 33 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 34 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 35 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15) 36 37 #define SHINFO_VADDR (SHINFO_REGION_GVA) 38 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 39 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15) 40 41 #define EVTCHN_VECTOR 0x10 42 43 #define EVTCHN_TEST1 15 44 #define EVTCHN_TEST2 66 45 #define EVTCHN_TIMER 13 46 47 enum { 48 TEST_INJECT_VECTOR = 0, 49 TEST_RUNSTATE_runnable, 50 TEST_RUNSTATE_blocked, 51 TEST_RUNSTATE_offline, 52 TEST_RUNSTATE_ADJUST, 53 TEST_RUNSTATE_DATA, 54 TEST_STEAL_TIME, 55 TEST_EVTCHN_MASKED, 56 TEST_EVTCHN_UNMASKED, 57 TEST_EVTCHN_SLOWPATH, 58 TEST_EVTCHN_SEND_IOCTL, 59 TEST_EVTCHN_HCALL, 60 TEST_EVTCHN_HCALL_SLOWPATH, 61 TEST_EVTCHN_HCALL_EVENTFD, 62 TEST_TIMER_SETUP, 63 TEST_TIMER_WAIT, 64 TEST_TIMER_RESTORE, 65 TEST_POLL_READY, 66 TEST_POLL_TIMEOUT, 67 TEST_POLL_MASKED, 68 TEST_POLL_WAKE, 69 TEST_TIMER_PAST, 70 TEST_LOCKING_SEND_RACE, 71 TEST_LOCKING_POLL_RACE, 72 TEST_LOCKING_POLL_TIMEOUT, 73 TEST_DONE, 74 75 TEST_GUEST_SAW_IRQ, 76 }; 77 78 #define XEN_HYPERCALL_MSR 0x40000000 79 80 #define MIN_STEAL_TIME 50000 81 82 #define SHINFO_RACE_TIMEOUT 2 /* seconds */ 83 84 #define __HYPERVISOR_set_timer_op 15 85 #define __HYPERVISOR_sched_op 29 86 #define __HYPERVISOR_event_channel_op 32 87 88 #define SCHEDOP_poll 3 89 90 #define EVTCHNOP_send 4 91 92 #define EVTCHNSTAT_interdomain 2 93 94 struct evtchn_send { 95 u32 port; 96 }; 97 98 struct sched_poll { 99 u32 *ports; 100 unsigned int nr_ports; 101 u64 timeout; 102 }; 103 104 struct pvclock_vcpu_time_info { 105 u32 version; 106 u32 pad0; 107 u64 tsc_timestamp; 108 u64 system_time; 109 u32 tsc_to_system_mul; 110 s8 tsc_shift; 111 u8 flags; 112 u8 pad[2]; 113 } __attribute__((__packed__)); /* 32 bytes */ 114 115 struct pvclock_wall_clock { 116 u32 version; 117 u32 sec; 118 u32 nsec; 119 } __attribute__((__packed__)); 120 121 struct vcpu_runstate_info { 122 uint32_t state; 123 uint64_t state_entry_time; 124 uint64_t time[5]; /* Extra field for overrun check */ 125 }; 126 127 struct compat_vcpu_runstate_info { 128 uint32_t state; 129 uint64_t state_entry_time; 130 uint64_t time[5]; 131 } __attribute__((__packed__));; 132 133 struct arch_vcpu_info { 134 unsigned long cr2; 135 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 136 }; 137 138 struct vcpu_info { 139 uint8_t evtchn_upcall_pending; 140 uint8_t evtchn_upcall_mask; 141 unsigned long evtchn_pending_sel; 142 struct arch_vcpu_info arch; 143 struct pvclock_vcpu_time_info time; 144 }; /* 64 bytes (x86) */ 145 146 struct shared_info { 147 struct vcpu_info vcpu_info[32]; 148 unsigned long evtchn_pending[64]; 149 unsigned long evtchn_mask[64]; 150 struct pvclock_wall_clock wc; 151 uint32_t wc_sec_hi; 152 /* arch_shared_info here */ 153 }; 154 155 #define RUNSTATE_running 0 156 #define RUNSTATE_runnable 1 157 #define RUNSTATE_blocked 2 158 #define RUNSTATE_offline 3 159 160 static const char *runstate_names[] = { 161 "running", 162 "runnable", 163 "blocked", 164 "offline" 165 }; 166 167 struct { 168 struct kvm_irq_routing info; 169 struct kvm_irq_routing_entry entries[2]; 170 } irq_routes; 171 172 static volatile bool guest_saw_irq; 173 174 static void evtchn_handler(struct ex_regs *regs) 175 { 176 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 177 vi->evtchn_upcall_pending = 0; 178 vi->evtchn_pending_sel = 0; 179 guest_saw_irq = true; 180 181 GUEST_SYNC(TEST_GUEST_SAW_IRQ); 182 } 183 184 static void guest_wait_for_irq(void) 185 { 186 while (!guest_saw_irq) 187 __asm__ __volatile__ ("rep nop" : : : "memory"); 188 guest_saw_irq = false; 189 } 190 191 static void guest_code(void) 192 { 193 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 194 int i; 195 196 __asm__ __volatile__( 197 "sti\n" 198 "nop\n" 199 ); 200 201 /* Trigger an interrupt injection */ 202 GUEST_SYNC(TEST_INJECT_VECTOR); 203 204 guest_wait_for_irq(); 205 206 /* Test having the host set runstates manually */ 207 GUEST_SYNC(TEST_RUNSTATE_runnable); 208 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 209 GUEST_ASSERT(rs->state == 0); 210 211 GUEST_SYNC(TEST_RUNSTATE_blocked); 212 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 213 GUEST_ASSERT(rs->state == 0); 214 215 GUEST_SYNC(TEST_RUNSTATE_offline); 216 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 217 GUEST_ASSERT(rs->state == 0); 218 219 /* Test runstate time adjust */ 220 GUEST_SYNC(TEST_RUNSTATE_ADJUST); 221 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 222 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 223 224 /* Test runstate time set */ 225 GUEST_SYNC(TEST_RUNSTATE_DATA); 226 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 227 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 228 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 229 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 230 231 /* sched_yield() should result in some 'runnable' time */ 232 GUEST_SYNC(TEST_STEAL_TIME); 233 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 234 235 /* Attempt to deliver a *masked* interrupt */ 236 GUEST_SYNC(TEST_EVTCHN_MASKED); 237 238 /* Wait until we see the bit set */ 239 struct shared_info *si = (void *)SHINFO_VADDR; 240 while (!si->evtchn_pending[0]) 241 __asm__ __volatile__ ("rep nop" : : : "memory"); 242 243 /* Now deliver an *unmasked* interrupt */ 244 GUEST_SYNC(TEST_EVTCHN_UNMASKED); 245 246 guest_wait_for_irq(); 247 248 /* Change memslots and deliver an interrupt */ 249 GUEST_SYNC(TEST_EVTCHN_SLOWPATH); 250 251 guest_wait_for_irq(); 252 253 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */ 254 GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL); 255 256 guest_wait_for_irq(); 257 258 GUEST_SYNC(TEST_EVTCHN_HCALL); 259 260 /* Our turn. Deliver event channel (to ourselves) with 261 * EVTCHNOP_send hypercall. */ 262 struct evtchn_send s = { .port = 127 }; 263 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 264 265 guest_wait_for_irq(); 266 267 GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH); 268 269 /* 270 * Same again, but this time the host has messed with memslots so it 271 * should take the slow path in kvm_xen_set_evtchn(). 272 */ 273 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 274 275 guest_wait_for_irq(); 276 277 GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD); 278 279 /* Deliver "outbound" event channel to an eventfd which 280 * happens to be one of our own irqfds. */ 281 s.port = 197; 282 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s); 283 284 guest_wait_for_irq(); 285 286 GUEST_SYNC(TEST_TIMER_SETUP); 287 288 /* Set a timer 100ms in the future. */ 289 xen_hypercall(__HYPERVISOR_set_timer_op, 290 rs->state_entry_time + 100000000, NULL); 291 292 GUEST_SYNC(TEST_TIMER_WAIT); 293 294 /* Now wait for the timer */ 295 guest_wait_for_irq(); 296 297 GUEST_SYNC(TEST_TIMER_RESTORE); 298 299 /* The host has 'restored' the timer. Just wait for it. */ 300 guest_wait_for_irq(); 301 302 GUEST_SYNC(TEST_POLL_READY); 303 304 /* Poll for an event channel port which is already set */ 305 u32 ports[1] = { EVTCHN_TIMER }; 306 struct sched_poll p = { 307 .ports = ports, 308 .nr_ports = 1, 309 .timeout = 0, 310 }; 311 312 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 313 314 GUEST_SYNC(TEST_POLL_TIMEOUT); 315 316 /* Poll for an unset port and wait for the timeout. */ 317 p.timeout = 100000000; 318 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 319 320 GUEST_SYNC(TEST_POLL_MASKED); 321 322 /* A timer will wake the masked port we're waiting on, while we poll */ 323 p.timeout = 0; 324 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 325 326 GUEST_SYNC(TEST_POLL_WAKE); 327 328 /* A timer wake an *unmasked* port which should wake us with an 329 * actual interrupt, while we're polling on a different port. */ 330 ports[0]++; 331 p.timeout = 0; 332 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 333 334 guest_wait_for_irq(); 335 336 GUEST_SYNC(TEST_TIMER_PAST); 337 338 /* Timer should have fired already */ 339 guest_wait_for_irq(); 340 341 GUEST_SYNC(TEST_LOCKING_SEND_RACE); 342 /* Racing host ioctls */ 343 344 guest_wait_for_irq(); 345 346 GUEST_SYNC(TEST_LOCKING_POLL_RACE); 347 /* Racing vmcall against host ioctl */ 348 349 ports[0] = 0; 350 351 p = (struct sched_poll) { 352 .ports = ports, 353 .nr_ports = 1, 354 .timeout = 0 355 }; 356 357 wait_for_timer: 358 /* 359 * Poll for a timer wake event while the worker thread is mucking with 360 * the shared info. KVM XEN drops timer IRQs if the shared info is 361 * invalid when the timer expires. Arbitrarily poll 100 times before 362 * giving up and asking the VMM to re-arm the timer. 100 polls should 363 * consume enough time to beat on KVM without taking too long if the 364 * timer IRQ is dropped due to an invalid event channel. 365 */ 366 for (i = 0; i < 100 && !guest_saw_irq; i++) 367 __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p); 368 369 /* 370 * Re-send the timer IRQ if it was (likely) dropped due to the timer 371 * expiring while the event channel was invalid. 372 */ 373 if (!guest_saw_irq) { 374 GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT); 375 goto wait_for_timer; 376 } 377 guest_saw_irq = false; 378 379 GUEST_SYNC(TEST_DONE); 380 } 381 382 static int cmp_timespec(struct timespec *a, struct timespec *b) 383 { 384 if (a->tv_sec > b->tv_sec) 385 return 1; 386 else if (a->tv_sec < b->tv_sec) 387 return -1; 388 else if (a->tv_nsec > b->tv_nsec) 389 return 1; 390 else if (a->tv_nsec < b->tv_nsec) 391 return -1; 392 else 393 return 0; 394 } 395 396 static struct vcpu_info *vinfo; 397 static struct kvm_vcpu *vcpu; 398 399 static void handle_alrm(int sig) 400 { 401 if (vinfo) 402 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); 403 vcpu_dump(stdout, vcpu, 0); 404 TEST_FAIL("IRQ delivery timed out"); 405 } 406 407 static void *juggle_shinfo_state(void *arg) 408 { 409 struct kvm_vm *vm = (struct kvm_vm *)arg; 410 411 struct kvm_xen_hvm_attr cache_activate = { 412 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 413 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE 414 }; 415 416 struct kvm_xen_hvm_attr cache_deactivate = { 417 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 418 .u.shared_info.gfn = KVM_XEN_INVALID_GFN 419 }; 420 421 for (;;) { 422 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate); 423 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate); 424 pthread_testcancel(); 425 } 426 427 return NULL; 428 } 429 430 int main(int argc, char *argv[]) 431 { 432 struct timespec min_ts, max_ts, vm_ts; 433 struct kvm_xen_hvm_attr evt_reset; 434 struct kvm_vm *vm; 435 pthread_t thread; 436 bool verbose; 437 int ret; 438 439 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 440 !strncmp(argv[1], "--verbose", 10)); 441 442 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 443 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); 444 445 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 446 bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG); 447 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 448 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); 449 450 clock_gettime(CLOCK_REALTIME, &min_ts); 451 452 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 453 454 /* Map a region for the shared_info page */ 455 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 456 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0); 457 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3); 458 459 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 460 461 int zero_fd = open("/dev/zero", O_RDONLY); 462 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 463 464 struct kvm_xen_hvm_config hvmc = { 465 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 466 .msr = XEN_HYPERCALL_MSR, 467 }; 468 469 /* Let the kernel know that we *will* use it for sending all 470 * event channels, which lets it intercept SCHEDOP_poll */ 471 if (do_evtchn_tests) 472 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; 473 474 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 475 476 struct kvm_xen_hvm_attr lm = { 477 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 478 .u.long_mode = 1, 479 }; 480 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 481 482 if (do_runstate_flag) { 483 struct kvm_xen_hvm_attr ruf = { 484 .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG, 485 .u.runstate_update_flag = 1, 486 }; 487 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf); 488 489 ruf.u.runstate_update_flag = 0; 490 vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf); 491 TEST_ASSERT(ruf.u.runstate_update_flag == 1, 492 "Failed to read back RUNSTATE_UPDATE_FLAG attr"); 493 } 494 495 struct kvm_xen_hvm_attr ha = { 496 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 497 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 498 }; 499 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 500 501 /* 502 * Test what happens when the HVA of the shinfo page is remapped after 503 * the kernel has a reference to it. But make sure we copy the clock 504 * info over since that's only set at setup time, and we test it later. 505 */ 506 struct pvclock_wall_clock wc_copy = shinfo->wc; 507 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 508 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 509 shinfo->wc = wc_copy; 510 511 struct kvm_xen_vcpu_attr vi = { 512 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 513 .u.gpa = VCPU_INFO_ADDR, 514 }; 515 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); 516 517 struct kvm_xen_vcpu_attr pvclock = { 518 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 519 .u.gpa = PVTIME_ADDR, 520 }; 521 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); 522 523 struct kvm_xen_hvm_attr vec = { 524 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 525 .u.vector = EVTCHN_VECTOR, 526 }; 527 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 528 529 vm_init_descriptor_tables(vm); 530 vcpu_init_descriptor_tables(vcpu); 531 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 532 533 if (do_runstate_tests) { 534 struct kvm_xen_vcpu_attr st = { 535 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 536 .u.gpa = RUNSTATE_ADDR, 537 }; 538 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 539 } 540 541 int irq_fd[2] = { -1, -1 }; 542 543 if (do_eventfd_tests) { 544 irq_fd[0] = eventfd(0, 0); 545 irq_fd[1] = eventfd(0, 0); 546 547 /* Unexpected, but not a KVM failure */ 548 if (irq_fd[0] == -1 || irq_fd[1] == -1) 549 do_evtchn_tests = do_eventfd_tests = false; 550 } 551 552 if (do_eventfd_tests) { 553 irq_routes.info.nr = 2; 554 555 irq_routes.entries[0].gsi = 32; 556 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 557 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; 558 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; 559 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 560 561 irq_routes.entries[1].gsi = 33; 562 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 563 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; 564 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; 565 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 566 567 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); 568 569 struct kvm_irqfd ifd = { }; 570 571 ifd.fd = irq_fd[0]; 572 ifd.gsi = 32; 573 vm_ioctl(vm, KVM_IRQFD, &ifd); 574 575 ifd.fd = irq_fd[1]; 576 ifd.gsi = 33; 577 vm_ioctl(vm, KVM_IRQFD, &ifd); 578 579 struct sigaction sa = { }; 580 sa.sa_handler = handle_alrm; 581 sigaction(SIGALRM, &sa, NULL); 582 } 583 584 struct kvm_xen_vcpu_attr tmr = { 585 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, 586 .u.timer.port = EVTCHN_TIMER, 587 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 588 .u.timer.expires_ns = 0 589 }; 590 591 if (do_evtchn_tests) { 592 struct kvm_xen_hvm_attr inj = { 593 .type = KVM_XEN_ATTR_TYPE_EVTCHN, 594 .u.evtchn.send_port = 127, 595 .u.evtchn.type = EVTCHNSTAT_interdomain, 596 .u.evtchn.flags = 0, 597 .u.evtchn.deliver.port.port = EVTCHN_TEST1, 598 .u.evtchn.deliver.port.vcpu = vcpu->id + 1, 599 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 600 }; 601 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 602 603 /* Test migration to a different vCPU */ 604 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; 605 inj.u.evtchn.deliver.port.vcpu = vcpu->id; 606 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 607 608 inj.u.evtchn.send_port = 197; 609 inj.u.evtchn.deliver.eventfd.port = 0; 610 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1]; 611 inj.u.evtchn.flags = 0; 612 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); 613 614 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 615 } 616 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 617 vinfo->evtchn_upcall_pending = 0; 618 619 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 620 rs->state = 0x5a; 621 622 bool evtchn_irq_expected = false; 623 624 for (;;) { 625 struct ucall uc; 626 627 vcpu_run(vcpu); 628 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 629 630 switch (get_ucall(vcpu, &uc)) { 631 case UCALL_ABORT: 632 REPORT_GUEST_ASSERT(uc); 633 /* NOT REACHED */ 634 case UCALL_SYNC: { 635 struct kvm_xen_vcpu_attr rst; 636 long rundelay; 637 638 if (do_runstate_tests) 639 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 640 rs->time[1] + rs->time[2] + rs->time[3], 641 "runstate times don't add up"); 642 643 switch (uc.args[1]) { 644 case TEST_INJECT_VECTOR: 645 if (verbose) 646 printf("Delivering evtchn upcall\n"); 647 evtchn_irq_expected = true; 648 vinfo->evtchn_upcall_pending = 1; 649 break; 650 651 case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline: 652 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 653 if (!do_runstate_tests) 654 goto done; 655 if (verbose) 656 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 657 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 658 rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable - 659 TEST_RUNSTATE_runnable; 660 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 661 break; 662 663 case TEST_RUNSTATE_ADJUST: 664 if (verbose) 665 printf("Testing RUNSTATE_ADJUST\n"); 666 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 667 memset(&rst.u, 0, sizeof(rst.u)); 668 rst.u.runstate.state = (uint64_t)-1; 669 rst.u.runstate.time_blocked = 670 0x5a - rs->time[RUNSTATE_blocked]; 671 rst.u.runstate.time_offline = 672 0x6b6b - rs->time[RUNSTATE_offline]; 673 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 674 rst.u.runstate.time_offline; 675 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 676 break; 677 678 case TEST_RUNSTATE_DATA: 679 if (verbose) 680 printf("Testing RUNSTATE_DATA\n"); 681 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 682 memset(&rst.u, 0, sizeof(rst.u)); 683 rst.u.runstate.state = RUNSTATE_running; 684 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 685 rst.u.runstate.time_blocked = 0x6b6b; 686 rst.u.runstate.time_offline = 0x5a; 687 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); 688 break; 689 690 case TEST_STEAL_TIME: 691 if (verbose) 692 printf("Testing steal time\n"); 693 /* Yield until scheduler delay exceeds target */ 694 rundelay = get_run_delay() + MIN_STEAL_TIME; 695 do { 696 sched_yield(); 697 } while (get_run_delay() < rundelay); 698 break; 699 700 case TEST_EVTCHN_MASKED: 701 if (!do_eventfd_tests) 702 goto done; 703 if (verbose) 704 printf("Testing masked event channel\n"); 705 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1; 706 eventfd_write(irq_fd[0], 1UL); 707 alarm(1); 708 break; 709 710 case TEST_EVTCHN_UNMASKED: 711 if (verbose) 712 printf("Testing unmasked event channel\n"); 713 /* Unmask that, but deliver the other one */ 714 shinfo->evtchn_pending[0] = 0; 715 shinfo->evtchn_mask[0] = 0; 716 eventfd_write(irq_fd[1], 1UL); 717 evtchn_irq_expected = true; 718 alarm(1); 719 break; 720 721 case TEST_EVTCHN_SLOWPATH: 722 TEST_ASSERT(!evtchn_irq_expected, 723 "Expected event channel IRQ but it didn't happen"); 724 shinfo->evtchn_pending[1] = 0; 725 if (verbose) 726 printf("Testing event channel after memslot change\n"); 727 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 728 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 729 eventfd_write(irq_fd[0], 1UL); 730 evtchn_irq_expected = true; 731 alarm(1); 732 break; 733 734 case TEST_EVTCHN_SEND_IOCTL: 735 TEST_ASSERT(!evtchn_irq_expected, 736 "Expected event channel IRQ but it didn't happen"); 737 if (!do_evtchn_tests) 738 goto done; 739 740 shinfo->evtchn_pending[0] = 0; 741 if (verbose) 742 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n"); 743 744 struct kvm_irq_routing_xen_evtchn e; 745 e.port = EVTCHN_TEST2; 746 e.vcpu = vcpu->id; 747 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 748 749 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); 750 evtchn_irq_expected = true; 751 alarm(1); 752 break; 753 754 case TEST_EVTCHN_HCALL: 755 TEST_ASSERT(!evtchn_irq_expected, 756 "Expected event channel IRQ but it didn't happen"); 757 shinfo->evtchn_pending[1] = 0; 758 759 if (verbose) 760 printf("Testing guest EVTCHNOP_send direct to evtchn\n"); 761 evtchn_irq_expected = true; 762 alarm(1); 763 break; 764 765 case TEST_EVTCHN_HCALL_SLOWPATH: 766 TEST_ASSERT(!evtchn_irq_expected, 767 "Expected event channel IRQ but it didn't happen"); 768 shinfo->evtchn_pending[0] = 0; 769 770 if (verbose) 771 printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n"); 772 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 773 DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0); 774 evtchn_irq_expected = true; 775 alarm(1); 776 break; 777 778 case TEST_EVTCHN_HCALL_EVENTFD: 779 TEST_ASSERT(!evtchn_irq_expected, 780 "Expected event channel IRQ but it didn't happen"); 781 shinfo->evtchn_pending[0] = 0; 782 783 if (verbose) 784 printf("Testing guest EVTCHNOP_send to eventfd\n"); 785 evtchn_irq_expected = true; 786 alarm(1); 787 break; 788 789 case TEST_TIMER_SETUP: 790 TEST_ASSERT(!evtchn_irq_expected, 791 "Expected event channel IRQ but it didn't happen"); 792 shinfo->evtchn_pending[1] = 0; 793 794 if (verbose) 795 printf("Testing guest oneshot timer\n"); 796 break; 797 798 case TEST_TIMER_WAIT: 799 memset(&tmr, 0, sizeof(tmr)); 800 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; 801 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 802 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, 803 "Timer port not returned"); 804 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, 805 "Timer priority not returned"); 806 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time, 807 "Timer expiry not returned"); 808 evtchn_irq_expected = true; 809 alarm(1); 810 break; 811 812 case TEST_TIMER_RESTORE: 813 TEST_ASSERT(!evtchn_irq_expected, 814 "Expected event channel IRQ but it didn't happen"); 815 shinfo->evtchn_pending[0] = 0; 816 817 if (verbose) 818 printf("Testing restored oneshot timer\n"); 819 820 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 821 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 822 evtchn_irq_expected = true; 823 alarm(1); 824 break; 825 826 case TEST_POLL_READY: 827 TEST_ASSERT(!evtchn_irq_expected, 828 "Expected event channel IRQ but it didn't happen"); 829 830 if (verbose) 831 printf("Testing SCHEDOP_poll with already pending event\n"); 832 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER; 833 alarm(1); 834 break; 835 836 case TEST_POLL_TIMEOUT: 837 if (verbose) 838 printf("Testing SCHEDOP_poll timeout\n"); 839 shinfo->evtchn_pending[0] = 0; 840 alarm(1); 841 break; 842 843 case TEST_POLL_MASKED: 844 if (verbose) 845 printf("Testing SCHEDOP_poll wake on masked event\n"); 846 847 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 848 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 849 alarm(1); 850 break; 851 852 case TEST_POLL_WAKE: 853 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0; 854 if (verbose) 855 printf("Testing SCHEDOP_poll wake on unmasked event\n"); 856 857 evtchn_irq_expected = true; 858 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; 859 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 860 861 /* Read it back and check the pending time is reported correctly */ 862 tmr.u.timer.expires_ns = 0; 863 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 864 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, 865 "Timer not reported pending"); 866 alarm(1); 867 break; 868 869 case TEST_TIMER_PAST: 870 TEST_ASSERT(!evtchn_irq_expected, 871 "Expected event channel IRQ but it didn't happen"); 872 /* Read timer and check it is no longer pending */ 873 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 874 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); 875 876 shinfo->evtchn_pending[0] = 0; 877 if (verbose) 878 printf("Testing timer in the past\n"); 879 880 evtchn_irq_expected = true; 881 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; 882 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 883 alarm(1); 884 break; 885 886 case TEST_LOCKING_SEND_RACE: 887 TEST_ASSERT(!evtchn_irq_expected, 888 "Expected event channel IRQ but it didn't happen"); 889 alarm(0); 890 891 if (verbose) 892 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n"); 893 894 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm); 895 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret)); 896 897 struct kvm_irq_routing_xen_evtchn uxe = { 898 .port = 1, 899 .vcpu = vcpu->id, 900 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL 901 }; 902 903 evtchn_irq_expected = true; 904 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;) 905 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe); 906 break; 907 908 case TEST_LOCKING_POLL_RACE: 909 TEST_ASSERT(!evtchn_irq_expected, 910 "Expected event channel IRQ but it didn't happen"); 911 912 if (verbose) 913 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n"); 914 915 shinfo->evtchn_pending[0] = 1; 916 917 evtchn_irq_expected = true; 918 tmr.u.timer.expires_ns = rs->state_entry_time + 919 SHINFO_RACE_TIMEOUT * 1000000000ULL; 920 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 921 break; 922 923 case TEST_LOCKING_POLL_TIMEOUT: 924 /* 925 * Optional and possibly repeated sync point. 926 * Injecting the timer IRQ may fail if the 927 * shinfo is invalid when the timer expires. 928 * If the timer has expired but the IRQ hasn't 929 * been delivered, rearm the timer and retry. 930 */ 931 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); 932 933 /* Resume the guest if the timer is still pending. */ 934 if (tmr.u.timer.expires_ns) 935 break; 936 937 /* All done if the IRQ was delivered. */ 938 if (!evtchn_irq_expected) 939 break; 940 941 tmr.u.timer.expires_ns = rs->state_entry_time + 942 SHINFO_RACE_TIMEOUT * 1000000000ULL; 943 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); 944 break; 945 case TEST_DONE: 946 TEST_ASSERT(!evtchn_irq_expected, 947 "Expected event channel IRQ but it didn't happen"); 948 949 ret = pthread_cancel(thread); 950 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret)); 951 952 ret = pthread_join(thread, 0); 953 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret)); 954 goto done; 955 956 case TEST_GUEST_SAW_IRQ: 957 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 958 evtchn_irq_expected = false; 959 break; 960 } 961 break; 962 } 963 case UCALL_DONE: 964 goto done; 965 default: 966 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 967 } 968 } 969 970 done: 971 evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; 972 evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; 973 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); 974 975 alarm(0); 976 clock_gettime(CLOCK_REALTIME, &max_ts); 977 978 /* 979 * Just a *really* basic check that things are being put in the 980 * right place. The actual calculations are much the same for 981 * Xen as they are for the KVM variants, so no need to check. 982 */ 983 struct pvclock_wall_clock *wc; 984 struct pvclock_vcpu_time_info *ti, *ti2; 985 986 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 987 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 988 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 989 990 if (verbose) { 991 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 992 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 993 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 994 ti->tsc_shift, ti->flags); 995 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 996 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 997 ti2->tsc_shift, ti2->flags); 998 } 999 1000 vm_ts.tv_sec = wc->sec; 1001 vm_ts.tv_nsec = wc->nsec; 1002 TEST_ASSERT(wc->version && !(wc->version & 1), 1003 "Bad wallclock version %x", wc->version); 1004 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 1005 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 1006 1007 TEST_ASSERT(ti->version && !(ti->version & 1), 1008 "Bad time_info version %x", ti->version); 1009 TEST_ASSERT(ti2->version && !(ti2->version & 1), 1010 "Bad time_info version %x", ti->version); 1011 1012 if (do_runstate_tests) { 1013 /* 1014 * Fetch runstate and check sanity. Strictly speaking in the 1015 * general case we might not expect the numbers to be identical 1016 * but in this case we know we aren't running the vCPU any more. 1017 */ 1018 struct kvm_xen_vcpu_attr rst = { 1019 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 1020 }; 1021 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); 1022 1023 if (verbose) { 1024 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 1025 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 1026 rs->state, rs->state_entry_time); 1027 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 1028 printf("State %s: %" PRIu64 " ns\n", 1029 runstate_names[i], rs->time[i]); 1030 } 1031 } 1032 1033 /* 1034 * Exercise runstate info at all points across the page boundary, in 1035 * 32-bit and 64-bit mode. In particular, test the case where it is 1036 * configured in 32-bit mode and then switched to 64-bit mode while 1037 * active, which takes it onto the second page. 1038 */ 1039 unsigned long runstate_addr; 1040 struct compat_vcpu_runstate_info *crs; 1041 for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4; 1042 runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) { 1043 1044 rs = addr_gpa2hva(vm, runstate_addr); 1045 crs = (void *)rs; 1046 1047 memset(rs, 0xa5, sizeof(*rs)); 1048 1049 /* Set to compatibility mode */ 1050 lm.u.long_mode = 0; 1051 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1052 1053 /* Set runstate to new address (kernel will write it) */ 1054 struct kvm_xen_vcpu_attr st = { 1055 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 1056 .u.gpa = runstate_addr, 1057 }; 1058 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); 1059 1060 if (verbose) 1061 printf("Compatibility runstate at %08lx\n", runstate_addr); 1062 1063 TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch"); 1064 TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time, 1065 "State entry time mismatch"); 1066 TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1067 "Running time mismatch"); 1068 TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1069 "Runnable time mismatch"); 1070 TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1071 "Blocked time mismatch"); 1072 TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1073 "Offline time mismatch"); 1074 TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1075 "Structure overrun"); 1076 TEST_ASSERT(crs->state_entry_time == crs->time[0] + 1077 crs->time[1] + crs->time[2] + crs->time[3], 1078 "runstate times don't add up"); 1079 1080 1081 /* Now switch to 64-bit mode */ 1082 lm.u.long_mode = 1; 1083 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 1084 1085 memset(rs, 0xa5, sizeof(*rs)); 1086 1087 /* Don't change the address, just trigger a write */ 1088 struct kvm_xen_vcpu_attr adj = { 1089 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, 1090 .u.runstate.state = (uint64_t)-1 1091 }; 1092 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); 1093 1094 if (verbose) 1095 printf("64-bit runstate at %08lx\n", runstate_addr); 1096 1097 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 1098 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 1099 "State entry time mismatch"); 1100 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 1101 "Running time mismatch"); 1102 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 1103 "Runnable time mismatch"); 1104 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 1105 "Blocked time mismatch"); 1106 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 1107 "Offline time mismatch"); 1108 TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL, 1109 "Structure overrun"); 1110 1111 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 1112 rs->time[1] + rs->time[2] + rs->time[3], 1113 "runstate times don't add up"); 1114 } 1115 } 1116 1117 kvm_vm_free(vm); 1118 return 0; 1119 } 1120