1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 19 #include <sys/eventfd.h> 20 21 #define VCPU_ID 5 22 23 #define SHINFO_REGION_GVA 0xc0000000ULL 24 #define SHINFO_REGION_GPA 0xc0000000ULL 25 #define SHINFO_REGION_SLOT 10 26 27 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) 28 #define DUMMY_REGION_SLOT 11 29 30 #define SHINFO_ADDR (SHINFO_REGION_GPA) 31 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 32 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) 33 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 34 35 #define SHINFO_VADDR (SHINFO_REGION_GVA) 36 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) 37 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 38 39 #define EVTCHN_VECTOR 0x10 40 41 static struct kvm_vm *vm; 42 43 #define XEN_HYPERCALL_MSR 0x40000000 44 45 #define MIN_STEAL_TIME 50000 46 47 struct pvclock_vcpu_time_info { 48 u32 version; 49 u32 pad0; 50 u64 tsc_timestamp; 51 u64 system_time; 52 u32 tsc_to_system_mul; 53 s8 tsc_shift; 54 u8 flags; 55 u8 pad[2]; 56 } __attribute__((__packed__)); /* 32 bytes */ 57 58 struct pvclock_wall_clock { 59 u32 version; 60 u32 sec; 61 u32 nsec; 62 } __attribute__((__packed__)); 63 64 struct vcpu_runstate_info { 65 uint32_t state; 66 uint64_t state_entry_time; 67 uint64_t time[4]; 68 }; 69 70 struct arch_vcpu_info { 71 unsigned long cr2; 72 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 73 }; 74 75 struct vcpu_info { 76 uint8_t evtchn_upcall_pending; 77 uint8_t evtchn_upcall_mask; 78 unsigned long evtchn_pending_sel; 79 struct arch_vcpu_info arch; 80 struct pvclock_vcpu_time_info time; 81 }; /* 64 bytes (x86) */ 82 83 struct shared_info { 84 struct vcpu_info vcpu_info[32]; 85 unsigned long evtchn_pending[64]; 86 unsigned long evtchn_mask[64]; 87 struct pvclock_wall_clock wc; 88 uint32_t wc_sec_hi; 89 /* arch_shared_info here */ 90 }; 91 92 #define RUNSTATE_running 0 93 #define RUNSTATE_runnable 1 94 #define RUNSTATE_blocked 2 95 #define RUNSTATE_offline 3 96 97 static const char *runstate_names[] = { 98 "running", 99 "runnable", 100 "blocked", 101 "offline" 102 }; 103 104 struct { 105 struct kvm_irq_routing info; 106 struct kvm_irq_routing_entry entries[2]; 107 } irq_routes; 108 109 static void evtchn_handler(struct ex_regs *regs) 110 { 111 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 112 vi->evtchn_upcall_pending = 0; 113 vi->evtchn_pending_sel = 0; 114 115 GUEST_SYNC(0x20); 116 } 117 118 static void guest_code(void) 119 { 120 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 121 122 __asm__ __volatile__( 123 "sti\n" 124 "nop\n" 125 ); 126 127 /* Trigger an interrupt injection */ 128 GUEST_SYNC(0); 129 130 /* Test having the host set runstates manually */ 131 GUEST_SYNC(RUNSTATE_runnable); 132 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 133 GUEST_ASSERT(rs->state == 0); 134 135 GUEST_SYNC(RUNSTATE_blocked); 136 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 137 GUEST_ASSERT(rs->state == 0); 138 139 GUEST_SYNC(RUNSTATE_offline); 140 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 141 GUEST_ASSERT(rs->state == 0); 142 143 /* Test runstate time adjust */ 144 GUEST_SYNC(4); 145 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 146 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 147 148 /* Test runstate time set */ 149 GUEST_SYNC(5); 150 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 151 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 152 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 153 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 154 155 /* sched_yield() should result in some 'runnable' time */ 156 GUEST_SYNC(6); 157 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 158 159 /* Attempt to deliver a *masked* interrupt */ 160 GUEST_SYNC(7); 161 162 /* Wait until we see the bit set */ 163 struct shared_info *si = (void *)SHINFO_VADDR; 164 while (!si->evtchn_pending[0]) 165 __asm__ __volatile__ ("rep nop" : : : "memory"); 166 167 /* Now deliver an *unmasked* interrupt */ 168 GUEST_SYNC(8); 169 170 while (!si->evtchn_pending[1]) 171 __asm__ __volatile__ ("rep nop" : : : "memory"); 172 173 /* Change memslots and deliver an interrupt */ 174 GUEST_SYNC(9); 175 176 for (;;) 177 __asm__ __volatile__ ("rep nop" : : : "memory"); 178 } 179 180 static int cmp_timespec(struct timespec *a, struct timespec *b) 181 { 182 if (a->tv_sec > b->tv_sec) 183 return 1; 184 else if (a->tv_sec < b->tv_sec) 185 return -1; 186 else if (a->tv_nsec > b->tv_nsec) 187 return 1; 188 else if (a->tv_nsec < b->tv_nsec) 189 return -1; 190 else 191 return 0; 192 } 193 194 static void handle_alrm(int sig) 195 { 196 TEST_FAIL("IRQ delivery timed out"); 197 } 198 199 int main(int argc, char *argv[]) 200 { 201 struct timespec min_ts, max_ts, vm_ts; 202 bool verbose; 203 204 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 205 !strncmp(argv[1], "--verbose", 10)); 206 207 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 208 if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { 209 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); 210 exit(KSFT_SKIP); 211 } 212 213 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 214 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 215 216 clock_gettime(CLOCK_REALTIME, &min_ts); 217 218 vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); 219 vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 220 221 /* Map a region for the shared_info page */ 222 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 223 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); 224 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); 225 226 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 227 228 int zero_fd = open("/dev/zero", O_RDONLY); 229 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 230 231 struct kvm_xen_hvm_config hvmc = { 232 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 233 .msr = XEN_HYPERCALL_MSR, 234 }; 235 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 236 237 struct kvm_xen_hvm_attr lm = { 238 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 239 .u.long_mode = 1, 240 }; 241 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 242 243 struct kvm_xen_hvm_attr ha = { 244 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 245 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 246 }; 247 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 248 249 /* 250 * Test what happens when the HVA of the shinfo page is remapped after 251 * the kernel has a reference to it. But make sure we copy the clock 252 * info over since that's only set at setup time, and we test it later. 253 */ 254 struct pvclock_wall_clock wc_copy = shinfo->wc; 255 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 256 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 257 shinfo->wc = wc_copy; 258 259 struct kvm_xen_vcpu_attr vi = { 260 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 261 .u.gpa = VCPU_INFO_ADDR, 262 }; 263 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); 264 265 struct kvm_xen_vcpu_attr pvclock = { 266 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 267 .u.gpa = PVTIME_ADDR, 268 }; 269 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); 270 271 struct kvm_xen_hvm_attr vec = { 272 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 273 .u.vector = EVTCHN_VECTOR, 274 }; 275 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 276 277 vm_init_descriptor_tables(vm); 278 vcpu_init_descriptor_tables(vm, VCPU_ID); 279 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 280 281 if (do_runstate_tests) { 282 struct kvm_xen_vcpu_attr st = { 283 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 284 .u.gpa = RUNSTATE_ADDR, 285 }; 286 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); 287 } 288 289 int irq_fd[2] = { -1, -1 }; 290 291 if (do_eventfd_tests) { 292 irq_fd[0] = eventfd(0, 0); 293 irq_fd[1] = eventfd(0, 0); 294 295 /* Unexpected, but not a KVM failure */ 296 if (irq_fd[0] == -1 || irq_fd[1] == -1) 297 do_eventfd_tests = false; 298 } 299 300 if (do_eventfd_tests) { 301 irq_routes.info.nr = 2; 302 303 irq_routes.entries[0].gsi = 32; 304 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 305 irq_routes.entries[0].u.xen_evtchn.port = 15; 306 irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; 307 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 308 309 irq_routes.entries[1].gsi = 33; 310 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 311 irq_routes.entries[1].u.xen_evtchn.port = 66; 312 irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; 313 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 314 315 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes); 316 317 struct kvm_irqfd ifd = { }; 318 319 ifd.fd = irq_fd[0]; 320 ifd.gsi = 32; 321 vm_ioctl(vm, KVM_IRQFD, &ifd); 322 323 ifd.fd = irq_fd[1]; 324 ifd.gsi = 33; 325 vm_ioctl(vm, KVM_IRQFD, &ifd); 326 327 struct sigaction sa = { }; 328 sa.sa_handler = handle_alrm; 329 sigaction(SIGALRM, &sa, NULL); 330 } 331 332 struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 333 vinfo->evtchn_upcall_pending = 0; 334 335 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 336 rs->state = 0x5a; 337 338 bool evtchn_irq_expected = false; 339 340 for (;;) { 341 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); 342 struct ucall uc; 343 344 vcpu_run(vm, VCPU_ID); 345 346 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 347 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 348 run->exit_reason, 349 exit_reason_str(run->exit_reason)); 350 351 switch (get_ucall(vm, VCPU_ID, &uc)) { 352 case UCALL_ABORT: 353 TEST_FAIL("%s", (const char *)uc.args[0]); 354 /* NOT REACHED */ 355 case UCALL_SYNC: { 356 struct kvm_xen_vcpu_attr rst; 357 long rundelay; 358 359 if (do_runstate_tests) 360 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 361 rs->time[1] + rs->time[2] + rs->time[3], 362 "runstate times don't add up"); 363 364 switch (uc.args[1]) { 365 case 0: 366 if (verbose) 367 printf("Delivering evtchn upcall\n"); 368 evtchn_irq_expected = true; 369 vinfo->evtchn_upcall_pending = 1; 370 break; 371 372 case RUNSTATE_runnable...RUNSTATE_offline: 373 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 374 if (!do_runstate_tests) 375 goto done; 376 if (verbose) 377 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 378 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 379 rst.u.runstate.state = uc.args[1]; 380 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 381 break; 382 383 case 4: 384 if (verbose) 385 printf("Testing RUNSTATE_ADJUST\n"); 386 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 387 memset(&rst.u, 0, sizeof(rst.u)); 388 rst.u.runstate.state = (uint64_t)-1; 389 rst.u.runstate.time_blocked = 390 0x5a - rs->time[RUNSTATE_blocked]; 391 rst.u.runstate.time_offline = 392 0x6b6b - rs->time[RUNSTATE_offline]; 393 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 394 rst.u.runstate.time_offline; 395 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 396 break; 397 398 case 5: 399 if (verbose) 400 printf("Testing RUNSTATE_DATA\n"); 401 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 402 memset(&rst.u, 0, sizeof(rst.u)); 403 rst.u.runstate.state = RUNSTATE_running; 404 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 405 rst.u.runstate.time_blocked = 0x6b6b; 406 rst.u.runstate.time_offline = 0x5a; 407 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 408 break; 409 410 case 6: 411 if (verbose) 412 printf("Testing steal time\n"); 413 /* Yield until scheduler delay exceeds target */ 414 rundelay = get_run_delay() + MIN_STEAL_TIME; 415 do { 416 sched_yield(); 417 } while (get_run_delay() < rundelay); 418 break; 419 420 case 7: 421 if (!do_eventfd_tests) 422 goto done; 423 if (verbose) 424 printf("Testing masked event channel\n"); 425 shinfo->evtchn_mask[0] = 0x8000; 426 eventfd_write(irq_fd[0], 1UL); 427 alarm(1); 428 break; 429 430 case 8: 431 if (verbose) 432 printf("Testing unmasked event channel\n"); 433 /* Unmask that, but deliver the other one */ 434 shinfo->evtchn_pending[0] = 0; 435 shinfo->evtchn_mask[0] = 0; 436 eventfd_write(irq_fd[1], 1UL); 437 evtchn_irq_expected = true; 438 alarm(1); 439 break; 440 441 case 9: 442 if (verbose) 443 printf("Testing event channel after memslot change\n"); 444 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 445 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 446 eventfd_write(irq_fd[0], 1UL); 447 evtchn_irq_expected = true; 448 alarm(1); 449 break; 450 451 case 0x20: 452 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 453 evtchn_irq_expected = false; 454 if (shinfo->evtchn_pending[1] && 455 shinfo->evtchn_pending[0]) 456 goto done; 457 break; 458 } 459 break; 460 } 461 case UCALL_DONE: 462 goto done; 463 default: 464 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 465 } 466 } 467 468 done: 469 clock_gettime(CLOCK_REALTIME, &max_ts); 470 471 /* 472 * Just a *really* basic check that things are being put in the 473 * right place. The actual calculations are much the same for 474 * Xen as they are for the KVM variants, so no need to check. 475 */ 476 struct pvclock_wall_clock *wc; 477 struct pvclock_vcpu_time_info *ti, *ti2; 478 479 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 480 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 481 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 482 483 if (verbose) { 484 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 485 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 486 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 487 ti->tsc_shift, ti->flags); 488 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 489 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 490 ti2->tsc_shift, ti2->flags); 491 } 492 493 vm_ts.tv_sec = wc->sec; 494 vm_ts.tv_nsec = wc->nsec; 495 TEST_ASSERT(wc->version && !(wc->version & 1), 496 "Bad wallclock version %x", wc->version); 497 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 498 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 499 500 TEST_ASSERT(ti->version && !(ti->version & 1), 501 "Bad time_info version %x", ti->version); 502 TEST_ASSERT(ti2->version && !(ti2->version & 1), 503 "Bad time_info version %x", ti->version); 504 505 if (do_runstate_tests) { 506 /* 507 * Fetch runstate and check sanity. Strictly speaking in the 508 * general case we might not expect the numbers to be identical 509 * but in this case we know we aren't running the vCPU any more. 510 */ 511 struct kvm_xen_vcpu_attr rst = { 512 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 513 }; 514 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); 515 516 if (verbose) { 517 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 518 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 519 rs->state, rs->state_entry_time); 520 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 521 printf("State %s: %" PRIu64 " ns\n", 522 runstate_names[i], rs->time[i]); 523 } 524 } 525 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 526 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 527 "State entry time mismatch"); 528 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 529 "Running time mismatch"); 530 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 531 "Runnable time mismatch"); 532 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 533 "Blocked time mismatch"); 534 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 535 "Offline time mismatch"); 536 537 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 538 rs->time[1] + rs->time[2] + rs->time[3], 539 "runstate times don't add up"); 540 } 541 kvm_vm_free(vm); 542 return 0; 543 } 544