1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * svm_vmcall_test 4 * 5 * Copyright © 2021 Amazon.com, Inc. or its affiliates. 6 * 7 * Xen shared_info / pvclock testing 8 */ 9 10 #include "test_util.h" 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #include <stdint.h> 15 #include <time.h> 16 #include <sched.h> 17 #include <signal.h> 18 19 #include <sys/eventfd.h> 20 21 #define VCPU_ID 5 22 23 #define SHINFO_REGION_GVA 0xc0000000ULL 24 #define SHINFO_REGION_GPA 0xc0000000ULL 25 #define SHINFO_REGION_SLOT 10 26 #define PAGE_SIZE 4096 27 28 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE)) 29 #define DUMMY_REGION_SLOT 11 30 31 #define SHINFO_ADDR (SHINFO_REGION_GPA) 32 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) 33 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) 34 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40) 35 36 #define SHINFO_VADDR (SHINFO_REGION_GVA) 37 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) 38 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40) 39 40 #define EVTCHN_VECTOR 0x10 41 42 static struct kvm_vm *vm; 43 44 #define XEN_HYPERCALL_MSR 0x40000000 45 46 #define MIN_STEAL_TIME 50000 47 48 struct pvclock_vcpu_time_info { 49 u32 version; 50 u32 pad0; 51 u64 tsc_timestamp; 52 u64 system_time; 53 u32 tsc_to_system_mul; 54 s8 tsc_shift; 55 u8 flags; 56 u8 pad[2]; 57 } __attribute__((__packed__)); /* 32 bytes */ 58 59 struct pvclock_wall_clock { 60 u32 version; 61 u32 sec; 62 u32 nsec; 63 } __attribute__((__packed__)); 64 65 struct vcpu_runstate_info { 66 uint32_t state; 67 uint64_t state_entry_time; 68 uint64_t time[4]; 69 }; 70 71 struct arch_vcpu_info { 72 unsigned long cr2; 73 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ 74 }; 75 76 struct vcpu_info { 77 uint8_t evtchn_upcall_pending; 78 uint8_t evtchn_upcall_mask; 79 unsigned long evtchn_pending_sel; 80 struct arch_vcpu_info arch; 81 struct pvclock_vcpu_time_info time; 82 }; /* 64 bytes (x86) */ 83 84 struct shared_info { 85 struct vcpu_info vcpu_info[32]; 86 unsigned long evtchn_pending[64]; 87 unsigned long evtchn_mask[64]; 88 struct pvclock_wall_clock wc; 89 uint32_t wc_sec_hi; 90 /* arch_shared_info here */ 91 }; 92 93 #define RUNSTATE_running 0 94 #define RUNSTATE_runnable 1 95 #define RUNSTATE_blocked 2 96 #define RUNSTATE_offline 3 97 98 static const char *runstate_names[] = { 99 "running", 100 "runnable", 101 "blocked", 102 "offline" 103 }; 104 105 struct { 106 struct kvm_irq_routing info; 107 struct kvm_irq_routing_entry entries[2]; 108 } irq_routes; 109 110 static void evtchn_handler(struct ex_regs *regs) 111 { 112 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; 113 vi->evtchn_upcall_pending = 0; 114 vi->evtchn_pending_sel = 0; 115 116 GUEST_SYNC(0x20); 117 } 118 119 static void guest_code(void) 120 { 121 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; 122 123 __asm__ __volatile__( 124 "sti\n" 125 "nop\n" 126 ); 127 128 /* Trigger an interrupt injection */ 129 GUEST_SYNC(0); 130 131 /* Test having the host set runstates manually */ 132 GUEST_SYNC(RUNSTATE_runnable); 133 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); 134 GUEST_ASSERT(rs->state == 0); 135 136 GUEST_SYNC(RUNSTATE_blocked); 137 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); 138 GUEST_ASSERT(rs->state == 0); 139 140 GUEST_SYNC(RUNSTATE_offline); 141 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); 142 GUEST_ASSERT(rs->state == 0); 143 144 /* Test runstate time adjust */ 145 GUEST_SYNC(4); 146 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); 147 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); 148 149 /* Test runstate time set */ 150 GUEST_SYNC(5); 151 GUEST_ASSERT(rs->state_entry_time >= 0x8000); 152 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); 153 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); 154 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); 155 156 /* sched_yield() should result in some 'runnable' time */ 157 GUEST_SYNC(6); 158 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); 159 160 /* Attempt to deliver a *masked* interrupt */ 161 GUEST_SYNC(7); 162 163 /* Wait until we see the bit set */ 164 struct shared_info *si = (void *)SHINFO_VADDR; 165 while (!si->evtchn_pending[0]) 166 __asm__ __volatile__ ("rep nop" : : : "memory"); 167 168 /* Now deliver an *unmasked* interrupt */ 169 GUEST_SYNC(8); 170 171 while (!si->evtchn_pending[1]) 172 __asm__ __volatile__ ("rep nop" : : : "memory"); 173 174 /* Change memslots and deliver an interrupt */ 175 GUEST_SYNC(9); 176 177 for (;;) 178 __asm__ __volatile__ ("rep nop" : : : "memory"); 179 } 180 181 static int cmp_timespec(struct timespec *a, struct timespec *b) 182 { 183 if (a->tv_sec > b->tv_sec) 184 return 1; 185 else if (a->tv_sec < b->tv_sec) 186 return -1; 187 else if (a->tv_nsec > b->tv_nsec) 188 return 1; 189 else if (a->tv_nsec < b->tv_nsec) 190 return -1; 191 else 192 return 0; 193 } 194 195 static void handle_alrm(int sig) 196 { 197 TEST_FAIL("IRQ delivery timed out"); 198 } 199 200 int main(int argc, char *argv[]) 201 { 202 struct timespec min_ts, max_ts, vm_ts; 203 bool verbose; 204 205 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || 206 !strncmp(argv[1], "--verbose", 10)); 207 208 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); 209 if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { 210 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); 211 exit(KSFT_SKIP); 212 } 213 214 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); 215 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); 216 217 clock_gettime(CLOCK_REALTIME, &min_ts); 218 219 vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); 220 vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); 221 222 /* Map a region for the shared_info page */ 223 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 224 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); 225 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2); 226 227 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR); 228 229 int zero_fd = open("/dev/zero", O_RDONLY); 230 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero"); 231 232 struct kvm_xen_hvm_config hvmc = { 233 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, 234 .msr = XEN_HYPERCALL_MSR, 235 }; 236 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc); 237 238 struct kvm_xen_hvm_attr lm = { 239 .type = KVM_XEN_ATTR_TYPE_LONG_MODE, 240 .u.long_mode = 1, 241 }; 242 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm); 243 244 struct kvm_xen_hvm_attr ha = { 245 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO, 246 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE, 247 }; 248 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha); 249 250 /* 251 * Test what happens when the HVA of the shinfo page is remapped after 252 * the kernel has a reference to it. But make sure we copy the clock 253 * info over since that's only set at setup time, and we test it later. 254 */ 255 struct pvclock_wall_clock wc_copy = shinfo->wc; 256 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0); 257 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info"); 258 shinfo->wc = wc_copy; 259 260 struct kvm_xen_vcpu_attr vi = { 261 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, 262 .u.gpa = VCPU_INFO_ADDR, 263 }; 264 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); 265 266 struct kvm_xen_vcpu_attr pvclock = { 267 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, 268 .u.gpa = PVTIME_ADDR, 269 }; 270 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); 271 272 struct kvm_xen_hvm_attr vec = { 273 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, 274 .u.vector = EVTCHN_VECTOR, 275 }; 276 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); 277 278 vm_init_descriptor_tables(vm); 279 vcpu_init_descriptor_tables(vm, VCPU_ID); 280 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); 281 282 if (do_runstate_tests) { 283 struct kvm_xen_vcpu_attr st = { 284 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, 285 .u.gpa = RUNSTATE_ADDR, 286 }; 287 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); 288 } 289 290 int irq_fd[2] = { -1, -1 }; 291 292 if (do_eventfd_tests) { 293 irq_fd[0] = eventfd(0, 0); 294 irq_fd[1] = eventfd(0, 0); 295 296 /* Unexpected, but not a KVM failure */ 297 if (irq_fd[0] == -1 || irq_fd[1] == -1) 298 do_eventfd_tests = false; 299 } 300 301 if (do_eventfd_tests) { 302 irq_routes.info.nr = 2; 303 304 irq_routes.entries[0].gsi = 32; 305 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 306 irq_routes.entries[0].u.xen_evtchn.port = 15; 307 irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; 308 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 309 310 irq_routes.entries[1].gsi = 33; 311 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; 312 irq_routes.entries[1].u.xen_evtchn.port = 66; 313 irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; 314 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; 315 316 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes); 317 318 struct kvm_irqfd ifd = { }; 319 320 ifd.fd = irq_fd[0]; 321 ifd.gsi = 32; 322 vm_ioctl(vm, KVM_IRQFD, &ifd); 323 324 ifd.fd = irq_fd[1]; 325 ifd.gsi = 33; 326 vm_ioctl(vm, KVM_IRQFD, &ifd); 327 328 struct sigaction sa = { }; 329 sa.sa_handler = handle_alrm; 330 sigaction(SIGALRM, &sa, NULL); 331 } 332 333 struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); 334 vinfo->evtchn_upcall_pending = 0; 335 336 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR); 337 rs->state = 0x5a; 338 339 bool evtchn_irq_expected = false; 340 341 for (;;) { 342 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); 343 struct ucall uc; 344 345 vcpu_run(vm, VCPU_ID); 346 347 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, 348 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", 349 run->exit_reason, 350 exit_reason_str(run->exit_reason)); 351 352 switch (get_ucall(vm, VCPU_ID, &uc)) { 353 case UCALL_ABORT: 354 TEST_FAIL("%s", (const char *)uc.args[0]); 355 /* NOT REACHED */ 356 case UCALL_SYNC: { 357 struct kvm_xen_vcpu_attr rst; 358 long rundelay; 359 360 if (do_runstate_tests) 361 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 362 rs->time[1] + rs->time[2] + rs->time[3], 363 "runstate times don't add up"); 364 365 switch (uc.args[1]) { 366 case 0: 367 if (verbose) 368 printf("Delivering evtchn upcall\n"); 369 evtchn_irq_expected = true; 370 vinfo->evtchn_upcall_pending = 1; 371 break; 372 373 case RUNSTATE_runnable...RUNSTATE_offline: 374 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen"); 375 if (!do_runstate_tests) 376 goto done; 377 if (verbose) 378 printf("Testing runstate %s\n", runstate_names[uc.args[1]]); 379 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; 380 rst.u.runstate.state = uc.args[1]; 381 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 382 break; 383 384 case 4: 385 if (verbose) 386 printf("Testing RUNSTATE_ADJUST\n"); 387 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; 388 memset(&rst.u, 0, sizeof(rst.u)); 389 rst.u.runstate.state = (uint64_t)-1; 390 rst.u.runstate.time_blocked = 391 0x5a - rs->time[RUNSTATE_blocked]; 392 rst.u.runstate.time_offline = 393 0x6b6b - rs->time[RUNSTATE_offline]; 394 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - 395 rst.u.runstate.time_offline; 396 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 397 break; 398 399 case 5: 400 if (verbose) 401 printf("Testing RUNSTATE_DATA\n"); 402 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; 403 memset(&rst.u, 0, sizeof(rst.u)); 404 rst.u.runstate.state = RUNSTATE_running; 405 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; 406 rst.u.runstate.time_blocked = 0x6b6b; 407 rst.u.runstate.time_offline = 0x5a; 408 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); 409 break; 410 411 case 6: 412 if (verbose) 413 printf("Testing steal time\n"); 414 /* Yield until scheduler delay exceeds target */ 415 rundelay = get_run_delay() + MIN_STEAL_TIME; 416 do { 417 sched_yield(); 418 } while (get_run_delay() < rundelay); 419 break; 420 421 case 7: 422 if (!do_eventfd_tests) 423 goto done; 424 if (verbose) 425 printf("Testing masked event channel\n"); 426 shinfo->evtchn_mask[0] = 0x8000; 427 eventfd_write(irq_fd[0], 1UL); 428 alarm(1); 429 break; 430 431 case 8: 432 if (verbose) 433 printf("Testing unmasked event channel\n"); 434 /* Unmask that, but deliver the other one */ 435 shinfo->evtchn_pending[0] = 0; 436 shinfo->evtchn_mask[0] = 0; 437 eventfd_write(irq_fd[1], 1UL); 438 evtchn_irq_expected = true; 439 alarm(1); 440 break; 441 442 case 9: 443 if (verbose) 444 printf("Testing event channel after memslot change\n"); 445 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 446 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0); 447 eventfd_write(irq_fd[0], 1UL); 448 evtchn_irq_expected = true; 449 alarm(1); 450 break; 451 452 case 0x20: 453 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ"); 454 evtchn_irq_expected = false; 455 if (shinfo->evtchn_pending[1] && 456 shinfo->evtchn_pending[0]) 457 goto done; 458 break; 459 } 460 break; 461 } 462 case UCALL_DONE: 463 goto done; 464 default: 465 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); 466 } 467 } 468 469 done: 470 clock_gettime(CLOCK_REALTIME, &max_ts); 471 472 /* 473 * Just a *really* basic check that things are being put in the 474 * right place. The actual calculations are much the same for 475 * Xen as they are for the KVM variants, so no need to check. 476 */ 477 struct pvclock_wall_clock *wc; 478 struct pvclock_vcpu_time_info *ti, *ti2; 479 480 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); 481 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); 482 ti2 = addr_gpa2hva(vm, PVTIME_ADDR); 483 484 if (verbose) { 485 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec); 486 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 487 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul, 488 ti->tsc_shift, ti->flags); 489 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n", 490 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul, 491 ti2->tsc_shift, ti2->flags); 492 } 493 494 vm_ts.tv_sec = wc->sec; 495 vm_ts.tv_nsec = wc->nsec; 496 TEST_ASSERT(wc->version && !(wc->version & 1), 497 "Bad wallclock version %x", wc->version); 498 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); 499 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); 500 501 TEST_ASSERT(ti->version && !(ti->version & 1), 502 "Bad time_info version %x", ti->version); 503 TEST_ASSERT(ti2->version && !(ti2->version & 1), 504 "Bad time_info version %x", ti->version); 505 506 if (do_runstate_tests) { 507 /* 508 * Fetch runstate and check sanity. Strictly speaking in the 509 * general case we might not expect the numbers to be identical 510 * but in this case we know we aren't running the vCPU any more. 511 */ 512 struct kvm_xen_vcpu_attr rst = { 513 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, 514 }; 515 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); 516 517 if (verbose) { 518 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", 519 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown", 520 rs->state, rs->state_entry_time); 521 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) { 522 printf("State %s: %" PRIu64 " ns\n", 523 runstate_names[i], rs->time[i]); 524 } 525 } 526 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); 527 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, 528 "State entry time mismatch"); 529 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, 530 "Running time mismatch"); 531 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, 532 "Runnable time mismatch"); 533 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, 534 "Blocked time mismatch"); 535 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, 536 "Offline time mismatch"); 537 538 TEST_ASSERT(rs->state_entry_time == rs->time[0] + 539 rs->time[1] + rs->time[2] + rs->time[3], 540 "runstate times don't add up"); 541 } 542 kvm_vm_free(vm); 543 return 0; 544 } 545