1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright © 2021 Amazon.com, Inc. or its affiliates.
4 */
5
6 #include "test_util.h"
7 #include "kvm_util.h"
8 #include "processor.h"
9
10 #include <stdint.h>
11 #include <time.h>
12 #include <sched.h>
13 #include <signal.h>
14 #include <pthread.h>
15
16 #include <sys/eventfd.h>
17
18 #define SHINFO_REGION_GVA 0xc0000000ULL
19 #define SHINFO_REGION_GPA 0xc0000000ULL
20 #define SHINFO_REGION_SLOT 10
21
22 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
23 #define DUMMY_REGION_SLOT 11
24
25 #define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
26 #define DUMMY_REGION_SLOT_2 12
27
28 #define SHINFO_ADDR (SHINFO_REGION_GPA)
29 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
30 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
31 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
32
33 #define SHINFO_VADDR (SHINFO_REGION_GVA)
34 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
35 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
36
37 #define EVTCHN_VECTOR 0x10
38
39 #define EVTCHN_TEST1 15
40 #define EVTCHN_TEST2 66
41 #define EVTCHN_TIMER 13
42
43 enum {
44 TEST_INJECT_VECTOR = 0,
45 TEST_RUNSTATE_runnable,
46 TEST_RUNSTATE_blocked,
47 TEST_RUNSTATE_offline,
48 TEST_RUNSTATE_ADJUST,
49 TEST_RUNSTATE_DATA,
50 TEST_STEAL_TIME,
51 TEST_EVTCHN_MASKED,
52 TEST_EVTCHN_UNMASKED,
53 TEST_EVTCHN_SLOWPATH,
54 TEST_EVTCHN_SEND_IOCTL,
55 TEST_EVTCHN_HCALL,
56 TEST_EVTCHN_HCALL_SLOWPATH,
57 TEST_EVTCHN_HCALL_EVENTFD,
58 TEST_TIMER_SETUP,
59 TEST_TIMER_WAIT,
60 TEST_TIMER_RESTORE,
61 TEST_POLL_READY,
62 TEST_POLL_TIMEOUT,
63 TEST_POLL_MASKED,
64 TEST_POLL_WAKE,
65 TEST_TIMER_PAST,
66 TEST_LOCKING_SEND_RACE,
67 TEST_LOCKING_POLL_RACE,
68 TEST_LOCKING_POLL_TIMEOUT,
69 TEST_DONE,
70
71 TEST_GUEST_SAW_IRQ,
72 };
73
74 #define XEN_HYPERCALL_MSR 0x40000000
75
76 #define MIN_STEAL_TIME 50000
77
78 #define SHINFO_RACE_TIMEOUT 2 /* seconds */
79
80 #define __HYPERVISOR_set_timer_op 15
81 #define __HYPERVISOR_sched_op 29
82 #define __HYPERVISOR_event_channel_op 32
83
84 #define SCHEDOP_poll 3
85
86 #define EVTCHNOP_send 4
87
88 #define EVTCHNSTAT_interdomain 2
89
90 struct evtchn_send {
91 u32 port;
92 };
93
94 struct sched_poll {
95 u32 *ports;
96 unsigned int nr_ports;
97 u64 timeout;
98 };
99
100 struct pvclock_vcpu_time_info {
101 u32 version;
102 u32 pad0;
103 u64 tsc_timestamp;
104 u64 system_time;
105 u32 tsc_to_system_mul;
106 s8 tsc_shift;
107 u8 flags;
108 u8 pad[2];
109 } __attribute__((__packed__)); /* 32 bytes */
110
111 struct pvclock_wall_clock {
112 u32 version;
113 u32 sec;
114 u32 nsec;
115 } __attribute__((__packed__));
116
117 struct vcpu_runstate_info {
118 uint32_t state;
119 uint64_t state_entry_time;
120 uint64_t time[5]; /* Extra field for overrun check */
121 };
122
123 struct compat_vcpu_runstate_info {
124 uint32_t state;
125 uint64_t state_entry_time;
126 uint64_t time[5];
127 } __attribute__((__packed__));;
128
129 struct arch_vcpu_info {
130 unsigned long cr2;
131 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
132 };
133
134 struct vcpu_info {
135 uint8_t evtchn_upcall_pending;
136 uint8_t evtchn_upcall_mask;
137 unsigned long evtchn_pending_sel;
138 struct arch_vcpu_info arch;
139 struct pvclock_vcpu_time_info time;
140 }; /* 64 bytes (x86) */
141
142 struct shared_info {
143 struct vcpu_info vcpu_info[32];
144 unsigned long evtchn_pending[64];
145 unsigned long evtchn_mask[64];
146 struct pvclock_wall_clock wc;
147 uint32_t wc_sec_hi;
148 /* arch_shared_info here */
149 };
150
151 #define RUNSTATE_running 0
152 #define RUNSTATE_runnable 1
153 #define RUNSTATE_blocked 2
154 #define RUNSTATE_offline 3
155
156 static const char *runstate_names[] = {
157 "running",
158 "runnable",
159 "blocked",
160 "offline"
161 };
162
163 struct {
164 struct kvm_irq_routing info;
165 struct kvm_irq_routing_entry entries[2];
166 } irq_routes;
167
168 static volatile bool guest_saw_irq;
169
evtchn_handler(struct ex_regs * regs)170 static void evtchn_handler(struct ex_regs *regs)
171 {
172 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
173 vi->evtchn_upcall_pending = 0;
174 vi->evtchn_pending_sel = 0;
175 guest_saw_irq = true;
176
177 GUEST_SYNC(TEST_GUEST_SAW_IRQ);
178 }
179
guest_wait_for_irq(void)180 static void guest_wait_for_irq(void)
181 {
182 while (!guest_saw_irq)
183 __asm__ __volatile__ ("rep nop" : : : "memory");
184 guest_saw_irq = false;
185 }
186
guest_code(void)187 static void guest_code(void)
188 {
189 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
190 int i;
191
192 __asm__ __volatile__(
193 "sti\n"
194 "nop\n"
195 );
196
197 /* Trigger an interrupt injection */
198 GUEST_SYNC(TEST_INJECT_VECTOR);
199
200 guest_wait_for_irq();
201
202 /* Test having the host set runstates manually */
203 GUEST_SYNC(TEST_RUNSTATE_runnable);
204 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
205 GUEST_ASSERT(rs->state == 0);
206
207 GUEST_SYNC(TEST_RUNSTATE_blocked);
208 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
209 GUEST_ASSERT(rs->state == 0);
210
211 GUEST_SYNC(TEST_RUNSTATE_offline);
212 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
213 GUEST_ASSERT(rs->state == 0);
214
215 /* Test runstate time adjust */
216 GUEST_SYNC(TEST_RUNSTATE_ADJUST);
217 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
218 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
219
220 /* Test runstate time set */
221 GUEST_SYNC(TEST_RUNSTATE_DATA);
222 GUEST_ASSERT(rs->state_entry_time >= 0x8000);
223 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
224 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
225 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
226
227 /* sched_yield() should result in some 'runnable' time */
228 GUEST_SYNC(TEST_STEAL_TIME);
229 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
230
231 /* Attempt to deliver a *masked* interrupt */
232 GUEST_SYNC(TEST_EVTCHN_MASKED);
233
234 /* Wait until we see the bit set */
235 struct shared_info *si = (void *)SHINFO_VADDR;
236 while (!si->evtchn_pending[0])
237 __asm__ __volatile__ ("rep nop" : : : "memory");
238
239 /* Now deliver an *unmasked* interrupt */
240 GUEST_SYNC(TEST_EVTCHN_UNMASKED);
241
242 guest_wait_for_irq();
243
244 /* Change memslots and deliver an interrupt */
245 GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
246
247 guest_wait_for_irq();
248
249 /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
250 GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
251
252 guest_wait_for_irq();
253
254 GUEST_SYNC(TEST_EVTCHN_HCALL);
255
256 /* Our turn. Deliver event channel (to ourselves) with
257 * EVTCHNOP_send hypercall. */
258 struct evtchn_send s = { .port = 127 };
259 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
260
261 guest_wait_for_irq();
262
263 GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
264
265 /*
266 * Same again, but this time the host has messed with memslots so it
267 * should take the slow path in kvm_xen_set_evtchn().
268 */
269 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
270
271 guest_wait_for_irq();
272
273 GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
274
275 /* Deliver "outbound" event channel to an eventfd which
276 * happens to be one of our own irqfds. */
277 s.port = 197;
278 xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
279
280 guest_wait_for_irq();
281
282 GUEST_SYNC(TEST_TIMER_SETUP);
283
284 /* Set a timer 100ms in the future. */
285 xen_hypercall(__HYPERVISOR_set_timer_op,
286 rs->state_entry_time + 100000000, NULL);
287
288 GUEST_SYNC(TEST_TIMER_WAIT);
289
290 /* Now wait for the timer */
291 guest_wait_for_irq();
292
293 GUEST_SYNC(TEST_TIMER_RESTORE);
294
295 /* The host has 'restored' the timer. Just wait for it. */
296 guest_wait_for_irq();
297
298 GUEST_SYNC(TEST_POLL_READY);
299
300 /* Poll for an event channel port which is already set */
301 u32 ports[1] = { EVTCHN_TIMER };
302 struct sched_poll p = {
303 .ports = ports,
304 .nr_ports = 1,
305 .timeout = 0,
306 };
307
308 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
309
310 GUEST_SYNC(TEST_POLL_TIMEOUT);
311
312 /* Poll for an unset port and wait for the timeout. */
313 p.timeout = 100000000;
314 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
315
316 GUEST_SYNC(TEST_POLL_MASKED);
317
318 /* A timer will wake the masked port we're waiting on, while we poll */
319 p.timeout = 0;
320 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
321
322 GUEST_SYNC(TEST_POLL_WAKE);
323
324 /* A timer wake an *unmasked* port which should wake us with an
325 * actual interrupt, while we're polling on a different port. */
326 ports[0]++;
327 p.timeout = 0;
328 xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
329
330 guest_wait_for_irq();
331
332 GUEST_SYNC(TEST_TIMER_PAST);
333
334 /* Timer should have fired already */
335 guest_wait_for_irq();
336
337 GUEST_SYNC(TEST_LOCKING_SEND_RACE);
338 /* Racing host ioctls */
339
340 guest_wait_for_irq();
341
342 GUEST_SYNC(TEST_LOCKING_POLL_RACE);
343 /* Racing vmcall against host ioctl */
344
345 ports[0] = 0;
346
347 p = (struct sched_poll) {
348 .ports = ports,
349 .nr_ports = 1,
350 .timeout = 0
351 };
352
353 wait_for_timer:
354 /*
355 * Poll for a timer wake event while the worker thread is mucking with
356 * the shared info. KVM XEN drops timer IRQs if the shared info is
357 * invalid when the timer expires. Arbitrarily poll 100 times before
358 * giving up and asking the VMM to re-arm the timer. 100 polls should
359 * consume enough time to beat on KVM without taking too long if the
360 * timer IRQ is dropped due to an invalid event channel.
361 */
362 for (i = 0; i < 100 && !guest_saw_irq; i++)
363 __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
364
365 /*
366 * Re-send the timer IRQ if it was (likely) dropped due to the timer
367 * expiring while the event channel was invalid.
368 */
369 if (!guest_saw_irq) {
370 GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
371 goto wait_for_timer;
372 }
373 guest_saw_irq = false;
374
375 GUEST_SYNC(TEST_DONE);
376 }
377
cmp_timespec(struct timespec * a,struct timespec * b)378 static int cmp_timespec(struct timespec *a, struct timespec *b)
379 {
380 if (a->tv_sec > b->tv_sec)
381 return 1;
382 else if (a->tv_sec < b->tv_sec)
383 return -1;
384 else if (a->tv_nsec > b->tv_nsec)
385 return 1;
386 else if (a->tv_nsec < b->tv_nsec)
387 return -1;
388 else
389 return 0;
390 }
391
392 static struct vcpu_info *vinfo;
393 static struct kvm_vcpu *vcpu;
394
handle_alrm(int sig)395 static void handle_alrm(int sig)
396 {
397 if (vinfo)
398 printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
399 vcpu_dump(stdout, vcpu, 0);
400 TEST_FAIL("IRQ delivery timed out");
401 }
402
juggle_shinfo_state(void * arg)403 static void *juggle_shinfo_state(void *arg)
404 {
405 struct kvm_vm *vm = (struct kvm_vm *)arg;
406
407 struct kvm_xen_hvm_attr cache_activate = {
408 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
409 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
410 };
411
412 struct kvm_xen_hvm_attr cache_deactivate = {
413 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
414 .u.shared_info.gfn = KVM_XEN_INVALID_GFN
415 };
416
417 for (;;) {
418 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate);
419 __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate);
420 pthread_testcancel();
421 }
422
423 return NULL;
424 }
425
main(int argc,char * argv[])426 int main(int argc, char *argv[])
427 {
428 struct timespec min_ts, max_ts, vm_ts;
429 struct kvm_xen_hvm_attr evt_reset;
430 struct kvm_vm *vm;
431 pthread_t thread;
432 bool verbose;
433 int ret;
434
435 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
436 !strncmp(argv[1], "--verbose", 10));
437
438 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
439 TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
440
441 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
442 bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
443 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
444 bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
445
446 clock_gettime(CLOCK_REALTIME, &min_ts);
447
448 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
449
450 /* Map a region for the shared_info page */
451 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
452 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
453 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
454
455 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
456
457 int zero_fd = open("/dev/zero", O_RDONLY);
458 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
459
460 struct kvm_xen_hvm_config hvmc = {
461 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
462 .msr = XEN_HYPERCALL_MSR,
463 };
464
465 /* Let the kernel know that we *will* use it for sending all
466 * event channels, which lets it intercept SCHEDOP_poll */
467 if (do_evtchn_tests)
468 hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
469
470 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
471
472 struct kvm_xen_hvm_attr lm = {
473 .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
474 .u.long_mode = 1,
475 };
476 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
477
478 if (do_runstate_flag) {
479 struct kvm_xen_hvm_attr ruf = {
480 .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
481 .u.runstate_update_flag = 1,
482 };
483 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
484
485 ruf.u.runstate_update_flag = 0;
486 vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
487 TEST_ASSERT(ruf.u.runstate_update_flag == 1,
488 "Failed to read back RUNSTATE_UPDATE_FLAG attr");
489 }
490
491 struct kvm_xen_hvm_attr ha = {
492 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
493 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
494 };
495 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
496
497 /*
498 * Test what happens when the HVA of the shinfo page is remapped after
499 * the kernel has a reference to it. But make sure we copy the clock
500 * info over since that's only set at setup time, and we test it later.
501 */
502 struct pvclock_wall_clock wc_copy = shinfo->wc;
503 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
504 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
505 shinfo->wc = wc_copy;
506
507 struct kvm_xen_vcpu_attr vi = {
508 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
509 .u.gpa = VCPU_INFO_ADDR,
510 };
511 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
512
513 struct kvm_xen_vcpu_attr pvclock = {
514 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
515 .u.gpa = PVTIME_ADDR,
516 };
517 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
518
519 struct kvm_xen_hvm_attr vec = {
520 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
521 .u.vector = EVTCHN_VECTOR,
522 };
523 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
524
525 vm_init_descriptor_tables(vm);
526 vcpu_init_descriptor_tables(vcpu);
527 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
528
529 if (do_runstate_tests) {
530 struct kvm_xen_vcpu_attr st = {
531 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
532 .u.gpa = RUNSTATE_ADDR,
533 };
534 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
535 }
536
537 int irq_fd[2] = { -1, -1 };
538
539 if (do_eventfd_tests) {
540 irq_fd[0] = eventfd(0, 0);
541 irq_fd[1] = eventfd(0, 0);
542
543 /* Unexpected, but not a KVM failure */
544 if (irq_fd[0] == -1 || irq_fd[1] == -1)
545 do_evtchn_tests = do_eventfd_tests = false;
546 }
547
548 if (do_eventfd_tests) {
549 irq_routes.info.nr = 2;
550
551 irq_routes.entries[0].gsi = 32;
552 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
553 irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
554 irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
555 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
556
557 irq_routes.entries[1].gsi = 33;
558 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
559 irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
560 irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
561 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
562
563 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
564
565 struct kvm_irqfd ifd = { };
566
567 ifd.fd = irq_fd[0];
568 ifd.gsi = 32;
569 vm_ioctl(vm, KVM_IRQFD, &ifd);
570
571 ifd.fd = irq_fd[1];
572 ifd.gsi = 33;
573 vm_ioctl(vm, KVM_IRQFD, &ifd);
574
575 struct sigaction sa = { };
576 sa.sa_handler = handle_alrm;
577 sigaction(SIGALRM, &sa, NULL);
578 }
579
580 struct kvm_xen_vcpu_attr tmr = {
581 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
582 .u.timer.port = EVTCHN_TIMER,
583 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
584 .u.timer.expires_ns = 0
585 };
586
587 if (do_evtchn_tests) {
588 struct kvm_xen_hvm_attr inj = {
589 .type = KVM_XEN_ATTR_TYPE_EVTCHN,
590 .u.evtchn.send_port = 127,
591 .u.evtchn.type = EVTCHNSTAT_interdomain,
592 .u.evtchn.flags = 0,
593 .u.evtchn.deliver.port.port = EVTCHN_TEST1,
594 .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
595 .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
596 };
597 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
598
599 /* Test migration to a different vCPU */
600 inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
601 inj.u.evtchn.deliver.port.vcpu = vcpu->id;
602 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
603
604 inj.u.evtchn.send_port = 197;
605 inj.u.evtchn.deliver.eventfd.port = 0;
606 inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
607 inj.u.evtchn.flags = 0;
608 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
609
610 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
611 }
612 vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
613 vinfo->evtchn_upcall_pending = 0;
614
615 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
616 rs->state = 0x5a;
617
618 bool evtchn_irq_expected = false;
619
620 for (;;) {
621 struct ucall uc;
622
623 vcpu_run(vcpu);
624 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
625
626 switch (get_ucall(vcpu, &uc)) {
627 case UCALL_ABORT:
628 REPORT_GUEST_ASSERT(uc);
629 /* NOT REACHED */
630 case UCALL_SYNC: {
631 struct kvm_xen_vcpu_attr rst;
632 long rundelay;
633
634 if (do_runstate_tests)
635 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
636 rs->time[1] + rs->time[2] + rs->time[3],
637 "runstate times don't add up");
638
639 switch (uc.args[1]) {
640 case TEST_INJECT_VECTOR:
641 if (verbose)
642 printf("Delivering evtchn upcall\n");
643 evtchn_irq_expected = true;
644 vinfo->evtchn_upcall_pending = 1;
645 break;
646
647 case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
648 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
649 if (!do_runstate_tests)
650 goto done;
651 if (verbose)
652 printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
653 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
654 rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
655 TEST_RUNSTATE_runnable;
656 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
657 break;
658
659 case TEST_RUNSTATE_ADJUST:
660 if (verbose)
661 printf("Testing RUNSTATE_ADJUST\n");
662 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
663 memset(&rst.u, 0, sizeof(rst.u));
664 rst.u.runstate.state = (uint64_t)-1;
665 rst.u.runstate.time_blocked =
666 0x5a - rs->time[RUNSTATE_blocked];
667 rst.u.runstate.time_offline =
668 0x6b6b - rs->time[RUNSTATE_offline];
669 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
670 rst.u.runstate.time_offline;
671 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
672 break;
673
674 case TEST_RUNSTATE_DATA:
675 if (verbose)
676 printf("Testing RUNSTATE_DATA\n");
677 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
678 memset(&rst.u, 0, sizeof(rst.u));
679 rst.u.runstate.state = RUNSTATE_running;
680 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
681 rst.u.runstate.time_blocked = 0x6b6b;
682 rst.u.runstate.time_offline = 0x5a;
683 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
684 break;
685
686 case TEST_STEAL_TIME:
687 if (verbose)
688 printf("Testing steal time\n");
689 /* Yield until scheduler delay exceeds target */
690 rundelay = get_run_delay() + MIN_STEAL_TIME;
691 do {
692 sched_yield();
693 } while (get_run_delay() < rundelay);
694 break;
695
696 case TEST_EVTCHN_MASKED:
697 if (!do_eventfd_tests)
698 goto done;
699 if (verbose)
700 printf("Testing masked event channel\n");
701 shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
702 eventfd_write(irq_fd[0], 1UL);
703 alarm(1);
704 break;
705
706 case TEST_EVTCHN_UNMASKED:
707 if (verbose)
708 printf("Testing unmasked event channel\n");
709 /* Unmask that, but deliver the other one */
710 shinfo->evtchn_pending[0] = 0;
711 shinfo->evtchn_mask[0] = 0;
712 eventfd_write(irq_fd[1], 1UL);
713 evtchn_irq_expected = true;
714 alarm(1);
715 break;
716
717 case TEST_EVTCHN_SLOWPATH:
718 TEST_ASSERT(!evtchn_irq_expected,
719 "Expected event channel IRQ but it didn't happen");
720 shinfo->evtchn_pending[1] = 0;
721 if (verbose)
722 printf("Testing event channel after memslot change\n");
723 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
724 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
725 eventfd_write(irq_fd[0], 1UL);
726 evtchn_irq_expected = true;
727 alarm(1);
728 break;
729
730 case TEST_EVTCHN_SEND_IOCTL:
731 TEST_ASSERT(!evtchn_irq_expected,
732 "Expected event channel IRQ but it didn't happen");
733 if (!do_evtchn_tests)
734 goto done;
735
736 shinfo->evtchn_pending[0] = 0;
737 if (verbose)
738 printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
739
740 struct kvm_irq_routing_xen_evtchn e;
741 e.port = EVTCHN_TEST2;
742 e.vcpu = vcpu->id;
743 e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
744
745 vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
746 evtchn_irq_expected = true;
747 alarm(1);
748 break;
749
750 case TEST_EVTCHN_HCALL:
751 TEST_ASSERT(!evtchn_irq_expected,
752 "Expected event channel IRQ but it didn't happen");
753 shinfo->evtchn_pending[1] = 0;
754
755 if (verbose)
756 printf("Testing guest EVTCHNOP_send direct to evtchn\n");
757 evtchn_irq_expected = true;
758 alarm(1);
759 break;
760
761 case TEST_EVTCHN_HCALL_SLOWPATH:
762 TEST_ASSERT(!evtchn_irq_expected,
763 "Expected event channel IRQ but it didn't happen");
764 shinfo->evtchn_pending[0] = 0;
765
766 if (verbose)
767 printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
768 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
769 DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
770 evtchn_irq_expected = true;
771 alarm(1);
772 break;
773
774 case TEST_EVTCHN_HCALL_EVENTFD:
775 TEST_ASSERT(!evtchn_irq_expected,
776 "Expected event channel IRQ but it didn't happen");
777 shinfo->evtchn_pending[0] = 0;
778
779 if (verbose)
780 printf("Testing guest EVTCHNOP_send to eventfd\n");
781 evtchn_irq_expected = true;
782 alarm(1);
783 break;
784
785 case TEST_TIMER_SETUP:
786 TEST_ASSERT(!evtchn_irq_expected,
787 "Expected event channel IRQ but it didn't happen");
788 shinfo->evtchn_pending[1] = 0;
789
790 if (verbose)
791 printf("Testing guest oneshot timer\n");
792 break;
793
794 case TEST_TIMER_WAIT:
795 memset(&tmr, 0, sizeof(tmr));
796 tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
797 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
798 TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
799 "Timer port not returned");
800 TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
801 "Timer priority not returned");
802 TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
803 "Timer expiry not returned");
804 evtchn_irq_expected = true;
805 alarm(1);
806 break;
807
808 case TEST_TIMER_RESTORE:
809 TEST_ASSERT(!evtchn_irq_expected,
810 "Expected event channel IRQ but it didn't happen");
811 shinfo->evtchn_pending[0] = 0;
812
813 if (verbose)
814 printf("Testing restored oneshot timer\n");
815
816 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
817 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
818 evtchn_irq_expected = true;
819 alarm(1);
820 break;
821
822 case TEST_POLL_READY:
823 TEST_ASSERT(!evtchn_irq_expected,
824 "Expected event channel IRQ but it didn't happen");
825
826 if (verbose)
827 printf("Testing SCHEDOP_poll with already pending event\n");
828 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
829 alarm(1);
830 break;
831
832 case TEST_POLL_TIMEOUT:
833 if (verbose)
834 printf("Testing SCHEDOP_poll timeout\n");
835 shinfo->evtchn_pending[0] = 0;
836 alarm(1);
837 break;
838
839 case TEST_POLL_MASKED:
840 if (verbose)
841 printf("Testing SCHEDOP_poll wake on masked event\n");
842
843 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
844 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
845 alarm(1);
846 break;
847
848 case TEST_POLL_WAKE:
849 shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
850 if (verbose)
851 printf("Testing SCHEDOP_poll wake on unmasked event\n");
852
853 evtchn_irq_expected = true;
854 tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
855 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
856
857 /* Read it back and check the pending time is reported correctly */
858 tmr.u.timer.expires_ns = 0;
859 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
860 TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
861 "Timer not reported pending");
862 alarm(1);
863 break;
864
865 case TEST_TIMER_PAST:
866 TEST_ASSERT(!evtchn_irq_expected,
867 "Expected event channel IRQ but it didn't happen");
868 /* Read timer and check it is no longer pending */
869 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
870 TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
871
872 shinfo->evtchn_pending[0] = 0;
873 if (verbose)
874 printf("Testing timer in the past\n");
875
876 evtchn_irq_expected = true;
877 tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
878 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
879 alarm(1);
880 break;
881
882 case TEST_LOCKING_SEND_RACE:
883 TEST_ASSERT(!evtchn_irq_expected,
884 "Expected event channel IRQ but it didn't happen");
885 alarm(0);
886
887 if (verbose)
888 printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
889
890 ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
891 TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
892
893 struct kvm_irq_routing_xen_evtchn uxe = {
894 .port = 1,
895 .vcpu = vcpu->id,
896 .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
897 };
898
899 evtchn_irq_expected = true;
900 for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
901 __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
902 break;
903
904 case TEST_LOCKING_POLL_RACE:
905 TEST_ASSERT(!evtchn_irq_expected,
906 "Expected event channel IRQ but it didn't happen");
907
908 if (verbose)
909 printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
910
911 shinfo->evtchn_pending[0] = 1;
912
913 evtchn_irq_expected = true;
914 tmr.u.timer.expires_ns = rs->state_entry_time +
915 SHINFO_RACE_TIMEOUT * 1000000000ULL;
916 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
917 break;
918
919 case TEST_LOCKING_POLL_TIMEOUT:
920 /*
921 * Optional and possibly repeated sync point.
922 * Injecting the timer IRQ may fail if the
923 * shinfo is invalid when the timer expires.
924 * If the timer has expired but the IRQ hasn't
925 * been delivered, rearm the timer and retry.
926 */
927 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
928
929 /* Resume the guest if the timer is still pending. */
930 if (tmr.u.timer.expires_ns)
931 break;
932
933 /* All done if the IRQ was delivered. */
934 if (!evtchn_irq_expected)
935 break;
936
937 tmr.u.timer.expires_ns = rs->state_entry_time +
938 SHINFO_RACE_TIMEOUT * 1000000000ULL;
939 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
940 break;
941 case TEST_DONE:
942 TEST_ASSERT(!evtchn_irq_expected,
943 "Expected event channel IRQ but it didn't happen");
944
945 ret = pthread_cancel(thread);
946 TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
947
948 ret = pthread_join(thread, 0);
949 TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
950 goto done;
951
952 case TEST_GUEST_SAW_IRQ:
953 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
954 evtchn_irq_expected = false;
955 break;
956 }
957 break;
958 }
959 case UCALL_DONE:
960 goto done;
961 default:
962 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
963 }
964 }
965
966 done:
967 evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
968 evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
969 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
970
971 alarm(0);
972 clock_gettime(CLOCK_REALTIME, &max_ts);
973
974 /*
975 * Just a *really* basic check that things are being put in the
976 * right place. The actual calculations are much the same for
977 * Xen as they are for the KVM variants, so no need to check.
978 */
979 struct pvclock_wall_clock *wc;
980 struct pvclock_vcpu_time_info *ti, *ti2;
981
982 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
983 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
984 ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
985
986 if (verbose) {
987 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
988 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
989 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
990 ti->tsc_shift, ti->flags);
991 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
992 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
993 ti2->tsc_shift, ti2->flags);
994 }
995
996 vm_ts.tv_sec = wc->sec;
997 vm_ts.tv_nsec = wc->nsec;
998 TEST_ASSERT(wc->version && !(wc->version & 1),
999 "Bad wallclock version %x", wc->version);
1000 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
1001 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
1002
1003 TEST_ASSERT(ti->version && !(ti->version & 1),
1004 "Bad time_info version %x", ti->version);
1005 TEST_ASSERT(ti2->version && !(ti2->version & 1),
1006 "Bad time_info version %x", ti->version);
1007
1008 if (do_runstate_tests) {
1009 /*
1010 * Fetch runstate and check sanity. Strictly speaking in the
1011 * general case we might not expect the numbers to be identical
1012 * but in this case we know we aren't running the vCPU any more.
1013 */
1014 struct kvm_xen_vcpu_attr rst = {
1015 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
1016 };
1017 vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
1018
1019 if (verbose) {
1020 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
1021 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
1022 rs->state, rs->state_entry_time);
1023 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
1024 printf("State %s: %" PRIu64 " ns\n",
1025 runstate_names[i], rs->time[i]);
1026 }
1027 }
1028
1029 /*
1030 * Exercise runstate info at all points across the page boundary, in
1031 * 32-bit and 64-bit mode. In particular, test the case where it is
1032 * configured in 32-bit mode and then switched to 64-bit mode while
1033 * active, which takes it onto the second page.
1034 */
1035 unsigned long runstate_addr;
1036 struct compat_vcpu_runstate_info *crs;
1037 for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
1038 runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
1039
1040 rs = addr_gpa2hva(vm, runstate_addr);
1041 crs = (void *)rs;
1042
1043 memset(rs, 0xa5, sizeof(*rs));
1044
1045 /* Set to compatibility mode */
1046 lm.u.long_mode = 0;
1047 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
1048
1049 /* Set runstate to new address (kernel will write it) */
1050 struct kvm_xen_vcpu_attr st = {
1051 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1052 .u.gpa = runstate_addr,
1053 };
1054 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
1055
1056 if (verbose)
1057 printf("Compatibility runstate at %08lx\n", runstate_addr);
1058
1059 TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
1060 TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
1061 "State entry time mismatch");
1062 TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
1063 "Running time mismatch");
1064 TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
1065 "Runnable time mismatch");
1066 TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
1067 "Blocked time mismatch");
1068 TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
1069 "Offline time mismatch");
1070 TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
1071 "Structure overrun");
1072 TEST_ASSERT(crs->state_entry_time == crs->time[0] +
1073 crs->time[1] + crs->time[2] + crs->time[3],
1074 "runstate times don't add up");
1075
1076
1077 /* Now switch to 64-bit mode */
1078 lm.u.long_mode = 1;
1079 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
1080
1081 memset(rs, 0xa5, sizeof(*rs));
1082
1083 /* Don't change the address, just trigger a write */
1084 struct kvm_xen_vcpu_attr adj = {
1085 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
1086 .u.runstate.state = (uint64_t)-1
1087 };
1088 vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
1089
1090 if (verbose)
1091 printf("64-bit runstate at %08lx\n", runstate_addr);
1092
1093 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
1094 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
1095 "State entry time mismatch");
1096 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
1097 "Running time mismatch");
1098 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
1099 "Runnable time mismatch");
1100 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
1101 "Blocked time mismatch");
1102 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
1103 "Offline time mismatch");
1104 TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
1105 "Structure overrun");
1106
1107 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
1108 rs->time[1] + rs->time[2] + rs->time[3],
1109 "runstate times don't add up");
1110 }
1111 }
1112
1113 kvm_vm_free(vm);
1114 return 0;
1115 }
1116