1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * svm_vmcall_test
4  *
5  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6  *
7  * Xen shared_info / pvclock testing
8  */
9 
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13 
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <signal.h>
18 
19 #include <sys/eventfd.h>
20 
21 #define SHINFO_REGION_GVA	0xc0000000ULL
22 #define SHINFO_REGION_GPA	0xc0000000ULL
23 #define SHINFO_REGION_SLOT	10
24 
25 #define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
26 #define DUMMY_REGION_SLOT	11
27 
28 #define SHINFO_ADDR	(SHINFO_REGION_GPA)
29 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
30 #define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
31 #define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
32 
33 #define SHINFO_VADDR	(SHINFO_REGION_GVA)
34 #define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
35 #define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
36 
37 #define EVTCHN_VECTOR	0x10
38 
39 #define EVTCHN_TEST1 15
40 #define EVTCHN_TEST2 66
41 #define EVTCHN_TIMER 13
42 
43 #define XEN_HYPERCALL_MSR	0x40000000
44 
45 #define MIN_STEAL_TIME		50000
46 
47 #define __HYPERVISOR_set_timer_op	15
48 #define __HYPERVISOR_sched_op		29
49 #define __HYPERVISOR_event_channel_op	32
50 
51 #define SCHEDOP_poll			3
52 
53 #define EVTCHNOP_send			4
54 
55 #define EVTCHNSTAT_interdomain		2
56 
57 struct evtchn_send {
58 	u32 port;
59 };
60 
61 struct sched_poll {
62 	u32 *ports;
63 	unsigned int nr_ports;
64 	u64 timeout;
65 };
66 
67 struct pvclock_vcpu_time_info {
68 	u32   version;
69 	u32   pad0;
70 	u64   tsc_timestamp;
71 	u64   system_time;
72 	u32   tsc_to_system_mul;
73 	s8    tsc_shift;
74 	u8    flags;
75 	u8    pad[2];
76 } __attribute__((__packed__)); /* 32 bytes */
77 
78 struct pvclock_wall_clock {
79 	u32   version;
80 	u32   sec;
81 	u32   nsec;
82 } __attribute__((__packed__));
83 
84 struct vcpu_runstate_info {
85     uint32_t state;
86     uint64_t state_entry_time;
87     uint64_t time[4];
88 };
89 
90 struct arch_vcpu_info {
91     unsigned long cr2;
92     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
93 };
94 
95 struct vcpu_info {
96 	uint8_t evtchn_upcall_pending;
97 	uint8_t evtchn_upcall_mask;
98 	unsigned long evtchn_pending_sel;
99 	struct arch_vcpu_info arch;
100 	struct pvclock_vcpu_time_info time;
101 }; /* 64 bytes (x86) */
102 
103 struct shared_info {
104 	struct vcpu_info vcpu_info[32];
105 	unsigned long evtchn_pending[64];
106 	unsigned long evtchn_mask[64];
107 	struct pvclock_wall_clock wc;
108 	uint32_t wc_sec_hi;
109 	/* arch_shared_info here */
110 };
111 
112 #define RUNSTATE_running  0
113 #define RUNSTATE_runnable 1
114 #define RUNSTATE_blocked  2
115 #define RUNSTATE_offline  3
116 
117 static const char *runstate_names[] = {
118 	"running",
119 	"runnable",
120 	"blocked",
121 	"offline"
122 };
123 
124 struct {
125 	struct kvm_irq_routing info;
126 	struct kvm_irq_routing_entry entries[2];
127 } irq_routes;
128 
129 bool guest_saw_irq;
130 
131 static void evtchn_handler(struct ex_regs *regs)
132 {
133 	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
134 	vi->evtchn_upcall_pending = 0;
135 	vi->evtchn_pending_sel = 0;
136 	guest_saw_irq = true;
137 
138 	GUEST_SYNC(0x20);
139 }
140 
141 static void guest_wait_for_irq(void)
142 {
143 	while (!guest_saw_irq)
144 		__asm__ __volatile__ ("rep nop" : : : "memory");
145 	guest_saw_irq = false;
146 }
147 
148 static void guest_code(void)
149 {
150 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
151 
152 	__asm__ __volatile__(
153 		"sti\n"
154 		"nop\n"
155 	);
156 
157 	/* Trigger an interrupt injection */
158 	GUEST_SYNC(0);
159 
160 	guest_wait_for_irq();
161 
162 	/* Test having the host set runstates manually */
163 	GUEST_SYNC(RUNSTATE_runnable);
164 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
165 	GUEST_ASSERT(rs->state == 0);
166 
167 	GUEST_SYNC(RUNSTATE_blocked);
168 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
169 	GUEST_ASSERT(rs->state == 0);
170 
171 	GUEST_SYNC(RUNSTATE_offline);
172 	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
173 	GUEST_ASSERT(rs->state == 0);
174 
175 	/* Test runstate time adjust */
176 	GUEST_SYNC(4);
177 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
178 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
179 
180 	/* Test runstate time set */
181 	GUEST_SYNC(5);
182 	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
183 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
184 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
185 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
186 
187 	/* sched_yield() should result in some 'runnable' time */
188 	GUEST_SYNC(6);
189 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
190 
191 	/* Attempt to deliver a *masked* interrupt */
192 	GUEST_SYNC(7);
193 
194 	/* Wait until we see the bit set */
195 	struct shared_info *si = (void *)SHINFO_VADDR;
196 	while (!si->evtchn_pending[0])
197 		__asm__ __volatile__ ("rep nop" : : : "memory");
198 
199 	/* Now deliver an *unmasked* interrupt */
200 	GUEST_SYNC(8);
201 
202 	guest_wait_for_irq();
203 
204 	/* Change memslots and deliver an interrupt */
205 	GUEST_SYNC(9);
206 
207 	guest_wait_for_irq();
208 
209 	/* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
210 	GUEST_SYNC(10);
211 
212 	guest_wait_for_irq();
213 
214 	GUEST_SYNC(11);
215 
216 	/* Our turn. Deliver event channel (to ourselves) with
217 	 * EVTCHNOP_send hypercall. */
218 	unsigned long rax;
219 	struct evtchn_send s = { .port = 127 };
220 	__asm__ __volatile__ ("vmcall" :
221 			      "=a" (rax) :
222 			      "a" (__HYPERVISOR_event_channel_op),
223 			      "D" (EVTCHNOP_send),
224 			      "S" (&s));
225 
226 	GUEST_ASSERT(rax == 0);
227 
228 	guest_wait_for_irq();
229 
230 	GUEST_SYNC(12);
231 
232 	/* Deliver "outbound" event channel to an eventfd which
233 	 * happens to be one of our own irqfds. */
234 	s.port = 197;
235 	__asm__ __volatile__ ("vmcall" :
236 			      "=a" (rax) :
237 			      "a" (__HYPERVISOR_event_channel_op),
238 			      "D" (EVTCHNOP_send),
239 			      "S" (&s));
240 
241 	GUEST_ASSERT(rax == 0);
242 
243 	guest_wait_for_irq();
244 
245 	GUEST_SYNC(13);
246 
247 	/* Set a timer 100ms in the future. */
248 	__asm__ __volatile__ ("vmcall" :
249 			      "=a" (rax) :
250 			      "a" (__HYPERVISOR_set_timer_op),
251 			      "D" (rs->state_entry_time + 100000000));
252 	GUEST_ASSERT(rax == 0);
253 
254 	GUEST_SYNC(14);
255 
256 	/* Now wait for the timer */
257 	guest_wait_for_irq();
258 
259 	GUEST_SYNC(15);
260 
261 	/* The host has 'restored' the timer. Just wait for it. */
262 	guest_wait_for_irq();
263 
264 	GUEST_SYNC(16);
265 
266 	/* Poll for an event channel port which is already set */
267 	u32 ports[1] = { EVTCHN_TIMER };
268 	struct sched_poll p = {
269 		.ports = ports,
270 		.nr_ports = 1,
271 		.timeout = 0,
272 	};
273 
274 	__asm__ __volatile__ ("vmcall" :
275 			      "=a" (rax) :
276 			      "a" (__HYPERVISOR_sched_op),
277 			      "D" (SCHEDOP_poll),
278 			      "S" (&p));
279 
280 	GUEST_ASSERT(rax == 0);
281 
282 	GUEST_SYNC(17);
283 
284 	/* Poll for an unset port and wait for the timeout. */
285 	p.timeout = 100000000;
286 	__asm__ __volatile__ ("vmcall" :
287 			      "=a" (rax) :
288 			      "a" (__HYPERVISOR_sched_op),
289 			      "D" (SCHEDOP_poll),
290 			      "S" (&p));
291 
292 	GUEST_ASSERT(rax == 0);
293 
294 	GUEST_SYNC(18);
295 
296 	/* A timer will wake the masked port we're waiting on, while we poll */
297 	p.timeout = 0;
298 	__asm__ __volatile__ ("vmcall" :
299 			      "=a" (rax) :
300 			      "a" (__HYPERVISOR_sched_op),
301 			      "D" (SCHEDOP_poll),
302 			      "S" (&p));
303 
304 	GUEST_ASSERT(rax == 0);
305 
306 	GUEST_SYNC(19);
307 
308 	/* A timer wake an *unmasked* port which should wake us with an
309 	 * actual interrupt, while we're polling on a different port. */
310 	ports[0]++;
311 	p.timeout = 0;
312 	__asm__ __volatile__ ("vmcall" :
313 			      "=a" (rax) :
314 			      "a" (__HYPERVISOR_sched_op),
315 			      "D" (SCHEDOP_poll),
316 			      "S" (&p));
317 
318 	GUEST_ASSERT(rax == 0);
319 
320 	guest_wait_for_irq();
321 
322 	GUEST_SYNC(20);
323 
324 	/* Timer should have fired already */
325 	guest_wait_for_irq();
326 
327 	GUEST_SYNC(21);
328 }
329 
330 static int cmp_timespec(struct timespec *a, struct timespec *b)
331 {
332 	if (a->tv_sec > b->tv_sec)
333 		return 1;
334 	else if (a->tv_sec < b->tv_sec)
335 		return -1;
336 	else if (a->tv_nsec > b->tv_nsec)
337 		return 1;
338 	else if (a->tv_nsec < b->tv_nsec)
339 		return -1;
340 	else
341 		return 0;
342 }
343 
344 static struct vcpu_info *vinfo;
345 static struct kvm_vcpu *vcpu;
346 
347 static void handle_alrm(int sig)
348 {
349 	if (vinfo)
350 		printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
351 	vcpu_dump(stdout, vcpu, 0);
352 	TEST_FAIL("IRQ delivery timed out");
353 }
354 
355 int main(int argc, char *argv[])
356 {
357 	struct timespec min_ts, max_ts, vm_ts;
358 	struct kvm_vm *vm;
359 	bool verbose;
360 
361 	verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
362 			       !strncmp(argv[1], "--verbose", 10));
363 
364 	int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
365 	TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
366 
367 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
368 	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
369 	bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
370 
371 	clock_gettime(CLOCK_REALTIME, &min_ts);
372 
373 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
374 
375 	/* Map a region for the shared_info page */
376 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
377 				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
378 	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
379 
380 	struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
381 
382 	int zero_fd = open("/dev/zero", O_RDONLY);
383 	TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
384 
385 	struct kvm_xen_hvm_config hvmc = {
386 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
387 		.msr = XEN_HYPERCALL_MSR,
388 	};
389 
390 	/* Let the kernel know that we *will* use it for sending all
391 	 * event channels, which lets it intercept SCHEDOP_poll */
392 	if (do_evtchn_tests)
393 		hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
394 
395 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
396 
397 	struct kvm_xen_hvm_attr lm = {
398 		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
399 		.u.long_mode = 1,
400 	};
401 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
402 
403 	struct kvm_xen_hvm_attr ha = {
404 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
405 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
406 	};
407 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
408 
409 	/*
410 	 * Test what happens when the HVA of the shinfo page is remapped after
411 	 * the kernel has a reference to it. But make sure we copy the clock
412 	 * info over since that's only set at setup time, and we test it later.
413 	 */
414 	struct pvclock_wall_clock wc_copy = shinfo->wc;
415 	void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
416 	TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
417 	shinfo->wc = wc_copy;
418 
419 	struct kvm_xen_vcpu_attr vi = {
420 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
421 		.u.gpa = VCPU_INFO_ADDR,
422 	};
423 	vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
424 
425 	struct kvm_xen_vcpu_attr pvclock = {
426 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
427 		.u.gpa = PVTIME_ADDR,
428 	};
429 	vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
430 
431 	struct kvm_xen_hvm_attr vec = {
432 		.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
433 		.u.vector = EVTCHN_VECTOR,
434 	};
435 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
436 
437 	vm_init_descriptor_tables(vm);
438 	vcpu_init_descriptor_tables(vcpu);
439 	vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
440 
441 	if (do_runstate_tests) {
442 		struct kvm_xen_vcpu_attr st = {
443 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
444 			.u.gpa = RUNSTATE_ADDR,
445 		};
446 		vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
447 	}
448 
449 	int irq_fd[2] = { -1, -1 };
450 
451 	if (do_eventfd_tests) {
452 		irq_fd[0] = eventfd(0, 0);
453 		irq_fd[1] = eventfd(0, 0);
454 
455 		/* Unexpected, but not a KVM failure */
456 		if (irq_fd[0] == -1 || irq_fd[1] == -1)
457 			do_evtchn_tests = do_eventfd_tests = false;
458 	}
459 
460 	if (do_eventfd_tests) {
461 		irq_routes.info.nr = 2;
462 
463 		irq_routes.entries[0].gsi = 32;
464 		irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
465 		irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
466 		irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
467 		irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
468 
469 		irq_routes.entries[1].gsi = 33;
470 		irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
471 		irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
472 		irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
473 		irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
474 
475 		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
476 
477 		struct kvm_irqfd ifd = { };
478 
479 		ifd.fd = irq_fd[0];
480 		ifd.gsi = 32;
481 		vm_ioctl(vm, KVM_IRQFD, &ifd);
482 
483 		ifd.fd = irq_fd[1];
484 		ifd.gsi = 33;
485 		vm_ioctl(vm, KVM_IRQFD, &ifd);
486 
487 		struct sigaction sa = { };
488 		sa.sa_handler = handle_alrm;
489 		sigaction(SIGALRM, &sa, NULL);
490 	}
491 
492 	struct kvm_xen_vcpu_attr tmr = {
493 		.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
494 		.u.timer.port = EVTCHN_TIMER,
495 		.u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
496 		.u.timer.expires_ns = 0
497 	};
498 
499 	if (do_evtchn_tests) {
500 		struct kvm_xen_hvm_attr inj = {
501 			.type = KVM_XEN_ATTR_TYPE_EVTCHN,
502 			.u.evtchn.send_port = 127,
503 			.u.evtchn.type = EVTCHNSTAT_interdomain,
504 			.u.evtchn.flags = 0,
505 			.u.evtchn.deliver.port.port = EVTCHN_TEST1,
506 			.u.evtchn.deliver.port.vcpu = vcpu->id + 1,
507 			.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
508 		};
509 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
510 
511 		/* Test migration to a different vCPU */
512 		inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
513 		inj.u.evtchn.deliver.port.vcpu = vcpu->id;
514 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
515 
516 		inj.u.evtchn.send_port = 197;
517 		inj.u.evtchn.deliver.eventfd.port = 0;
518 		inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
519 		inj.u.evtchn.flags = 0;
520 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
521 
522 		vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
523 	}
524 	vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
525 	vinfo->evtchn_upcall_pending = 0;
526 
527 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
528 	rs->state = 0x5a;
529 
530 	bool evtchn_irq_expected = false;
531 
532 	for (;;) {
533 		volatile struct kvm_run *run = vcpu->run;
534 		struct ucall uc;
535 
536 		vcpu_run(vcpu);
537 
538 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
539 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
540 			    run->exit_reason,
541 			    exit_reason_str(run->exit_reason));
542 
543 		switch (get_ucall(vcpu, &uc)) {
544 		case UCALL_ABORT:
545 			REPORT_GUEST_ASSERT(uc);
546 			/* NOT REACHED */
547 		case UCALL_SYNC: {
548 			struct kvm_xen_vcpu_attr rst;
549 			long rundelay;
550 
551 			if (do_runstate_tests)
552 				TEST_ASSERT(rs->state_entry_time == rs->time[0] +
553 					    rs->time[1] + rs->time[2] + rs->time[3],
554 					    "runstate times don't add up");
555 
556 			switch (uc.args[1]) {
557 			case 0:
558 				if (verbose)
559 					printf("Delivering evtchn upcall\n");
560 				evtchn_irq_expected = true;
561 				vinfo->evtchn_upcall_pending = 1;
562 				break;
563 
564 			case RUNSTATE_runnable...RUNSTATE_offline:
565 				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
566 				if (!do_runstate_tests)
567 					goto done;
568 				if (verbose)
569 					printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
570 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
571 				rst.u.runstate.state = uc.args[1];
572 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
573 				break;
574 
575 			case 4:
576 				if (verbose)
577 					printf("Testing RUNSTATE_ADJUST\n");
578 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
579 				memset(&rst.u, 0, sizeof(rst.u));
580 				rst.u.runstate.state = (uint64_t)-1;
581 				rst.u.runstate.time_blocked =
582 					0x5a - rs->time[RUNSTATE_blocked];
583 				rst.u.runstate.time_offline =
584 					0x6b6b - rs->time[RUNSTATE_offline];
585 				rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
586 					rst.u.runstate.time_offline;
587 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
588 				break;
589 
590 			case 5:
591 				if (verbose)
592 					printf("Testing RUNSTATE_DATA\n");
593 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
594 				memset(&rst.u, 0, sizeof(rst.u));
595 				rst.u.runstate.state = RUNSTATE_running;
596 				rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
597 				rst.u.runstate.time_blocked = 0x6b6b;
598 				rst.u.runstate.time_offline = 0x5a;
599 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
600 				break;
601 
602 			case 6:
603 				if (verbose)
604 					printf("Testing steal time\n");
605 				/* Yield until scheduler delay exceeds target */
606 				rundelay = get_run_delay() + MIN_STEAL_TIME;
607 				do {
608 					sched_yield();
609 				} while (get_run_delay() < rundelay);
610 				break;
611 
612 			case 7:
613 				if (!do_eventfd_tests)
614 					goto done;
615 				if (verbose)
616 					printf("Testing masked event channel\n");
617 				shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
618 				eventfd_write(irq_fd[0], 1UL);
619 				alarm(1);
620 				break;
621 
622 			case 8:
623 				if (verbose)
624 					printf("Testing unmasked event channel\n");
625 				/* Unmask that, but deliver the other one */
626 				shinfo->evtchn_pending[0] = 0;
627 				shinfo->evtchn_mask[0] = 0;
628 				eventfd_write(irq_fd[1], 1UL);
629 				evtchn_irq_expected = true;
630 				alarm(1);
631 				break;
632 
633 			case 9:
634 				TEST_ASSERT(!evtchn_irq_expected,
635 					    "Expected event channel IRQ but it didn't happen");
636 				shinfo->evtchn_pending[1] = 0;
637 				if (verbose)
638 					printf("Testing event channel after memslot change\n");
639 				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
640 							    DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
641 				eventfd_write(irq_fd[0], 1UL);
642 				evtchn_irq_expected = true;
643 				alarm(1);
644 				break;
645 
646 			case 10:
647 				TEST_ASSERT(!evtchn_irq_expected,
648 					    "Expected event channel IRQ but it didn't happen");
649 				if (!do_evtchn_tests)
650 					goto done;
651 
652 				shinfo->evtchn_pending[0] = 0;
653 				if (verbose)
654 					printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
655 
656 				struct kvm_irq_routing_xen_evtchn e;
657 				e.port = EVTCHN_TEST2;
658 				e.vcpu = vcpu->id;
659 				e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
660 
661 				vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
662 				evtchn_irq_expected = true;
663 				alarm(1);
664 				break;
665 
666 			case 11:
667 				TEST_ASSERT(!evtchn_irq_expected,
668 					    "Expected event channel IRQ but it didn't happen");
669 				shinfo->evtchn_pending[1] = 0;
670 
671 				if (verbose)
672 					printf("Testing guest EVTCHNOP_send direct to evtchn\n");
673 				evtchn_irq_expected = true;
674 				alarm(1);
675 				break;
676 
677 			case 12:
678 				TEST_ASSERT(!evtchn_irq_expected,
679 					    "Expected event channel IRQ but it didn't happen");
680 				shinfo->evtchn_pending[0] = 0;
681 
682 				if (verbose)
683 					printf("Testing guest EVTCHNOP_send to eventfd\n");
684 				evtchn_irq_expected = true;
685 				alarm(1);
686 				break;
687 
688 			case 13:
689 				TEST_ASSERT(!evtchn_irq_expected,
690 					    "Expected event channel IRQ but it didn't happen");
691 				shinfo->evtchn_pending[1] = 0;
692 
693 				if (verbose)
694 					printf("Testing guest oneshot timer\n");
695 				break;
696 
697 			case 14:
698 				memset(&tmr, 0, sizeof(tmr));
699 				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
700 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
701 				TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
702 					    "Timer port not returned");
703 				TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
704 					    "Timer priority not returned");
705 				TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
706 					    "Timer expiry not returned");
707 				evtchn_irq_expected = true;
708 				alarm(1);
709 				break;
710 
711 			case 15:
712 				TEST_ASSERT(!evtchn_irq_expected,
713 					    "Expected event channel IRQ but it didn't happen");
714 				shinfo->evtchn_pending[0] = 0;
715 
716 				if (verbose)
717 					printf("Testing restored oneshot timer\n");
718 
719 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
720 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
721 				evtchn_irq_expected = true;
722 				alarm(1);
723 				break;
724 
725 			case 16:
726 				TEST_ASSERT(!evtchn_irq_expected,
727 					    "Expected event channel IRQ but it didn't happen");
728 
729 				if (verbose)
730 					printf("Testing SCHEDOP_poll with already pending event\n");
731 				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
732 				alarm(1);
733 				break;
734 
735 			case 17:
736 				if (verbose)
737 					printf("Testing SCHEDOP_poll timeout\n");
738 				shinfo->evtchn_pending[0] = 0;
739 				alarm(1);
740 				break;
741 
742 			case 18:
743 				if (verbose)
744 					printf("Testing SCHEDOP_poll wake on masked event\n");
745 
746 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
747 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
748 				alarm(1);
749 				break;
750 
751 			case 19:
752 				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
753 				if (verbose)
754 					printf("Testing SCHEDOP_poll wake on unmasked event\n");
755 
756 				evtchn_irq_expected = true;
757 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
758 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
759 
760 				/* Read it back and check the pending time is reported correctly */
761 				tmr.u.timer.expires_ns = 0;
762 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
763 				TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
764 					    "Timer not reported pending");
765 				alarm(1);
766 				break;
767 
768 			case 20:
769 				TEST_ASSERT(!evtchn_irq_expected,
770 					    "Expected event channel IRQ but it didn't happen");
771 				/* Read timer and check it is no longer pending */
772 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
773 				TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
774 
775 				shinfo->evtchn_pending[0] = 0;
776 				if (verbose)
777 					printf("Testing timer in the past\n");
778 
779 				evtchn_irq_expected = true;
780 				tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
781 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
782 				alarm(1);
783 				break;
784 
785 			case 21:
786 				TEST_ASSERT(!evtchn_irq_expected,
787 					    "Expected event channel IRQ but it didn't happen");
788 				goto done;
789 
790 			case 0x20:
791 				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
792 				evtchn_irq_expected = false;
793 				break;
794 			}
795 			break;
796 		}
797 		case UCALL_DONE:
798 			goto done;
799 		default:
800 			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
801 		}
802 	}
803 
804  done:
805 	alarm(0);
806 	clock_gettime(CLOCK_REALTIME, &max_ts);
807 
808 	/*
809 	 * Just a *really* basic check that things are being put in the
810 	 * right place. The actual calculations are much the same for
811 	 * Xen as they are for the KVM variants, so no need to check.
812 	 */
813 	struct pvclock_wall_clock *wc;
814 	struct pvclock_vcpu_time_info *ti, *ti2;
815 
816 	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
817 	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
818 	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
819 
820 	if (verbose) {
821 		printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
822 		printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
823 		       ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
824 		       ti->tsc_shift, ti->flags);
825 		printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
826 		       ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
827 		       ti2->tsc_shift, ti2->flags);
828 	}
829 
830 	vm_ts.tv_sec = wc->sec;
831 	vm_ts.tv_nsec = wc->nsec;
832 	TEST_ASSERT(wc->version && !(wc->version & 1),
833 		    "Bad wallclock version %x", wc->version);
834 	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
835 	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
836 
837 	TEST_ASSERT(ti->version && !(ti->version & 1),
838 		    "Bad time_info version %x", ti->version);
839 	TEST_ASSERT(ti2->version && !(ti2->version & 1),
840 		    "Bad time_info version %x", ti->version);
841 
842 	if (do_runstate_tests) {
843 		/*
844 		 * Fetch runstate and check sanity. Strictly speaking in the
845 		 * general case we might not expect the numbers to be identical
846 		 * but in this case we know we aren't running the vCPU any more.
847 		 */
848 		struct kvm_xen_vcpu_attr rst = {
849 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
850 		};
851 		vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
852 
853 		if (verbose) {
854 			printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
855 			       rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
856 			       rs->state, rs->state_entry_time);
857 			for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
858 				printf("State %s: %" PRIu64 " ns\n",
859 				       runstate_names[i], rs->time[i]);
860 			}
861 		}
862 		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
863 		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
864 			    "State entry time mismatch");
865 		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
866 			    "Running time mismatch");
867 		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
868 			    "Runnable time mismatch");
869 		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
870 			    "Blocked time mismatch");
871 		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
872 			    "Offline time mismatch");
873 
874 		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
875 			    rs->time[1] + rs->time[2] + rs->time[3],
876 			    "runstate times don't add up");
877 	}
878 	kvm_vm_free(vm);
879 	return 0;
880 }
881