1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * svm_vmcall_test
4  *
5  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6  *
7  * Xen shared_info / pvclock testing
8  */
9 
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13 
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <signal.h>
18 
19 #include <sys/eventfd.h>
20 
21 #define VCPU_ID		5
22 
23 #define SHINFO_REGION_GVA	0xc0000000ULL
24 #define SHINFO_REGION_GPA	0xc0000000ULL
25 #define SHINFO_REGION_SLOT	10
26 
27 #define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
28 #define DUMMY_REGION_SLOT	11
29 
30 #define SHINFO_ADDR	(SHINFO_REGION_GPA)
31 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
32 #define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
33 #define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
34 
35 #define SHINFO_VADDR	(SHINFO_REGION_GVA)
36 #define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
37 #define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
38 
39 #define EVTCHN_VECTOR	0x10
40 
41 #define EVTCHN_TEST1 15
42 #define EVTCHN_TEST2 66
43 #define EVTCHN_TIMER 13
44 
45 static struct kvm_vm *vm;
46 
47 #define XEN_HYPERCALL_MSR	0x40000000
48 
49 #define MIN_STEAL_TIME		50000
50 
51 #define __HYPERVISOR_set_timer_op	15
52 #define __HYPERVISOR_sched_op		29
53 #define __HYPERVISOR_event_channel_op	32
54 
55 #define SCHEDOP_poll			3
56 
57 #define EVTCHNOP_send			4
58 
59 #define EVTCHNSTAT_interdomain		2
60 
61 struct evtchn_send {
62 	u32 port;
63 };
64 
65 struct sched_poll {
66 	u32 *ports;
67 	unsigned int nr_ports;
68 	u64 timeout;
69 };
70 
71 struct pvclock_vcpu_time_info {
72 	u32   version;
73 	u32   pad0;
74 	u64   tsc_timestamp;
75 	u64   system_time;
76 	u32   tsc_to_system_mul;
77 	s8    tsc_shift;
78 	u8    flags;
79 	u8    pad[2];
80 } __attribute__((__packed__)); /* 32 bytes */
81 
82 struct pvclock_wall_clock {
83 	u32   version;
84 	u32   sec;
85 	u32   nsec;
86 } __attribute__((__packed__));
87 
88 struct vcpu_runstate_info {
89     uint32_t state;
90     uint64_t state_entry_time;
91     uint64_t time[4];
92 };
93 
94 struct arch_vcpu_info {
95     unsigned long cr2;
96     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
97 };
98 
99 struct vcpu_info {
100 	uint8_t evtchn_upcall_pending;
101 	uint8_t evtchn_upcall_mask;
102 	unsigned long evtchn_pending_sel;
103 	struct arch_vcpu_info arch;
104 	struct pvclock_vcpu_time_info time;
105 }; /* 64 bytes (x86) */
106 
107 struct shared_info {
108 	struct vcpu_info vcpu_info[32];
109 	unsigned long evtchn_pending[64];
110 	unsigned long evtchn_mask[64];
111 	struct pvclock_wall_clock wc;
112 	uint32_t wc_sec_hi;
113 	/* arch_shared_info here */
114 };
115 
116 #define RUNSTATE_running  0
117 #define RUNSTATE_runnable 1
118 #define RUNSTATE_blocked  2
119 #define RUNSTATE_offline  3
120 
121 static const char *runstate_names[] = {
122 	"running",
123 	"runnable",
124 	"blocked",
125 	"offline"
126 };
127 
128 struct {
129 	struct kvm_irq_routing info;
130 	struct kvm_irq_routing_entry entries[2];
131 } irq_routes;
132 
133 bool guest_saw_irq;
134 
135 static void evtchn_handler(struct ex_regs *regs)
136 {
137 	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
138 	vi->evtchn_upcall_pending = 0;
139 	vi->evtchn_pending_sel = 0;
140 	guest_saw_irq = true;
141 
142 	GUEST_SYNC(0x20);
143 }
144 
145 static void guest_wait_for_irq(void)
146 {
147 	while (!guest_saw_irq)
148 		__asm__ __volatile__ ("rep nop" : : : "memory");
149 	guest_saw_irq = false;
150 }
151 
152 static void guest_code(void)
153 {
154 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
155 
156 	__asm__ __volatile__(
157 		"sti\n"
158 		"nop\n"
159 	);
160 
161 	/* Trigger an interrupt injection */
162 	GUEST_SYNC(0);
163 
164 	guest_wait_for_irq();
165 
166 	/* Test having the host set runstates manually */
167 	GUEST_SYNC(RUNSTATE_runnable);
168 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
169 	GUEST_ASSERT(rs->state == 0);
170 
171 	GUEST_SYNC(RUNSTATE_blocked);
172 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
173 	GUEST_ASSERT(rs->state == 0);
174 
175 	GUEST_SYNC(RUNSTATE_offline);
176 	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
177 	GUEST_ASSERT(rs->state == 0);
178 
179 	/* Test runstate time adjust */
180 	GUEST_SYNC(4);
181 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
182 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
183 
184 	/* Test runstate time set */
185 	GUEST_SYNC(5);
186 	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
187 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
188 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
189 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
190 
191 	/* sched_yield() should result in some 'runnable' time */
192 	GUEST_SYNC(6);
193 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
194 
195 	/* Attempt to deliver a *masked* interrupt */
196 	GUEST_SYNC(7);
197 
198 	/* Wait until we see the bit set */
199 	struct shared_info *si = (void *)SHINFO_VADDR;
200 	while (!si->evtchn_pending[0])
201 		__asm__ __volatile__ ("rep nop" : : : "memory");
202 
203 	/* Now deliver an *unmasked* interrupt */
204 	GUEST_SYNC(8);
205 
206 	guest_wait_for_irq();
207 
208 	/* Change memslots and deliver an interrupt */
209 	GUEST_SYNC(9);
210 
211 	guest_wait_for_irq();
212 
213 	/* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
214 	GUEST_SYNC(10);
215 
216 	guest_wait_for_irq();
217 
218 	GUEST_SYNC(11);
219 
220 	/* Our turn. Deliver event channel (to ourselves) with
221 	 * EVTCHNOP_send hypercall. */
222 	unsigned long rax;
223 	struct evtchn_send s = { .port = 127 };
224 	__asm__ __volatile__ ("vmcall" :
225 			      "=a" (rax) :
226 			      "a" (__HYPERVISOR_event_channel_op),
227 			      "D" (EVTCHNOP_send),
228 			      "S" (&s));
229 
230 	GUEST_ASSERT(rax == 0);
231 
232 	guest_wait_for_irq();
233 
234 	GUEST_SYNC(12);
235 
236 	/* Deliver "outbound" event channel to an eventfd which
237 	 * happens to be one of our own irqfds. */
238 	s.port = 197;
239 	__asm__ __volatile__ ("vmcall" :
240 			      "=a" (rax) :
241 			      "a" (__HYPERVISOR_event_channel_op),
242 			      "D" (EVTCHNOP_send),
243 			      "S" (&s));
244 
245 	GUEST_ASSERT(rax == 0);
246 
247 	guest_wait_for_irq();
248 
249 	GUEST_SYNC(13);
250 
251 	/* Set a timer 100ms in the future. */
252 	__asm__ __volatile__ ("vmcall" :
253 			      "=a" (rax) :
254 			      "a" (__HYPERVISOR_set_timer_op),
255 			      "D" (rs->state_entry_time + 100000000));
256 	GUEST_ASSERT(rax == 0);
257 
258 	GUEST_SYNC(14);
259 
260 	/* Now wait for the timer */
261 	guest_wait_for_irq();
262 
263 	GUEST_SYNC(15);
264 
265 	/* The host has 'restored' the timer. Just wait for it. */
266 	guest_wait_for_irq();
267 
268 	GUEST_SYNC(16);
269 
270 	/* Poll for an event channel port which is already set */
271 	u32 ports[1] = { EVTCHN_TIMER };
272 	struct sched_poll p = {
273 		.ports = ports,
274 		.nr_ports = 1,
275 		.timeout = 0,
276 	};
277 
278 	__asm__ __volatile__ ("vmcall" :
279 			      "=a" (rax) :
280 			      "a" (__HYPERVISOR_sched_op),
281 			      "D" (SCHEDOP_poll),
282 			      "S" (&p));
283 
284 	GUEST_ASSERT(rax == 0);
285 
286 	GUEST_SYNC(17);
287 
288 	/* Poll for an unset port and wait for the timeout. */
289 	p.timeout = 100000000;
290 	__asm__ __volatile__ ("vmcall" :
291 			      "=a" (rax) :
292 			      "a" (__HYPERVISOR_sched_op),
293 			      "D" (SCHEDOP_poll),
294 			      "S" (&p));
295 
296 	GUEST_ASSERT(rax == 0);
297 
298 	GUEST_SYNC(18);
299 
300 	/* A timer will wake the masked port we're waiting on, while we poll */
301 	p.timeout = 0;
302 	__asm__ __volatile__ ("vmcall" :
303 			      "=a" (rax) :
304 			      "a" (__HYPERVISOR_sched_op),
305 			      "D" (SCHEDOP_poll),
306 			      "S" (&p));
307 
308 	GUEST_ASSERT(rax == 0);
309 
310 	GUEST_SYNC(19);
311 
312 	/* A timer wake an *unmasked* port which should wake us with an
313 	 * actual interrupt, while we're polling on a different port. */
314 	ports[0]++;
315 	p.timeout = 0;
316 	__asm__ __volatile__ ("vmcall" :
317 			      "=a" (rax) :
318 			      "a" (__HYPERVISOR_sched_op),
319 			      "D" (SCHEDOP_poll),
320 			      "S" (&p));
321 
322 	GUEST_ASSERT(rax == 0);
323 
324 	guest_wait_for_irq();
325 
326 	GUEST_SYNC(20);
327 
328 	/* Timer should have fired already */
329 	guest_wait_for_irq();
330 
331 	GUEST_SYNC(21);
332 }
333 
334 static int cmp_timespec(struct timespec *a, struct timespec *b)
335 {
336 	if (a->tv_sec > b->tv_sec)
337 		return 1;
338 	else if (a->tv_sec < b->tv_sec)
339 		return -1;
340 	else if (a->tv_nsec > b->tv_nsec)
341 		return 1;
342 	else if (a->tv_nsec < b->tv_nsec)
343 		return -1;
344 	else
345 		return 0;
346 }
347 struct vcpu_info *vinfo;
348 
349 static void handle_alrm(int sig)
350 {
351 	if (vinfo)
352 		printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
353 	vcpu_dump(stdout, vm, VCPU_ID, 0);
354 	TEST_FAIL("IRQ delivery timed out");
355 }
356 
357 int main(int argc, char *argv[])
358 {
359 	struct timespec min_ts, max_ts, vm_ts;
360 	bool verbose;
361 
362 	verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
363 			       !strncmp(argv[1], "--verbose", 10));
364 
365 	int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
366 	if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
367 		print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
368 		exit(KSFT_SKIP);
369 	}
370 
371 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
372 	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
373 	bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
374 
375 	clock_gettime(CLOCK_REALTIME, &min_ts);
376 
377 	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
378 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
379 
380 	/* Map a region for the shared_info page */
381 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
382 				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
383 	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
384 
385 	struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
386 
387 	int zero_fd = open("/dev/zero", O_RDONLY);
388 	TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
389 
390 	struct kvm_xen_hvm_config hvmc = {
391 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
392 		.msr = XEN_HYPERCALL_MSR,
393 	};
394 
395 	/* Let the kernel know that we *will* use it for sending all
396 	 * event channels, which lets it intercept SCHEDOP_poll */
397 	if (do_evtchn_tests)
398 		hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
399 
400 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
401 
402 	struct kvm_xen_hvm_attr lm = {
403 		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
404 		.u.long_mode = 1,
405 	};
406 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
407 
408 	struct kvm_xen_hvm_attr ha = {
409 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
410 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
411 	};
412 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
413 
414 	/*
415 	 * Test what happens when the HVA of the shinfo page is remapped after
416 	 * the kernel has a reference to it. But make sure we copy the clock
417 	 * info over since that's only set at setup time, and we test it later.
418 	 */
419 	struct pvclock_wall_clock wc_copy = shinfo->wc;
420 	void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
421 	TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
422 	shinfo->wc = wc_copy;
423 
424 	struct kvm_xen_vcpu_attr vi = {
425 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
426 		.u.gpa = VCPU_INFO_ADDR,
427 	};
428 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
429 
430 	struct kvm_xen_vcpu_attr pvclock = {
431 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
432 		.u.gpa = PVTIME_ADDR,
433 	};
434 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
435 
436 	struct kvm_xen_hvm_attr vec = {
437 		.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
438 		.u.vector = EVTCHN_VECTOR,
439 	};
440 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
441 
442 	vm_init_descriptor_tables(vm);
443 	vcpu_init_descriptor_tables(vm, VCPU_ID);
444 	vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
445 
446 	if (do_runstate_tests) {
447 		struct kvm_xen_vcpu_attr st = {
448 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
449 			.u.gpa = RUNSTATE_ADDR,
450 		};
451 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
452 	}
453 
454 	int irq_fd[2] = { -1, -1 };
455 
456 	if (do_eventfd_tests) {
457 		irq_fd[0] = eventfd(0, 0);
458 		irq_fd[1] = eventfd(0, 0);
459 
460 		/* Unexpected, but not a KVM failure */
461 		if (irq_fd[0] == -1 || irq_fd[1] == -1)
462 			do_evtchn_tests = do_eventfd_tests = false;
463 	}
464 
465 	if (do_eventfd_tests) {
466 		irq_routes.info.nr = 2;
467 
468 		irq_routes.entries[0].gsi = 32;
469 		irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
470 		irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
471 		irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
472 		irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
473 
474 		irq_routes.entries[1].gsi = 33;
475 		irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
476 		irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
477 		irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
478 		irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
479 
480 		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
481 
482 		struct kvm_irqfd ifd = { };
483 
484 		ifd.fd = irq_fd[0];
485 		ifd.gsi = 32;
486 		vm_ioctl(vm, KVM_IRQFD, &ifd);
487 
488 		ifd.fd = irq_fd[1];
489 		ifd.gsi = 33;
490 		vm_ioctl(vm, KVM_IRQFD, &ifd);
491 
492 		struct sigaction sa = { };
493 		sa.sa_handler = handle_alrm;
494 		sigaction(SIGALRM, &sa, NULL);
495 	}
496 
497 	struct kvm_xen_vcpu_attr tmr = {
498 		.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
499 		.u.timer.port = EVTCHN_TIMER,
500 		.u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
501 		.u.timer.expires_ns = 0
502 	};
503 
504 	if (do_evtchn_tests) {
505 		struct kvm_xen_hvm_attr inj = {
506 			.type = KVM_XEN_ATTR_TYPE_EVTCHN,
507 			.u.evtchn.send_port = 127,
508 			.u.evtchn.type = EVTCHNSTAT_interdomain,
509 			.u.evtchn.flags = 0,
510 			.u.evtchn.deliver.port.port = EVTCHN_TEST1,
511 			.u.evtchn.deliver.port.vcpu = VCPU_ID + 1,
512 			.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
513 		};
514 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
515 
516 		/* Test migration to a different vCPU */
517 		inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
518 		inj.u.evtchn.deliver.port.vcpu = VCPU_ID;
519 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
520 
521 		inj.u.evtchn.send_port = 197;
522 		inj.u.evtchn.deliver.eventfd.port = 0;
523 		inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
524 		inj.u.evtchn.flags = 0;
525 		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
526 
527 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
528 	}
529 	vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
530 	vinfo->evtchn_upcall_pending = 0;
531 
532 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
533 	rs->state = 0x5a;
534 
535 	bool evtchn_irq_expected = false;
536 
537 	for (;;) {
538 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
539 		struct ucall uc;
540 
541 		vcpu_run(vm, VCPU_ID);
542 
543 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
544 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
545 			    run->exit_reason,
546 			    exit_reason_str(run->exit_reason));
547 
548 		switch (get_ucall(vm, VCPU_ID, &uc)) {
549 		case UCALL_ABORT:
550 			TEST_FAIL("%s", (const char *)uc.args[0]);
551 			/* NOT REACHED */
552 		case UCALL_SYNC: {
553 			struct kvm_xen_vcpu_attr rst;
554 			long rundelay;
555 
556 			if (do_runstate_tests)
557 				TEST_ASSERT(rs->state_entry_time == rs->time[0] +
558 					    rs->time[1] + rs->time[2] + rs->time[3],
559 					    "runstate times don't add up");
560 
561 			switch (uc.args[1]) {
562 			case 0:
563 				if (verbose)
564 					printf("Delivering evtchn upcall\n");
565 				evtchn_irq_expected = true;
566 				vinfo->evtchn_upcall_pending = 1;
567 				break;
568 
569 			case RUNSTATE_runnable...RUNSTATE_offline:
570 				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
571 				if (!do_runstate_tests)
572 					goto done;
573 				if (verbose)
574 					printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
575 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
576 				rst.u.runstate.state = uc.args[1];
577 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
578 				break;
579 
580 			case 4:
581 				if (verbose)
582 					printf("Testing RUNSTATE_ADJUST\n");
583 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
584 				memset(&rst.u, 0, sizeof(rst.u));
585 				rst.u.runstate.state = (uint64_t)-1;
586 				rst.u.runstate.time_blocked =
587 					0x5a - rs->time[RUNSTATE_blocked];
588 				rst.u.runstate.time_offline =
589 					0x6b6b - rs->time[RUNSTATE_offline];
590 				rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
591 					rst.u.runstate.time_offline;
592 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
593 				break;
594 
595 			case 5:
596 				if (verbose)
597 					printf("Testing RUNSTATE_DATA\n");
598 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
599 				memset(&rst.u, 0, sizeof(rst.u));
600 				rst.u.runstate.state = RUNSTATE_running;
601 				rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
602 				rst.u.runstate.time_blocked = 0x6b6b;
603 				rst.u.runstate.time_offline = 0x5a;
604 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
605 				break;
606 
607 			case 6:
608 				if (verbose)
609 					printf("Testing steal time\n");
610 				/* Yield until scheduler delay exceeds target */
611 				rundelay = get_run_delay() + MIN_STEAL_TIME;
612 				do {
613 					sched_yield();
614 				} while (get_run_delay() < rundelay);
615 				break;
616 
617 			case 7:
618 				if (!do_eventfd_tests)
619 					goto done;
620 				if (verbose)
621 					printf("Testing masked event channel\n");
622 				shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
623 				eventfd_write(irq_fd[0], 1UL);
624 				alarm(1);
625 				break;
626 
627 			case 8:
628 				if (verbose)
629 					printf("Testing unmasked event channel\n");
630 				/* Unmask that, but deliver the other one */
631 				shinfo->evtchn_pending[0] = 0;
632 				shinfo->evtchn_mask[0] = 0;
633 				eventfd_write(irq_fd[1], 1UL);
634 				evtchn_irq_expected = true;
635 				alarm(1);
636 				break;
637 
638 			case 9:
639 				TEST_ASSERT(!evtchn_irq_expected,
640 					    "Expected event channel IRQ but it didn't happen");
641 				shinfo->evtchn_pending[1] = 0;
642 				if (verbose)
643 					printf("Testing event channel after memslot change\n");
644 				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
645 							    DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
646 				eventfd_write(irq_fd[0], 1UL);
647 				evtchn_irq_expected = true;
648 				alarm(1);
649 				break;
650 
651 			case 10:
652 				TEST_ASSERT(!evtchn_irq_expected,
653 					    "Expected event channel IRQ but it didn't happen");
654 				if (!do_evtchn_tests)
655 					goto done;
656 
657 				shinfo->evtchn_pending[0] = 0;
658 				if (verbose)
659 					printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
660 
661 				struct kvm_irq_routing_xen_evtchn e;
662 				e.port = EVTCHN_TEST2;
663 				e.vcpu = VCPU_ID;
664 				e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
665 
666 				vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
667 				evtchn_irq_expected = true;
668 				alarm(1);
669 				break;
670 
671 			case 11:
672 				TEST_ASSERT(!evtchn_irq_expected,
673 					    "Expected event channel IRQ but it didn't happen");
674 				shinfo->evtchn_pending[1] = 0;
675 
676 				if (verbose)
677 					printf("Testing guest EVTCHNOP_send direct to evtchn\n");
678 				evtchn_irq_expected = true;
679 				alarm(1);
680 				break;
681 
682 			case 12:
683 				TEST_ASSERT(!evtchn_irq_expected,
684 					    "Expected event channel IRQ but it didn't happen");
685 				shinfo->evtchn_pending[0] = 0;
686 
687 				if (verbose)
688 					printf("Testing guest EVTCHNOP_send to eventfd\n");
689 				evtchn_irq_expected = true;
690 				alarm(1);
691 				break;
692 
693 			case 13:
694 				TEST_ASSERT(!evtchn_irq_expected,
695 					    "Expected event channel IRQ but it didn't happen");
696 				shinfo->evtchn_pending[1] = 0;
697 
698 				if (verbose)
699 					printf("Testing guest oneshot timer\n");
700 				break;
701 
702 			case 14:
703 				memset(&tmr, 0, sizeof(tmr));
704 				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
705 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
706 				TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
707 					    "Timer port not returned");
708 				TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
709 					    "Timer priority not returned");
710 				TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
711 					    "Timer expiry not returned");
712 				evtchn_irq_expected = true;
713 				alarm(1);
714 				break;
715 
716 			case 15:
717 				TEST_ASSERT(!evtchn_irq_expected,
718 					    "Expected event channel IRQ but it didn't happen");
719 				shinfo->evtchn_pending[0] = 0;
720 
721 				if (verbose)
722 					printf("Testing restored oneshot timer\n");
723 
724 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
725 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
726 				evtchn_irq_expected = true;
727 				alarm(1);
728 				break;
729 
730 			case 16:
731 				TEST_ASSERT(!evtchn_irq_expected,
732 					    "Expected event channel IRQ but it didn't happen");
733 
734 				if (verbose)
735 					printf("Testing SCHEDOP_poll with already pending event\n");
736 				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
737 				alarm(1);
738 				break;
739 
740 			case 17:
741 				if (verbose)
742 					printf("Testing SCHEDOP_poll timeout\n");
743 				shinfo->evtchn_pending[0] = 0;
744 				alarm(1);
745 				break;
746 
747 			case 18:
748 				if (verbose)
749 					printf("Testing SCHEDOP_poll wake on masked event\n");
750 
751 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000,
752 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
753 				alarm(1);
754 				break;
755 
756 			case 19:
757 				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
758 				if (verbose)
759 					printf("Testing SCHEDOP_poll wake on unmasked event\n");
760 
761 				evtchn_irq_expected = true;
762 				tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
763 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
764 
765 				/* Read it back and check the pending time is reported correctly */
766 				tmr.u.timer.expires_ns = 0;
767 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
768 				TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
769 					    "Timer not reported pending");
770 				alarm(1);
771 				break;
772 
773 			case 20:
774 				TEST_ASSERT(!evtchn_irq_expected,
775 					    "Expected event channel IRQ but it didn't happen");
776 				/* Read timer and check it is no longer pending */
777 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr);
778 				TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
779 
780 				shinfo->evtchn_pending[0] = 0;
781 				if (verbose)
782 					printf("Testing timer in the past\n");
783 
784 				evtchn_irq_expected = true;
785 				tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
786 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr);
787 				alarm(1);
788 				break;
789 
790 			case 21:
791 				TEST_ASSERT(!evtchn_irq_expected,
792 					    "Expected event channel IRQ but it didn't happen");
793 				goto done;
794 
795 			case 0x20:
796 				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
797 				evtchn_irq_expected = false;
798 				break;
799 			}
800 			break;
801 		}
802 		case UCALL_DONE:
803 			goto done;
804 		default:
805 			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
806 		}
807 	}
808 
809  done:
810 	alarm(0);
811 	clock_gettime(CLOCK_REALTIME, &max_ts);
812 
813 	/*
814 	 * Just a *really* basic check that things are being put in the
815 	 * right place. The actual calculations are much the same for
816 	 * Xen as they are for the KVM variants, so no need to check.
817 	 */
818 	struct pvclock_wall_clock *wc;
819 	struct pvclock_vcpu_time_info *ti, *ti2;
820 
821 	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
822 	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
823 	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
824 
825 	if (verbose) {
826 		printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
827 		printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
828 		       ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
829 		       ti->tsc_shift, ti->flags);
830 		printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
831 		       ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
832 		       ti2->tsc_shift, ti2->flags);
833 	}
834 
835 	vm_ts.tv_sec = wc->sec;
836 	vm_ts.tv_nsec = wc->nsec;
837 	TEST_ASSERT(wc->version && !(wc->version & 1),
838 		    "Bad wallclock version %x", wc->version);
839 	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
840 	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
841 
842 	TEST_ASSERT(ti->version && !(ti->version & 1),
843 		    "Bad time_info version %x", ti->version);
844 	TEST_ASSERT(ti2->version && !(ti2->version & 1),
845 		    "Bad time_info version %x", ti->version);
846 
847 	if (do_runstate_tests) {
848 		/*
849 		 * Fetch runstate and check sanity. Strictly speaking in the
850 		 * general case we might not expect the numbers to be identical
851 		 * but in this case we know we aren't running the vCPU any more.
852 		 */
853 		struct kvm_xen_vcpu_attr rst = {
854 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
855 		};
856 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
857 
858 		if (verbose) {
859 			printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
860 			       rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
861 			       rs->state, rs->state_entry_time);
862 			for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
863 				printf("State %s: %" PRIu64 " ns\n",
864 				       runstate_names[i], rs->time[i]);
865 			}
866 		}
867 		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
868 		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
869 			    "State entry time mismatch");
870 		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
871 			    "Running time mismatch");
872 		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
873 			    "Runnable time mismatch");
874 		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
875 			    "Blocked time mismatch");
876 		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
877 			    "Offline time mismatch");
878 
879 		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
880 			    rs->time[1] + rs->time[2] + rs->time[3],
881 			    "runstate times don't add up");
882 	}
883 	kvm_vm_free(vm);
884 	return 0;
885 }
886