1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * svm_vmcall_test
4  *
5  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6  *
7  * Xen shared_info / pvclock testing
8  */
9 
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13 
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <signal.h>
18 
19 #include <sys/eventfd.h>
20 
21 #define VCPU_ID		5
22 
23 #define SHINFO_REGION_GVA	0xc0000000ULL
24 #define SHINFO_REGION_GPA	0xc0000000ULL
25 #define SHINFO_REGION_SLOT	10
26 #define PAGE_SIZE		4096
27 
28 #define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
29 #define DUMMY_REGION_SLOT	11
30 
31 #define SHINFO_ADDR	(SHINFO_REGION_GPA)
32 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
33 #define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
34 #define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
35 
36 #define SHINFO_VADDR	(SHINFO_REGION_GVA)
37 #define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
38 #define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
39 
40 #define EVTCHN_VECTOR	0x10
41 
42 static struct kvm_vm *vm;
43 
44 #define XEN_HYPERCALL_MSR	0x40000000
45 
46 #define MIN_STEAL_TIME		50000
47 
48 struct pvclock_vcpu_time_info {
49 	u32   version;
50 	u32   pad0;
51 	u64   tsc_timestamp;
52 	u64   system_time;
53 	u32   tsc_to_system_mul;
54 	s8    tsc_shift;
55 	u8    flags;
56 	u8    pad[2];
57 } __attribute__((__packed__)); /* 32 bytes */
58 
59 struct pvclock_wall_clock {
60 	u32   version;
61 	u32   sec;
62 	u32   nsec;
63 } __attribute__((__packed__));
64 
65 struct vcpu_runstate_info {
66     uint32_t state;
67     uint64_t state_entry_time;
68     uint64_t time[4];
69 };
70 
71 struct arch_vcpu_info {
72     unsigned long cr2;
73     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
74 };
75 
76 struct vcpu_info {
77 	uint8_t evtchn_upcall_pending;
78 	uint8_t evtchn_upcall_mask;
79 	unsigned long evtchn_pending_sel;
80 	struct arch_vcpu_info arch;
81 	struct pvclock_vcpu_time_info time;
82 }; /* 64 bytes (x86) */
83 
84 struct shared_info {
85 	struct vcpu_info vcpu_info[32];
86 	unsigned long evtchn_pending[64];
87 	unsigned long evtchn_mask[64];
88 	struct pvclock_wall_clock wc;
89 	uint32_t wc_sec_hi;
90 	/* arch_shared_info here */
91 };
92 
93 #define RUNSTATE_running  0
94 #define RUNSTATE_runnable 1
95 #define RUNSTATE_blocked  2
96 #define RUNSTATE_offline  3
97 
98 static const char *runstate_names[] = {
99 	"running",
100 	"runnable",
101 	"blocked",
102 	"offline"
103 };
104 
105 struct {
106 	struct kvm_irq_routing info;
107 	struct kvm_irq_routing_entry entries[2];
108 } irq_routes;
109 
110 static void evtchn_handler(struct ex_regs *regs)
111 {
112 	struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
113 	vi->evtchn_upcall_pending = 0;
114 	vi->evtchn_pending_sel = 0;
115 
116 	GUEST_SYNC(0x20);
117 }
118 
119 static void guest_code(void)
120 {
121 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
122 
123 	__asm__ __volatile__(
124 		"sti\n"
125 		"nop\n"
126 	);
127 
128 	/* Trigger an interrupt injection */
129 	GUEST_SYNC(0);
130 
131 	/* Test having the host set runstates manually */
132 	GUEST_SYNC(RUNSTATE_runnable);
133 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
134 	GUEST_ASSERT(rs->state == 0);
135 
136 	GUEST_SYNC(RUNSTATE_blocked);
137 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
138 	GUEST_ASSERT(rs->state == 0);
139 
140 	GUEST_SYNC(RUNSTATE_offline);
141 	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
142 	GUEST_ASSERT(rs->state == 0);
143 
144 	/* Test runstate time adjust */
145 	GUEST_SYNC(4);
146 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
147 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
148 
149 	/* Test runstate time set */
150 	GUEST_SYNC(5);
151 	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
152 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
153 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
154 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
155 
156 	/* sched_yield() should result in some 'runnable' time */
157 	GUEST_SYNC(6);
158 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
159 
160 	/* Attempt to deliver a *masked* interrupt */
161 	GUEST_SYNC(7);
162 
163 	/* Wait until we see the bit set */
164 	struct shared_info *si = (void *)SHINFO_VADDR;
165 	while (!si->evtchn_pending[0])
166 		__asm__ __volatile__ ("rep nop" : : : "memory");
167 
168 	/* Now deliver an *unmasked* interrupt */
169 	GUEST_SYNC(8);
170 
171 	while (!si->evtchn_pending[1])
172 		__asm__ __volatile__ ("rep nop" : : : "memory");
173 
174 	/* Change memslots and deliver an interrupt */
175 	GUEST_SYNC(9);
176 
177 	for (;;)
178 		__asm__ __volatile__ ("rep nop" : : : "memory");
179 }
180 
181 static int cmp_timespec(struct timespec *a, struct timespec *b)
182 {
183 	if (a->tv_sec > b->tv_sec)
184 		return 1;
185 	else if (a->tv_sec < b->tv_sec)
186 		return -1;
187 	else if (a->tv_nsec > b->tv_nsec)
188 		return 1;
189 	else if (a->tv_nsec < b->tv_nsec)
190 		return -1;
191 	else
192 		return 0;
193 }
194 
195 static void handle_alrm(int sig)
196 {
197 	TEST_FAIL("IRQ delivery timed out");
198 }
199 
200 int main(int argc, char *argv[])
201 {
202 	struct timespec min_ts, max_ts, vm_ts;
203 	bool verbose;
204 
205 	verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
206 			       !strncmp(argv[1], "--verbose", 10));
207 
208 	int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
209 	if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
210 		print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
211 		exit(KSFT_SKIP);
212 	}
213 
214 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
215 	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
216 
217 	clock_gettime(CLOCK_REALTIME, &min_ts);
218 
219 	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
220 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
221 
222 	/* Map a region for the shared_info page */
223 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
224 				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
225 	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
226 
227 	struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
228 
229 	int zero_fd = open("/dev/zero", O_RDONLY);
230 	TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
231 
232 	struct kvm_xen_hvm_config hvmc = {
233 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
234 		.msr = XEN_HYPERCALL_MSR,
235 	};
236 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
237 
238 	struct kvm_xen_hvm_attr lm = {
239 		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
240 		.u.long_mode = 1,
241 	};
242 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
243 
244 	struct kvm_xen_hvm_attr ha = {
245 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
246 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
247 	};
248 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
249 
250 	/*
251 	 * Test what happens when the HVA of the shinfo page is remapped after
252 	 * the kernel has a reference to it. But make sure we copy the clock
253 	 * info over since that's only set at setup time, and we test it later.
254 	 */
255 	struct pvclock_wall_clock wc_copy = shinfo->wc;
256 	void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
257 	TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
258 	shinfo->wc = wc_copy;
259 
260 	struct kvm_xen_vcpu_attr vi = {
261 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
262 		.u.gpa = VCPU_INFO_ADDR,
263 	};
264 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
265 
266 	struct kvm_xen_vcpu_attr pvclock = {
267 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
268 		.u.gpa = PVTIME_ADDR,
269 	};
270 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
271 
272 	struct kvm_xen_hvm_attr vec = {
273 		.type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
274 		.u.vector = EVTCHN_VECTOR,
275 	};
276 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
277 
278 	vm_init_descriptor_tables(vm);
279 	vcpu_init_descriptor_tables(vm, VCPU_ID);
280 	vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
281 
282 	if (do_runstate_tests) {
283 		struct kvm_xen_vcpu_attr st = {
284 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
285 			.u.gpa = RUNSTATE_ADDR,
286 		};
287 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
288 	}
289 
290 	int irq_fd[2] = { -1, -1 };
291 
292 	if (do_eventfd_tests) {
293 		irq_fd[0] = eventfd(0, 0);
294 		irq_fd[1] = eventfd(0, 0);
295 
296 		/* Unexpected, but not a KVM failure */
297 		if (irq_fd[0] == -1 || irq_fd[1] == -1)
298 			do_eventfd_tests = false;
299 	}
300 
301 	if (do_eventfd_tests) {
302 		irq_routes.info.nr = 2;
303 
304 		irq_routes.entries[0].gsi = 32;
305 		irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
306 		irq_routes.entries[0].u.xen_evtchn.port = 15;
307 		irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
308 		irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
309 
310 		irq_routes.entries[1].gsi = 33;
311 		irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
312 		irq_routes.entries[1].u.xen_evtchn.port = 66;
313 		irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
314 		irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
315 
316 		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
317 
318 		struct kvm_irqfd ifd = { };
319 
320 		ifd.fd = irq_fd[0];
321 		ifd.gsi = 32;
322 		vm_ioctl(vm, KVM_IRQFD, &ifd);
323 
324 		ifd.fd = irq_fd[1];
325 		ifd.gsi = 33;
326 		vm_ioctl(vm, KVM_IRQFD, &ifd);
327 
328 		struct sigaction sa = { };
329 		sa.sa_handler = handle_alrm;
330 		sigaction(SIGALRM, &sa, NULL);
331 	}
332 
333 	struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
334 	vinfo->evtchn_upcall_pending = 0;
335 
336 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
337 	rs->state = 0x5a;
338 
339 	bool evtchn_irq_expected = false;
340 
341 	for (;;) {
342 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
343 		struct ucall uc;
344 
345 		vcpu_run(vm, VCPU_ID);
346 
347 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
348 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
349 			    run->exit_reason,
350 			    exit_reason_str(run->exit_reason));
351 
352 		switch (get_ucall(vm, VCPU_ID, &uc)) {
353 		case UCALL_ABORT:
354 			TEST_FAIL("%s", (const char *)uc.args[0]);
355 			/* NOT REACHED */
356 		case UCALL_SYNC: {
357 			struct kvm_xen_vcpu_attr rst;
358 			long rundelay;
359 
360 			if (do_runstate_tests)
361 				TEST_ASSERT(rs->state_entry_time == rs->time[0] +
362 					    rs->time[1] + rs->time[2] + rs->time[3],
363 					    "runstate times don't add up");
364 
365 			switch (uc.args[1]) {
366 			case 0:
367 				if (verbose)
368 					printf("Delivering evtchn upcall\n");
369 				evtchn_irq_expected = true;
370 				vinfo->evtchn_upcall_pending = 1;
371 				break;
372 
373 			case RUNSTATE_runnable...RUNSTATE_offline:
374 				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
375 				if (!do_runstate_tests)
376 					goto done;
377 				if (verbose)
378 					printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
379 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
380 				rst.u.runstate.state = uc.args[1];
381 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
382 				break;
383 
384 			case 4:
385 				if (verbose)
386 					printf("Testing RUNSTATE_ADJUST\n");
387 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
388 				memset(&rst.u, 0, sizeof(rst.u));
389 				rst.u.runstate.state = (uint64_t)-1;
390 				rst.u.runstate.time_blocked =
391 					0x5a - rs->time[RUNSTATE_blocked];
392 				rst.u.runstate.time_offline =
393 					0x6b6b - rs->time[RUNSTATE_offline];
394 				rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
395 					rst.u.runstate.time_offline;
396 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
397 				break;
398 
399 			case 5:
400 				if (verbose)
401 					printf("Testing RUNSTATE_DATA\n");
402 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
403 				memset(&rst.u, 0, sizeof(rst.u));
404 				rst.u.runstate.state = RUNSTATE_running;
405 				rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
406 				rst.u.runstate.time_blocked = 0x6b6b;
407 				rst.u.runstate.time_offline = 0x5a;
408 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
409 				break;
410 
411 			case 6:
412 				if (verbose)
413 					printf("Testing steal time\n");
414 				/* Yield until scheduler delay exceeds target */
415 				rundelay = get_run_delay() + MIN_STEAL_TIME;
416 				do {
417 					sched_yield();
418 				} while (get_run_delay() < rundelay);
419 				break;
420 
421 			case 7:
422 				if (!do_eventfd_tests)
423 					goto done;
424 				if (verbose)
425 					printf("Testing masked event channel\n");
426 				shinfo->evtchn_mask[0] = 0x8000;
427 				eventfd_write(irq_fd[0], 1UL);
428 				alarm(1);
429 				break;
430 
431 			case 8:
432 				if (verbose)
433 					printf("Testing unmasked event channel\n");
434 				/* Unmask that, but deliver the other one */
435 				shinfo->evtchn_pending[0] = 0;
436 				shinfo->evtchn_mask[0] = 0;
437 				eventfd_write(irq_fd[1], 1UL);
438 				evtchn_irq_expected = true;
439 				alarm(1);
440 				break;
441 
442 			case 9:
443 				if (verbose)
444 					printf("Testing event channel after memslot change\n");
445 				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
446 							    DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
447 				eventfd_write(irq_fd[0], 1UL);
448 				evtchn_irq_expected = true;
449 				alarm(1);
450 				break;
451 
452 			case 0x20:
453 				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
454 				evtchn_irq_expected = false;
455 				if (shinfo->evtchn_pending[1] &&
456 				    shinfo->evtchn_pending[0])
457 					goto done;
458 				break;
459 			}
460 			break;
461 		}
462 		case UCALL_DONE:
463 			goto done;
464 		default:
465 			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
466 		}
467 	}
468 
469  done:
470 	clock_gettime(CLOCK_REALTIME, &max_ts);
471 
472 	/*
473 	 * Just a *really* basic check that things are being put in the
474 	 * right place. The actual calculations are much the same for
475 	 * Xen as they are for the KVM variants, so no need to check.
476 	 */
477 	struct pvclock_wall_clock *wc;
478 	struct pvclock_vcpu_time_info *ti, *ti2;
479 
480 	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
481 	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
482 	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
483 
484 	if (verbose) {
485 		printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
486 		printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
487 		       ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
488 		       ti->tsc_shift, ti->flags);
489 		printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
490 		       ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
491 		       ti2->tsc_shift, ti2->flags);
492 	}
493 
494 	vm_ts.tv_sec = wc->sec;
495 	vm_ts.tv_nsec = wc->nsec;
496 	TEST_ASSERT(wc->version && !(wc->version & 1),
497 		    "Bad wallclock version %x", wc->version);
498 	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
499 	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
500 
501 	TEST_ASSERT(ti->version && !(ti->version & 1),
502 		    "Bad time_info version %x", ti->version);
503 	TEST_ASSERT(ti2->version && !(ti2->version & 1),
504 		    "Bad time_info version %x", ti->version);
505 
506 	if (do_runstate_tests) {
507 		/*
508 		 * Fetch runstate and check sanity. Strictly speaking in the
509 		 * general case we might not expect the numbers to be identical
510 		 * but in this case we know we aren't running the vCPU any more.
511 		 */
512 		struct kvm_xen_vcpu_attr rst = {
513 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
514 		};
515 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
516 
517 		if (verbose) {
518 			printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
519 			       rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
520 			       rs->state, rs->state_entry_time);
521 			for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
522 				printf("State %s: %" PRIu64 " ns\n",
523 				       runstate_names[i], rs->time[i]);
524 			}
525 		}
526 		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
527 		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
528 			    "State entry time mismatch");
529 		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
530 			    "Running time mismatch");
531 		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
532 			    "Runnable time mismatch");
533 		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
534 			    "Blocked time mismatch");
535 		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
536 			    "Offline time mismatch");
537 
538 		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
539 			    rs->time[1] + rs->time[2] + rs->time[3],
540 			    "runstate times don't add up");
541 	}
542 	kvm_vm_free(vm);
543 	return 0;
544 }
545