1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * svm_vmcall_test
4  *
5  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6  *
7  * Xen shared_info / pvclock testing
8  */
9 
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13 
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <sys/syscall.h>
18 
19 #define VCPU_ID		5
20 
21 #define SHINFO_REGION_GVA	0xc0000000ULL
22 #define SHINFO_REGION_GPA	0xc0000000ULL
23 #define SHINFO_REGION_SLOT	10
24 #define PAGE_SIZE		4096
25 
26 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
27 #define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
28 
29 #define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
30 
31 static struct kvm_vm *vm;
32 
33 #define XEN_HYPERCALL_MSR	0x40000000
34 
35 #define MIN_STEAL_TIME		50000
36 
37 struct pvclock_vcpu_time_info {
38         u32   version;
39         u32   pad0;
40         u64   tsc_timestamp;
41         u64   system_time;
42         u32   tsc_to_system_mul;
43         s8    tsc_shift;
44         u8    flags;
45         u8    pad[2];
46 } __attribute__((__packed__)); /* 32 bytes */
47 
48 struct pvclock_wall_clock {
49         u32   version;
50         u32   sec;
51         u32   nsec;
52 } __attribute__((__packed__));
53 
54 struct vcpu_runstate_info {
55     uint32_t state;
56     uint64_t state_entry_time;
57     uint64_t time[4];
58 };
59 
60 #define RUNSTATE_running  0
61 #define RUNSTATE_runnable 1
62 #define RUNSTATE_blocked  2
63 #define RUNSTATE_offline  3
64 
65 static void guest_code(void)
66 {
67 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
68 
69 	/* Test having the host set runstates manually */
70 	GUEST_SYNC(RUNSTATE_runnable);
71 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
72 	GUEST_ASSERT(rs->state == 0);
73 
74 	GUEST_SYNC(RUNSTATE_blocked);
75 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
76 	GUEST_ASSERT(rs->state == 0);
77 
78 	GUEST_SYNC(RUNSTATE_offline);
79 	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
80 	GUEST_ASSERT(rs->state == 0);
81 
82 	/* Test runstate time adjust */
83 	GUEST_SYNC(4);
84 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
85 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
86 
87 	/* Test runstate time set */
88 	GUEST_SYNC(5);
89 	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
90 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
91 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
92 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
93 
94 	/* sched_yield() should result in some 'runnable' time */
95 	GUEST_SYNC(6);
96 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
97 
98 	GUEST_DONE();
99 }
100 
101 static long get_run_delay(void)
102 {
103         char path[64];
104         long val[2];
105         FILE *fp;
106 
107         sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
108         fp = fopen(path, "r");
109         fscanf(fp, "%ld %ld ", &val[0], &val[1]);
110         fclose(fp);
111 
112         return val[1];
113 }
114 
115 static int cmp_timespec(struct timespec *a, struct timespec *b)
116 {
117 	if (a->tv_sec > b->tv_sec)
118 		return 1;
119 	else if (a->tv_sec < b->tv_sec)
120 		return -1;
121 	else if (a->tv_nsec > b->tv_nsec)
122 		return 1;
123 	else if (a->tv_nsec < b->tv_nsec)
124 		return -1;
125 	else
126 		return 0;
127 }
128 
129 int main(int argc, char *argv[])
130 {
131 	struct timespec min_ts, max_ts, vm_ts;
132 
133 	int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
134 	if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
135 		print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
136 		exit(KSFT_SKIP);
137 	}
138 
139 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
140 
141 	clock_gettime(CLOCK_REALTIME, &min_ts);
142 
143 	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
144 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
145 
146 	/* Map a region for the shared_info page */
147 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
148 				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
149 	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
150 
151 	struct kvm_xen_hvm_config hvmc = {
152 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
153 		.msr = XEN_HYPERCALL_MSR,
154 	};
155 	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
156 
157 	struct kvm_xen_hvm_attr lm = {
158 		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
159 		.u.long_mode = 1,
160 	};
161 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
162 
163 	struct kvm_xen_hvm_attr ha = {
164 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
165 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
166 	};
167 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
168 
169 	struct kvm_xen_vcpu_attr vi = {
170 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
171 		.u.gpa = SHINFO_REGION_GPA + 0x40,
172 	};
173 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
174 
175 	struct kvm_xen_vcpu_attr pvclock = {
176 		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
177 		.u.gpa = PVTIME_ADDR,
178 	};
179 	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
180 
181 	if (do_runstate_tests) {
182 		struct kvm_xen_vcpu_attr st = {
183 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
184 			.u.gpa = RUNSTATE_ADDR,
185 		};
186 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
187 	}
188 
189 	struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
190 	rs->state = 0x5a;
191 
192 	for (;;) {
193 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
194 		struct ucall uc;
195 
196 		vcpu_run(vm, VCPU_ID);
197 
198 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
199 			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
200 			    run->exit_reason,
201 			    exit_reason_str(run->exit_reason));
202 
203 		switch (get_ucall(vm, VCPU_ID, &uc)) {
204 		case UCALL_ABORT:
205 			TEST_FAIL("%s", (const char *)uc.args[0]);
206 			/* NOT REACHED */
207 		case UCALL_SYNC: {
208 			struct kvm_xen_vcpu_attr rst;
209 			long rundelay;
210 
211 			/* If no runstate support, bail out early */
212 			if (!do_runstate_tests)
213 				goto done;
214 
215 			TEST_ASSERT(rs->state_entry_time == rs->time[0] +
216 				    rs->time[1] + rs->time[2] + rs->time[3],
217 				    "runstate times don't add up");
218 
219 			switch (uc.args[1]) {
220 			case RUNSTATE_running...RUNSTATE_offline:
221 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
222 				rst.u.runstate.state = uc.args[1];
223 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
224 				break;
225 			case 4:
226 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
227 				memset(&rst.u, 0, sizeof(rst.u));
228 				rst.u.runstate.state = (uint64_t)-1;
229 				rst.u.runstate.time_blocked =
230 					0x5a - rs->time[RUNSTATE_blocked];
231 				rst.u.runstate.time_offline =
232 					0x6b6b - rs->time[RUNSTATE_offline];
233 				rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
234 					rst.u.runstate.time_offline;
235 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
236 				break;
237 
238 			case 5:
239 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
240 				memset(&rst.u, 0, sizeof(rst.u));
241 				rst.u.runstate.state = RUNSTATE_running;
242 				rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
243 				rst.u.runstate.time_blocked = 0x6b6b;
244 				rst.u.runstate.time_offline = 0x5a;
245 				vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
246 				break;
247 			case 6:
248 				/* Yield until scheduler delay exceeds target */
249 				rundelay = get_run_delay() + MIN_STEAL_TIME;
250 				do {
251 					sched_yield();
252 				} while (get_run_delay() < rundelay);
253 				break;
254 			}
255 			break;
256 		}
257 		case UCALL_DONE:
258 			goto done;
259 		default:
260 			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
261 		}
262 	}
263 
264  done:
265 	clock_gettime(CLOCK_REALTIME, &max_ts);
266 
267 	/*
268 	 * Just a *really* basic check that things are being put in the
269 	 * right place. The actual calculations are much the same for
270 	 * Xen as they are for the KVM variants, so no need to check.
271 	 */
272 	struct pvclock_wall_clock *wc;
273 	struct pvclock_vcpu_time_info *ti, *ti2;
274 
275 	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
276 	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
277 	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
278 
279 	vm_ts.tv_sec = wc->sec;
280 	vm_ts.tv_nsec = wc->nsec;
281         TEST_ASSERT(wc->version && !(wc->version & 1),
282 		    "Bad wallclock version %x", wc->version);
283 	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
284 	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
285 
286 	TEST_ASSERT(ti->version && !(ti->version & 1),
287 		    "Bad time_info version %x", ti->version);
288 	TEST_ASSERT(ti2->version && !(ti2->version & 1),
289 		    "Bad time_info version %x", ti->version);
290 
291 	if (do_runstate_tests) {
292 		/*
293 		 * Fetch runstate and check sanity. Strictly speaking in the
294 		 * general case we might not expect the numbers to be identical
295 		 * but in this case we know we aren't running the vCPU any more.
296 		 */
297 		struct kvm_xen_vcpu_attr rst = {
298 			.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
299 		};
300 		vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
301 
302 		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
303 		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
304 			    "State entry time mismatch");
305 		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
306 			    "Running time mismatch");
307 		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
308 			    "Runnable time mismatch");
309 		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
310 			    "Blocked time mismatch");
311 		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
312 			    "Offline time mismatch");
313 
314 		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
315 			    rs->time[1] + rs->time[2] + rs->time[3],
316 			    "runstate times don't add up");
317 	}
318 	kvm_vm_free(vm);
319 	return 0;
320 }
321