1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * xapic_ipi_test
4  *
5  * Copyright (C) 2020, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
10  * another vCPU that is halted when KVM's backing page for the APIC access
11  * address has been moved by mm.
12  *
13  * The test starts two vCPUs: one that sends IPIs and one that continually
14  * executes HLT. The sender checks that the halter has woken from the HLT and
15  * has reentered HLT before sending the next IPI. While the vCPUs are running,
16  * the host continually calls migrate_pages to move all of the process' pages
17  * amongst the available numa nodes on the machine.
18  *
19  * Migration is a command line option. When used on non-numa machines will
20  * exit with error. Test is still usefull on non-numa for testing IPIs.
21  */
22 
23 #define _GNU_SOURCE /* for program_invocation_short_name */
24 #include <getopt.h>
25 #include <pthread.h>
26 #include <inttypes.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "kvm_util.h"
31 #include "numaif.h"
32 #include "processor.h"
33 #include "test_util.h"
34 #include "vmx.h"
35 
36 /* Default running time for the test */
37 #define DEFAULT_RUN_SECS 3
38 
39 /* Default delay between migrate_pages calls (microseconds) */
40 #define DEFAULT_DELAY_USECS 500000
41 
42 #define HALTER_VCPU_ID 0
43 #define SENDER_VCPU_ID 1
44 
45 volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
46 
47 /*
48  * Vector for IPI from sender vCPU to halting vCPU.
49  * Value is arbitrary and was chosen for the alternating bit pattern. Any
50  * value should work.
51  */
52 #define IPI_VECTOR	 0xa5
53 
54 /*
55  * Incremented in the IPI handler. Provides evidence to the sender that the IPI
56  * arrived at the destination
57  */
58 static volatile uint64_t ipis_rcvd;
59 
60 /* Data struct shared between host main thread and vCPUs */
61 struct test_data_page {
62 	uint32_t halter_apic_id;
63 	volatile uint64_t hlt_count;
64 	volatile uint64_t wake_count;
65 	uint64_t ipis_sent;
66 	uint64_t migrations_attempted;
67 	uint64_t migrations_completed;
68 	uint32_t icr;
69 	uint32_t icr2;
70 	uint32_t halter_tpr;
71 	uint32_t halter_ppr;
72 
73 	/*
74 	 *  Record local version register as a cross-check that APIC access
75 	 *  worked. Value should match what KVM reports (APIC_VERSION in
76 	 *  arch/x86/kvm/lapic.c). If test is failing, check that values match
77 	 *  to determine whether APIC access exits are working.
78 	 */
79 	uint32_t halter_lvr;
80 };
81 
82 struct thread_params {
83 	struct test_data_page *data;
84 	struct kvm_vm *vm;
85 	uint32_t vcpu_id;
86 	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
87 };
88 
89 uint32_t read_apic_reg(uint reg)
90 {
91 	return apic_base[reg >> 2];
92 }
93 
94 void write_apic_reg(uint reg, uint32_t val)
95 {
96 	apic_base[reg >> 2] = val;
97 }
98 
99 void disable_apic(void)
100 {
101 	wrmsr(MSR_IA32_APICBASE,
102 	      rdmsr(MSR_IA32_APICBASE) &
103 		~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
104 }
105 
106 void enable_xapic(void)
107 {
108 	uint64_t val = rdmsr(MSR_IA32_APICBASE);
109 
110 	/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
111 	if (val & MSR_IA32_APICBASE_EXTD) {
112 		disable_apic();
113 		wrmsr(MSR_IA32_APICBASE,
114 		      rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
115 	} else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
116 		wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
117 	}
118 
119 	/*
120 	 * Per SDM: reset value of spurious interrupt vector register has the
121 	 * APIC software enabled bit=0. It must be enabled in addition to the
122 	 * enable bit in the MSR.
123 	 */
124 	val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
125 	write_apic_reg(APIC_SPIV, val);
126 }
127 
128 void verify_apic_base_addr(void)
129 {
130 	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
131 	uint64_t base = GET_APIC_BASE(msr);
132 
133 	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
134 }
135 
136 static void halter_guest_code(struct test_data_page *data)
137 {
138 	verify_apic_base_addr();
139 	enable_xapic();
140 
141 	data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
142 	data->halter_lvr = read_apic_reg(APIC_LVR);
143 
144 	/*
145 	 * Loop forever HLTing and recording halts & wakes. Disable interrupts
146 	 * each time around to minimize window between signaling the pending
147 	 * halt to the sender vCPU and executing the halt. No need to disable on
148 	 * first run as this vCPU executes first and the host waits for it to
149 	 * signal going into first halt before starting the sender vCPU. Record
150 	 * TPR and PPR for diagnostic purposes in case the test fails.
151 	 */
152 	for (;;) {
153 		data->halter_tpr = read_apic_reg(APIC_TASKPRI);
154 		data->halter_ppr = read_apic_reg(APIC_PROCPRI);
155 		data->hlt_count++;
156 		asm volatile("sti; hlt; cli");
157 		data->wake_count++;
158 	}
159 }
160 
161 /*
162  * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
163  * enable diagnosing errant writes to the APIC access address backing page in
164  * case of test failure.
165  */
166 static void guest_ipi_handler(struct ex_regs *regs)
167 {
168 	ipis_rcvd++;
169 	write_apic_reg(APIC_EOI, 77);
170 }
171 
172 static void sender_guest_code(struct test_data_page *data)
173 {
174 	uint64_t last_wake_count;
175 	uint64_t last_hlt_count;
176 	uint64_t last_ipis_rcvd_count;
177 	uint32_t icr_val;
178 	uint32_t icr2_val;
179 	uint64_t tsc_start;
180 
181 	verify_apic_base_addr();
182 	enable_xapic();
183 
184 	/*
185 	 * Init interrupt command register for sending IPIs
186 	 *
187 	 * Delivery mode=fixed, per SDM:
188 	 *   "Delivers the interrupt specified in the vector field to the target
189 	 *    processor."
190 	 *
191 	 * Destination mode=physical i.e. specify target by its local APIC
192 	 * ID. This vCPU assumes that the halter vCPU has already started and
193 	 * set data->halter_apic_id.
194 	 */
195 	icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
196 	icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
197 	data->icr = icr_val;
198 	data->icr2 = icr2_val;
199 
200 	last_wake_count = data->wake_count;
201 	last_hlt_count = data->hlt_count;
202 	last_ipis_rcvd_count = ipis_rcvd;
203 	for (;;) {
204 		/*
205 		 * Send IPI to halter vCPU.
206 		 * First IPI can be sent unconditionally because halter vCPU
207 		 * starts earlier.
208 		 */
209 		write_apic_reg(APIC_ICR2, icr2_val);
210 		write_apic_reg(APIC_ICR, icr_val);
211 		data->ipis_sent++;
212 
213 		/*
214 		 * Wait up to ~1 sec for halter to indicate that it has:
215 		 * 1. Received the IPI
216 		 * 2. Woken up from the halt
217 		 * 3. Gone back into halt
218 		 * Current CPUs typically run at 2.x Ghz which is ~2
219 		 * billion ticks per second.
220 		 */
221 		tsc_start = rdtsc();
222 		while (rdtsc() - tsc_start < 2000000000) {
223 			if ((ipis_rcvd != last_ipis_rcvd_count) &&
224 			    (data->wake_count != last_wake_count) &&
225 			    (data->hlt_count != last_hlt_count))
226 				break;
227 		}
228 
229 		GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
230 			     (data->wake_count != last_wake_count) &&
231 			     (data->hlt_count != last_hlt_count));
232 
233 		last_wake_count = data->wake_count;
234 		last_hlt_count = data->hlt_count;
235 		last_ipis_rcvd_count = ipis_rcvd;
236 	}
237 }
238 
239 static void *vcpu_thread(void *arg)
240 {
241 	struct thread_params *params = (struct thread_params *)arg;
242 	struct ucall uc;
243 	int old;
244 	int r;
245 	unsigned int exit_reason;
246 
247 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
248 	TEST_ASSERT(r == 0,
249 		    "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
250 		    params->vcpu_id, r);
251 
252 	fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
253 	vcpu_run(params->vm, params->vcpu_id);
254 	exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
255 
256 	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
257 		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
258 		    params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
259 
260 	if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
261 		TEST_ASSERT(false,
262 			    "vCPU %u exited with error: %s.\n"
263 			    "Sending vCPU sent %lu IPIs to halting vCPU\n"
264 			    "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
265 			    "Halter TPR=%#x PPR=%#x LVR=%#x\n"
266 			    "Migrations attempted: %lu\n"
267 			    "Migrations completed: %lu\n",
268 			    params->vcpu_id, (const char *)uc.args[0],
269 			    params->data->ipis_sent, params->data->hlt_count,
270 			    params->data->wake_count,
271 			    *params->pipis_rcvd, params->data->halter_tpr,
272 			    params->data->halter_ppr, params->data->halter_lvr,
273 			    params->data->migrations_attempted,
274 			    params->data->migrations_completed);
275 	}
276 
277 	return NULL;
278 }
279 
280 static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
281 {
282 	void *retval;
283 	int r;
284 
285 	r = pthread_cancel(thread);
286 	TEST_ASSERT(r == 0,
287 		    "pthread_cancel on vcpu_id=%d failed with errno=%d",
288 		    vcpu_id, r);
289 
290 	r = pthread_join(thread, &retval);
291 	TEST_ASSERT(r == 0,
292 		    "pthread_join on vcpu_id=%d failed with errno=%d",
293 		    vcpu_id, r);
294 	TEST_ASSERT(retval == PTHREAD_CANCELED,
295 		    "expected retval=%p, got %p", PTHREAD_CANCELED,
296 		    retval);
297 }
298 
299 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
300 		   uint64_t *pipis_rcvd)
301 {
302 	long pages_not_moved;
303 	unsigned long nodemask = 0;
304 	unsigned long nodemasks[sizeof(nodemask) * 8];
305 	int nodes = 0;
306 	time_t start_time, last_update, now;
307 	time_t interval_secs = 1;
308 	int i, r;
309 	int from, to;
310 	unsigned long bit;
311 	uint64_t hlt_count;
312 	uint64_t wake_count;
313 	uint64_t ipis_sent;
314 
315 	fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
316 		delay_usecs);
317 
318 	/* Get set of first 64 numa nodes available */
319 	r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
320 			  0, MPOL_F_MEMS_ALLOWED);
321 	TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
322 
323 	fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
324 		"(each 1-bit indicates node is present): %#lx\n",
325 		sizeof(nodemask) * 8, nodemask);
326 
327 	/* Init array of masks containing a single-bit in each, one for each
328 	 * available node. migrate_pages called below requires specifying nodes
329 	 * as bit masks.
330 	 */
331 	for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
332 		if (nodemask & bit) {
333 			nodemasks[nodes] = nodemask & bit;
334 			nodes++;
335 		}
336 	}
337 
338 	TEST_ASSERT(nodes > 1,
339 		    "Did not find at least 2 numa nodes. Can't do migration\n");
340 
341 	fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
342 
343 	from = 0;
344 	to = 1;
345 	start_time = time(NULL);
346 	last_update = start_time;
347 
348 	ipis_sent = data->ipis_sent;
349 	hlt_count = data->hlt_count;
350 	wake_count = data->wake_count;
351 
352 	while ((int)(time(NULL) - start_time) < run_secs) {
353 		data->migrations_attempted++;
354 
355 		/*
356 		 * migrate_pages with PID=0 will migrate all pages of this
357 		 * process between the nodes specified as bitmasks. The page
358 		 * backing the APIC access address belongs to this process
359 		 * because it is allocated by KVM in the context of the
360 		 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
361 		 * test may break or give a false positive signal.
362 		 */
363 		pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
364 						&nodemasks[from],
365 						&nodemasks[to]);
366 		if (pages_not_moved < 0)
367 			fprintf(stderr,
368 				"migrate_pages failed, errno=%d\n", errno);
369 		else if (pages_not_moved > 0)
370 			fprintf(stderr,
371 				"migrate_pages could not move %ld pages\n",
372 				pages_not_moved);
373 		else
374 			data->migrations_completed++;
375 
376 		from = to;
377 		to++;
378 		if (to == nodes)
379 			to = 0;
380 
381 		now = time(NULL);
382 		if (((now - start_time) % interval_secs == 0) &&
383 		    (now != last_update)) {
384 			last_update = now;
385 			fprintf(stderr,
386 				"%lu seconds: Migrations attempted=%lu completed=%lu, "
387 				"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
388 				now - start_time, data->migrations_attempted,
389 				data->migrations_completed,
390 				data->ipis_sent, *pipis_rcvd,
391 				data->hlt_count, data->wake_count);
392 
393 			TEST_ASSERT(ipis_sent != data->ipis_sent &&
394 				    hlt_count != data->hlt_count &&
395 				    wake_count != data->wake_count,
396 				    "IPI, HLT and wake count have not increased "
397 				    "in the last %lu seconds. "
398 				    "HLTer is likely hung.\n", interval_secs);
399 
400 			ipis_sent = data->ipis_sent;
401 			hlt_count = data->hlt_count;
402 			wake_count = data->wake_count;
403 		}
404 		usleep(delay_usecs);
405 	}
406 }
407 
408 void get_cmdline_args(int argc, char *argv[], int *run_secs,
409 		      bool *migrate, int *delay_usecs)
410 {
411 	for (;;) {
412 		int opt = getopt(argc, argv, "s:d:m");
413 
414 		if (opt == -1)
415 			break;
416 		switch (opt) {
417 		case 's':
418 			*run_secs = parse_size(optarg);
419 			break;
420 		case 'm':
421 			*migrate = true;
422 			break;
423 		case 'd':
424 			*delay_usecs = parse_size(optarg);
425 			break;
426 		default:
427 			TEST_ASSERT(false,
428 				    "Usage: -s <runtime seconds>. Default is %d seconds.\n"
429 				    "-m adds calls to migrate_pages while vCPUs are running."
430 				    " Default is no migrations.\n"
431 				    "-d <delay microseconds> - delay between migrate_pages() calls."
432 				    " Default is %d microseconds.\n",
433 				    DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
434 		}
435 	}
436 }
437 
438 int main(int argc, char *argv[])
439 {
440 	int r;
441 	int wait_secs;
442 	const int max_halter_wait = 10;
443 	int run_secs = 0;
444 	int delay_usecs = 0;
445 	struct test_data_page *data;
446 	vm_vaddr_t test_data_page_vaddr;
447 	bool migrate = false;
448 	pthread_t threads[2];
449 	struct thread_params params[2];
450 	struct kvm_vm *vm;
451 	uint64_t *pipis_rcvd;
452 
453 	get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
454 	if (run_secs <= 0)
455 		run_secs = DEFAULT_RUN_SECS;
456 	if (delay_usecs <= 0)
457 		delay_usecs = DEFAULT_DELAY_USECS;
458 
459 	vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
460 	params[0].vm = vm;
461 	params[1].vm = vm;
462 
463 	vm_init_descriptor_tables(vm);
464 	vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
465 	vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
466 
467 	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
468 
469 	vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
470 
471 	test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
472 	data =
473 	   (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
474 	memset(data, 0, sizeof(*data));
475 	params[0].data = data;
476 	params[1].data = data;
477 
478 	vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
479 	vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
480 
481 	pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
482 	params[0].pipis_rcvd = pipis_rcvd;
483 	params[1].pipis_rcvd = pipis_rcvd;
484 
485 	/* Start halter vCPU thread and wait for it to execute first HLT. */
486 	params[0].vcpu_id = HALTER_VCPU_ID;
487 	r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
488 	TEST_ASSERT(r == 0,
489 		    "pthread_create halter failed errno=%d", errno);
490 	fprintf(stderr, "Halter vCPU thread started\n");
491 
492 	wait_secs = 0;
493 	while ((wait_secs < max_halter_wait) && !data->hlt_count) {
494 		sleep(1);
495 		wait_secs++;
496 	}
497 
498 	TEST_ASSERT(data->hlt_count,
499 		    "Halter vCPU did not execute first HLT within %d seconds",
500 		    max_halter_wait);
501 
502 	fprintf(stderr,
503 		"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
504 		data->halter_apic_id, wait_secs);
505 
506 	params[1].vcpu_id = SENDER_VCPU_ID;
507 	r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
508 	TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
509 
510 	fprintf(stderr,
511 		"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
512 		run_secs);
513 
514 	if (!migrate)
515 		sleep(run_secs);
516 	else
517 		do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
518 
519 	/*
520 	 * Cancel threads and wait for them to stop.
521 	 */
522 	cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
523 	cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
524 
525 	fprintf(stderr,
526 		"Test successful after running for %d seconds.\n"
527 		"Sending vCPU sent %lu IPIs to halting vCPU\n"
528 		"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
529 		"Halter APIC ID=%#x\n"
530 		"Sender ICR value=%#x ICR2 value=%#x\n"
531 		"Halter TPR=%#x PPR=%#x LVR=%#x\n"
532 		"Migrations attempted: %lu\n"
533 		"Migrations completed: %lu\n",
534 		run_secs, data->ipis_sent,
535 		data->hlt_count, data->wake_count, *pipis_rcvd,
536 		data->halter_apic_id,
537 		data->icr, data->icr2,
538 		data->halter_tpr, data->halter_ppr, data->halter_lvr,
539 		data->migrations_attempted, data->migrations_completed);
540 
541 	kvm_vm_free(vm);
542 
543 	return 0;
544 }
545