1*eede2065SJue Wang // SPDX-License-Identifier: GPL-2.0
2*eede2065SJue Wang /*
3*eede2065SJue Wang  * ucna_injection_test
4*eede2065SJue Wang  *
5*eede2065SJue Wang  * Copyright (C) 2022, Google LLC.
6*eede2065SJue Wang  *
7*eede2065SJue Wang  * This work is licensed under the terms of the GNU GPL, version 2.
8*eede2065SJue Wang  *
9*eede2065SJue Wang  * Test that user space can inject UnCorrectable No Action required (UCNA)
10*eede2065SJue Wang  * memory errors to the guest.
11*eede2065SJue Wang  *
12*eede2065SJue Wang  * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
13*eede2065SJue Wang  * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
14*eede2065SJue Wang  * corresponding per-bank control register (MCI_CTL2) bit enabled.
15*eede2065SJue Wang  * The test also checks that the UCNA errors get recorded in the
16*eede2065SJue Wang  * Machine Check bank registers no matter the error signal interrupts get
17*eede2065SJue Wang  * delivered into the guest or not.
18*eede2065SJue Wang  *
19*eede2065SJue Wang  */
20*eede2065SJue Wang 
21*eede2065SJue Wang #define _GNU_SOURCE /* for program_invocation_short_name */
22*eede2065SJue Wang #include <pthread.h>
23*eede2065SJue Wang #include <inttypes.h>
24*eede2065SJue Wang #include <string.h>
25*eede2065SJue Wang #include <time.h>
26*eede2065SJue Wang 
27*eede2065SJue Wang #include "kvm_util_base.h"
28*eede2065SJue Wang #include "kvm_util.h"
29*eede2065SJue Wang #include "mce.h"
30*eede2065SJue Wang #include "processor.h"
31*eede2065SJue Wang #include "test_util.h"
32*eede2065SJue Wang #include "apic.h"
33*eede2065SJue Wang 
34*eede2065SJue Wang #define SYNC_FIRST_UCNA 9
35*eede2065SJue Wang #define SYNC_SECOND_UCNA 10
36*eede2065SJue Wang #define SYNC_GP 11
37*eede2065SJue Wang #define FIRST_UCNA_ADDR 0xdeadbeef
38*eede2065SJue Wang #define SECOND_UCNA_ADDR 0xcafeb0ba
39*eede2065SJue Wang 
40*eede2065SJue Wang /*
41*eede2065SJue Wang  * Vector for the CMCI interrupt.
42*eede2065SJue Wang  * Value is arbitrary. Any value in 0x20-0xFF should work:
43*eede2065SJue Wang  * https://wiki.osdev.org/Interrupt_Vector_Table
44*eede2065SJue Wang  */
45*eede2065SJue Wang #define CMCI_VECTOR  0xa9
46*eede2065SJue Wang 
47*eede2065SJue Wang #define UCNA_BANK  0x7	// IMC0 bank
48*eede2065SJue Wang 
49*eede2065SJue Wang #define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
50*eede2065SJue Wang 
51*eede2065SJue Wang static uint64_t supported_mcg_caps;
52*eede2065SJue Wang 
53*eede2065SJue Wang /*
54*eede2065SJue Wang  * Record states about the injected UCNA.
55*eede2065SJue Wang  * The variables started with the 'i_' prefixes are recorded in interrupt
56*eede2065SJue Wang  * handler. Variables without the 'i_' prefixes are recorded in guest main
57*eede2065SJue Wang  * execution thread.
58*eede2065SJue Wang  */
59*eede2065SJue Wang static volatile uint64_t i_ucna_rcvd;
60*eede2065SJue Wang static volatile uint64_t i_ucna_addr;
61*eede2065SJue Wang static volatile uint64_t ucna_addr;
62*eede2065SJue Wang static volatile uint64_t ucna_addr2;
63*eede2065SJue Wang 
64*eede2065SJue Wang struct thread_params {
65*eede2065SJue Wang 	struct kvm_vcpu *vcpu;
66*eede2065SJue Wang 	uint64_t *p_i_ucna_rcvd;
67*eede2065SJue Wang 	uint64_t *p_i_ucna_addr;
68*eede2065SJue Wang 	uint64_t *p_ucna_addr;
69*eede2065SJue Wang 	uint64_t *p_ucna_addr2;
70*eede2065SJue Wang };
71*eede2065SJue Wang 
72*eede2065SJue Wang static void verify_apic_base_addr(void)
73*eede2065SJue Wang {
74*eede2065SJue Wang 	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
75*eede2065SJue Wang 	uint64_t base = GET_APIC_BASE(msr);
76*eede2065SJue Wang 
77*eede2065SJue Wang 	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
78*eede2065SJue Wang }
79*eede2065SJue Wang 
80*eede2065SJue Wang static void ucna_injection_guest_code(void)
81*eede2065SJue Wang {
82*eede2065SJue Wang 	uint64_t ctl2;
83*eede2065SJue Wang 	verify_apic_base_addr();
84*eede2065SJue Wang 	xapic_enable();
85*eede2065SJue Wang 
86*eede2065SJue Wang 	/* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
87*eede2065SJue Wang 	xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
88*eede2065SJue Wang 	ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
89*eede2065SJue Wang 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
90*eede2065SJue Wang 
91*eede2065SJue Wang 	/* Enables interrupt in guest. */
92*eede2065SJue Wang 	asm volatile("sti");
93*eede2065SJue Wang 
94*eede2065SJue Wang 	/* Let user space inject the first UCNA */
95*eede2065SJue Wang 	GUEST_SYNC(SYNC_FIRST_UCNA);
96*eede2065SJue Wang 
97*eede2065SJue Wang 	ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
98*eede2065SJue Wang 
99*eede2065SJue Wang 	/* Disables the per-bank CMCI signaling. */
100*eede2065SJue Wang 	ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
101*eede2065SJue Wang 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
102*eede2065SJue Wang 
103*eede2065SJue Wang 	/* Let the user space inject the second UCNA */
104*eede2065SJue Wang 	GUEST_SYNC(SYNC_SECOND_UCNA);
105*eede2065SJue Wang 
106*eede2065SJue Wang 	ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
107*eede2065SJue Wang 	GUEST_DONE();
108*eede2065SJue Wang }
109*eede2065SJue Wang 
110*eede2065SJue Wang static void cmci_disabled_guest_code(void)
111*eede2065SJue Wang {
112*eede2065SJue Wang 	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
113*eede2065SJue Wang 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
114*eede2065SJue Wang 
115*eede2065SJue Wang 	GUEST_DONE();
116*eede2065SJue Wang }
117*eede2065SJue Wang 
118*eede2065SJue Wang static void cmci_enabled_guest_code(void)
119*eede2065SJue Wang {
120*eede2065SJue Wang 	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
121*eede2065SJue Wang 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
122*eede2065SJue Wang 
123*eede2065SJue Wang 	GUEST_DONE();
124*eede2065SJue Wang }
125*eede2065SJue Wang 
126*eede2065SJue Wang static void guest_cmci_handler(struct ex_regs *regs)
127*eede2065SJue Wang {
128*eede2065SJue Wang 	i_ucna_rcvd++;
129*eede2065SJue Wang 	i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
130*eede2065SJue Wang 	xapic_write_reg(APIC_EOI, 0);
131*eede2065SJue Wang }
132*eede2065SJue Wang 
133*eede2065SJue Wang static void guest_gp_handler(struct ex_regs *regs)
134*eede2065SJue Wang {
135*eede2065SJue Wang 	GUEST_SYNC(SYNC_GP);
136*eede2065SJue Wang }
137*eede2065SJue Wang 
138*eede2065SJue Wang static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
139*eede2065SJue Wang {
140*eede2065SJue Wang 	unsigned int exit_reason;
141*eede2065SJue Wang 	struct ucall uc;
142*eede2065SJue Wang 
143*eede2065SJue Wang 	vcpu_run(vcpu);
144*eede2065SJue Wang 
145*eede2065SJue Wang 	exit_reason = vcpu->run->exit_reason;
146*eede2065SJue Wang 	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
147*eede2065SJue Wang 		    "exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
148*eede2065SJue Wang 		    exit_reason, exit_reason_str(exit_reason));
149*eede2065SJue Wang 	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
150*eede2065SJue Wang 		    "Expect UCALL_SYNC\n");
151*eede2065SJue Wang 	TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
152*eede2065SJue Wang 	printf("vCPU received GP in guest.\n");
153*eede2065SJue Wang }
154*eede2065SJue Wang 
155*eede2065SJue Wang static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
156*eede2065SJue Wang 	/*
157*eede2065SJue Wang 	 * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
158*eede2065SJue Wang 	 * the IA32_MCi_STATUS register.
159*eede2065SJue Wang 	 * MSCOD=1 (BIT[16] - MscodDataRdErr).
160*eede2065SJue Wang 	 * MCACOD=0x0090 (Memory controller error format, channel 0)
161*eede2065SJue Wang 	 */
162*eede2065SJue Wang 	uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
163*eede2065SJue Wang 			  MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
164*eede2065SJue Wang 	struct kvm_x86_mce mce = {};
165*eede2065SJue Wang 	mce.status = status;
166*eede2065SJue Wang 	mce.mcg_status = 0;
167*eede2065SJue Wang 	/*
168*eede2065SJue Wang 	 * MCM_ADDR_PHYS indicates the reported address is a physical address.
169*eede2065SJue Wang 	 * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
170*eede2065SJue Wang 	 * is at 4KB granularity.
171*eede2065SJue Wang 	 */
172*eede2065SJue Wang 	mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
173*eede2065SJue Wang 	mce.addr = addr;
174*eede2065SJue Wang 	mce.bank = UCNA_BANK;
175*eede2065SJue Wang 
176*eede2065SJue Wang 	vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
177*eede2065SJue Wang }
178*eede2065SJue Wang 
179*eede2065SJue Wang static void *run_ucna_injection(void *arg)
180*eede2065SJue Wang {
181*eede2065SJue Wang 	struct thread_params *params = (struct thread_params *)arg;
182*eede2065SJue Wang 	struct ucall uc;
183*eede2065SJue Wang 	int old;
184*eede2065SJue Wang 	int r;
185*eede2065SJue Wang 	unsigned int exit_reason;
186*eede2065SJue Wang 
187*eede2065SJue Wang 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
188*eede2065SJue Wang 	TEST_ASSERT(r == 0,
189*eede2065SJue Wang 		    "pthread_setcanceltype failed with errno=%d",
190*eede2065SJue Wang 		    r);
191*eede2065SJue Wang 
192*eede2065SJue Wang 	vcpu_run(params->vcpu);
193*eede2065SJue Wang 
194*eede2065SJue Wang 	exit_reason = params->vcpu->run->exit_reason;
195*eede2065SJue Wang 	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
196*eede2065SJue Wang 		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
197*eede2065SJue Wang 		    exit_reason, exit_reason_str(exit_reason));
198*eede2065SJue Wang 	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
199*eede2065SJue Wang 		    "Expect UCALL_SYNC\n");
200*eede2065SJue Wang 	TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
201*eede2065SJue Wang 
202*eede2065SJue Wang 	printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
203*eede2065SJue Wang 
204*eede2065SJue Wang 	inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
205*eede2065SJue Wang 	vcpu_run(params->vcpu);
206*eede2065SJue Wang 
207*eede2065SJue Wang 	exit_reason = params->vcpu->run->exit_reason;
208*eede2065SJue Wang 	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
209*eede2065SJue Wang 		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
210*eede2065SJue Wang 		    exit_reason, exit_reason_str(exit_reason));
211*eede2065SJue Wang 	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
212*eede2065SJue Wang 		    "Expect UCALL_SYNC\n");
213*eede2065SJue Wang 	TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
214*eede2065SJue Wang 
215*eede2065SJue Wang 	printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
216*eede2065SJue Wang 
217*eede2065SJue Wang 	inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
218*eede2065SJue Wang 	vcpu_run(params->vcpu);
219*eede2065SJue Wang 
220*eede2065SJue Wang 	exit_reason = params->vcpu->run->exit_reason;
221*eede2065SJue Wang 	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
222*eede2065SJue Wang 		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
223*eede2065SJue Wang 		    exit_reason, exit_reason_str(exit_reason));
224*eede2065SJue Wang 	if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
225*eede2065SJue Wang 		TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
226*eede2065SJue Wang 			    (const char *)uc.args[0]);
227*eede2065SJue Wang 	}
228*eede2065SJue Wang 
229*eede2065SJue Wang 	return NULL;
230*eede2065SJue Wang }
231*eede2065SJue Wang 
232*eede2065SJue Wang static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
233*eede2065SJue Wang {
234*eede2065SJue Wang 	struct kvm_vm *vm = vcpu->vm;
235*eede2065SJue Wang 	params->vcpu = vcpu;
236*eede2065SJue Wang 	params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
237*eede2065SJue Wang 	params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
238*eede2065SJue Wang 	params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
239*eede2065SJue Wang 	params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
240*eede2065SJue Wang 
241*eede2065SJue Wang 	run_ucna_injection(params);
242*eede2065SJue Wang 
243*eede2065SJue Wang 	TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
244*eede2065SJue Wang 	TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
245*eede2065SJue Wang 		    "Only first UCNA reported addr get recorded via interrupt.");
246*eede2065SJue Wang 	TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
247*eede2065SJue Wang 		    "First injected UCNAs should get exposed via registers.");
248*eede2065SJue Wang 	TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
249*eede2065SJue Wang 		    "Second injected UCNAs should get exposed via registers.");
250*eede2065SJue Wang 
251*eede2065SJue Wang 	printf("Test successful.\n"
252*eede2065SJue Wang 	       "UCNA CMCI interrupts received: %ld\n"
253*eede2065SJue Wang 	       "Last UCNA address received via CMCI: %lx\n"
254*eede2065SJue Wang 	       "First UCNA address in vCPU thread: %lx\n"
255*eede2065SJue Wang 	       "Second UCNA address in vCPU thread: %lx\n",
256*eede2065SJue Wang 	       *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
257*eede2065SJue Wang 	       *params->p_ucna_addr, *params->p_ucna_addr2);
258*eede2065SJue Wang }
259*eede2065SJue Wang 
260*eede2065SJue Wang static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
261*eede2065SJue Wang {
262*eede2065SJue Wang 	uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
263*eede2065SJue Wang 	if (enable_cmci_p)
264*eede2065SJue Wang 		mcg_caps |= MCG_CMCI_P;
265*eede2065SJue Wang 
266*eede2065SJue Wang 	mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
267*eede2065SJue Wang 	vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
268*eede2065SJue Wang }
269*eede2065SJue Wang 
270*eede2065SJue Wang static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
271*eede2065SJue Wang 						 bool enable_cmci_p, void *guest_code)
272*eede2065SJue Wang {
273*eede2065SJue Wang 	struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
274*eede2065SJue Wang 	setup_mce_cap(vcpu, enable_cmci_p);
275*eede2065SJue Wang 	return vcpu;
276*eede2065SJue Wang }
277*eede2065SJue Wang 
278*eede2065SJue Wang int main(int argc, char *argv[])
279*eede2065SJue Wang {
280*eede2065SJue Wang 	struct thread_params params;
281*eede2065SJue Wang 	struct kvm_vm *vm;
282*eede2065SJue Wang 	struct kvm_vcpu *ucna_vcpu;
283*eede2065SJue Wang 	struct kvm_vcpu *cmcidis_vcpu;
284*eede2065SJue Wang 	struct kvm_vcpu *cmci_vcpu;
285*eede2065SJue Wang 
286*eede2065SJue Wang 	kvm_check_cap(KVM_CAP_MCE);
287*eede2065SJue Wang 
288*eede2065SJue Wang 	vm = __vm_create(VM_MODE_DEFAULT, 3, 0);
289*eede2065SJue Wang 
290*eede2065SJue Wang 	kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
291*eede2065SJue Wang 		  &supported_mcg_caps);
292*eede2065SJue Wang 
293*eede2065SJue Wang 	if (!(supported_mcg_caps & MCG_CMCI_P)) {
294*eede2065SJue Wang 		print_skip("MCG_CMCI_P is not supported");
295*eede2065SJue Wang 		exit(KSFT_SKIP);
296*eede2065SJue Wang 	}
297*eede2065SJue Wang 
298*eede2065SJue Wang 	ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
299*eede2065SJue Wang 	cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
300*eede2065SJue Wang 	cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
301*eede2065SJue Wang 
302*eede2065SJue Wang 	vm_init_descriptor_tables(vm);
303*eede2065SJue Wang 	vcpu_init_descriptor_tables(ucna_vcpu);
304*eede2065SJue Wang 	vcpu_init_descriptor_tables(cmcidis_vcpu);
305*eede2065SJue Wang 	vcpu_init_descriptor_tables(cmci_vcpu);
306*eede2065SJue Wang 	vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
307*eede2065SJue Wang 	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
308*eede2065SJue Wang 
309*eede2065SJue Wang 	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
310*eede2065SJue Wang 
311*eede2065SJue Wang 	test_ucna_injection(ucna_vcpu, &params);
312*eede2065SJue Wang 	run_vcpu_expect_gp(cmcidis_vcpu);
313*eede2065SJue Wang 	run_vcpu_expect_gp(cmci_vcpu);
314*eede2065SJue Wang 
315*eede2065SJue Wang 	kvm_vm_free(vm);
316*eede2065SJue Wang }
317