1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
4  *
5  * Copyright (C) 2022, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Verifies the expected behavior of allow lists and deny lists for
10  * virtual PMU events.
11  */
12 
13 #define _GNU_SOURCE /* for program_invocation_short_name */
14 #include "test_util.h"
15 #include "kvm_util.h"
16 #include "processor.h"
17 
18 /*
19  * In lieu of copying perf_event.h into tools...
20  */
21 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
22 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
23 
24 union cpuid10_eax {
25 	struct {
26 		unsigned int version_id:8;
27 		unsigned int num_counters:8;
28 		unsigned int bit_width:8;
29 		unsigned int mask_length:8;
30 	} split;
31 	unsigned int full;
32 };
33 
34 union cpuid10_ebx {
35 	struct {
36 		unsigned int no_unhalted_core_cycles:1;
37 		unsigned int no_instructions_retired:1;
38 		unsigned int no_unhalted_reference_cycles:1;
39 		unsigned int no_llc_reference:1;
40 		unsigned int no_llc_misses:1;
41 		unsigned int no_branch_instruction_retired:1;
42 		unsigned int no_branch_misses_retired:1;
43 	} split;
44 	unsigned int full;
45 };
46 
47 /* End of stuff taken from perf_event.h. */
48 
49 /* Oddly, this isn't in perf_event.h. */
50 #define ARCH_PERFMON_BRANCHES_RETIRED		5
51 
52 #define VCPU_ID 0
53 #define NUM_BRANCHES 42
54 
55 /*
56  * This is how the event selector and unit mask are stored in an AMD
57  * core performance event-select register. Intel's format is similar,
58  * but the event selector is only 8 bits.
59  */
60 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
61 			      (umask & 0xff) << 8)
62 
63 /*
64  * "Branch instructions retired", from the Intel SDM, volume 3,
65  * "Pre-defined Architectural Performance Events."
66  */
67 
68 #define INTEL_BR_RETIRED EVENT(0xc4, 0)
69 
70 /*
71  * "Retired branch instructions", from Processor Programming Reference
72  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
73  * Preliminary Processor Programming Reference (PPR) for AMD Family
74  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
75  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
76  * B1 Processors Volume 1 of 2.
77  */
78 
79 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
80 
81 /*
82  * This event list comprises Intel's eight architectural events plus
83  * AMD's "retired branch instructions" for Zen[123] (and possibly
84  * other AMD CPUs).
85  */
86 static const uint64_t event_list[] = {
87 	EVENT(0x3c, 0),
88 	EVENT(0xc0, 0),
89 	EVENT(0x3c, 1),
90 	EVENT(0x2e, 0x4f),
91 	EVENT(0x2e, 0x41),
92 	EVENT(0xc4, 0),
93 	EVENT(0xc5, 0),
94 	EVENT(0xa4, 1),
95 	AMD_ZEN_BR_RETIRED,
96 };
97 
98 /*
99  * If we encounter a #GP during the guest PMU sanity check, then the guest
100  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
101  */
102 static void guest_gp_handler(struct ex_regs *regs)
103 {
104 	GUEST_SYNC(0);
105 }
106 
107 /*
108  * Check that we can write a new value to the given MSR and read it back.
109  * The caller should provide a non-empty set of bits that are safe to flip.
110  *
111  * Return on success. GUEST_SYNC(0) on error.
112  */
113 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
114 {
115 	uint64_t v = rdmsr(msr) ^ bits_to_flip;
116 
117 	wrmsr(msr, v);
118 	if (rdmsr(msr) != v)
119 		GUEST_SYNC(0);
120 
121 	v ^= bits_to_flip;
122 	wrmsr(msr, v);
123 	if (rdmsr(msr) != v)
124 		GUEST_SYNC(0);
125 }
126 
127 static void intel_guest_code(void)
128 {
129 	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
130 	check_msr(MSR_P6_EVNTSEL0, 0xffff);
131 	check_msr(MSR_IA32_PMC0, 0xffff);
132 	GUEST_SYNC(1);
133 
134 	for (;;) {
135 		uint64_t br0, br1;
136 
137 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
138 		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
139 		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
140 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
141 		br0 = rdmsr(MSR_IA32_PMC0);
142 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
143 		br1 = rdmsr(MSR_IA32_PMC0);
144 		GUEST_SYNC(br1 - br0);
145 	}
146 }
147 
148 /*
149  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
150  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
151  * the first four of the six extended core PMU MSRs.
152  */
153 static void amd_guest_code(void)
154 {
155 	check_msr(MSR_K7_EVNTSEL0, 0xffff);
156 	check_msr(MSR_K7_PERFCTR0, 0xffff);
157 	GUEST_SYNC(1);
158 
159 	for (;;) {
160 		uint64_t br0, br1;
161 
162 		wrmsr(MSR_K7_EVNTSEL0, 0);
163 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
164 		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
165 		br0 = rdmsr(MSR_K7_PERFCTR0);
166 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
167 		br1 = rdmsr(MSR_K7_PERFCTR0);
168 		GUEST_SYNC(br1 - br0);
169 	}
170 }
171 
172 /*
173  * Run the VM to the next GUEST_SYNC(value), and return the value passed
174  * to the sync. Any other exit from the guest is fatal.
175  */
176 static uint64_t run_vm_to_sync(struct kvm_vm *vm)
177 {
178 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
179 	struct ucall uc;
180 
181 	vcpu_run(vm, VCPU_ID);
182 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
183 		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
184 		    run->exit_reason,
185 		    exit_reason_str(run->exit_reason));
186 	get_ucall(vm, VCPU_ID, &uc);
187 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
188 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
189 	return uc.args[1];
190 }
191 
192 /*
193  * In a nested environment or if the vPMU is disabled, the guest PMU
194  * might not work as architected (accessing the PMU MSRs may raise
195  * #GP, or writes could simply be discarded). In those situations,
196  * there is no point in running these tests. The guest code will perform
197  * a sanity check and then GUEST_SYNC(success). In the case of failure,
198  * the behavior of the guest on resumption is undefined.
199  */
200 static bool sanity_check_pmu(struct kvm_vm *vm)
201 {
202 	bool success;
203 
204 	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
205 	success = run_vm_to_sync(vm);
206 	vm_install_exception_handler(vm, GP_VECTOR, NULL);
207 
208 	return success;
209 }
210 
211 static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
212 {
213 	struct kvm_pmu_event_filter *f;
214 	int size = sizeof(*f) + nevents * sizeof(f->events[0]);
215 
216 	f = malloc(size);
217 	TEST_ASSERT(f, "Out of memory");
218 	memset(f, 0, size);
219 	f->nevents = nevents;
220 	return f;
221 }
222 
223 static struct kvm_pmu_event_filter *event_filter(uint32_t action)
224 {
225 	struct kvm_pmu_event_filter *f;
226 	int i;
227 
228 	f = make_pmu_event_filter(ARRAY_SIZE(event_list));
229 	f->action = action;
230 	for (i = 0; i < ARRAY_SIZE(event_list); i++)
231 		f->events[i] = event_list[i];
232 
233 	return f;
234 }
235 
236 /*
237  * Remove the first occurrence of 'event' (if any) from the filter's
238  * event list.
239  */
240 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
241 						 uint64_t event)
242 {
243 	bool found = false;
244 	int i;
245 
246 	for (i = 0; i < f->nevents; i++) {
247 		if (found)
248 			f->events[i - 1] = f->events[i];
249 		else
250 			found = f->events[i] == event;
251 	}
252 	if (found)
253 		f->nevents--;
254 	return f;
255 }
256 
257 static void test_without_filter(struct kvm_vm *vm)
258 {
259 	uint64_t count = run_vm_to_sync(vm);
260 
261 	if (count != NUM_BRANCHES)
262 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
263 			__func__, count, NUM_BRANCHES);
264 	TEST_ASSERT(count, "Allowed PMU event is not counting");
265 }
266 
267 static uint64_t test_with_filter(struct kvm_vm *vm,
268 				 struct kvm_pmu_event_filter *f)
269 {
270 	vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
271 	return run_vm_to_sync(vm);
272 }
273 
274 static void test_member_deny_list(struct kvm_vm *vm)
275 {
276 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
277 	uint64_t count = test_with_filter(vm, f);
278 
279 	free(f);
280 	if (count)
281 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
282 			__func__, count);
283 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
284 }
285 
286 static void test_member_allow_list(struct kvm_vm *vm)
287 {
288 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
289 	uint64_t count = test_with_filter(vm, f);
290 
291 	free(f);
292 	if (count != NUM_BRANCHES)
293 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
294 			__func__, count, NUM_BRANCHES);
295 	TEST_ASSERT(count, "Allowed PMU event is not counting");
296 }
297 
298 static void test_not_member_deny_list(struct kvm_vm *vm)
299 {
300 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
301 	uint64_t count;
302 
303 	remove_event(f, INTEL_BR_RETIRED);
304 	remove_event(f, AMD_ZEN_BR_RETIRED);
305 	count = test_with_filter(vm, f);
306 	free(f);
307 	if (count != NUM_BRANCHES)
308 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
309 			__func__, count, NUM_BRANCHES);
310 	TEST_ASSERT(count, "Allowed PMU event is not counting");
311 }
312 
313 static void test_not_member_allow_list(struct kvm_vm *vm)
314 {
315 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
316 	uint64_t count;
317 
318 	remove_event(f, INTEL_BR_RETIRED);
319 	remove_event(f, AMD_ZEN_BR_RETIRED);
320 	count = test_with_filter(vm, f);
321 	free(f);
322 	if (count)
323 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
324 			__func__, count);
325 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
326 }
327 
328 /*
329  * Check for a non-zero PMU version, at least one general-purpose
330  * counter per logical processor, an EBX bit vector of length greater
331  * than 5, and EBX[5] clear.
332  */
333 static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
334 {
335 	union cpuid10_eax eax = { .full = entry->eax };
336 	union cpuid10_ebx ebx = { .full = entry->ebx };
337 
338 	return eax.split.version_id && eax.split.num_counters > 0 &&
339 		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
340 		!ebx.split.no_branch_instruction_retired;
341 }
342 
343 /*
344  * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
345  * clear on AMD hardware.
346  */
347 static bool use_intel_pmu(void)
348 {
349 	struct kvm_cpuid_entry2 *entry;
350 
351 	entry = kvm_get_supported_cpuid_index(0xa, 0);
352 	return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
353 }
354 
355 static bool is_zen1(uint32_t eax)
356 {
357 	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
358 }
359 
360 static bool is_zen2(uint32_t eax)
361 {
362 	return x86_family(eax) == 0x17 &&
363 		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
364 }
365 
366 static bool is_zen3(uint32_t eax)
367 {
368 	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
369 }
370 
371 /*
372  * Determining AMD support for a PMU event requires consulting the AMD
373  * PPR for the CPU or reference material derived therefrom. The AMD
374  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
375  *
376  * Feel free to add more AMD CPUs that are documented to support event
377  * select 0xc2 umask 0 as "retired branch instructions."
378  */
379 static bool use_amd_pmu(void)
380 {
381 	struct kvm_cpuid_entry2 *entry;
382 
383 	entry = kvm_get_supported_cpuid_index(1, 0);
384 	return is_amd_cpu() && entry &&
385 		(is_zen1(entry->eax) ||
386 		 is_zen2(entry->eax) ||
387 		 is_zen3(entry->eax));
388 }
389 
390 int main(int argc, char *argv[])
391 {
392 	void (*guest_code)(void) = NULL;
393 	struct kvm_vm *vm;
394 	int r;
395 
396 	/* Tell stdout not to buffer its content */
397 	setbuf(stdout, NULL);
398 
399 	r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
400 	if (!r) {
401 		print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
402 		exit(KSFT_SKIP);
403 	}
404 
405 	if (use_intel_pmu())
406 		guest_code = intel_guest_code;
407 	else if (use_amd_pmu())
408 		guest_code = amd_guest_code;
409 
410 	if (!guest_code) {
411 		print_skip("Don't know how to test this guest PMU");
412 		exit(KSFT_SKIP);
413 	}
414 
415 	vm = vm_create_default(VCPU_ID, 0, guest_code);
416 
417 	vm_init_descriptor_tables(vm);
418 	vcpu_init_descriptor_tables(vm, VCPU_ID);
419 
420 	if (!sanity_check_pmu(vm)) {
421 		print_skip("Guest PMU is not functional");
422 		exit(KSFT_SKIP);
423 	}
424 
425 	test_without_filter(vm);
426 	test_member_deny_list(vm);
427 	test_member_allow_list(vm);
428 	test_not_member_deny_list(vm);
429 	test_not_member_allow_list(vm);
430 
431 	kvm_vm_free(vm);
432 
433 	return 0;
434 }
435