1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
4  *
5  * Copyright (C) 2022, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Verifies the expected behavior of allow lists and deny lists for
10  * virtual PMU events.
11  */
12 
13 #define _GNU_SOURCE /* for program_invocation_short_name */
14 #include "test_util.h"
15 #include "kvm_util.h"
16 #include "processor.h"
17 
18 /*
19  * In lieu of copying perf_event.h into tools...
20  */
21 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
22 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
23 
24 union cpuid10_eax {
25 	struct {
26 		unsigned int version_id:8;
27 		unsigned int num_counters:8;
28 		unsigned int bit_width:8;
29 		unsigned int mask_length:8;
30 	} split;
31 	unsigned int full;
32 };
33 
34 union cpuid10_ebx {
35 	struct {
36 		unsigned int no_unhalted_core_cycles:1;
37 		unsigned int no_instructions_retired:1;
38 		unsigned int no_unhalted_reference_cycles:1;
39 		unsigned int no_llc_reference:1;
40 		unsigned int no_llc_misses:1;
41 		unsigned int no_branch_instruction_retired:1;
42 		unsigned int no_branch_misses_retired:1;
43 	} split;
44 	unsigned int full;
45 };
46 
47 /* End of stuff taken from perf_event.h. */
48 
49 /* Oddly, this isn't in perf_event.h. */
50 #define ARCH_PERFMON_BRANCHES_RETIRED		5
51 
52 #define VCPU_ID 0
53 #define NUM_BRANCHES 42
54 
55 /*
56  * This is how the event selector and unit mask are stored in an AMD
57  * core performance event-select register. Intel's format is similar,
58  * but the event selector is only 8 bits.
59  */
60 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
61 			      (umask & 0xff) << 8)
62 
63 /*
64  * "Branch instructions retired", from the Intel SDM, volume 3,
65  * "Pre-defined Architectural Performance Events."
66  */
67 
68 #define INTEL_BR_RETIRED EVENT(0xc4, 0)
69 
70 /*
71  * "Retired branch instructions", from Processor Programming Reference
72  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
73  * Preliminary Processor Programming Reference (PPR) for AMD Family
74  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
75  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
76  * B1 Processors Volume 1 of 2.
77  */
78 
79 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
80 
81 /*
82  * This event list comprises Intel's eight architectural events plus
83  * AMD's "retired branch instructions" for Zen[123] (and possibly
84  * other AMD CPUs).
85  */
86 static const uint64_t event_list[] = {
87 	EVENT(0x3c, 0),
88 	EVENT(0xc0, 0),
89 	EVENT(0x3c, 1),
90 	EVENT(0x2e, 0x4f),
91 	EVENT(0x2e, 0x41),
92 	EVENT(0xc4, 0),
93 	EVENT(0xc5, 0),
94 	EVENT(0xa4, 1),
95 	AMD_ZEN_BR_RETIRED,
96 };
97 
98 /*
99  * If we encounter a #GP during the guest PMU sanity check, then the guest
100  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
101  */
102 static void guest_gp_handler(struct ex_regs *regs)
103 {
104 	GUEST_SYNC(0);
105 }
106 
107 /*
108  * Check that we can write a new value to the given MSR and read it back.
109  * The caller should provide a non-empty set of bits that are safe to flip.
110  *
111  * Return on success. GUEST_SYNC(0) on error.
112  */
113 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
114 {
115 	uint64_t v = rdmsr(msr) ^ bits_to_flip;
116 
117 	wrmsr(msr, v);
118 	if (rdmsr(msr) != v)
119 		GUEST_SYNC(0);
120 
121 	v ^= bits_to_flip;
122 	wrmsr(msr, v);
123 	if (rdmsr(msr) != v)
124 		GUEST_SYNC(0);
125 }
126 
127 static void intel_guest_code(void)
128 {
129 	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
130 	check_msr(MSR_P6_EVNTSEL0, 0xffff);
131 	check_msr(MSR_IA32_PMC0, 0xffff);
132 	GUEST_SYNC(1);
133 
134 	for (;;) {
135 		uint64_t br0, br1;
136 
137 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
138 		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
139 		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
140 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
141 		br0 = rdmsr(MSR_IA32_PMC0);
142 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
143 		br1 = rdmsr(MSR_IA32_PMC0);
144 		GUEST_SYNC(br1 - br0);
145 	}
146 }
147 
148 /*
149  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
150  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
151  * the first four of the six extended core PMU MSRs.
152  */
153 static void amd_guest_code(void)
154 {
155 	check_msr(MSR_K7_EVNTSEL0, 0xffff);
156 	check_msr(MSR_K7_PERFCTR0, 0xffff);
157 	GUEST_SYNC(1);
158 
159 	for (;;) {
160 		uint64_t br0, br1;
161 
162 		wrmsr(MSR_K7_EVNTSEL0, 0);
163 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
164 		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
165 		br0 = rdmsr(MSR_K7_PERFCTR0);
166 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
167 		br1 = rdmsr(MSR_K7_PERFCTR0);
168 		GUEST_SYNC(br1 - br0);
169 	}
170 }
171 
172 /*
173  * Run the VM to the next GUEST_SYNC(value), and return the value passed
174  * to the sync. Any other exit from the guest is fatal.
175  */
176 static uint64_t run_vm_to_sync(struct kvm_vm *vm)
177 {
178 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
179 	struct ucall uc;
180 
181 	vcpu_run(vm, VCPU_ID);
182 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
183 		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
184 		    run->exit_reason,
185 		    exit_reason_str(run->exit_reason));
186 	get_ucall(vm, VCPU_ID, &uc);
187 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
188 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
189 	return uc.args[1];
190 }
191 
192 /*
193  * In a nested environment or if the vPMU is disabled, the guest PMU
194  * might not work as architected (accessing the PMU MSRs may raise
195  * #GP, or writes could simply be discarded). In those situations,
196  * there is no point in running these tests. The guest code will perform
197  * a sanity check and then GUEST_SYNC(success). In the case of failure,
198  * the behavior of the guest on resumption is undefined.
199  */
200 static bool sanity_check_pmu(struct kvm_vm *vm)
201 {
202 	bool success;
203 
204 	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
205 	success = run_vm_to_sync(vm);
206 	vm_install_exception_handler(vm, GP_VECTOR, NULL);
207 
208 	return success;
209 }
210 
211 static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
212 {
213 	struct kvm_pmu_event_filter *f;
214 	int size = sizeof(*f) + nevents * sizeof(f->events[0]);
215 
216 	f = malloc(size);
217 	TEST_ASSERT(f, "Out of memory");
218 	memset(f, 0, size);
219 	f->nevents = nevents;
220 	return f;
221 }
222 
223 
224 static struct kvm_pmu_event_filter *
225 create_pmu_event_filter(const uint64_t event_list[],
226 			int nevents, uint32_t action)
227 {
228 	struct kvm_pmu_event_filter *f;
229 	int i;
230 
231 	f = alloc_pmu_event_filter(nevents);
232 	f->action = action;
233 	for (i = 0; i < nevents; i++)
234 		f->events[i] = event_list[i];
235 
236 	return f;
237 }
238 
239 static struct kvm_pmu_event_filter *event_filter(uint32_t action)
240 {
241 	return create_pmu_event_filter(event_list,
242 				       ARRAY_SIZE(event_list),
243 				       action);
244 }
245 
246 /*
247  * Remove the first occurrence of 'event' (if any) from the filter's
248  * event list.
249  */
250 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
251 						 uint64_t event)
252 {
253 	bool found = false;
254 	int i;
255 
256 	for (i = 0; i < f->nevents; i++) {
257 		if (found)
258 			f->events[i - 1] = f->events[i];
259 		else
260 			found = f->events[i] == event;
261 	}
262 	if (found)
263 		f->nevents--;
264 	return f;
265 }
266 
267 static void test_without_filter(struct kvm_vm *vm)
268 {
269 	uint64_t count = run_vm_to_sync(vm);
270 
271 	if (count != NUM_BRANCHES)
272 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
273 			__func__, count, NUM_BRANCHES);
274 	TEST_ASSERT(count, "Allowed PMU event is not counting");
275 }
276 
277 static uint64_t test_with_filter(struct kvm_vm *vm,
278 				 struct kvm_pmu_event_filter *f)
279 {
280 	vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
281 	return run_vm_to_sync(vm);
282 }
283 
284 static void test_amd_deny_list(struct kvm_vm *vm)
285 {
286 	uint64_t event = EVENT(0x1C2, 0);
287 	struct kvm_pmu_event_filter *f;
288 	uint64_t count;
289 
290 	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
291 	count = test_with_filter(vm, f);
292 
293 	free(f);
294 	if (count != NUM_BRANCHES)
295 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
296 			__func__, count, NUM_BRANCHES);
297 	TEST_ASSERT(count, "Allowed PMU event is not counting");
298 }
299 
300 static void test_member_deny_list(struct kvm_vm *vm)
301 {
302 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
303 	uint64_t count = test_with_filter(vm, f);
304 
305 	free(f);
306 	if (count)
307 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
308 			__func__, count);
309 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
310 }
311 
312 static void test_member_allow_list(struct kvm_vm *vm)
313 {
314 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
315 	uint64_t count = test_with_filter(vm, f);
316 
317 	free(f);
318 	if (count != NUM_BRANCHES)
319 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
320 			__func__, count, NUM_BRANCHES);
321 	TEST_ASSERT(count, "Allowed PMU event is not counting");
322 }
323 
324 static void test_not_member_deny_list(struct kvm_vm *vm)
325 {
326 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
327 	uint64_t count;
328 
329 	remove_event(f, INTEL_BR_RETIRED);
330 	remove_event(f, AMD_ZEN_BR_RETIRED);
331 	count = test_with_filter(vm, f);
332 	free(f);
333 	if (count != NUM_BRANCHES)
334 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
335 			__func__, count, NUM_BRANCHES);
336 	TEST_ASSERT(count, "Allowed PMU event is not counting");
337 }
338 
339 static void test_not_member_allow_list(struct kvm_vm *vm)
340 {
341 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
342 	uint64_t count;
343 
344 	remove_event(f, INTEL_BR_RETIRED);
345 	remove_event(f, AMD_ZEN_BR_RETIRED);
346 	count = test_with_filter(vm, f);
347 	free(f);
348 	if (count)
349 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
350 			__func__, count);
351 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
352 }
353 
354 /*
355  * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
356  *
357  * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
358  */
359 static void test_pmu_config_disable(void (*guest_code)(void))
360 {
361 	int r;
362 	struct kvm_vm *vm;
363 	struct kvm_enable_cap cap = { 0 };
364 
365 	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
366 	if (!(r & KVM_PMU_CAP_DISABLE))
367 		return;
368 
369 	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);
370 
371 	cap.cap = KVM_CAP_PMU_CAPABILITY;
372 	cap.args[0] = KVM_PMU_CAP_DISABLE;
373 	TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE.");
374 
375 	vm_vcpu_add_default(vm, VCPU_ID, guest_code);
376 	vm_init_descriptor_tables(vm);
377 	vcpu_init_descriptor_tables(vm, VCPU_ID);
378 
379 	TEST_ASSERT(!sanity_check_pmu(vm),
380 		    "Guest should not be able to use disabled PMU.");
381 
382 	kvm_vm_free(vm);
383 }
384 
385 /*
386  * Check for a non-zero PMU version, at least one general-purpose
387  * counter per logical processor, an EBX bit vector of length greater
388  * than 5, and EBX[5] clear.
389  */
390 static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
391 {
392 	union cpuid10_eax eax = { .full = entry->eax };
393 	union cpuid10_ebx ebx = { .full = entry->ebx };
394 
395 	return eax.split.version_id && eax.split.num_counters > 0 &&
396 		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
397 		!ebx.split.no_branch_instruction_retired;
398 }
399 
400 /*
401  * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
402  * clear on AMD hardware.
403  */
404 static bool use_intel_pmu(void)
405 {
406 	struct kvm_cpuid_entry2 *entry;
407 
408 	entry = kvm_get_supported_cpuid_index(0xa, 0);
409 	return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
410 }
411 
412 static bool is_zen1(uint32_t eax)
413 {
414 	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
415 }
416 
417 static bool is_zen2(uint32_t eax)
418 {
419 	return x86_family(eax) == 0x17 &&
420 		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
421 }
422 
423 static bool is_zen3(uint32_t eax)
424 {
425 	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
426 }
427 
428 /*
429  * Determining AMD support for a PMU event requires consulting the AMD
430  * PPR for the CPU or reference material derived therefrom. The AMD
431  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
432  *
433  * Feel free to add more AMD CPUs that are documented to support event
434  * select 0xc2 umask 0 as "retired branch instructions."
435  */
436 static bool use_amd_pmu(void)
437 {
438 	struct kvm_cpuid_entry2 *entry;
439 
440 	entry = kvm_get_supported_cpuid_index(1, 0);
441 	return is_amd_cpu() && entry &&
442 		(is_zen1(entry->eax) ||
443 		 is_zen2(entry->eax) ||
444 		 is_zen3(entry->eax));
445 }
446 
447 int main(int argc, char *argv[])
448 {
449 	void (*guest_code)(void) = NULL;
450 	struct kvm_vm *vm;
451 	int r;
452 
453 	/* Tell stdout not to buffer its content */
454 	setbuf(stdout, NULL);
455 
456 	r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
457 	if (!r) {
458 		print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
459 		exit(KSFT_SKIP);
460 	}
461 
462 	if (use_intel_pmu())
463 		guest_code = intel_guest_code;
464 	else if (use_amd_pmu())
465 		guest_code = amd_guest_code;
466 
467 	if (!guest_code) {
468 		print_skip("Don't know how to test this guest PMU");
469 		exit(KSFT_SKIP);
470 	}
471 
472 	vm = vm_create_default(VCPU_ID, 0, guest_code);
473 
474 	vm_init_descriptor_tables(vm);
475 	vcpu_init_descriptor_tables(vm, VCPU_ID);
476 
477 	if (!sanity_check_pmu(vm)) {
478 		print_skip("Guest PMU is not functional");
479 		exit(KSFT_SKIP);
480 	}
481 
482 	if (use_amd_pmu())
483 		test_amd_deny_list(vm);
484 
485 	test_without_filter(vm);
486 	test_member_deny_list(vm);
487 	test_member_allow_list(vm);
488 	test_not_member_deny_list(vm);
489 	test_not_member_allow_list(vm);
490 
491 	kvm_vm_free(vm);
492 
493 	test_pmu_config_disable(guest_code);
494 
495 	return 0;
496 }
497