1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
4  *
5  * Copyright (C) 2022, Google LLC.
6  *
7  * This work is licensed under the terms of the GNU GPL, version 2.
8  *
9  * Verifies the expected behavior of allow lists and deny lists for
10  * virtual PMU events.
11  */
12 
13 #define _GNU_SOURCE /* for program_invocation_short_name */
14 #include "test_util.h"
15 #include "kvm_util.h"
16 #include "processor.h"
17 
18 /*
19  * In lieu of copying perf_event.h into tools...
20  */
21 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
22 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
23 
24 /* End of stuff taken from perf_event.h. */
25 
26 /* Oddly, this isn't in perf_event.h. */
27 #define ARCH_PERFMON_BRANCHES_RETIRED		5
28 
29 #define NUM_BRANCHES 42
30 #define INTEL_PMC_IDX_FIXED		32
31 
32 /* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
33 #define MAX_FILTER_EVENTS		300
34 #define MAX_TEST_EVENTS		10
35 
36 #define PMU_EVENT_FILTER_INVALID_ACTION		(KVM_PMU_EVENT_DENY + 1)
37 #define PMU_EVENT_FILTER_INVALID_FLAGS			(KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
38 #define PMU_EVENT_FILTER_INVALID_NEVENTS		(MAX_FILTER_EVENTS + 1)
39 
40 /*
41  * This is how the event selector and unit mask are stored in an AMD
42  * core performance event-select register. Intel's format is similar,
43  * but the event selector is only 8 bits.
44  */
45 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
46 			      (umask & 0xff) << 8)
47 
48 /*
49  * "Branch instructions retired", from the Intel SDM, volume 3,
50  * "Pre-defined Architectural Performance Events."
51  */
52 
53 #define INTEL_BR_RETIRED EVENT(0xc4, 0)
54 
55 /*
56  * "Retired branch instructions", from Processor Programming Reference
57  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
58  * Preliminary Processor Programming Reference (PPR) for AMD Family
59  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
60  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
61  * B1 Processors Volume 1 of 2.
62  */
63 
64 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
65 
66 
67 /*
68  * "Retired instructions", from Processor Programming Reference
69  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
70  * Preliminary Processor Programming Reference (PPR) for AMD Family
71  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
72  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
73  * B1 Processors Volume 1 of 2.
74  *                      --- and ---
75  * "Instructions retired", from the Intel SDM, volume 3,
76  * "Pre-defined Architectural Performance Events."
77  */
78 
79 #define INST_RETIRED EVENT(0xc0, 0)
80 
81 struct __kvm_pmu_event_filter {
82 	__u32 action;
83 	__u32 nevents;
84 	__u32 fixed_counter_bitmap;
85 	__u32 flags;
86 	__u32 pad[4];
87 	__u64 events[MAX_FILTER_EVENTS];
88 };
89 
90 /*
91  * This event list comprises Intel's eight architectural events plus
92  * AMD's "retired branch instructions" for Zen[123] (and possibly
93  * other AMD CPUs).
94  */
95 static const struct __kvm_pmu_event_filter base_event_filter = {
96 	.nevents = ARRAY_SIZE(base_event_filter.events),
97 	.events = {
98 		EVENT(0x3c, 0),
99 		INST_RETIRED,
100 		EVENT(0x3c, 1),
101 		EVENT(0x2e, 0x4f),
102 		EVENT(0x2e, 0x41),
103 		EVENT(0xc4, 0),
104 		EVENT(0xc5, 0),
105 		EVENT(0xa4, 1),
106 		AMD_ZEN_BR_RETIRED,
107 	},
108 };
109 
110 struct {
111 	uint64_t loads;
112 	uint64_t stores;
113 	uint64_t loads_stores;
114 	uint64_t branches_retired;
115 	uint64_t instructions_retired;
116 } pmc_results;
117 
118 /*
119  * If we encounter a #GP during the guest PMU sanity check, then the guest
120  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
121  */
guest_gp_handler(struct ex_regs * regs)122 static void guest_gp_handler(struct ex_regs *regs)
123 {
124 	GUEST_SYNC(-EFAULT);
125 }
126 
127 /*
128  * Check that we can write a new value to the given MSR and read it back.
129  * The caller should provide a non-empty set of bits that are safe to flip.
130  *
131  * Return on success. GUEST_SYNC(0) on error.
132  */
check_msr(uint32_t msr,uint64_t bits_to_flip)133 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
134 {
135 	uint64_t v = rdmsr(msr) ^ bits_to_flip;
136 
137 	wrmsr(msr, v);
138 	if (rdmsr(msr) != v)
139 		GUEST_SYNC(-EIO);
140 
141 	v ^= bits_to_flip;
142 	wrmsr(msr, v);
143 	if (rdmsr(msr) != v)
144 		GUEST_SYNC(-EIO);
145 }
146 
run_and_measure_loop(uint32_t msr_base)147 static void run_and_measure_loop(uint32_t msr_base)
148 {
149 	const uint64_t branches_retired = rdmsr(msr_base + 0);
150 	const uint64_t insn_retired = rdmsr(msr_base + 1);
151 
152 	__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
153 
154 	pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
155 	pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
156 }
157 
intel_guest_code(void)158 static void intel_guest_code(void)
159 {
160 	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
161 	check_msr(MSR_P6_EVNTSEL0, 0xffff);
162 	check_msr(MSR_IA32_PMC0, 0xffff);
163 	GUEST_SYNC(0);
164 
165 	for (;;) {
166 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
167 		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
168 		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
169 		wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
170 		      ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
171 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
172 
173 		run_and_measure_loop(MSR_IA32_PMC0);
174 		GUEST_SYNC(0);
175 	}
176 }
177 
178 /*
179  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
180  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
181  * the first four of the six extended core PMU MSRs.
182  */
amd_guest_code(void)183 static void amd_guest_code(void)
184 {
185 	check_msr(MSR_K7_EVNTSEL0, 0xffff);
186 	check_msr(MSR_K7_PERFCTR0, 0xffff);
187 	GUEST_SYNC(0);
188 
189 	for (;;) {
190 		wrmsr(MSR_K7_EVNTSEL0, 0);
191 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
192 		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
193 		wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
194 		      ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
195 
196 		run_and_measure_loop(MSR_K7_PERFCTR0);
197 		GUEST_SYNC(0);
198 	}
199 }
200 
201 /*
202  * Run the VM to the next GUEST_SYNC(value), and return the value passed
203  * to the sync. Any other exit from the guest is fatal.
204  */
run_vcpu_to_sync(struct kvm_vcpu * vcpu)205 static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
206 {
207 	struct ucall uc;
208 
209 	vcpu_run(vcpu);
210 	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
211 	get_ucall(vcpu, &uc);
212 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
213 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
214 	return uc.args[1];
215 }
216 
run_vcpu_and_sync_pmc_results(struct kvm_vcpu * vcpu)217 static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
218 {
219 	uint64_t r;
220 
221 	memset(&pmc_results, 0, sizeof(pmc_results));
222 	sync_global_to_guest(vcpu->vm, pmc_results);
223 
224 	r = run_vcpu_to_sync(vcpu);
225 	TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
226 
227 	sync_global_from_guest(vcpu->vm, pmc_results);
228 }
229 
230 /*
231  * In a nested environment or if the vPMU is disabled, the guest PMU
232  * might not work as architected (accessing the PMU MSRs may raise
233  * #GP, or writes could simply be discarded). In those situations,
234  * there is no point in running these tests. The guest code will perform
235  * a sanity check and then GUEST_SYNC(success). In the case of failure,
236  * the behavior of the guest on resumption is undefined.
237  */
sanity_check_pmu(struct kvm_vcpu * vcpu)238 static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
239 {
240 	uint64_t r;
241 
242 	vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
243 	r = run_vcpu_to_sync(vcpu);
244 	vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
245 
246 	return !r;
247 }
248 
249 /*
250  * Remove the first occurrence of 'event' (if any) from the filter's
251  * event list.
252  */
remove_event(struct __kvm_pmu_event_filter * f,uint64_t event)253 static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
254 {
255 	bool found = false;
256 	int i;
257 
258 	for (i = 0; i < f->nevents; i++) {
259 		if (found)
260 			f->events[i - 1] = f->events[i];
261 		else
262 			found = f->events[i] == event;
263 	}
264 	if (found)
265 		f->nevents--;
266 }
267 
268 #define ASSERT_PMC_COUNTING_INSTRUCTIONS()						\
269 do {											\
270 	uint64_t br = pmc_results.branches_retired;					\
271 	uint64_t ir = pmc_results.instructions_retired;					\
272 											\
273 	if (br && br != NUM_BRANCHES)							\
274 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",	\
275 			__func__, br, NUM_BRANCHES);					\
276 	TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",		\
277 		    __func__, br);							\
278 	TEST_ASSERT(ir,	"%s: Instructions retired = %lu (expected > 0)",		\
279 		    __func__, ir);							\
280 } while (0)
281 
282 #define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()						\
283 do {											\
284 	uint64_t br = pmc_results.branches_retired;					\
285 	uint64_t ir = pmc_results.instructions_retired;					\
286 											\
287 	TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",		\
288 		    __func__, br);							\
289 	TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",			\
290 		    __func__, ir);							\
291 } while (0)
292 
test_without_filter(struct kvm_vcpu * vcpu)293 static void test_without_filter(struct kvm_vcpu *vcpu)
294 {
295 	run_vcpu_and_sync_pmc_results(vcpu);
296 
297 	ASSERT_PMC_COUNTING_INSTRUCTIONS();
298 }
299 
test_with_filter(struct kvm_vcpu * vcpu,struct __kvm_pmu_event_filter * __f)300 static void test_with_filter(struct kvm_vcpu *vcpu,
301 			     struct __kvm_pmu_event_filter *__f)
302 {
303 	struct kvm_pmu_event_filter *f = (void *)__f;
304 
305 	vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
306 	run_vcpu_and_sync_pmc_results(vcpu);
307 }
308 
test_amd_deny_list(struct kvm_vcpu * vcpu)309 static void test_amd_deny_list(struct kvm_vcpu *vcpu)
310 {
311 	struct __kvm_pmu_event_filter f = {
312 		.action = KVM_PMU_EVENT_DENY,
313 		.nevents = 1,
314 		.events = {
315 			EVENT(0x1C2, 0),
316 		},
317 	};
318 
319 	test_with_filter(vcpu, &f);
320 
321 	ASSERT_PMC_COUNTING_INSTRUCTIONS();
322 }
323 
test_member_deny_list(struct kvm_vcpu * vcpu)324 static void test_member_deny_list(struct kvm_vcpu *vcpu)
325 {
326 	struct __kvm_pmu_event_filter f = base_event_filter;
327 
328 	f.action = KVM_PMU_EVENT_DENY;
329 	test_with_filter(vcpu, &f);
330 
331 	ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
332 }
333 
test_member_allow_list(struct kvm_vcpu * vcpu)334 static void test_member_allow_list(struct kvm_vcpu *vcpu)
335 {
336 	struct __kvm_pmu_event_filter f = base_event_filter;
337 
338 	f.action = KVM_PMU_EVENT_ALLOW;
339 	test_with_filter(vcpu, &f);
340 
341 	ASSERT_PMC_COUNTING_INSTRUCTIONS();
342 }
343 
test_not_member_deny_list(struct kvm_vcpu * vcpu)344 static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
345 {
346 	struct __kvm_pmu_event_filter f = base_event_filter;
347 
348 	f.action = KVM_PMU_EVENT_DENY;
349 
350 	remove_event(&f, INST_RETIRED);
351 	remove_event(&f, INTEL_BR_RETIRED);
352 	remove_event(&f, AMD_ZEN_BR_RETIRED);
353 	test_with_filter(vcpu, &f);
354 
355 	ASSERT_PMC_COUNTING_INSTRUCTIONS();
356 }
357 
test_not_member_allow_list(struct kvm_vcpu * vcpu)358 static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
359 {
360 	struct __kvm_pmu_event_filter f = base_event_filter;
361 
362 	f.action = KVM_PMU_EVENT_ALLOW;
363 
364 	remove_event(&f, INST_RETIRED);
365 	remove_event(&f, INTEL_BR_RETIRED);
366 	remove_event(&f, AMD_ZEN_BR_RETIRED);
367 	test_with_filter(vcpu, &f);
368 
369 	ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
370 }
371 
372 /*
373  * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
374  *
375  * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
376  */
test_pmu_config_disable(void (* guest_code)(void))377 static void test_pmu_config_disable(void (*guest_code)(void))
378 {
379 	struct kvm_vcpu *vcpu;
380 	int r;
381 	struct kvm_vm *vm;
382 
383 	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
384 	if (!(r & KVM_PMU_CAP_DISABLE))
385 		return;
386 
387 	vm = vm_create(1);
388 
389 	vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
390 
391 	vcpu = vm_vcpu_add(vm, 0, guest_code);
392 	vm_init_descriptor_tables(vm);
393 	vcpu_init_descriptor_tables(vcpu);
394 
395 	TEST_ASSERT(!sanity_check_pmu(vcpu),
396 		    "Guest should not be able to use disabled PMU.");
397 
398 	kvm_vm_free(vm);
399 }
400 
401 /*
402  * On Intel, check for a non-zero PMU version, at least one general-purpose
403  * counter per logical processor, and support for counting the number of branch
404  * instructions retired.
405  */
use_intel_pmu(void)406 static bool use_intel_pmu(void)
407 {
408 	return host_cpu_is_intel &&
409 	       kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
410 	       kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
411 	       kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
412 }
413 
is_zen1(uint32_t family,uint32_t model)414 static bool is_zen1(uint32_t family, uint32_t model)
415 {
416 	return family == 0x17 && model <= 0x0f;
417 }
418 
is_zen2(uint32_t family,uint32_t model)419 static bool is_zen2(uint32_t family, uint32_t model)
420 {
421 	return family == 0x17 && model >= 0x30 && model <= 0x3f;
422 }
423 
is_zen3(uint32_t family,uint32_t model)424 static bool is_zen3(uint32_t family, uint32_t model)
425 {
426 	return family == 0x19 && model <= 0x0f;
427 }
428 
429 /*
430  * Determining AMD support for a PMU event requires consulting the AMD
431  * PPR for the CPU or reference material derived therefrom. The AMD
432  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
433  *
434  * Feel free to add more AMD CPUs that are documented to support event
435  * select 0xc2 umask 0 as "retired branch instructions."
436  */
use_amd_pmu(void)437 static bool use_amd_pmu(void)
438 {
439 	uint32_t family = kvm_cpu_family();
440 	uint32_t model = kvm_cpu_model();
441 
442 	return host_cpu_is_amd &&
443 		(is_zen1(family, model) ||
444 		 is_zen2(family, model) ||
445 		 is_zen3(family, model));
446 }
447 
448 /*
449  * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
450  * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
451  * supported on Intel Xeon processors:
452  *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
453  */
454 #define MEM_INST_RETIRED		0xD0
455 #define MEM_INST_RETIRED_LOAD		EVENT(MEM_INST_RETIRED, 0x81)
456 #define MEM_INST_RETIRED_STORE		EVENT(MEM_INST_RETIRED, 0x82)
457 #define MEM_INST_RETIRED_LOAD_STORE	EVENT(MEM_INST_RETIRED, 0x83)
458 
supports_event_mem_inst_retired(void)459 static bool supports_event_mem_inst_retired(void)
460 {
461 	uint32_t eax, ebx, ecx, edx;
462 
463 	cpuid(1, &eax, &ebx, &ecx, &edx);
464 	if (x86_family(eax) == 0x6) {
465 		switch (x86_model(eax)) {
466 		/* Sapphire Rapids */
467 		case 0x8F:
468 		/* Ice Lake */
469 		case 0x6A:
470 		/* Skylake */
471 		/* Cascade Lake */
472 		case 0x55:
473 			return true;
474 		}
475 	}
476 
477 	return false;
478 }
479 
480 /*
481  * "LS Dispatch", from Processor Programming Reference
482  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
483  * Preliminary Processor Programming Reference (PPR) for AMD Family
484  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
485  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
486  * B1 Processors Volume 1 of 2.
487  */
488 #define LS_DISPATCH		0x29
489 #define LS_DISPATCH_LOAD	EVENT(LS_DISPATCH, BIT(0))
490 #define LS_DISPATCH_STORE	EVENT(LS_DISPATCH, BIT(1))
491 #define LS_DISPATCH_LOAD_STORE	EVENT(LS_DISPATCH, BIT(2))
492 
493 #define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
494 	KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
495 #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
496 	KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
497 
masked_events_guest_test(uint32_t msr_base)498 static void masked_events_guest_test(uint32_t msr_base)
499 {
500 	/*
501 	 * The actual value of the counters don't determine the outcome of
502 	 * the test.  Only that they are zero or non-zero.
503 	 */
504 	const uint64_t loads = rdmsr(msr_base + 0);
505 	const uint64_t stores = rdmsr(msr_base + 1);
506 	const uint64_t loads_stores = rdmsr(msr_base + 2);
507 	int val;
508 
509 
510 	__asm__ __volatile__("movl $0, %[v];"
511 			     "movl %[v], %%eax;"
512 			     "incl %[v];"
513 			     : [v]"+m"(val) :: "eax");
514 
515 	pmc_results.loads = rdmsr(msr_base + 0) - loads;
516 	pmc_results.stores = rdmsr(msr_base + 1) - stores;
517 	pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
518 }
519 
intel_masked_events_guest_code(void)520 static void intel_masked_events_guest_code(void)
521 {
522 	for (;;) {
523 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
524 
525 		wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
526 		      ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
527 		wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
528 		      ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
529 		wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
530 		      ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
531 
532 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
533 
534 		masked_events_guest_test(MSR_IA32_PMC0);
535 		GUEST_SYNC(0);
536 	}
537 }
538 
amd_masked_events_guest_code(void)539 static void amd_masked_events_guest_code(void)
540 {
541 	for (;;) {
542 		wrmsr(MSR_K7_EVNTSEL0, 0);
543 		wrmsr(MSR_K7_EVNTSEL1, 0);
544 		wrmsr(MSR_K7_EVNTSEL2, 0);
545 
546 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
547 		      ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
548 		wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
549 		      ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
550 		wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
551 		      ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
552 
553 		masked_events_guest_test(MSR_K7_PERFCTR0);
554 		GUEST_SYNC(0);
555 	}
556 }
557 
run_masked_events_test(struct kvm_vcpu * vcpu,const uint64_t masked_events[],const int nmasked_events)558 static void run_masked_events_test(struct kvm_vcpu *vcpu,
559 				   const uint64_t masked_events[],
560 				   const int nmasked_events)
561 {
562 	struct __kvm_pmu_event_filter f = {
563 		.nevents = nmasked_events,
564 		.action = KVM_PMU_EVENT_ALLOW,
565 		.flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
566 	};
567 
568 	memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
569 	test_with_filter(vcpu, &f);
570 }
571 
572 #define ALLOW_LOADS		BIT(0)
573 #define ALLOW_STORES		BIT(1)
574 #define ALLOW_LOADS_STORES	BIT(2)
575 
576 struct masked_events_test {
577 	uint64_t intel_events[MAX_TEST_EVENTS];
578 	uint64_t intel_event_end;
579 	uint64_t amd_events[MAX_TEST_EVENTS];
580 	uint64_t amd_event_end;
581 	const char *msg;
582 	uint32_t flags;
583 };
584 
585 /*
586  * These are the test cases for the masked events tests.
587  *
588  * For each test, the guest enables 3 PMU counters (loads, stores,
589  * loads + stores).  The filter is then set in KVM with the masked events
590  * provided.  The test then verifies that the counters agree with which
591  * ones should be counting and which ones should be filtered.
592  */
593 const struct masked_events_test test_cases[] = {
594 	{
595 		.intel_events = {
596 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
597 		},
598 		.amd_events = {
599 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
600 		},
601 		.msg = "Only allow loads.",
602 		.flags = ALLOW_LOADS,
603 	}, {
604 		.intel_events = {
605 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
606 		},
607 		.amd_events = {
608 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
609 		},
610 		.msg = "Only allow stores.",
611 		.flags = ALLOW_STORES,
612 	}, {
613 		.intel_events = {
614 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
615 		},
616 		.amd_events = {
617 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
618 		},
619 		.msg = "Only allow loads + stores.",
620 		.flags = ALLOW_LOADS_STORES,
621 	}, {
622 		.intel_events = {
623 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
624 			EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
625 		},
626 		.amd_events = {
627 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
628 		},
629 		.msg = "Only allow loads and stores.",
630 		.flags = ALLOW_LOADS | ALLOW_STORES,
631 	}, {
632 		.intel_events = {
633 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
634 			EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
635 		},
636 		.amd_events = {
637 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
638 			EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
639 		},
640 		.msg = "Only allow loads and loads + stores.",
641 		.flags = ALLOW_LOADS | ALLOW_LOADS_STORES
642 	}, {
643 		.intel_events = {
644 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
645 		},
646 		.amd_events = {
647 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
648 			EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
649 		},
650 		.msg = "Only allow stores and loads + stores.",
651 		.flags = ALLOW_STORES | ALLOW_LOADS_STORES
652 	}, {
653 		.intel_events = {
654 			INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
655 		},
656 		.amd_events = {
657 			INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
658 		},
659 		.msg = "Only allow loads, stores, and loads + stores.",
660 		.flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
661 	},
662 };
663 
append_test_events(const struct masked_events_test * test,uint64_t * events,int nevents)664 static int append_test_events(const struct masked_events_test *test,
665 			      uint64_t *events, int nevents)
666 {
667 	const uint64_t *evts;
668 	int i;
669 
670 	evts = use_intel_pmu() ? test->intel_events : test->amd_events;
671 	for (i = 0; i < MAX_TEST_EVENTS; i++) {
672 		if (evts[i] == 0)
673 			break;
674 
675 		events[nevents + i] = evts[i];
676 	}
677 
678 	return nevents + i;
679 }
680 
bool_eq(bool a,bool b)681 static bool bool_eq(bool a, bool b)
682 {
683 	return a == b;
684 }
685 
run_masked_events_tests(struct kvm_vcpu * vcpu,uint64_t * events,int nevents)686 static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
687 				    int nevents)
688 {
689 	int ntests = ARRAY_SIZE(test_cases);
690 	int i, n;
691 
692 	for (i = 0; i < ntests; i++) {
693 		const struct masked_events_test *test = &test_cases[i];
694 
695 		/* Do any test case events overflow MAX_TEST_EVENTS? */
696 		assert(test->intel_event_end == 0);
697 		assert(test->amd_event_end == 0);
698 
699 		n = append_test_events(test, events, nevents);
700 
701 		run_masked_events_test(vcpu, events, n);
702 
703 		TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
704 			    bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
705 			    bool_eq(pmc_results.loads_stores,
706 				    test->flags & ALLOW_LOADS_STORES),
707 			    "%s  loads: %lu, stores: %lu, loads + stores: %lu",
708 			    test->msg, pmc_results.loads, pmc_results.stores,
709 			    pmc_results.loads_stores);
710 	}
711 }
712 
add_dummy_events(uint64_t * events,int nevents)713 static void add_dummy_events(uint64_t *events, int nevents)
714 {
715 	int i;
716 
717 	for (i = 0; i < nevents; i++) {
718 		int event_select = i % 0xFF;
719 		bool exclude = ((i % 4) == 0);
720 
721 		if (event_select == MEM_INST_RETIRED ||
722 		    event_select == LS_DISPATCH)
723 			event_select++;
724 
725 		events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
726 							0, exclude);
727 	}
728 }
729 
test_masked_events(struct kvm_vcpu * vcpu)730 static void test_masked_events(struct kvm_vcpu *vcpu)
731 {
732 	int nevents = MAX_FILTER_EVENTS - MAX_TEST_EVENTS;
733 	uint64_t events[MAX_FILTER_EVENTS];
734 
735 	/* Run the test cases against a sparse PMU event filter. */
736 	run_masked_events_tests(vcpu, events, 0);
737 
738 	/* Run the test cases against a dense PMU event filter. */
739 	add_dummy_events(events, MAX_FILTER_EVENTS);
740 	run_masked_events_tests(vcpu, events, nevents);
741 }
742 
set_pmu_event_filter(struct kvm_vcpu * vcpu,struct __kvm_pmu_event_filter * __f)743 static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
744 				struct __kvm_pmu_event_filter *__f)
745 {
746 	struct kvm_pmu_event_filter *f = (void *)__f;
747 
748 	return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
749 }
750 
set_pmu_single_event_filter(struct kvm_vcpu * vcpu,uint64_t event,uint32_t flags,uint32_t action)751 static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
752 				       uint32_t flags, uint32_t action)
753 {
754 	struct __kvm_pmu_event_filter f = {
755 		.nevents = 1,
756 		.flags = flags,
757 		.action = action,
758 		.events = {
759 			event,
760 		},
761 	};
762 
763 	return set_pmu_event_filter(vcpu, &f);
764 }
765 
test_filter_ioctl(struct kvm_vcpu * vcpu)766 static void test_filter_ioctl(struct kvm_vcpu *vcpu)
767 {
768 	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
769 	struct __kvm_pmu_event_filter f;
770 	uint64_t e = ~0ul;
771 	int r;
772 
773 	/*
774 	 * Unfortunately having invalid bits set in event data is expected to
775 	 * pass when flags == 0 (bits other than eventsel+umask).
776 	 */
777 	r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
778 	TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
779 
780 	r = set_pmu_single_event_filter(vcpu, e,
781 					KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
782 					KVM_PMU_EVENT_ALLOW);
783 	TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
784 
785 	e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
786 	r = set_pmu_single_event_filter(vcpu, e,
787 					KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
788 					KVM_PMU_EVENT_ALLOW);
789 	TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
790 
791 	f = base_event_filter;
792 	f.action = PMU_EVENT_FILTER_INVALID_ACTION;
793 	r = set_pmu_event_filter(vcpu, &f);
794 	TEST_ASSERT(r, "Set invalid action is expected to fail");
795 
796 	f = base_event_filter;
797 	f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
798 	r = set_pmu_event_filter(vcpu, &f);
799 	TEST_ASSERT(r, "Set invalid flags is expected to fail");
800 
801 	f = base_event_filter;
802 	f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
803 	r = set_pmu_event_filter(vcpu, &f);
804 	TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
805 
806 	f = base_event_filter;
807 	f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
808 	r = set_pmu_event_filter(vcpu, &f);
809 	TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
810 }
811 
intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)812 static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
813 {
814 	for (;;) {
815 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
816 		wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
817 
818 		/* Only OS_EN bit is enabled for fixed counter[idx]. */
819 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
820 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
821 		      BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
822 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
823 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
824 
825 		GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
826 	}
827 }
828 
test_with_fixed_counter_filter(struct kvm_vcpu * vcpu,uint32_t action,uint32_t bitmap)829 static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
830 					       uint32_t action, uint32_t bitmap)
831 {
832 	struct __kvm_pmu_event_filter f = {
833 		.action = action,
834 		.fixed_counter_bitmap = bitmap,
835 	};
836 	set_pmu_event_filter(vcpu, &f);
837 
838 	return run_vcpu_to_sync(vcpu);
839 }
840 
test_set_gp_and_fixed_event_filter(struct kvm_vcpu * vcpu,uint32_t action,uint32_t bitmap)841 static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
842 						   uint32_t action,
843 						   uint32_t bitmap)
844 {
845 	struct __kvm_pmu_event_filter f = base_event_filter;
846 
847 	f.action = action;
848 	f.fixed_counter_bitmap = bitmap;
849 	set_pmu_event_filter(vcpu, &f);
850 
851 	return run_vcpu_to_sync(vcpu);
852 }
853 
__test_fixed_counter_bitmap(struct kvm_vcpu * vcpu,uint8_t idx,uint8_t nr_fixed_counters)854 static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
855 					uint8_t nr_fixed_counters)
856 {
857 	unsigned int i;
858 	uint32_t bitmap;
859 	uint64_t count;
860 
861 	TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
862 		    "Invalid nr_fixed_counters");
863 
864 	/*
865 	 * Check the fixed performance counter can count normally when KVM
866 	 * userspace doesn't set any pmu filter.
867 	 */
868 	count = run_vcpu_to_sync(vcpu);
869 	TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
870 
871 	for (i = 0; i < BIT(nr_fixed_counters); i++) {
872 		bitmap = BIT(i);
873 		count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
874 						       bitmap);
875 		TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
876 
877 		count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
878 						       bitmap);
879 		TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
880 
881 		/*
882 		 * Check that fixed_counter_bitmap has higher priority than
883 		 * events[] when both are set.
884 		 */
885 		count = test_set_gp_and_fixed_event_filter(vcpu,
886 							   KVM_PMU_EVENT_ALLOW,
887 							   bitmap);
888 		TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
889 
890 		count = test_set_gp_and_fixed_event_filter(vcpu,
891 							   KVM_PMU_EVENT_DENY,
892 							   bitmap);
893 		TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
894 	}
895 }
896 
test_fixed_counter_bitmap(void)897 static void test_fixed_counter_bitmap(void)
898 {
899 	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
900 	struct kvm_vm *vm;
901 	struct kvm_vcpu *vcpu;
902 	uint8_t idx;
903 
904 	/*
905 	 * Check that pmu_event_filter works as expected when it's applied to
906 	 * fixed performance counters.
907 	 */
908 	for (idx = 0; idx < nr_fixed_counters; idx++) {
909 		vm = vm_create_with_one_vcpu(&vcpu,
910 					     intel_run_fixed_counter_guest_code);
911 		vcpu_args_set(vcpu, 1, idx);
912 		__test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
913 		kvm_vm_free(vm);
914 	}
915 }
916 
main(int argc,char * argv[])917 int main(int argc, char *argv[])
918 {
919 	void (*guest_code)(void);
920 	struct kvm_vcpu *vcpu, *vcpu2 = NULL;
921 	struct kvm_vm *vm;
922 
923 	TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
924 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
925 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
926 
927 	TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
928 	guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
929 
930 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
931 
932 	vm_init_descriptor_tables(vm);
933 	vcpu_init_descriptor_tables(vcpu);
934 
935 	TEST_REQUIRE(sanity_check_pmu(vcpu));
936 
937 	if (use_amd_pmu())
938 		test_amd_deny_list(vcpu);
939 
940 	test_without_filter(vcpu);
941 	test_member_deny_list(vcpu);
942 	test_member_allow_list(vcpu);
943 	test_not_member_deny_list(vcpu);
944 	test_not_member_allow_list(vcpu);
945 
946 	if (use_intel_pmu() &&
947 	    supports_event_mem_inst_retired() &&
948 	    kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
949 		vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
950 	else if (use_amd_pmu())
951 		vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
952 
953 	if (vcpu2)
954 		test_masked_events(vcpu2);
955 	test_filter_ioctl(vcpu);
956 
957 	kvm_vm_free(vm);
958 
959 	test_pmu_config_disable(guest_code);
960 	test_fixed_counter_bitmap();
961 
962 	return 0;
963 }
964