xref: /openbmc/linux/arch/x86/kvm/pmu.c (revision 63f21f326fc9e068d04c2c1d0a722e8db65588ba)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine -- Performance Monitoring Unit support
4  *
5  * Copyright 2015 Red Hat, Inc. and/or its affiliates.
6  *
7  * Authors:
8  *   Avi Kivity   <avi@redhat.com>
9  *   Gleb Natapov <gleb@redhat.com>
10  *   Wei Huang    <wei@redhat.com>
11  */
12 
13 #include <linux/types.h>
14 #include <linux/kvm_host.h>
15 #include <linux/perf_event.h>
16 #include <linux/bsearch.h>
17 #include <linux/sort.h>
18 #include <asm/perf_event.h>
19 #include "x86.h"
20 #include "cpuid.h"
21 #include "lapic.h"
22 #include "pmu.h"
23 
24 /* This is enough to filter the vast majority of currently defined events. */
25 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
26 
27 /* NOTE:
28  * - Each perf counter is defined as "struct kvm_pmc";
29  * - There are two types of perf counters: general purpose (gp) and fixed.
30  *   gp counters are stored in gp_counters[] and fixed counters are stored
31  *   in fixed_counters[] respectively. Both of them are part of "struct
32  *   kvm_pmu";
33  * - pmu.c understands the difference between gp counters and fixed counters.
34  *   However AMD doesn't support fixed-counters;
35  * - There are three types of index to access perf counters (PMC):
36  *     1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
37  *        has MSR_K7_PERFCTRn.
38  *     2. MSR Index (named idx): This normally is used by RDPMC instruction.
39  *        For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
40  *        C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
41  *        that it also supports fixed counters. idx can be used to as index to
42  *        gp and fixed counters.
43  *     3. Global PMC Index (named pmc): pmc is an index specific to PMU
44  *        code. Each pmc, stored in kvm_pmc.idx field, is unique across
45  *        all perf counters (both gp and fixed). The mapping relationship
46  *        between pmc and perf counters is as the following:
47  *        * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
48  *                 [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
49  *        * AMD:   [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters
50  */
51 
52 static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
53 
54 #define KVM_X86_PMU_OP(func)					     \
55 	DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func,			     \
56 				*(((struct kvm_pmu_ops *)0)->func));
57 #define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
58 #include <asm/kvm-x86-pmu-ops.h>
59 
60 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
61 {
62 	memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops));
63 
64 #define __KVM_X86_PMU_OP(func) \
65 	static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func);
66 #define KVM_X86_PMU_OP(func) \
67 	WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func)
68 #define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP
69 #include <asm/kvm-x86-pmu-ops.h>
70 #undef __KVM_X86_PMU_OP
71 }
72 
73 static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
74 {
75 	return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
76 }
77 
78 static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
79 {
80 	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
81 	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
82 
83 	kvm_pmu_deliver_pmi(vcpu);
84 }
85 
86 static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
87 {
88 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
89 	bool skip_pmi = false;
90 
91 	/* Ignore counters that have been reprogrammed already. */
92 	if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
93 		return;
94 
95 	if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
96 		/* Indicate PEBS overflow PMI to guest. */
97 		skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
98 					      (unsigned long *)&pmu->global_status);
99 	} else {
100 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
101 	}
102 	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
103 
104 	if (!pmc->intr || skip_pmi)
105 		return;
106 
107 	/*
108 	 * Inject PMI. If vcpu was in a guest mode during NMI PMI
109 	 * can be ejected on a guest mode re-entry. Otherwise we can't
110 	 * be sure that vcpu wasn't executing hlt instruction at the
111 	 * time of vmexit and is not going to re-enter guest mode until
112 	 * woken up. So we should wake it, but this is impossible from
113 	 * NMI context. Do it from irq work instead.
114 	 */
115 	if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
116 		irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
117 	else
118 		kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
119 }
120 
121 static void kvm_perf_overflow(struct perf_event *perf_event,
122 			      struct perf_sample_data *data,
123 			      struct pt_regs *regs)
124 {
125 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
126 
127 	__kvm_perf_overflow(pmc, true);
128 }
129 
130 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
131 				  u64 config, bool exclude_user,
132 				  bool exclude_kernel, bool intr)
133 {
134 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
135 	struct perf_event *event;
136 	struct perf_event_attr attr = {
137 		.type = type,
138 		.size = sizeof(attr),
139 		.pinned = true,
140 		.exclude_idle = true,
141 		.exclude_host = 1,
142 		.exclude_user = exclude_user,
143 		.exclude_kernel = exclude_kernel,
144 		.config = config,
145 	};
146 	bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
147 
148 	if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
149 		return;
150 
151 	attr.sample_period = get_sample_period(pmc, pmc->counter);
152 
153 	if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
154 	    guest_cpuid_is_intel(pmc->vcpu)) {
155 		/*
156 		 * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
157 		 * period. Just clear the sample period so at least
158 		 * allocating the counter doesn't fail.
159 		 */
160 		attr.sample_period = 0;
161 	}
162 	if (pebs) {
163 		/*
164 		 * The non-zero precision level of guest event makes the ordinary
165 		 * guest event becomes a guest PEBS event and triggers the host
166 		 * PEBS PMI handler to determine whether the PEBS overflow PMI
167 		 * comes from the host counters or the guest.
168 		 *
169 		 * For most PEBS hardware events, the difference in the software
170 		 * precision levels of guest and host PEBS events will not affect
171 		 * the accuracy of the PEBS profiling result, because the "event IP"
172 		 * in the PEBS record is calibrated on the guest side.
173 		 *
174 		 * On Icelake everything is fine. Other hardware (GLC+, TNT+) that
175 		 * could possibly care here is unsupported and needs changes.
176 		 */
177 		attr.precise_ip = 1;
178 		if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
179 			attr.precise_ip = 3;
180 	}
181 
182 	event = perf_event_create_kernel_counter(&attr, -1, current,
183 						 kvm_perf_overflow, pmc);
184 	if (IS_ERR(event)) {
185 		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
186 			    PTR_ERR(event), pmc->idx);
187 		return;
188 	}
189 
190 	pmc->perf_event = event;
191 	pmc_to_pmu(pmc)->event_count++;
192 	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
193 	pmc->is_paused = false;
194 	pmc->intr = intr || pebs;
195 }
196 
197 static void pmc_pause_counter(struct kvm_pmc *pmc)
198 {
199 	u64 counter = pmc->counter;
200 
201 	if (!pmc->perf_event || pmc->is_paused)
202 		return;
203 
204 	/* update counter, reset event value to avoid redundant accumulation */
205 	counter += perf_event_pause(pmc->perf_event, true);
206 	pmc->counter = counter & pmc_bitmask(pmc);
207 	pmc->is_paused = true;
208 }
209 
210 static bool pmc_resume_counter(struct kvm_pmc *pmc)
211 {
212 	if (!pmc->perf_event)
213 		return false;
214 
215 	/* recalibrate sample period and check if it's accepted by perf core */
216 	if (perf_event_period(pmc->perf_event,
217 			      get_sample_period(pmc, pmc->counter)))
218 		return false;
219 
220 	if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
221 	    pmc->perf_event->attr.precise_ip)
222 		return false;
223 
224 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
225 	perf_event_enable(pmc->perf_event);
226 	pmc->is_paused = false;
227 
228 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
229 	return true;
230 }
231 
232 static int cmp_u64(const void *pa, const void *pb)
233 {
234 	u64 a = *(u64 *)pa;
235 	u64 b = *(u64 *)pb;
236 
237 	return (a > b) - (a < b);
238 }
239 
240 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
241 {
242 	u64 config;
243 	u32 type = PERF_TYPE_RAW;
244 	struct kvm *kvm = pmc->vcpu->kvm;
245 	struct kvm_pmu_event_filter *filter;
246 	struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
247 	bool allow_event = true;
248 
249 	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
250 		printk_once("kvm pmu: pin control bit is ignored\n");
251 
252 	pmc->eventsel = eventsel;
253 
254 	pmc_pause_counter(pmc);
255 
256 	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
257 		return;
258 
259 	filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
260 	if (filter) {
261 		__u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
262 
263 		if (bsearch(&key, filter->events, filter->nevents,
264 			    sizeof(__u64), cmp_u64))
265 			allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
266 		else
267 			allow_event = filter->action == KVM_PMU_EVENT_DENY;
268 	}
269 	if (!allow_event)
270 		return;
271 
272 	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
273 			  ARCH_PERFMON_EVENTSEL_INV |
274 			  ARCH_PERFMON_EVENTSEL_CMASK |
275 			  HSW_IN_TX |
276 			  HSW_IN_TX_CHECKPOINTED))) {
277 		config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
278 		if (config != PERF_COUNT_HW_MAX)
279 			type = PERF_TYPE_HARDWARE;
280 	}
281 
282 	if (type == PERF_TYPE_RAW)
283 		config = eventsel & pmu->raw_event_mask;
284 
285 	if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
286 		return;
287 
288 	pmc_release_perf_event(pmc);
289 
290 	pmc->current_config = eventsel;
291 	pmc_reprogram_counter(pmc, type, config,
292 			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
293 			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
294 			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
295 }
296 EXPORT_SYMBOL_GPL(reprogram_gp_counter);
297 
298 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
299 {
300 	unsigned en_field = ctrl & 0x3;
301 	bool pmi = ctrl & 0x8;
302 	struct kvm_pmu_event_filter *filter;
303 	struct kvm *kvm = pmc->vcpu->kvm;
304 
305 	pmc_pause_counter(pmc);
306 
307 	if (!en_field || !pmc_is_enabled(pmc))
308 		return;
309 
310 	filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
311 	if (filter) {
312 		if (filter->action == KVM_PMU_EVENT_DENY &&
313 		    test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
314 			return;
315 		if (filter->action == KVM_PMU_EVENT_ALLOW &&
316 		    !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
317 			return;
318 	}
319 
320 	if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc))
321 		return;
322 
323 	pmc_release_perf_event(pmc);
324 
325 	pmc->current_config = (u64)ctrl;
326 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
327 			      static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc),
328 			      !(en_field & 0x2), /* exclude user */
329 			      !(en_field & 0x1), /* exclude kernel */
330 			      pmi);
331 }
332 EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
333 
334 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
335 {
336 	struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, pmc_idx);
337 
338 	if (!pmc)
339 		return;
340 
341 	if (pmc_is_gp(pmc))
342 		reprogram_gp_counter(pmc, pmc->eventsel);
343 	else {
344 		int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
345 		u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
346 
347 		reprogram_fixed_counter(pmc, ctrl, idx);
348 	}
349 }
350 EXPORT_SYMBOL_GPL(reprogram_counter);
351 
352 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
353 {
354 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
355 	int bit;
356 
357 	for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
358 		struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
359 
360 		if (unlikely(!pmc || !pmc->perf_event)) {
361 			clear_bit(bit, pmu->reprogram_pmi);
362 			continue;
363 		}
364 
365 		reprogram_counter(pmu, bit);
366 	}
367 
368 	/*
369 	 * Unused perf_events are only released if the corresponding MSRs
370 	 * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
371 	 * triggers KVM_REQ_PMU if cleanup is needed.
372 	 */
373 	if (unlikely(pmu->need_cleanup))
374 		kvm_pmu_cleanup(vcpu);
375 }
376 
377 /* check if idx is a valid index to access PMU */
378 bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
379 {
380 	return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
381 }
382 
383 bool is_vmware_backdoor_pmc(u32 pmc_idx)
384 {
385 	switch (pmc_idx) {
386 	case VMWARE_BACKDOOR_PMC_HOST_TSC:
387 	case VMWARE_BACKDOOR_PMC_REAL_TIME:
388 	case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
389 		return true;
390 	}
391 	return false;
392 }
393 
394 static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
395 {
396 	u64 ctr_val;
397 
398 	switch (idx) {
399 	case VMWARE_BACKDOOR_PMC_HOST_TSC:
400 		ctr_val = rdtsc();
401 		break;
402 	case VMWARE_BACKDOOR_PMC_REAL_TIME:
403 		ctr_val = ktime_get_boottime_ns();
404 		break;
405 	case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
406 		ctr_val = ktime_get_boottime_ns() +
407 			vcpu->kvm->arch.kvmclock_offset;
408 		break;
409 	default:
410 		return 1;
411 	}
412 
413 	*data = ctr_val;
414 	return 0;
415 }
416 
417 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
418 {
419 	bool fast_mode = idx & (1u << 31);
420 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
421 	struct kvm_pmc *pmc;
422 	u64 mask = fast_mode ? ~0u : ~0ull;
423 
424 	if (!pmu->version)
425 		return 1;
426 
427 	if (is_vmware_backdoor_pmc(idx))
428 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
429 
430 	pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask);
431 	if (!pmc)
432 		return 1;
433 
434 	if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
435 	    (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
436 	    (kvm_read_cr0(vcpu) & X86_CR0_PE))
437 		return 1;
438 
439 	*data = pmc_read_counter(pmc) & mask;
440 	return 0;
441 }
442 
443 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
444 {
445 	if (lapic_in_kernel(vcpu)) {
446 		static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
447 		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
448 	}
449 }
450 
451 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
452 {
453 	return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
454 		static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
455 }
456 
457 static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
458 {
459 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
460 	struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr);
461 
462 	if (pmc)
463 		__set_bit(pmc->idx, pmu->pmc_in_use);
464 }
465 
466 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
467 {
468 	return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
469 }
470 
471 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
472 {
473 	kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
474 	return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
475 }
476 
477 /* refresh PMU settings. This function generally is called when underlying
478  * settings are changed (such as changes of PMU CPUID by guest VMs), which
479  * should rarely happen.
480  */
481 void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
482 {
483 	static_call(kvm_x86_pmu_refresh)(vcpu);
484 }
485 
486 void kvm_pmu_reset(struct kvm_vcpu *vcpu)
487 {
488 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
489 
490 	irq_work_sync(&pmu->irq_work);
491 	static_call(kvm_x86_pmu_reset)(vcpu);
492 }
493 
494 void kvm_pmu_init(struct kvm_vcpu *vcpu)
495 {
496 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
497 
498 	memset(pmu, 0, sizeof(*pmu));
499 	static_call(kvm_x86_pmu_init)(vcpu);
500 	init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
501 	pmu->event_count = 0;
502 	pmu->need_cleanup = false;
503 	kvm_pmu_refresh(vcpu);
504 }
505 
506 /* Release perf_events for vPMCs that have been unused for a full time slice.  */
507 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
508 {
509 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
510 	struct kvm_pmc *pmc = NULL;
511 	DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
512 	int i;
513 
514 	pmu->need_cleanup = false;
515 
516 	bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
517 		      pmu->pmc_in_use, X86_PMC_IDX_MAX);
518 
519 	for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
520 		pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
521 
522 		if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
523 			pmc_stop_counter(pmc);
524 	}
525 
526 	static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
527 
528 	bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
529 }
530 
531 void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
532 {
533 	kvm_pmu_reset(vcpu);
534 }
535 
536 static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
537 {
538 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
539 	u64 prev_count;
540 
541 	prev_count = pmc->counter;
542 	pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
543 
544 	reprogram_counter(pmu, pmc->idx);
545 	if (pmc->counter < prev_count)
546 		__kvm_perf_overflow(pmc, false);
547 }
548 
549 static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
550 	unsigned int perf_hw_id)
551 {
552 	u64 old_eventsel = pmc->eventsel;
553 	unsigned int config;
554 
555 	pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK);
556 	config = static_call(kvm_x86_pmu_pmc_perf_hw_id)(pmc);
557 	pmc->eventsel = old_eventsel;
558 	return config == perf_hw_id;
559 }
560 
561 static inline bool cpl_is_matched(struct kvm_pmc *pmc)
562 {
563 	bool select_os, select_user;
564 	u64 config = pmc->current_config;
565 
566 	if (pmc_is_gp(pmc)) {
567 		select_os = config & ARCH_PERFMON_EVENTSEL_OS;
568 		select_user = config & ARCH_PERFMON_EVENTSEL_USR;
569 	} else {
570 		select_os = config & 0x1;
571 		select_user = config & 0x2;
572 	}
573 
574 	return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
575 }
576 
577 void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
578 {
579 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
580 	struct kvm_pmc *pmc;
581 	int i;
582 
583 	for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
584 		pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
585 
586 		if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
587 			continue;
588 
589 		/* Ignore checks for edge detect, pin control, invert and CMASK bits */
590 		if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
591 			kvm_pmu_incr_counter(pmc);
592 	}
593 }
594 EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
595 
596 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
597 {
598 	struct kvm_pmu_event_filter tmp, *filter;
599 	size_t size;
600 	int r;
601 
602 	if (copy_from_user(&tmp, argp, sizeof(tmp)))
603 		return -EFAULT;
604 
605 	if (tmp.action != KVM_PMU_EVENT_ALLOW &&
606 	    tmp.action != KVM_PMU_EVENT_DENY)
607 		return -EINVAL;
608 
609 	if (tmp.flags != 0)
610 		return -EINVAL;
611 
612 	if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
613 		return -E2BIG;
614 
615 	size = struct_size(filter, events, tmp.nevents);
616 	filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
617 	if (!filter)
618 		return -ENOMEM;
619 
620 	r = -EFAULT;
621 	if (copy_from_user(filter, argp, size))
622 		goto cleanup;
623 
624 	/* Ensure nevents can't be changed between the user copies. */
625 	*filter = tmp;
626 
627 	/*
628 	 * Sort the in-kernel list so that we can search it with bsearch.
629 	 */
630 	sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
631 
632 	mutex_lock(&kvm->lock);
633 	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
634 				     mutex_is_locked(&kvm->lock));
635 	mutex_unlock(&kvm->lock);
636 
637 	synchronize_srcu_expedited(&kvm->srcu);
638 	r = 0;
639 cleanup:
640 	kfree(filter);
641 	return r;
642 }
643