xref: /openbmc/linux/arch/arm64/kvm/pmu-emul.c (revision b296a6d5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015 Linaro Ltd.
4  * Author: Shannon Zhao <shannon.zhao@linaro.org>
5  */
6 
7 #include <linux/cpu.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/perf_event.h>
11 #include <linux/perf/arm_pmu.h>
12 #include <linux/uaccess.h>
13 #include <asm/kvm_emulate.h>
14 #include <kvm/arm_pmu.h>
15 #include <kvm/arm_vgic.h>
16 
17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
20 
21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
22 
23 /**
24  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
25  * @vcpu: The vcpu pointer
26  * @select_idx: The counter index
27  */
28 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
29 {
30 	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
31 		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
32 }
33 
34 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
35 {
36 	struct kvm_pmu *pmu;
37 	struct kvm_vcpu_arch *vcpu_arch;
38 
39 	pmc -= pmc->idx;
40 	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
41 	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
42 	return container_of(vcpu_arch, struct kvm_vcpu, arch);
43 }
44 
45 /**
46  * kvm_pmu_pmc_is_chained - determine if the pmc is chained
47  * @pmc: The PMU counter pointer
48  */
49 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
50 {
51 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
52 
53 	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
54 }
55 
56 /**
57  * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
58  * @select_idx: The counter index
59  */
60 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
61 {
62 	return select_idx & 0x1;
63 }
64 
65 /**
66  * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
67  * @pmc: The PMU counter pointer
68  *
69  * When a pair of PMCs are chained together we use the low counter (canonical)
70  * to hold the underlying perf event.
71  */
72 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
73 {
74 	if (kvm_pmu_pmc_is_chained(pmc) &&
75 	    kvm_pmu_idx_is_high_counter(pmc->idx))
76 		return pmc - 1;
77 
78 	return pmc;
79 }
80 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
81 {
82 	if (kvm_pmu_idx_is_high_counter(pmc->idx))
83 		return pmc - 1;
84 	else
85 		return pmc + 1;
86 }
87 
88 /**
89  * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
90  * @vcpu: The vcpu pointer
91  * @select_idx: The counter index
92  */
93 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
94 {
95 	u64 eventsel, reg;
96 
97 	select_idx |= 0x1;
98 
99 	if (select_idx == ARMV8_PMU_CYCLE_IDX)
100 		return false;
101 
102 	reg = PMEVTYPER0_EL0 + select_idx;
103 	eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
104 
105 	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
106 }
107 
108 /**
109  * kvm_pmu_get_pair_counter_value - get PMU counter value
110  * @vcpu: The vcpu pointer
111  * @pmc: The PMU counter pointer
112  */
113 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
114 					  struct kvm_pmc *pmc)
115 {
116 	u64 counter, counter_high, reg, enabled, running;
117 
118 	if (kvm_pmu_pmc_is_chained(pmc)) {
119 		pmc = kvm_pmu_get_canonical_pmc(pmc);
120 		reg = PMEVCNTR0_EL0 + pmc->idx;
121 
122 		counter = __vcpu_sys_reg(vcpu, reg);
123 		counter_high = __vcpu_sys_reg(vcpu, reg + 1);
124 
125 		counter = lower_32_bits(counter) | (counter_high << 32);
126 	} else {
127 		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
128 		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
129 		counter = __vcpu_sys_reg(vcpu, reg);
130 	}
131 
132 	/*
133 	 * The real counter value is equal to the value of counter register plus
134 	 * the value perf event counts.
135 	 */
136 	if (pmc->perf_event)
137 		counter += perf_event_read_value(pmc->perf_event, &enabled,
138 						 &running);
139 
140 	return counter;
141 }
142 
143 /**
144  * kvm_pmu_get_counter_value - get PMU counter value
145  * @vcpu: The vcpu pointer
146  * @select_idx: The counter index
147  */
148 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
149 {
150 	u64 counter;
151 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
152 	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
153 
154 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
155 
156 	if (kvm_pmu_pmc_is_chained(pmc) &&
157 	    kvm_pmu_idx_is_high_counter(select_idx))
158 		counter = upper_32_bits(counter);
159 	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
160 		counter = lower_32_bits(counter);
161 
162 	return counter;
163 }
164 
165 /**
166  * kvm_pmu_set_counter_value - set PMU counter value
167  * @vcpu: The vcpu pointer
168  * @select_idx: The counter index
169  * @val: The counter value
170  */
171 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
172 {
173 	u64 reg;
174 
175 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
176 	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
177 	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
178 
179 	/* Recreate the perf event to reflect the updated sample_period */
180 	kvm_pmu_create_perf_event(vcpu, select_idx);
181 }
182 
183 /**
184  * kvm_pmu_release_perf_event - remove the perf event
185  * @pmc: The PMU counter pointer
186  */
187 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
188 {
189 	pmc = kvm_pmu_get_canonical_pmc(pmc);
190 	if (pmc->perf_event) {
191 		perf_event_disable(pmc->perf_event);
192 		perf_event_release_kernel(pmc->perf_event);
193 		pmc->perf_event = NULL;
194 	}
195 }
196 
197 /**
198  * kvm_pmu_stop_counter - stop PMU counter
199  * @pmc: The PMU counter pointer
200  *
201  * If this counter has been configured to monitor some event, release it here.
202  */
203 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
204 {
205 	u64 counter, reg, val;
206 
207 	pmc = kvm_pmu_get_canonical_pmc(pmc);
208 	if (!pmc->perf_event)
209 		return;
210 
211 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
212 
213 	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
214 		reg = PMCCNTR_EL0;
215 		val = counter;
216 	} else {
217 		reg = PMEVCNTR0_EL0 + pmc->idx;
218 		val = lower_32_bits(counter);
219 	}
220 
221 	__vcpu_sys_reg(vcpu, reg) = val;
222 
223 	if (kvm_pmu_pmc_is_chained(pmc))
224 		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
225 
226 	kvm_pmu_release_perf_event(pmc);
227 }
228 
229 /**
230  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
231  * @vcpu: The vcpu pointer
232  *
233  */
234 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
235 {
236 	int i;
237 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
238 
239 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
240 		pmu->pmc[i].idx = i;
241 }
242 
243 /**
244  * kvm_pmu_vcpu_reset - reset pmu state for cpu
245  * @vcpu: The vcpu pointer
246  *
247  */
248 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
249 {
250 	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
251 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
252 	int i;
253 
254 	for_each_set_bit(i, &mask, 32)
255 		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
256 
257 	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
258 }
259 
260 /**
261  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
262  * @vcpu: The vcpu pointer
263  *
264  */
265 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
266 {
267 	int i;
268 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
269 
270 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
271 		kvm_pmu_release_perf_event(&pmu->pmc[i]);
272 	irq_work_sync(&vcpu->arch.pmu.overflow_work);
273 }
274 
275 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
276 {
277 	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
278 
279 	val &= ARMV8_PMU_PMCR_N_MASK;
280 	if (val == 0)
281 		return BIT(ARMV8_PMU_CYCLE_IDX);
282 	else
283 		return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
284 }
285 
286 /**
287  * kvm_pmu_enable_counter_mask - enable selected PMU counters
288  * @vcpu: The vcpu pointer
289  * @val: the value guest writes to PMCNTENSET register
290  *
291  * Call perf_event_enable to start counting the perf event
292  */
293 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
294 {
295 	int i;
296 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
297 	struct kvm_pmc *pmc;
298 
299 	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
300 		return;
301 
302 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
303 		if (!(val & BIT(i)))
304 			continue;
305 
306 		pmc = &pmu->pmc[i];
307 
308 		/* A change in the enable state may affect the chain state */
309 		kvm_pmu_update_pmc_chained(vcpu, i);
310 		kvm_pmu_create_perf_event(vcpu, i);
311 
312 		/* At this point, pmc must be the canonical */
313 		if (pmc->perf_event) {
314 			perf_event_enable(pmc->perf_event);
315 			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
316 				kvm_debug("fail to enable perf event\n");
317 		}
318 	}
319 }
320 
321 /**
322  * kvm_pmu_disable_counter_mask - disable selected PMU counters
323  * @vcpu: The vcpu pointer
324  * @val: the value guest writes to PMCNTENCLR register
325  *
326  * Call perf_event_disable to stop counting the perf event
327  */
328 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
329 {
330 	int i;
331 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
332 	struct kvm_pmc *pmc;
333 
334 	if (!val)
335 		return;
336 
337 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
338 		if (!(val & BIT(i)))
339 			continue;
340 
341 		pmc = &pmu->pmc[i];
342 
343 		/* A change in the enable state may affect the chain state */
344 		kvm_pmu_update_pmc_chained(vcpu, i);
345 		kvm_pmu_create_perf_event(vcpu, i);
346 
347 		/* At this point, pmc must be the canonical */
348 		if (pmc->perf_event)
349 			perf_event_disable(pmc->perf_event);
350 	}
351 }
352 
353 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
354 {
355 	u64 reg = 0;
356 
357 	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
358 		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
359 		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
360 		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
361 		reg &= kvm_pmu_valid_counter_mask(vcpu);
362 	}
363 
364 	return reg;
365 }
366 
367 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
368 {
369 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
370 	bool overflow;
371 
372 	if (!kvm_arm_pmu_v3_ready(vcpu))
373 		return;
374 
375 	overflow = !!kvm_pmu_overflow_status(vcpu);
376 	if (pmu->irq_level == overflow)
377 		return;
378 
379 	pmu->irq_level = overflow;
380 
381 	if (likely(irqchip_in_kernel(vcpu->kvm))) {
382 		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
383 					      pmu->irq_num, overflow, pmu);
384 		WARN_ON(ret);
385 	}
386 }
387 
388 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
389 {
390 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
391 	struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
392 	bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
393 
394 	if (likely(irqchip_in_kernel(vcpu->kvm)))
395 		return false;
396 
397 	return pmu->irq_level != run_level;
398 }
399 
400 /*
401  * Reflect the PMU overflow interrupt output level into the kvm_run structure
402  */
403 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
404 {
405 	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
406 
407 	/* Populate the timer bitmap for user space */
408 	regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
409 	if (vcpu->arch.pmu.irq_level)
410 		regs->device_irq_level |= KVM_ARM_DEV_PMU;
411 }
412 
413 /**
414  * kvm_pmu_flush_hwstate - flush pmu state to cpu
415  * @vcpu: The vcpu pointer
416  *
417  * Check if the PMU has overflowed while we were running in the host, and inject
418  * an interrupt if that was the case.
419  */
420 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
421 {
422 	kvm_pmu_update_state(vcpu);
423 }
424 
425 /**
426  * kvm_pmu_sync_hwstate - sync pmu state from cpu
427  * @vcpu: The vcpu pointer
428  *
429  * Check if the PMU has overflowed while we were running in the guest, and
430  * inject an interrupt if that was the case.
431  */
432 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
433 {
434 	kvm_pmu_update_state(vcpu);
435 }
436 
437 /**
438  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
439  * to the event.
440  * This is why we need a callback to do it once outside of the NMI context.
441  */
442 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
443 {
444 	struct kvm_vcpu *vcpu;
445 	struct kvm_pmu *pmu;
446 
447 	pmu = container_of(work, struct kvm_pmu, overflow_work);
448 	vcpu = kvm_pmc_to_vcpu(pmu->pmc);
449 
450 	kvm_vcpu_kick(vcpu);
451 }
452 
453 /**
454  * When the perf event overflows, set the overflow status and inform the vcpu.
455  */
456 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
457 				  struct perf_sample_data *data,
458 				  struct pt_regs *regs)
459 {
460 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
461 	struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
462 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
463 	int idx = pmc->idx;
464 	u64 period;
465 
466 	cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
467 
468 	/*
469 	 * Reset the sample period to the architectural limit,
470 	 * i.e. the point where the counter overflows.
471 	 */
472 	period = -(local64_read(&perf_event->count));
473 
474 	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
475 		period &= GENMASK(31, 0);
476 
477 	local64_set(&perf_event->hw.period_left, 0);
478 	perf_event->attr.sample_period = period;
479 	perf_event->hw.sample_period = period;
480 
481 	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
482 
483 	if (kvm_pmu_overflow_status(vcpu)) {
484 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
485 
486 		if (!in_nmi())
487 			kvm_vcpu_kick(vcpu);
488 		else
489 			irq_work_queue(&vcpu->arch.pmu.overflow_work);
490 	}
491 
492 	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
493 }
494 
495 /**
496  * kvm_pmu_software_increment - do software increment
497  * @vcpu: The vcpu pointer
498  * @val: the value guest writes to PMSWINC register
499  */
500 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
501 {
502 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
503 	int i;
504 
505 	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
506 		return;
507 
508 	/* Weed out disabled counters */
509 	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
510 
511 	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
512 		u64 type, reg;
513 
514 		if (!(val & BIT(i)))
515 			continue;
516 
517 		/* PMSWINC only applies to ... SW_INC! */
518 		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
519 		type &= ARMV8_PMU_EVTYPE_EVENT;
520 		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
521 			continue;
522 
523 		/* increment this even SW_INC counter */
524 		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
525 		reg = lower_32_bits(reg);
526 		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
527 
528 		if (reg) /* no overflow on the low part */
529 			continue;
530 
531 		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
532 			/* increment the high counter */
533 			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
534 			reg = lower_32_bits(reg);
535 			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
536 			if (!reg) /* mark overflow on the high counter */
537 				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
538 		} else {
539 			/* mark overflow on low counter */
540 			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
541 		}
542 	}
543 }
544 
545 /**
546  * kvm_pmu_handle_pmcr - handle PMCR register
547  * @vcpu: The vcpu pointer
548  * @val: the value guest writes to PMCR register
549  */
550 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
551 {
552 	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
553 	int i;
554 
555 	if (val & ARMV8_PMU_PMCR_E) {
556 		kvm_pmu_enable_counter_mask(vcpu,
557 		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
558 	} else {
559 		kvm_pmu_disable_counter_mask(vcpu, mask);
560 	}
561 
562 	if (val & ARMV8_PMU_PMCR_C)
563 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
564 
565 	if (val & ARMV8_PMU_PMCR_P) {
566 		for_each_set_bit(i, &mask, 32)
567 			kvm_pmu_set_counter_value(vcpu, i, 0);
568 	}
569 }
570 
571 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
572 {
573 	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
574 	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
575 }
576 
577 /**
578  * kvm_pmu_create_perf_event - create a perf event for a counter
579  * @vcpu: The vcpu pointer
580  * @select_idx: The number of selected counter
581  */
582 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
583 {
584 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
585 	struct kvm_pmc *pmc;
586 	struct perf_event *event;
587 	struct perf_event_attr attr;
588 	u64 eventsel, counter, reg, data;
589 
590 	/*
591 	 * For chained counters the event type and filtering attributes are
592 	 * obtained from the low/even counter. We also use this counter to
593 	 * determine if the event is enabled/disabled.
594 	 */
595 	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
596 
597 	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
598 	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
599 	data = __vcpu_sys_reg(vcpu, reg);
600 
601 	kvm_pmu_stop_counter(vcpu, pmc);
602 	eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
603 
604 	/* Software increment event does't need to be backed by a perf event */
605 	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
606 	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
607 		return;
608 
609 	memset(&attr, 0, sizeof(struct perf_event_attr));
610 	attr.type = PERF_TYPE_RAW;
611 	attr.size = sizeof(attr);
612 	attr.pinned = 1;
613 	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
614 	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
615 	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
616 	attr.exclude_hv = 1; /* Don't count EL2 events */
617 	attr.exclude_host = 1; /* Don't count host events */
618 	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
619 		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
620 
621 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
622 
623 	if (kvm_pmu_pmc_is_chained(pmc)) {
624 		/**
625 		 * The initial sample period (overflow count) of an event. For
626 		 * chained counters we only support overflow interrupts on the
627 		 * high counter.
628 		 */
629 		attr.sample_period = (-counter) & GENMASK(63, 0);
630 		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
631 
632 		event = perf_event_create_kernel_counter(&attr, -1, current,
633 							 kvm_pmu_perf_overflow,
634 							 pmc + 1);
635 	} else {
636 		/* The initial sample period (overflow count) of an event. */
637 		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
638 			attr.sample_period = (-counter) & GENMASK(63, 0);
639 		else
640 			attr.sample_period = (-counter) & GENMASK(31, 0);
641 
642 		event = perf_event_create_kernel_counter(&attr, -1, current,
643 						 kvm_pmu_perf_overflow, pmc);
644 	}
645 
646 	if (IS_ERR(event)) {
647 		pr_err_once("kvm: pmu event creation failed %ld\n",
648 			    PTR_ERR(event));
649 		return;
650 	}
651 
652 	pmc->perf_event = event;
653 }
654 
655 /**
656  * kvm_pmu_update_pmc_chained - update chained bitmap
657  * @vcpu: The vcpu pointer
658  * @select_idx: The number of selected counter
659  *
660  * Update the chained bitmap based on the event type written in the
661  * typer register and the enable state of the odd register.
662  */
663 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
664 {
665 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
666 	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
667 	bool new_state, old_state;
668 
669 	old_state = kvm_pmu_pmc_is_chained(pmc);
670 	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
671 		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
672 
673 	if (old_state == new_state)
674 		return;
675 
676 	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
677 	kvm_pmu_stop_counter(vcpu, canonical_pmc);
678 	if (new_state) {
679 		/*
680 		 * During promotion from !chained to chained we must ensure
681 		 * the adjacent counter is stopped and its event destroyed
682 		 */
683 		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
684 		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
685 		return;
686 	}
687 	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
688 }
689 
690 /**
691  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
692  * @vcpu: The vcpu pointer
693  * @data: The data guest writes to PMXEVTYPER_EL0
694  * @select_idx: The number of selected counter
695  *
696  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
697  * event with given hardware event number. Here we call perf_event API to
698  * emulate this action and create a kernel perf event for it.
699  */
700 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
701 				    u64 select_idx)
702 {
703 	u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
704 
705 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
706 	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
707 
708 	__vcpu_sys_reg(vcpu, reg) = event_type;
709 
710 	kvm_pmu_update_pmc_chained(vcpu, select_idx);
711 	kvm_pmu_create_perf_event(vcpu, select_idx);
712 }
713 
714 bool kvm_arm_support_pmu_v3(void)
715 {
716 	/*
717 	 * Check if HW_PERF_EVENTS are supported by checking the number of
718 	 * hardware performance counters. This could ensure the presence of
719 	 * a physical PMU and CONFIG_PERF_EVENT is selected.
720 	 */
721 	return (perf_num_counters() > 0);
722 }
723 
724 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
725 {
726 	if (!vcpu->arch.pmu.created)
727 		return 0;
728 
729 	/*
730 	 * A valid interrupt configuration for the PMU is either to have a
731 	 * properly configured interrupt number and using an in-kernel
732 	 * irqchip, or to not have an in-kernel GIC and not set an IRQ.
733 	 */
734 	if (irqchip_in_kernel(vcpu->kvm)) {
735 		int irq = vcpu->arch.pmu.irq_num;
736 		if (!kvm_arm_pmu_irq_initialized(vcpu))
737 			return -EINVAL;
738 
739 		/*
740 		 * If we are using an in-kernel vgic, at this point we know
741 		 * the vgic will be initialized, so we can check the PMU irq
742 		 * number against the dimensions of the vgic and make sure
743 		 * it's valid.
744 		 */
745 		if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
746 			return -EINVAL;
747 	} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
748 		   return -EINVAL;
749 	}
750 
751 	kvm_pmu_vcpu_reset(vcpu);
752 	vcpu->arch.pmu.ready = true;
753 
754 	return 0;
755 }
756 
757 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
758 {
759 	if (!kvm_arm_support_pmu_v3())
760 		return -ENODEV;
761 
762 	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
763 		return -ENXIO;
764 
765 	if (vcpu->arch.pmu.created)
766 		return -EBUSY;
767 
768 	if (irqchip_in_kernel(vcpu->kvm)) {
769 		int ret;
770 
771 		/*
772 		 * If using the PMU with an in-kernel virtual GIC
773 		 * implementation, we require the GIC to be already
774 		 * initialized when initializing the PMU.
775 		 */
776 		if (!vgic_initialized(vcpu->kvm))
777 			return -ENODEV;
778 
779 		if (!kvm_arm_pmu_irq_initialized(vcpu))
780 			return -ENXIO;
781 
782 		ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
783 					 &vcpu->arch.pmu);
784 		if (ret)
785 			return ret;
786 	}
787 
788 	init_irq_work(&vcpu->arch.pmu.overflow_work,
789 		      kvm_pmu_perf_overflow_notify_vcpu);
790 
791 	vcpu->arch.pmu.created = true;
792 	return 0;
793 }
794 
795 /*
796  * For one VM the interrupt type must be same for each vcpu.
797  * As a PPI, the interrupt number is the same for all vcpus,
798  * while as an SPI it must be a separate number per vcpu.
799  */
800 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
801 {
802 	int i;
803 	struct kvm_vcpu *vcpu;
804 
805 	kvm_for_each_vcpu(i, vcpu, kvm) {
806 		if (!kvm_arm_pmu_irq_initialized(vcpu))
807 			continue;
808 
809 		if (irq_is_ppi(irq)) {
810 			if (vcpu->arch.pmu.irq_num != irq)
811 				return false;
812 		} else {
813 			if (vcpu->arch.pmu.irq_num == irq)
814 				return false;
815 		}
816 	}
817 
818 	return true;
819 }
820 
821 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
822 {
823 	switch (attr->attr) {
824 	case KVM_ARM_VCPU_PMU_V3_IRQ: {
825 		int __user *uaddr = (int __user *)(long)attr->addr;
826 		int irq;
827 
828 		if (!irqchip_in_kernel(vcpu->kvm))
829 			return -EINVAL;
830 
831 		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
832 			return -ENODEV;
833 
834 		if (get_user(irq, uaddr))
835 			return -EFAULT;
836 
837 		/* The PMU overflow interrupt can be a PPI or a valid SPI. */
838 		if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
839 			return -EINVAL;
840 
841 		if (!pmu_irq_is_valid(vcpu->kvm, irq))
842 			return -EINVAL;
843 
844 		if (kvm_arm_pmu_irq_initialized(vcpu))
845 			return -EBUSY;
846 
847 		kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
848 		vcpu->arch.pmu.irq_num = irq;
849 		return 0;
850 	}
851 	case KVM_ARM_VCPU_PMU_V3_INIT:
852 		return kvm_arm_pmu_v3_init(vcpu);
853 	}
854 
855 	return -ENXIO;
856 }
857 
858 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
859 {
860 	switch (attr->attr) {
861 	case KVM_ARM_VCPU_PMU_V3_IRQ: {
862 		int __user *uaddr = (int __user *)(long)attr->addr;
863 		int irq;
864 
865 		if (!irqchip_in_kernel(vcpu->kvm))
866 			return -EINVAL;
867 
868 		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
869 			return -ENODEV;
870 
871 		if (!kvm_arm_pmu_irq_initialized(vcpu))
872 			return -ENXIO;
873 
874 		irq = vcpu->arch.pmu.irq_num;
875 		return put_user(irq, uaddr);
876 	}
877 	}
878 
879 	return -ENXIO;
880 }
881 
882 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
883 {
884 	switch (attr->attr) {
885 	case KVM_ARM_VCPU_PMU_V3_IRQ:
886 	case KVM_ARM_VCPU_PMU_V3_INIT:
887 		if (kvm_arm_support_pmu_v3() &&
888 		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
889 			return 0;
890 	}
891 
892 	return -ENXIO;
893 }
894