120c8ccb1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2f5132b01SGleb Natapov /* 3c7a7062fSGuo Chao * Kernel-based Virtual Machine -- Performance Monitoring Unit support 4f5132b01SGleb Natapov * 525462f7fSWei Huang * Copyright 2015 Red Hat, Inc. and/or its affiliates. 6f5132b01SGleb Natapov * 7f5132b01SGleb Natapov * Authors: 8f5132b01SGleb Natapov * Avi Kivity <avi@redhat.com> 9f5132b01SGleb Natapov * Gleb Natapov <gleb@redhat.com> 1025462f7fSWei Huang * Wei Huang <wei@redhat.com> 11f5132b01SGleb Natapov */ 12f5132b01SGleb Natapov 13f5132b01SGleb Natapov #include <linux/types.h> 14f5132b01SGleb Natapov #include <linux/kvm_host.h> 15f5132b01SGleb Natapov #include <linux/perf_event.h> 167ff775acSJim Mattson #include <linux/bsearch.h> 177ff775acSJim Mattson #include <linux/sort.h> 18d27aa7f1SNadav Amit #include <asm/perf_event.h> 19f5132b01SGleb Natapov #include "x86.h" 20f5132b01SGleb Natapov #include "cpuid.h" 21f5132b01SGleb Natapov #include "lapic.h" 22474a5bb9SWei Huang #include "pmu.h" 23f5132b01SGleb Natapov 2430cd8604SEric Hankland /* This is enough to filter the vast majority of currently defined events. */ 2530cd8604SEric Hankland #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 2666bb8a06SEric Hankland 2725462f7fSWei Huang /* NOTE: 2825462f7fSWei Huang * - Each perf counter is defined as "struct kvm_pmc"; 2925462f7fSWei Huang * - There are two types of perf counters: general purpose (gp) and fixed. 3025462f7fSWei Huang * gp counters are stored in gp_counters[] and fixed counters are stored 3125462f7fSWei Huang * in fixed_counters[] respectively. Both of them are part of "struct 3225462f7fSWei Huang * kvm_pmu"; 3325462f7fSWei Huang * - pmu.c understands the difference between gp counters and fixed counters. 3425462f7fSWei Huang * However AMD doesn't support fixed-counters; 3525462f7fSWei Huang * - There are three types of index to access perf counters (PMC): 3625462f7fSWei Huang * 1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD 3725462f7fSWei Huang * has MSR_K7_PERFCTRn. 3825462f7fSWei Huang * 2. MSR Index (named idx): This normally is used by RDPMC instruction. 3925462f7fSWei Huang * For instance AMD RDPMC instruction uses 0000_0003h in ECX to access 4025462f7fSWei Huang * C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except 4125462f7fSWei Huang * that it also supports fixed counters. idx can be used to as index to 4225462f7fSWei Huang * gp and fixed counters. 4325462f7fSWei Huang * 3. Global PMC Index (named pmc): pmc is an index specific to PMU 4425462f7fSWei Huang * code. Each pmc, stored in kvm_pmc.idx field, is unique across 4525462f7fSWei Huang * all perf counters (both gp and fixed). The mapping relationship 4625462f7fSWei Huang * between pmc and perf counters is as the following: 4725462f7fSWei Huang * * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters 4825462f7fSWei Huang * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed 4925462f7fSWei Huang * * AMD: [0 .. AMD64_NUM_COUNTERS-1] <=> gp counters 5025462f7fSWei Huang */ 51f5132b01SGleb Natapov 52c6702c9dSWei Huang static void kvm_pmi_trigger_fn(struct irq_work *irq_work) 53f5132b01SGleb Natapov { 54212dba12SWei Huang struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); 55212dba12SWei Huang struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); 56f5132b01SGleb Natapov 57c6702c9dSWei Huang kvm_pmu_deliver_pmi(vcpu); 58f5132b01SGleb Natapov } 59f5132b01SGleb Natapov 6040ccb96dSLike Xu static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) 61f5132b01SGleb Natapov { 62212dba12SWei Huang struct kvm_pmu *pmu = pmc_to_pmu(pmc); 63e84cfe4cSWei Huang 6440ccb96dSLike Xu /* Ignore counters that have been reprogrammed already. */ 6540ccb96dSLike Xu if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) 6640ccb96dSLike Xu return; 6740ccb96dSLike Xu 68f5132b01SGleb Natapov __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); 69671bd993SNadav Amit kvm_make_request(KVM_REQ_PMU, pmc->vcpu); 70f5132b01SGleb Natapov 7140ccb96dSLike Xu if (!pmc->intr) 7240ccb96dSLike Xu return; 73e84cfe4cSWei Huang 74f5132b01SGleb Natapov /* 75f5132b01SGleb Natapov * Inject PMI. If vcpu was in a guest mode during NMI PMI 76f5132b01SGleb Natapov * can be ejected on a guest mode re-entry. Otherwise we can't 77f5132b01SGleb Natapov * be sure that vcpu wasn't executing hlt instruction at the 78e84cfe4cSWei Huang * time of vmexit and is not going to re-enter guest mode until 79f5132b01SGleb Natapov * woken up. So we should wake it, but this is impossible from 80f5132b01SGleb Natapov * NMI context. Do it from irq work instead. 81f5132b01SGleb Natapov */ 8279e06c4cSLinus Torvalds if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu)) 83212dba12SWei Huang irq_work_queue(&pmc_to_pmu(pmc)->irq_work); 84f5132b01SGleb Natapov else 85f5132b01SGleb Natapov kvm_make_request(KVM_REQ_PMI, pmc->vcpu); 86f5132b01SGleb Natapov } 8740ccb96dSLike Xu 8840ccb96dSLike Xu static void kvm_perf_overflow(struct perf_event *perf_event, 8940ccb96dSLike Xu struct perf_sample_data *data, 9040ccb96dSLike Xu struct pt_regs *regs) 9140ccb96dSLike Xu { 9240ccb96dSLike Xu struct kvm_pmc *pmc = perf_event->overflow_handler_context; 9340ccb96dSLike Xu 9440ccb96dSLike Xu __kvm_perf_overflow(pmc, true); 95f5132b01SGleb Natapov } 96f5132b01SGleb Natapov 97c6702c9dSWei Huang static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, 98b8bfee85SJim Mattson u64 config, bool exclude_user, 99*e644896fSLike Xu bool exclude_kernel, bool intr) 100f5132b01SGleb Natapov { 101f5132b01SGleb Natapov struct perf_event *event; 102f5132b01SGleb Natapov struct perf_event_attr attr = { 103f5132b01SGleb Natapov .type = type, 104f5132b01SGleb Natapov .size = sizeof(attr), 105f5132b01SGleb Natapov .pinned = true, 106f5132b01SGleb Natapov .exclude_idle = true, 107f5132b01SGleb Natapov .exclude_host = 1, 108f5132b01SGleb Natapov .exclude_user = exclude_user, 109f5132b01SGleb Natapov .exclude_kernel = exclude_kernel, 110f5132b01SGleb Natapov .config = config, 111f5132b01SGleb Natapov }; 112e84cfe4cSWei Huang 113a2186448SLike Xu if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX) 114a2186448SLike Xu return; 115a2186448SLike Xu 116168d918fSEric Hankland attr.sample_period = get_sample_period(pmc, pmc->counter); 117bba82fd7SRobert O'Callahan 118*e644896fSLike Xu if ((attr.config & HSW_IN_TX_CHECKPOINTED) && 119*e644896fSLike Xu guest_cpuid_is_intel(pmc->vcpu)) { 120bba82fd7SRobert O'Callahan /* 121bba82fd7SRobert O'Callahan * HSW_IN_TX_CHECKPOINTED is not supported with nonzero 122bba82fd7SRobert O'Callahan * period. Just clear the sample period so at least 123bba82fd7SRobert O'Callahan * allocating the counter doesn't fail. 124bba82fd7SRobert O'Callahan */ 125bba82fd7SRobert O'Callahan attr.sample_period = 0; 126bba82fd7SRobert O'Callahan } 127f5132b01SGleb Natapov 128f5132b01SGleb Natapov event = perf_event_create_kernel_counter(&attr, -1, current, 129f5132b01SGleb Natapov kvm_perf_overflow, pmc); 130f5132b01SGleb Natapov if (IS_ERR(event)) { 1316fc3977cSLike Xu pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", 1326fc3977cSLike Xu PTR_ERR(event), pmc->idx); 133f5132b01SGleb Natapov return; 134f5132b01SGleb Natapov } 135f5132b01SGleb Natapov 136f5132b01SGleb Natapov pmc->perf_event = event; 137b35e5548SLike Xu pmc_to_pmu(pmc)->event_count++; 1384be94672SLike Xu clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); 139e79f49c3SLike Xu pmc->is_paused = false; 14040ccb96dSLike Xu pmc->intr = intr; 141f5132b01SGleb Natapov } 142f5132b01SGleb Natapov 143a6da0d77SLike Xu static void pmc_pause_counter(struct kvm_pmc *pmc) 144a6da0d77SLike Xu { 145a6da0d77SLike Xu u64 counter = pmc->counter; 146a6da0d77SLike Xu 147e79f49c3SLike Xu if (!pmc->perf_event || pmc->is_paused) 148a6da0d77SLike Xu return; 149a6da0d77SLike Xu 150a6da0d77SLike Xu /* update counter, reset event value to avoid redundant accumulation */ 151a6da0d77SLike Xu counter += perf_event_pause(pmc->perf_event, true); 152a6da0d77SLike Xu pmc->counter = counter & pmc_bitmask(pmc); 153e79f49c3SLike Xu pmc->is_paused = true; 154a6da0d77SLike Xu } 155a6da0d77SLike Xu 156a6da0d77SLike Xu static bool pmc_resume_counter(struct kvm_pmc *pmc) 157a6da0d77SLike Xu { 158a6da0d77SLike Xu if (!pmc->perf_event) 159a6da0d77SLike Xu return false; 160a6da0d77SLike Xu 161a6da0d77SLike Xu /* recalibrate sample period and check if it's accepted by perf core */ 162a6da0d77SLike Xu if (perf_event_period(pmc->perf_event, 163168d918fSEric Hankland get_sample_period(pmc, pmc->counter))) 164a6da0d77SLike Xu return false; 165a6da0d77SLike Xu 166a6da0d77SLike Xu /* reuse perf_event to serve as pmc_reprogram_counter() does*/ 167a6da0d77SLike Xu perf_event_enable(pmc->perf_event); 168e79f49c3SLike Xu pmc->is_paused = false; 169a6da0d77SLike Xu 170a6da0d77SLike Xu clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); 171a6da0d77SLike Xu return true; 172a6da0d77SLike Xu } 173a6da0d77SLike Xu 1747ff775acSJim Mattson static int cmp_u64(const void *a, const void *b) 1757ff775acSJim Mattson { 1767ff775acSJim Mattson return *(__u64 *)a - *(__u64 *)b; 1777ff775acSJim Mattson } 1787ff775acSJim Mattson 17925462f7fSWei Huang void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) 180f5132b01SGleb Natapov { 181b8bfee85SJim Mattson u64 config; 182b8bfee85SJim Mattson u32 type = PERF_TYPE_RAW; 18366bb8a06SEric Hankland struct kvm *kvm = pmc->vcpu->kvm; 18466bb8a06SEric Hankland struct kvm_pmu_event_filter *filter; 18595b065bfSJim Mattson struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu); 18666bb8a06SEric Hankland bool allow_event = true; 187f5132b01SGleb Natapov 188a7b9d2ccSGleb Natapov if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) 189a7b9d2ccSGleb Natapov printk_once("kvm pmu: pin control bit is ignored\n"); 190a7b9d2ccSGleb Natapov 191f5132b01SGleb Natapov pmc->eventsel = eventsel; 192f5132b01SGleb Natapov 193a6da0d77SLike Xu pmc_pause_counter(pmc); 194f5132b01SGleb Natapov 195c6702c9dSWei Huang if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) 196f5132b01SGleb Natapov return; 197f5132b01SGleb Natapov 19866bb8a06SEric Hankland filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); 19966bb8a06SEric Hankland if (filter) { 2007ff775acSJim Mattson __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; 2017ff775acSJim Mattson 2027ff775acSJim Mattson if (bsearch(&key, filter->events, filter->nevents, 2037ff775acSJim Mattson sizeof(__u64), cmp_u64)) 2047ff775acSJim Mattson allow_event = filter->action == KVM_PMU_EVENT_ALLOW; 2057ff775acSJim Mattson else 2067ff775acSJim Mattson allow_event = filter->action == KVM_PMU_EVENT_DENY; 20766bb8a06SEric Hankland } 20866bb8a06SEric Hankland if (!allow_event) 20966bb8a06SEric Hankland return; 21066bb8a06SEric Hankland 211fac33683SGleb Natapov if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | 212f5132b01SGleb Natapov ARCH_PERFMON_EVENTSEL_INV | 213103af0a9SAndi Kleen ARCH_PERFMON_EVENTSEL_CMASK | 214103af0a9SAndi Kleen HSW_IN_TX | 215103af0a9SAndi Kleen HSW_IN_TX_CHECKPOINTED))) { 2167c174f30SLike Xu config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); 217f5132b01SGleb Natapov if (config != PERF_COUNT_HW_MAX) 218f5132b01SGleb Natapov type = PERF_TYPE_HARDWARE; 219f5132b01SGleb Natapov } 220f5132b01SGleb Natapov 221f5132b01SGleb Natapov if (type == PERF_TYPE_RAW) 22295b065bfSJim Mattson config = eventsel & pmu->raw_event_mask; 223f5132b01SGleb Natapov 224a6da0d77SLike Xu if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) 225a6da0d77SLike Xu return; 226a6da0d77SLike Xu 227a6da0d77SLike Xu pmc_release_perf_event(pmc); 228a6da0d77SLike Xu 229a6da0d77SLike Xu pmc->current_config = eventsel; 230c6702c9dSWei Huang pmc_reprogram_counter(pmc, type, config, 231f5132b01SGleb Natapov !(eventsel & ARCH_PERFMON_EVENTSEL_USR), 232f5132b01SGleb Natapov !(eventsel & ARCH_PERFMON_EVENTSEL_OS), 233*e644896fSLike Xu eventsel & ARCH_PERFMON_EVENTSEL_INT); 234f5132b01SGleb Natapov } 23525462f7fSWei Huang EXPORT_SYMBOL_GPL(reprogram_gp_counter); 236f5132b01SGleb Natapov 23725462f7fSWei Huang void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) 238f5132b01SGleb Natapov { 239e84cfe4cSWei Huang unsigned en_field = ctrl & 0x3; 240e84cfe4cSWei Huang bool pmi = ctrl & 0x8; 24130cd8604SEric Hankland struct kvm_pmu_event_filter *filter; 24230cd8604SEric Hankland struct kvm *kvm = pmc->vcpu->kvm; 243f5132b01SGleb Natapov 244a6da0d77SLike Xu pmc_pause_counter(pmc); 245f5132b01SGleb Natapov 246e84cfe4cSWei Huang if (!en_field || !pmc_is_enabled(pmc)) 247f5132b01SGleb Natapov return; 248f5132b01SGleb Natapov 24930cd8604SEric Hankland filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); 25030cd8604SEric Hankland if (filter) { 25130cd8604SEric Hankland if (filter->action == KVM_PMU_EVENT_DENY && 25230cd8604SEric Hankland test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) 25330cd8604SEric Hankland return; 25430cd8604SEric Hankland if (filter->action == KVM_PMU_EVENT_ALLOW && 25530cd8604SEric Hankland !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) 25630cd8604SEric Hankland return; 25730cd8604SEric Hankland } 25830cd8604SEric Hankland 259a6da0d77SLike Xu if (pmc->current_config == (u64)ctrl && pmc_resume_counter(pmc)) 260a6da0d77SLike Xu return; 261a6da0d77SLike Xu 262a6da0d77SLike Xu pmc_release_perf_event(pmc); 263a6da0d77SLike Xu 264a6da0d77SLike Xu pmc->current_config = (u64)ctrl; 265c6702c9dSWei Huang pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, 2666ed1298eSLike Xu kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), 267e84cfe4cSWei Huang !(en_field & 0x2), /* exclude user */ 268e84cfe4cSWei Huang !(en_field & 0x1), /* exclude kernel */ 269*e644896fSLike Xu pmi); 270f5132b01SGleb Natapov } 27125462f7fSWei Huang EXPORT_SYMBOL_GPL(reprogram_fixed_counter); 272f5132b01SGleb Natapov 27325462f7fSWei Huang void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx) 274f5132b01SGleb Natapov { 275afaf0b2fSSean Christopherson struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx); 276f5132b01SGleb Natapov 277f5132b01SGleb Natapov if (!pmc) 278f5132b01SGleb Natapov return; 279f5132b01SGleb Natapov 280f5132b01SGleb Natapov if (pmc_is_gp(pmc)) 281f5132b01SGleb Natapov reprogram_gp_counter(pmc, pmc->eventsel); 282f5132b01SGleb Natapov else { 283e84cfe4cSWei Huang int idx = pmc_idx - INTEL_PMC_IDX_FIXED; 284e84cfe4cSWei Huang u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx); 285e84cfe4cSWei Huang 286e84cfe4cSWei Huang reprogram_fixed_counter(pmc, ctrl, idx); 287f5132b01SGleb Natapov } 288f5132b01SGleb Natapov } 28925462f7fSWei Huang EXPORT_SYMBOL_GPL(reprogram_counter); 290f5132b01SGleb Natapov 291e5af058aSWei Huang void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) 292e5af058aSWei Huang { 293e5af058aSWei Huang struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 294e5af058aSWei Huang int bit; 295e5af058aSWei Huang 2964be94672SLike Xu for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { 297afaf0b2fSSean Christopherson struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit); 298e5af058aSWei Huang 299e5af058aSWei Huang if (unlikely(!pmc || !pmc->perf_event)) { 3004be94672SLike Xu clear_bit(bit, pmu->reprogram_pmi); 301e5af058aSWei Huang continue; 302e5af058aSWei Huang } 303e5af058aSWei Huang 304e5af058aSWei Huang reprogram_counter(pmu, bit); 305e5af058aSWei Huang } 306b35e5548SLike Xu 307b35e5548SLike Xu /* 308b35e5548SLike Xu * Unused perf_events are only released if the corresponding MSRs 309b35e5548SLike Xu * weren't accessed during the last vCPU time slice. kvm_arch_sched_in 310b35e5548SLike Xu * triggers KVM_REQ_PMU if cleanup is needed. 311b35e5548SLike Xu */ 312b35e5548SLike Xu if (unlikely(pmu->need_cleanup)) 313b35e5548SLike Xu kvm_pmu_cleanup(vcpu); 314e5af058aSWei Huang } 315e5af058aSWei Huang 316e5af058aSWei Huang /* check if idx is a valid index to access PMU */ 317e6cd31f1SJim Mattson bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) 318e5af058aSWei Huang { 319afaf0b2fSSean Christopherson return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx); 32041aac14aSWei Huang } 32141aac14aSWei Huang 3222d7921c4SArbel Moshe bool is_vmware_backdoor_pmc(u32 pmc_idx) 3232d7921c4SArbel Moshe { 3242d7921c4SArbel Moshe switch (pmc_idx) { 3252d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_HOST_TSC: 3262d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_REAL_TIME: 3272d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_APPARENT_TIME: 3282d7921c4SArbel Moshe return true; 3292d7921c4SArbel Moshe } 3302d7921c4SArbel Moshe return false; 3312d7921c4SArbel Moshe } 3322d7921c4SArbel Moshe 3332d7921c4SArbel Moshe static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) 3342d7921c4SArbel Moshe { 3352d7921c4SArbel Moshe u64 ctr_val; 3362d7921c4SArbel Moshe 3372d7921c4SArbel Moshe switch (idx) { 3382d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_HOST_TSC: 3392d7921c4SArbel Moshe ctr_val = rdtsc(); 3402d7921c4SArbel Moshe break; 3412d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_REAL_TIME: 3429285ec4cSJason A. Donenfeld ctr_val = ktime_get_boottime_ns(); 3432d7921c4SArbel Moshe break; 3442d7921c4SArbel Moshe case VMWARE_BACKDOOR_PMC_APPARENT_TIME: 3459285ec4cSJason A. Donenfeld ctr_val = ktime_get_boottime_ns() + 3462d7921c4SArbel Moshe vcpu->kvm->arch.kvmclock_offset; 3472d7921c4SArbel Moshe break; 3482d7921c4SArbel Moshe default: 3492d7921c4SArbel Moshe return 1; 3502d7921c4SArbel Moshe } 3512d7921c4SArbel Moshe 3522d7921c4SArbel Moshe *data = ctr_val; 3532d7921c4SArbel Moshe return 0; 3542d7921c4SArbel Moshe } 3552d7921c4SArbel Moshe 35641aac14aSWei Huang int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) 35741aac14aSWei Huang { 35841aac14aSWei Huang bool fast_mode = idx & (1u << 31); 359672ff6cfSLiran Alon struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 36041aac14aSWei Huang struct kvm_pmc *pmc; 3610e6f467eSPaolo Bonzini u64 mask = fast_mode ? ~0u : ~0ull; 36241aac14aSWei Huang 363672ff6cfSLiran Alon if (!pmu->version) 364672ff6cfSLiran Alon return 1; 365672ff6cfSLiran Alon 3662d7921c4SArbel Moshe if (is_vmware_backdoor_pmc(idx)) 3672d7921c4SArbel Moshe return kvm_pmu_rdpmc_vmware(vcpu, idx, data); 3682d7921c4SArbel Moshe 369afaf0b2fSSean Christopherson pmc = kvm_x86_ops.pmu_ops->rdpmc_ecx_to_pmc(vcpu, idx, &mask); 37041aac14aSWei Huang if (!pmc) 37141aac14aSWei Huang return 1; 37241aac14aSWei Huang 373632a4cf5SLike Xu if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) && 374b3646477SJason Baron (static_call(kvm_x86_get_cpl)(vcpu) != 0) && 375632a4cf5SLike Xu (kvm_read_cr0(vcpu) & X86_CR0_PE)) 376632a4cf5SLike Xu return 1; 377632a4cf5SLike Xu 3780e6f467eSPaolo Bonzini *data = pmc_read_counter(pmc) & mask; 379e5af058aSWei Huang return 0; 380e5af058aSWei Huang } 381e5af058aSWei Huang 382e5af058aSWei Huang void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) 383e5af058aSWei Huang { 384e6209a3bSLike Xu if (lapic_in_kernel(vcpu)) { 385e6209a3bSLike Xu if (kvm_x86_ops.pmu_ops->deliver_pmi) 386e6209a3bSLike Xu kvm_x86_ops.pmu_ops->deliver_pmi(vcpu); 387e5af058aSWei Huang kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); 388e5af058aSWei Huang } 389e6209a3bSLike Xu } 390e5af058aSWei Huang 391c6702c9dSWei Huang bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) 392f5132b01SGleb Natapov { 393afaf0b2fSSean Christopherson return kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr) || 394afaf0b2fSSean Christopherson kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, msr); 395f5132b01SGleb Natapov } 396f5132b01SGleb Natapov 397b35e5548SLike Xu static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) 398b35e5548SLike Xu { 399b35e5548SLike Xu struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 400afaf0b2fSSean Christopherson struct kvm_pmc *pmc = kvm_x86_ops.pmu_ops->msr_idx_to_pmc(vcpu, msr); 401b35e5548SLike Xu 402b35e5548SLike Xu if (pmc) 403b35e5548SLike Xu __set_bit(pmc->idx, pmu->pmc_in_use); 404b35e5548SLike Xu } 405b35e5548SLike Xu 406cbd71758SWei Wang int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 407f5132b01SGleb Natapov { 408cbd71758SWei Wang return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info); 409f5132b01SGleb Natapov } 410f5132b01SGleb Natapov 411afd80d85SPaolo Bonzini int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 412f5132b01SGleb Natapov { 413b35e5548SLike Xu kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index); 414afaf0b2fSSean Christopherson return kvm_x86_ops.pmu_ops->set_msr(vcpu, msr_info); 415f5132b01SGleb Natapov } 416f5132b01SGleb Natapov 417e84cfe4cSWei Huang /* refresh PMU settings. This function generally is called when underlying 418e84cfe4cSWei Huang * settings are changed (such as changes of PMU CPUID by guest VMs), which 419e84cfe4cSWei Huang * should rarely happen. 420e84cfe4cSWei Huang */ 421c6702c9dSWei Huang void kvm_pmu_refresh(struct kvm_vcpu *vcpu) 422f5132b01SGleb Natapov { 423afaf0b2fSSean Christopherson kvm_x86_ops.pmu_ops->refresh(vcpu); 424f5132b01SGleb Natapov } 425f5132b01SGleb Natapov 426e5af058aSWei Huang void kvm_pmu_reset(struct kvm_vcpu *vcpu) 427e5af058aSWei Huang { 428e5af058aSWei Huang struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 429e5af058aSWei Huang 430e5af058aSWei Huang irq_work_sync(&pmu->irq_work); 431afaf0b2fSSean Christopherson kvm_x86_ops.pmu_ops->reset(vcpu); 432e5af058aSWei Huang } 433e5af058aSWei Huang 434f5132b01SGleb Natapov void kvm_pmu_init(struct kvm_vcpu *vcpu) 435f5132b01SGleb Natapov { 436212dba12SWei Huang struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 437f5132b01SGleb Natapov 438f5132b01SGleb Natapov memset(pmu, 0, sizeof(*pmu)); 439afaf0b2fSSean Christopherson kvm_x86_ops.pmu_ops->init(vcpu); 440c6702c9dSWei Huang init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); 441b35e5548SLike Xu pmu->event_count = 0; 442b35e5548SLike Xu pmu->need_cleanup = false; 443c6702c9dSWei Huang kvm_pmu_refresh(vcpu); 444f5132b01SGleb Natapov } 445f5132b01SGleb Natapov 446b35e5548SLike Xu static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc) 447b35e5548SLike Xu { 448b35e5548SLike Xu struct kvm_pmu *pmu = pmc_to_pmu(pmc); 449b35e5548SLike Xu 450b35e5548SLike Xu if (pmc_is_fixed(pmc)) 451b35e5548SLike Xu return fixed_ctrl_field(pmu->fixed_ctr_ctrl, 452b35e5548SLike Xu pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3; 453b35e5548SLike Xu 454b35e5548SLike Xu return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE; 455b35e5548SLike Xu } 456b35e5548SLike Xu 457b35e5548SLike Xu /* Release perf_events for vPMCs that have been unused for a full time slice. */ 458b35e5548SLike Xu void kvm_pmu_cleanup(struct kvm_vcpu *vcpu) 459b35e5548SLike Xu { 460b35e5548SLike Xu struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 461b35e5548SLike Xu struct kvm_pmc *pmc = NULL; 462b35e5548SLike Xu DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX); 463b35e5548SLike Xu int i; 464b35e5548SLike Xu 465b35e5548SLike Xu pmu->need_cleanup = false; 466b35e5548SLike Xu 467b35e5548SLike Xu bitmap_andnot(bitmask, pmu->all_valid_pmc_idx, 468b35e5548SLike Xu pmu->pmc_in_use, X86_PMC_IDX_MAX); 469b35e5548SLike Xu 470b35e5548SLike Xu for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) { 471afaf0b2fSSean Christopherson pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i); 472b35e5548SLike Xu 473b35e5548SLike Xu if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc)) 474b35e5548SLike Xu pmc_stop_counter(pmc); 475b35e5548SLike Xu } 476b35e5548SLike Xu 4779aa4f622SLike Xu if (kvm_x86_ops.pmu_ops->cleanup) 4789aa4f622SLike Xu kvm_x86_ops.pmu_ops->cleanup(vcpu); 4799aa4f622SLike Xu 480b35e5548SLike Xu bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX); 481b35e5548SLike Xu } 482b35e5548SLike Xu 483f5132b01SGleb Natapov void kvm_pmu_destroy(struct kvm_vcpu *vcpu) 484f5132b01SGleb Natapov { 485f5132b01SGleb Natapov kvm_pmu_reset(vcpu); 486f5132b01SGleb Natapov } 48766bb8a06SEric Hankland 4889cd803d4SEric Hankland static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) 4899cd803d4SEric Hankland { 4909cd803d4SEric Hankland struct kvm_pmu *pmu = pmc_to_pmu(pmc); 4919cd803d4SEric Hankland u64 prev_count; 4929cd803d4SEric Hankland 4939cd803d4SEric Hankland prev_count = pmc->counter; 4949cd803d4SEric Hankland pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); 4959cd803d4SEric Hankland 4969cd803d4SEric Hankland reprogram_counter(pmu, pmc->idx); 4979cd803d4SEric Hankland if (pmc->counter < prev_count) 4989cd803d4SEric Hankland __kvm_perf_overflow(pmc, false); 4999cd803d4SEric Hankland } 5009cd803d4SEric Hankland 5019cd803d4SEric Hankland static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, 5029cd803d4SEric Hankland unsigned int perf_hw_id) 5039cd803d4SEric Hankland { 5049cd803d4SEric Hankland u64 old_eventsel = pmc->eventsel; 5059cd803d4SEric Hankland unsigned int config; 5069cd803d4SEric Hankland 5079cd803d4SEric Hankland pmc->eventsel &= (ARCH_PERFMON_EVENTSEL_EVENT | ARCH_PERFMON_EVENTSEL_UMASK); 5089cd803d4SEric Hankland config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); 5099cd803d4SEric Hankland pmc->eventsel = old_eventsel; 5109cd803d4SEric Hankland return config == perf_hw_id; 5119cd803d4SEric Hankland } 5129cd803d4SEric Hankland 5139cd803d4SEric Hankland static inline bool cpl_is_matched(struct kvm_pmc *pmc) 5149cd803d4SEric Hankland { 5159cd803d4SEric Hankland bool select_os, select_user; 5169cd803d4SEric Hankland u64 config = pmc->current_config; 5179cd803d4SEric Hankland 5189cd803d4SEric Hankland if (pmc_is_gp(pmc)) { 5199cd803d4SEric Hankland select_os = config & ARCH_PERFMON_EVENTSEL_OS; 5209cd803d4SEric Hankland select_user = config & ARCH_PERFMON_EVENTSEL_USR; 5219cd803d4SEric Hankland } else { 5229cd803d4SEric Hankland select_os = config & 0x1; 5239cd803d4SEric Hankland select_user = config & 0x2; 5249cd803d4SEric Hankland } 5259cd803d4SEric Hankland 5269cd803d4SEric Hankland return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user; 5279cd803d4SEric Hankland } 5289cd803d4SEric Hankland 5299cd803d4SEric Hankland void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) 5309cd803d4SEric Hankland { 5319cd803d4SEric Hankland struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 5329cd803d4SEric Hankland struct kvm_pmc *pmc; 5339cd803d4SEric Hankland int i; 5349cd803d4SEric Hankland 5359cd803d4SEric Hankland for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { 5369cd803d4SEric Hankland pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, i); 5379cd803d4SEric Hankland 5389cd803d4SEric Hankland if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) 5399cd803d4SEric Hankland continue; 5409cd803d4SEric Hankland 5419cd803d4SEric Hankland /* Ignore checks for edge detect, pin control, invert and CMASK bits */ 5429cd803d4SEric Hankland if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc)) 5439cd803d4SEric Hankland kvm_pmu_incr_counter(pmc); 5449cd803d4SEric Hankland } 5459cd803d4SEric Hankland } 5469cd803d4SEric Hankland EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); 5479cd803d4SEric Hankland 54866bb8a06SEric Hankland int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) 54966bb8a06SEric Hankland { 55066bb8a06SEric Hankland struct kvm_pmu_event_filter tmp, *filter; 55166bb8a06SEric Hankland size_t size; 55266bb8a06SEric Hankland int r; 55366bb8a06SEric Hankland 55466bb8a06SEric Hankland if (copy_from_user(&tmp, argp, sizeof(tmp))) 55566bb8a06SEric Hankland return -EFAULT; 55666bb8a06SEric Hankland 55766bb8a06SEric Hankland if (tmp.action != KVM_PMU_EVENT_ALLOW && 55866bb8a06SEric Hankland tmp.action != KVM_PMU_EVENT_DENY) 55966bb8a06SEric Hankland return -EINVAL; 56066bb8a06SEric Hankland 56130cd8604SEric Hankland if (tmp.flags != 0) 56230cd8604SEric Hankland return -EINVAL; 56330cd8604SEric Hankland 56466bb8a06SEric Hankland if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) 56566bb8a06SEric Hankland return -E2BIG; 56666bb8a06SEric Hankland 56766bb8a06SEric Hankland size = struct_size(filter, events, tmp.nevents); 56866bb8a06SEric Hankland filter = kmalloc(size, GFP_KERNEL_ACCOUNT); 56966bb8a06SEric Hankland if (!filter) 57066bb8a06SEric Hankland return -ENOMEM; 57166bb8a06SEric Hankland 57266bb8a06SEric Hankland r = -EFAULT; 57366bb8a06SEric Hankland if (copy_from_user(filter, argp, size)) 57466bb8a06SEric Hankland goto cleanup; 57566bb8a06SEric Hankland 57666bb8a06SEric Hankland /* Ensure nevents can't be changed between the user copies. */ 57766bb8a06SEric Hankland *filter = tmp; 57866bb8a06SEric Hankland 5797ff775acSJim Mattson /* 5807ff775acSJim Mattson * Sort the in-kernel list so that we can search it with bsearch. 5817ff775acSJim Mattson */ 5827ff775acSJim Mattson sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL); 5837ff775acSJim Mattson 58466bb8a06SEric Hankland mutex_lock(&kvm->lock); 58512e78e69SPaul E. McKenney filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, 58666bb8a06SEric Hankland mutex_is_locked(&kvm->lock)); 58766bb8a06SEric Hankland mutex_unlock(&kvm->lock); 58866bb8a06SEric Hankland 58966bb8a06SEric Hankland synchronize_srcu_expedited(&kvm->srcu); 59066bb8a06SEric Hankland r = 0; 59166bb8a06SEric Hankland cleanup: 59266bb8a06SEric Hankland kfree(filter); 59366bb8a06SEric Hankland return r; 59466bb8a06SEric Hankland } 595