1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 19 20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 21 22 static LIST_HEAD(arm_pmus); 23 static DEFINE_MUTEX(arm_pmus_lock); 24 25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 27 28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 29 { 30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 31 } 32 33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 34 { 35 return &vcpu->arch.pmu.pmc[cnt_idx]; 36 } 37 38 static u32 kvm_pmu_event_mask(struct kvm *kvm) 39 { 40 unsigned int pmuver; 41 42 pmuver = kvm->arch.arm_pmu->pmuver; 43 44 switch (pmuver) { 45 case ID_AA64DFR0_EL1_PMUVer_IMP: 46 return GENMASK(9, 0); 47 case ID_AA64DFR0_EL1_PMUVer_V3P1: 48 case ID_AA64DFR0_EL1_PMUVer_V3P4: 49 case ID_AA64DFR0_EL1_PMUVer_V3P5: 50 case ID_AA64DFR0_EL1_PMUVer_V3P7: 51 return GENMASK(15, 0); 52 default: /* Shouldn't be here, just for sanity */ 53 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 54 return 0; 55 } 56 } 57 58 /** 59 * kvm_pmc_is_64bit - determine if counter is 64bit 60 * @pmc: counter context 61 */ 62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 63 { 64 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 65 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); 66 } 67 68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 69 { 70 u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); 71 72 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 73 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 74 } 75 76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 77 { 78 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 79 !kvm_pmc_has_64bit_overflow(pmc)); 80 } 81 82 static u32 counter_index_to_reg(u64 idx) 83 { 84 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 85 } 86 87 static u32 counter_index_to_evtreg(u64 idx) 88 { 89 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 90 } 91 92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 93 { 94 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 95 u64 counter, reg, enabled, running; 96 97 reg = counter_index_to_reg(pmc->idx); 98 counter = __vcpu_sys_reg(vcpu, reg); 99 100 /* 101 * The real counter value is equal to the value of counter register plus 102 * the value perf event counts. 103 */ 104 if (pmc->perf_event) 105 counter += perf_event_read_value(pmc->perf_event, &enabled, 106 &running); 107 108 if (!kvm_pmc_is_64bit(pmc)) 109 counter = lower_32_bits(counter); 110 111 return counter; 112 } 113 114 /** 115 * kvm_pmu_get_counter_value - get PMU counter value 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 if (!kvm_vcpu_has_pmu(vcpu)) 122 return 0; 123 124 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 125 } 126 127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 128 { 129 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 130 u64 reg; 131 132 kvm_pmu_release_perf_event(pmc); 133 134 reg = counter_index_to_reg(pmc->idx); 135 136 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 137 !force) { 138 /* 139 * Even with PMUv3p5, AArch32 cannot write to the top 140 * 32bit of the counters. The only possible course of 141 * action is to use PMCR.P, which will reset them to 142 * 0 (the only use of the 'force' parameter). 143 */ 144 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 145 val |= lower_32_bits(val); 146 } 147 148 __vcpu_sys_reg(vcpu, reg) = val; 149 150 /* Recreate the perf event to reflect the updated sample_period */ 151 kvm_pmu_create_perf_event(pmc); 152 } 153 154 /** 155 * kvm_pmu_set_counter_value - set PMU counter value 156 * @vcpu: The vcpu pointer 157 * @select_idx: The counter index 158 * @val: The counter value 159 */ 160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 161 { 162 if (!kvm_vcpu_has_pmu(vcpu)) 163 return; 164 165 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 166 } 167 168 /** 169 * kvm_pmu_release_perf_event - remove the perf event 170 * @pmc: The PMU counter pointer 171 */ 172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 173 { 174 if (pmc->perf_event) { 175 perf_event_disable(pmc->perf_event); 176 perf_event_release_kernel(pmc->perf_event); 177 pmc->perf_event = NULL; 178 } 179 } 180 181 /** 182 * kvm_pmu_stop_counter - stop PMU counter 183 * @pmc: The PMU counter pointer 184 * 185 * If this counter has been configured to monitor some event, release it here. 186 */ 187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 188 { 189 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 190 u64 reg, val; 191 192 if (!pmc->perf_event) 193 return; 194 195 val = kvm_pmu_get_pmc_value(pmc); 196 197 reg = counter_index_to_reg(pmc->idx); 198 199 __vcpu_sys_reg(vcpu, reg) = val; 200 201 kvm_pmu_release_perf_event(pmc); 202 } 203 204 /** 205 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 206 * @vcpu: The vcpu pointer 207 * 208 */ 209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 210 { 211 int i; 212 struct kvm_pmu *pmu = &vcpu->arch.pmu; 213 214 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 215 pmu->pmc[i].idx = i; 216 } 217 218 /** 219 * kvm_pmu_vcpu_reset - reset pmu state for cpu 220 * @vcpu: The vcpu pointer 221 * 222 */ 223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 224 { 225 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 226 int i; 227 228 for_each_set_bit(i, &mask, 32) 229 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 230 } 231 232 /** 233 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 234 * @vcpu: The vcpu pointer 235 * 236 */ 237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 238 { 239 int i; 240 241 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 242 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 243 irq_work_sync(&vcpu->arch.pmu.overflow_work); 244 } 245 246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 247 { 248 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 249 250 val &= ARMV8_PMU_PMCR_N_MASK; 251 if (val == 0) 252 return BIT(ARMV8_PMU_CYCLE_IDX); 253 else 254 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 255 } 256 257 /** 258 * kvm_pmu_enable_counter_mask - enable selected PMU counters 259 * @vcpu: The vcpu pointer 260 * @val: the value guest writes to PMCNTENSET register 261 * 262 * Call perf_event_enable to start counting the perf event 263 */ 264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 265 { 266 int i; 267 if (!kvm_vcpu_has_pmu(vcpu)) 268 return; 269 270 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 271 return; 272 273 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 274 struct kvm_pmc *pmc; 275 276 if (!(val & BIT(i))) 277 continue; 278 279 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 280 281 if (!pmc->perf_event) { 282 kvm_pmu_create_perf_event(pmc); 283 } else { 284 perf_event_enable(pmc->perf_event); 285 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 286 kvm_debug("fail to enable perf event\n"); 287 } 288 } 289 } 290 291 /** 292 * kvm_pmu_disable_counter_mask - disable selected PMU counters 293 * @vcpu: The vcpu pointer 294 * @val: the value guest writes to PMCNTENCLR register 295 * 296 * Call perf_event_disable to stop counting the perf event 297 */ 298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 299 { 300 int i; 301 302 if (!kvm_vcpu_has_pmu(vcpu) || !val) 303 return; 304 305 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 306 struct kvm_pmc *pmc; 307 308 if (!(val & BIT(i))) 309 continue; 310 311 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 312 313 if (pmc->perf_event) 314 perf_event_disable(pmc->perf_event); 315 } 316 } 317 318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 319 { 320 u64 reg = 0; 321 322 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 323 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 324 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 325 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 326 } 327 328 return reg; 329 } 330 331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 332 { 333 struct kvm_pmu *pmu = &vcpu->arch.pmu; 334 bool overflow; 335 336 if (!kvm_vcpu_has_pmu(vcpu)) 337 return; 338 339 overflow = !!kvm_pmu_overflow_status(vcpu); 340 if (pmu->irq_level == overflow) 341 return; 342 343 pmu->irq_level = overflow; 344 345 if (likely(irqchip_in_kernel(vcpu->kvm))) { 346 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 347 pmu->irq_num, overflow, pmu); 348 WARN_ON(ret); 349 } 350 } 351 352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 353 { 354 struct kvm_pmu *pmu = &vcpu->arch.pmu; 355 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 356 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 357 358 if (likely(irqchip_in_kernel(vcpu->kvm))) 359 return false; 360 361 return pmu->irq_level != run_level; 362 } 363 364 /* 365 * Reflect the PMU overflow interrupt output level into the kvm_run structure 366 */ 367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 368 { 369 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 370 371 /* Populate the timer bitmap for user space */ 372 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 373 if (vcpu->arch.pmu.irq_level) 374 regs->device_irq_level |= KVM_ARM_DEV_PMU; 375 } 376 377 /** 378 * kvm_pmu_flush_hwstate - flush pmu state to cpu 379 * @vcpu: The vcpu pointer 380 * 381 * Check if the PMU has overflowed while we were running in the host, and inject 382 * an interrupt if that was the case. 383 */ 384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 385 { 386 kvm_pmu_update_state(vcpu); 387 } 388 389 /** 390 * kvm_pmu_sync_hwstate - sync pmu state from cpu 391 * @vcpu: The vcpu pointer 392 * 393 * Check if the PMU has overflowed while we were running in the guest, and 394 * inject an interrupt if that was the case. 395 */ 396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 397 { 398 kvm_pmu_update_state(vcpu); 399 } 400 401 /** 402 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 403 * to the event. 404 * This is why we need a callback to do it once outside of the NMI context. 405 */ 406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 407 { 408 struct kvm_vcpu *vcpu; 409 410 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 411 kvm_vcpu_kick(vcpu); 412 } 413 414 /* 415 * Perform an increment on any of the counters described in @mask, 416 * generating the overflow if required, and propagate it as a chained 417 * event if possible. 418 */ 419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 420 unsigned long mask, u32 event) 421 { 422 int i; 423 424 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 425 return; 426 427 /* Weed out disabled counters */ 428 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 429 430 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 431 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 432 u64 type, reg; 433 434 /* Filter on event type */ 435 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 436 type &= kvm_pmu_event_mask(vcpu->kvm); 437 if (type != event) 438 continue; 439 440 /* Increment this counter */ 441 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 442 if (!kvm_pmc_is_64bit(pmc)) 443 reg = lower_32_bits(reg); 444 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 445 446 /* No overflow? move on */ 447 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 448 continue; 449 450 /* Mark overflow */ 451 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 452 453 if (kvm_pmu_counter_can_chain(pmc)) 454 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 455 ARMV8_PMUV3_PERFCTR_CHAIN); 456 } 457 } 458 459 /* Compute the sample period for a given counter value */ 460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 461 { 462 u64 val; 463 464 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 465 val = (-counter) & GENMASK(63, 0); 466 else 467 val = (-counter) & GENMASK(31, 0); 468 469 return val; 470 } 471 472 /** 473 * When the perf event overflows, set the overflow status and inform the vcpu. 474 */ 475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 476 struct perf_sample_data *data, 477 struct pt_regs *regs) 478 { 479 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 480 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 481 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 482 int idx = pmc->idx; 483 u64 period; 484 485 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 486 487 /* 488 * Reset the sample period to the architectural limit, 489 * i.e. the point where the counter overflows. 490 */ 491 period = compute_period(pmc, local64_read(&perf_event->count)); 492 493 local64_set(&perf_event->hw.period_left, 0); 494 perf_event->attr.sample_period = period; 495 perf_event->hw.sample_period = period; 496 497 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 498 499 if (kvm_pmu_counter_can_chain(pmc)) 500 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 501 ARMV8_PMUV3_PERFCTR_CHAIN); 502 503 if (kvm_pmu_overflow_status(vcpu)) { 504 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 505 506 if (!in_nmi()) 507 kvm_vcpu_kick(vcpu); 508 else 509 irq_work_queue(&vcpu->arch.pmu.overflow_work); 510 } 511 512 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 513 } 514 515 /** 516 * kvm_pmu_software_increment - do software increment 517 * @vcpu: The vcpu pointer 518 * @val: the value guest writes to PMSWINC register 519 */ 520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 521 { 522 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 523 } 524 525 /** 526 * kvm_pmu_handle_pmcr - handle PMCR register 527 * @vcpu: The vcpu pointer 528 * @val: the value guest writes to PMCR register 529 */ 530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 531 { 532 int i; 533 534 if (!kvm_vcpu_has_pmu(vcpu)) 535 return; 536 537 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 538 if (!kvm_pmu_is_3p5(vcpu)) 539 val &= ~ARMV8_PMU_PMCR_LP; 540 541 /* The reset bits don't indicate any state, and shouldn't be saved. */ 542 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); 543 544 if (val & ARMV8_PMU_PMCR_E) { 545 kvm_pmu_enable_counter_mask(vcpu, 546 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 547 } else { 548 kvm_pmu_disable_counter_mask(vcpu, 549 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 550 } 551 552 if (val & ARMV8_PMU_PMCR_C) 553 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 554 555 if (val & ARMV8_PMU_PMCR_P) { 556 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 557 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 558 for_each_set_bit(i, &mask, 32) 559 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 560 } 561 } 562 563 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 564 { 565 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 566 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 567 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 568 } 569 570 /** 571 * kvm_pmu_create_perf_event - create a perf event for a counter 572 * @pmc: Counter context 573 */ 574 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 575 { 576 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 577 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 578 struct perf_event *event; 579 struct perf_event_attr attr; 580 u64 eventsel, reg, data; 581 582 reg = counter_index_to_evtreg(pmc->idx); 583 data = __vcpu_sys_reg(vcpu, reg); 584 585 kvm_pmu_stop_counter(pmc); 586 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 587 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 588 else 589 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 590 591 /* 592 * Neither SW increment nor chained events need to be backed 593 * by a perf event. 594 */ 595 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 596 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 597 return; 598 599 /* 600 * If we have a filter in place and that the event isn't allowed, do 601 * not install a perf event either. 602 */ 603 if (vcpu->kvm->arch.pmu_filter && 604 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 605 return; 606 607 memset(&attr, 0, sizeof(struct perf_event_attr)); 608 attr.type = arm_pmu->pmu.type; 609 attr.size = sizeof(attr); 610 attr.pinned = 1; 611 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 612 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 613 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 614 attr.exclude_hv = 1; /* Don't count EL2 events */ 615 attr.exclude_host = 1; /* Don't count host events */ 616 attr.config = eventsel; 617 618 /* 619 * If counting with a 64bit counter, advertise it to the perf 620 * code, carefully dealing with the initial sample period 621 * which also depends on the overflow. 622 */ 623 if (kvm_pmc_is_64bit(pmc)) 624 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 625 626 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 627 628 event = perf_event_create_kernel_counter(&attr, -1, current, 629 kvm_pmu_perf_overflow, pmc); 630 631 if (IS_ERR(event)) { 632 pr_err_once("kvm: pmu event creation failed %ld\n", 633 PTR_ERR(event)); 634 return; 635 } 636 637 pmc->perf_event = event; 638 } 639 640 /** 641 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 642 * @vcpu: The vcpu pointer 643 * @data: The data guest writes to PMXEVTYPER_EL0 644 * @select_idx: The number of selected counter 645 * 646 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 647 * event with given hardware event number. Here we call perf_event API to 648 * emulate this action and create a kernel perf event for it. 649 */ 650 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 651 u64 select_idx) 652 { 653 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 654 u64 reg, mask; 655 656 if (!kvm_vcpu_has_pmu(vcpu)) 657 return; 658 659 mask = ARMV8_PMU_EVTYPE_MASK; 660 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 661 mask |= kvm_pmu_event_mask(vcpu->kvm); 662 663 reg = counter_index_to_evtreg(pmc->idx); 664 665 __vcpu_sys_reg(vcpu, reg) = data & mask; 666 667 kvm_pmu_create_perf_event(pmc); 668 } 669 670 void kvm_host_pmu_init(struct arm_pmu *pmu) 671 { 672 struct arm_pmu_entry *entry; 673 674 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 675 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 676 return; 677 678 mutex_lock(&arm_pmus_lock); 679 680 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 681 if (!entry) 682 goto out_unlock; 683 684 entry->arm_pmu = pmu; 685 list_add_tail(&entry->entry, &arm_pmus); 686 687 if (list_is_singular(&arm_pmus)) 688 static_branch_enable(&kvm_arm_pmu_available); 689 690 out_unlock: 691 mutex_unlock(&arm_pmus_lock); 692 } 693 694 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 695 { 696 struct perf_event_attr attr = { }; 697 struct perf_event *event; 698 struct arm_pmu *pmu = NULL; 699 700 /* 701 * Create a dummy event that only counts user cycles. As we'll never 702 * leave this function with the event being live, it will never 703 * count anything. But it allows us to probe some of the PMU 704 * details. Yes, this is terrible. 705 */ 706 attr.type = PERF_TYPE_RAW; 707 attr.size = sizeof(attr); 708 attr.pinned = 1; 709 attr.disabled = 0; 710 attr.exclude_user = 0; 711 attr.exclude_kernel = 1; 712 attr.exclude_hv = 1; 713 attr.exclude_host = 1; 714 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 715 attr.sample_period = GENMASK(63, 0); 716 717 event = perf_event_create_kernel_counter(&attr, -1, current, 718 kvm_pmu_perf_overflow, &attr); 719 720 if (IS_ERR(event)) { 721 pr_err_once("kvm: pmu event creation failed %ld\n", 722 PTR_ERR(event)); 723 return NULL; 724 } 725 726 if (event->pmu) { 727 pmu = to_arm_pmu(event->pmu); 728 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 729 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 730 pmu = NULL; 731 } 732 733 perf_event_disable(event); 734 perf_event_release_kernel(event); 735 736 return pmu; 737 } 738 739 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 740 { 741 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 742 u64 val, mask = 0; 743 int base, i, nr_events; 744 745 if (!kvm_vcpu_has_pmu(vcpu)) 746 return 0; 747 748 if (!pmceid1) { 749 val = read_sysreg(pmceid0_el0); 750 /* always support CHAIN */ 751 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 752 base = 0; 753 } else { 754 val = read_sysreg(pmceid1_el0); 755 /* 756 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 757 * as RAZ 758 */ 759 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) 760 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 761 base = 32; 762 } 763 764 if (!bmap) 765 return val; 766 767 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 768 769 for (i = 0; i < 32; i += 8) { 770 u64 byte; 771 772 byte = bitmap_get_value8(bmap, base + i); 773 mask |= byte << i; 774 if (nr_events >= (0x4000 + base + 32)) { 775 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 776 mask |= byte << (32 + i); 777 } 778 } 779 780 return val & mask; 781 } 782 783 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 784 { 785 if (!kvm_vcpu_has_pmu(vcpu)) 786 return 0; 787 788 if (!vcpu->arch.pmu.created) 789 return -EINVAL; 790 791 /* 792 * A valid interrupt configuration for the PMU is either to have a 793 * properly configured interrupt number and using an in-kernel 794 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 795 */ 796 if (irqchip_in_kernel(vcpu->kvm)) { 797 int irq = vcpu->arch.pmu.irq_num; 798 /* 799 * If we are using an in-kernel vgic, at this point we know 800 * the vgic will be initialized, so we can check the PMU irq 801 * number against the dimensions of the vgic and make sure 802 * it's valid. 803 */ 804 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 805 return -EINVAL; 806 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 807 return -EINVAL; 808 } 809 810 /* One-off reload of the PMU on first run */ 811 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 812 813 return 0; 814 } 815 816 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 817 { 818 if (irqchip_in_kernel(vcpu->kvm)) { 819 int ret; 820 821 /* 822 * If using the PMU with an in-kernel virtual GIC 823 * implementation, we require the GIC to be already 824 * initialized when initializing the PMU. 825 */ 826 if (!vgic_initialized(vcpu->kvm)) 827 return -ENODEV; 828 829 if (!kvm_arm_pmu_irq_initialized(vcpu)) 830 return -ENXIO; 831 832 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 833 &vcpu->arch.pmu); 834 if (ret) 835 return ret; 836 } 837 838 init_irq_work(&vcpu->arch.pmu.overflow_work, 839 kvm_pmu_perf_overflow_notify_vcpu); 840 841 vcpu->arch.pmu.created = true; 842 return 0; 843 } 844 845 /* 846 * For one VM the interrupt type must be same for each vcpu. 847 * As a PPI, the interrupt number is the same for all vcpus, 848 * while as an SPI it must be a separate number per vcpu. 849 */ 850 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 851 { 852 unsigned long i; 853 struct kvm_vcpu *vcpu; 854 855 kvm_for_each_vcpu(i, vcpu, kvm) { 856 if (!kvm_arm_pmu_irq_initialized(vcpu)) 857 continue; 858 859 if (irq_is_ppi(irq)) { 860 if (vcpu->arch.pmu.irq_num != irq) 861 return false; 862 } else { 863 if (vcpu->arch.pmu.irq_num == irq) 864 return false; 865 } 866 } 867 868 return true; 869 } 870 871 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 872 { 873 struct kvm *kvm = vcpu->kvm; 874 struct arm_pmu_entry *entry; 875 struct arm_pmu *arm_pmu; 876 int ret = -ENXIO; 877 878 mutex_lock(&kvm->lock); 879 mutex_lock(&arm_pmus_lock); 880 881 list_for_each_entry(entry, &arm_pmus, entry) { 882 arm_pmu = entry->arm_pmu; 883 if (arm_pmu->pmu.type == pmu_id) { 884 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || 885 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 886 ret = -EBUSY; 887 break; 888 } 889 890 kvm->arch.arm_pmu = arm_pmu; 891 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 892 ret = 0; 893 break; 894 } 895 } 896 897 mutex_unlock(&arm_pmus_lock); 898 mutex_unlock(&kvm->lock); 899 return ret; 900 } 901 902 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 903 { 904 struct kvm *kvm = vcpu->kvm; 905 906 if (!kvm_vcpu_has_pmu(vcpu)) 907 return -ENODEV; 908 909 if (vcpu->arch.pmu.created) 910 return -EBUSY; 911 912 mutex_lock(&kvm->lock); 913 if (!kvm->arch.arm_pmu) { 914 /* No PMU set, get the default one */ 915 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 916 if (!kvm->arch.arm_pmu) { 917 mutex_unlock(&kvm->lock); 918 return -ENODEV; 919 } 920 } 921 mutex_unlock(&kvm->lock); 922 923 switch (attr->attr) { 924 case KVM_ARM_VCPU_PMU_V3_IRQ: { 925 int __user *uaddr = (int __user *)(long)attr->addr; 926 int irq; 927 928 if (!irqchip_in_kernel(kvm)) 929 return -EINVAL; 930 931 if (get_user(irq, uaddr)) 932 return -EFAULT; 933 934 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 935 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 936 return -EINVAL; 937 938 if (!pmu_irq_is_valid(kvm, irq)) 939 return -EINVAL; 940 941 if (kvm_arm_pmu_irq_initialized(vcpu)) 942 return -EBUSY; 943 944 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 945 vcpu->arch.pmu.irq_num = irq; 946 return 0; 947 } 948 case KVM_ARM_VCPU_PMU_V3_FILTER: { 949 struct kvm_pmu_event_filter __user *uaddr; 950 struct kvm_pmu_event_filter filter; 951 int nr_events; 952 953 nr_events = kvm_pmu_event_mask(kvm) + 1; 954 955 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 956 957 if (copy_from_user(&filter, uaddr, sizeof(filter))) 958 return -EFAULT; 959 960 if (((u32)filter.base_event + filter.nevents) > nr_events || 961 (filter.action != KVM_PMU_EVENT_ALLOW && 962 filter.action != KVM_PMU_EVENT_DENY)) 963 return -EINVAL; 964 965 mutex_lock(&kvm->lock); 966 967 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { 968 mutex_unlock(&kvm->lock); 969 return -EBUSY; 970 } 971 972 if (!kvm->arch.pmu_filter) { 973 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 974 if (!kvm->arch.pmu_filter) { 975 mutex_unlock(&kvm->lock); 976 return -ENOMEM; 977 } 978 979 /* 980 * The default depends on the first applied filter. 981 * If it allows events, the default is to deny. 982 * Conversely, if the first filter denies a set of 983 * events, the default is to allow. 984 */ 985 if (filter.action == KVM_PMU_EVENT_ALLOW) 986 bitmap_zero(kvm->arch.pmu_filter, nr_events); 987 else 988 bitmap_fill(kvm->arch.pmu_filter, nr_events); 989 } 990 991 if (filter.action == KVM_PMU_EVENT_ALLOW) 992 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 993 else 994 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 995 996 mutex_unlock(&kvm->lock); 997 998 return 0; 999 } 1000 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1001 int __user *uaddr = (int __user *)(long)attr->addr; 1002 int pmu_id; 1003 1004 if (get_user(pmu_id, uaddr)) 1005 return -EFAULT; 1006 1007 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1008 } 1009 case KVM_ARM_VCPU_PMU_V3_INIT: 1010 return kvm_arm_pmu_v3_init(vcpu); 1011 } 1012 1013 return -ENXIO; 1014 } 1015 1016 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1017 { 1018 switch (attr->attr) { 1019 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1020 int __user *uaddr = (int __user *)(long)attr->addr; 1021 int irq; 1022 1023 if (!irqchip_in_kernel(vcpu->kvm)) 1024 return -EINVAL; 1025 1026 if (!kvm_vcpu_has_pmu(vcpu)) 1027 return -ENODEV; 1028 1029 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1030 return -ENXIO; 1031 1032 irq = vcpu->arch.pmu.irq_num; 1033 return put_user(irq, uaddr); 1034 } 1035 } 1036 1037 return -ENXIO; 1038 } 1039 1040 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1041 { 1042 switch (attr->attr) { 1043 case KVM_ARM_VCPU_PMU_V3_IRQ: 1044 case KVM_ARM_VCPU_PMU_V3_INIT: 1045 case KVM_ARM_VCPU_PMU_V3_FILTER: 1046 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1047 if (kvm_vcpu_has_pmu(vcpu)) 1048 return 0; 1049 } 1050 1051 return -ENXIO; 1052 } 1053 1054 u8 kvm_arm_pmu_get_pmuver_limit(void) 1055 { 1056 u64 tmp; 1057 1058 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1059 tmp = cpuid_feature_cap_perfmon_field(tmp, 1060 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1061 ID_AA64DFR0_EL1_PMUVer_V3P5); 1062 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1063 } 1064