1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/perf_event.h> 11 #include <linux/perf/arm_pmu.h> 12 #include <linux/uaccess.h> 13 #include <asm/kvm_emulate.h> 14 #include <kvm/arm_pmu.h> 15 #include <kvm/arm_vgic.h> 16 17 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 18 19 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 20 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 21 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 22 23 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 24 25 static u32 kvm_pmu_event_mask(struct kvm *kvm) 26 { 27 switch (kvm->arch.pmuver) { 28 case ID_AA64DFR0_PMUVER_8_0: 29 return GENMASK(9, 0); 30 case ID_AA64DFR0_PMUVER_8_1: 31 case ID_AA64DFR0_PMUVER_8_4: 32 case ID_AA64DFR0_PMUVER_8_5: 33 case ID_AA64DFR0_PMUVER_8_7: 34 return GENMASK(15, 0); 35 default: /* Shouldn't be here, just for sanity */ 36 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); 37 return 0; 38 } 39 } 40 41 /** 42 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 43 * @vcpu: The vcpu pointer 44 * @select_idx: The counter index 45 */ 46 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 47 { 48 return (select_idx == ARMV8_PMU_CYCLE_IDX && 49 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 50 } 51 52 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 53 { 54 struct kvm_pmu *pmu; 55 struct kvm_vcpu_arch *vcpu_arch; 56 57 pmc -= pmc->idx; 58 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 59 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 60 return container_of(vcpu_arch, struct kvm_vcpu, arch); 61 } 62 63 /** 64 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 65 * @pmc: The PMU counter pointer 66 */ 67 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 68 { 69 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 70 71 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 72 } 73 74 /** 75 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 76 * @select_idx: The counter index 77 */ 78 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 79 { 80 return select_idx & 0x1; 81 } 82 83 /** 84 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 85 * @pmc: The PMU counter pointer 86 * 87 * When a pair of PMCs are chained together we use the low counter (canonical) 88 * to hold the underlying perf event. 89 */ 90 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 91 { 92 if (kvm_pmu_pmc_is_chained(pmc) && 93 kvm_pmu_idx_is_high_counter(pmc->idx)) 94 return pmc - 1; 95 96 return pmc; 97 } 98 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 99 { 100 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 101 return pmc - 1; 102 else 103 return pmc + 1; 104 } 105 106 /** 107 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 108 * @vcpu: The vcpu pointer 109 * @select_idx: The counter index 110 */ 111 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 112 { 113 u64 eventsel, reg; 114 115 select_idx |= 0x1; 116 117 if (select_idx == ARMV8_PMU_CYCLE_IDX) 118 return false; 119 120 reg = PMEVTYPER0_EL0 + select_idx; 121 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 122 123 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 124 } 125 126 /** 127 * kvm_pmu_get_pair_counter_value - get PMU counter value 128 * @vcpu: The vcpu pointer 129 * @pmc: The PMU counter pointer 130 */ 131 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 132 struct kvm_pmc *pmc) 133 { 134 u64 counter, counter_high, reg, enabled, running; 135 136 if (kvm_pmu_pmc_is_chained(pmc)) { 137 pmc = kvm_pmu_get_canonical_pmc(pmc); 138 reg = PMEVCNTR0_EL0 + pmc->idx; 139 140 counter = __vcpu_sys_reg(vcpu, reg); 141 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 142 143 counter = lower_32_bits(counter) | (counter_high << 32); 144 } else { 145 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 146 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 147 counter = __vcpu_sys_reg(vcpu, reg); 148 } 149 150 /* 151 * The real counter value is equal to the value of counter register plus 152 * the value perf event counts. 153 */ 154 if (pmc->perf_event) 155 counter += perf_event_read_value(pmc->perf_event, &enabled, 156 &running); 157 158 return counter; 159 } 160 161 /** 162 * kvm_pmu_get_counter_value - get PMU counter value 163 * @vcpu: The vcpu pointer 164 * @select_idx: The counter index 165 */ 166 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 167 { 168 u64 counter; 169 struct kvm_pmu *pmu = &vcpu->arch.pmu; 170 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 171 172 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 173 174 if (kvm_pmu_pmc_is_chained(pmc) && 175 kvm_pmu_idx_is_high_counter(select_idx)) 176 counter = upper_32_bits(counter); 177 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 178 counter = lower_32_bits(counter); 179 180 return counter; 181 } 182 183 /** 184 * kvm_pmu_set_counter_value - set PMU counter value 185 * @vcpu: The vcpu pointer 186 * @select_idx: The counter index 187 * @val: The counter value 188 */ 189 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 190 { 191 u64 reg; 192 193 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 194 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 195 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 196 197 /* Recreate the perf event to reflect the updated sample_period */ 198 kvm_pmu_create_perf_event(vcpu, select_idx); 199 } 200 201 /** 202 * kvm_pmu_release_perf_event - remove the perf event 203 * @pmc: The PMU counter pointer 204 */ 205 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 206 { 207 pmc = kvm_pmu_get_canonical_pmc(pmc); 208 if (pmc->perf_event) { 209 perf_event_disable(pmc->perf_event); 210 perf_event_release_kernel(pmc->perf_event); 211 pmc->perf_event = NULL; 212 } 213 } 214 215 /** 216 * kvm_pmu_stop_counter - stop PMU counter 217 * @pmc: The PMU counter pointer 218 * 219 * If this counter has been configured to monitor some event, release it here. 220 */ 221 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 222 { 223 u64 counter, reg, val; 224 225 pmc = kvm_pmu_get_canonical_pmc(pmc); 226 if (!pmc->perf_event) 227 return; 228 229 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 230 231 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 232 reg = PMCCNTR_EL0; 233 val = counter; 234 } else { 235 reg = PMEVCNTR0_EL0 + pmc->idx; 236 val = lower_32_bits(counter); 237 } 238 239 __vcpu_sys_reg(vcpu, reg) = val; 240 241 if (kvm_pmu_pmc_is_chained(pmc)) 242 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 243 244 kvm_pmu_release_perf_event(pmc); 245 } 246 247 /** 248 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 249 * @vcpu: The vcpu pointer 250 * 251 */ 252 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 253 { 254 int i; 255 struct kvm_pmu *pmu = &vcpu->arch.pmu; 256 257 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 258 pmu->pmc[i].idx = i; 259 } 260 261 /** 262 * kvm_pmu_vcpu_reset - reset pmu state for cpu 263 * @vcpu: The vcpu pointer 264 * 265 */ 266 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 267 { 268 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 269 struct kvm_pmu *pmu = &vcpu->arch.pmu; 270 int i; 271 272 for_each_set_bit(i, &mask, 32) 273 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 274 275 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 276 } 277 278 /** 279 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 280 * @vcpu: The vcpu pointer 281 * 282 */ 283 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 284 { 285 int i; 286 struct kvm_pmu *pmu = &vcpu->arch.pmu; 287 288 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 289 kvm_pmu_release_perf_event(&pmu->pmc[i]); 290 irq_work_sync(&vcpu->arch.pmu.overflow_work); 291 } 292 293 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 294 { 295 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 296 297 val &= ARMV8_PMU_PMCR_N_MASK; 298 if (val == 0) 299 return BIT(ARMV8_PMU_CYCLE_IDX); 300 else 301 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 302 } 303 304 /** 305 * kvm_pmu_enable_counter_mask - enable selected PMU counters 306 * @vcpu: The vcpu pointer 307 * @val: the value guest writes to PMCNTENSET register 308 * 309 * Call perf_event_enable to start counting the perf event 310 */ 311 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 312 { 313 int i; 314 struct kvm_pmu *pmu = &vcpu->arch.pmu; 315 struct kvm_pmc *pmc; 316 317 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 318 return; 319 320 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 321 if (!(val & BIT(i))) 322 continue; 323 324 pmc = &pmu->pmc[i]; 325 326 /* A change in the enable state may affect the chain state */ 327 kvm_pmu_update_pmc_chained(vcpu, i); 328 kvm_pmu_create_perf_event(vcpu, i); 329 330 /* At this point, pmc must be the canonical */ 331 if (pmc->perf_event) { 332 perf_event_enable(pmc->perf_event); 333 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 334 kvm_debug("fail to enable perf event\n"); 335 } 336 } 337 } 338 339 /** 340 * kvm_pmu_disable_counter_mask - disable selected PMU counters 341 * @vcpu: The vcpu pointer 342 * @val: the value guest writes to PMCNTENCLR register 343 * 344 * Call perf_event_disable to stop counting the perf event 345 */ 346 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 347 { 348 int i; 349 struct kvm_pmu *pmu = &vcpu->arch.pmu; 350 struct kvm_pmc *pmc; 351 352 if (!val) 353 return; 354 355 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 356 if (!(val & BIT(i))) 357 continue; 358 359 pmc = &pmu->pmc[i]; 360 361 /* A change in the enable state may affect the chain state */ 362 kvm_pmu_update_pmc_chained(vcpu, i); 363 kvm_pmu_create_perf_event(vcpu, i); 364 365 /* At this point, pmc must be the canonical */ 366 if (pmc->perf_event) 367 perf_event_disable(pmc->perf_event); 368 } 369 } 370 371 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 372 { 373 u64 reg = 0; 374 375 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 376 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 377 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 378 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 379 } 380 381 return reg; 382 } 383 384 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 385 { 386 struct kvm_pmu *pmu = &vcpu->arch.pmu; 387 bool overflow; 388 389 if (!kvm_vcpu_has_pmu(vcpu)) 390 return; 391 392 overflow = !!kvm_pmu_overflow_status(vcpu); 393 if (pmu->irq_level == overflow) 394 return; 395 396 pmu->irq_level = overflow; 397 398 if (likely(irqchip_in_kernel(vcpu->kvm))) { 399 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 400 pmu->irq_num, overflow, pmu); 401 WARN_ON(ret); 402 } 403 } 404 405 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 406 { 407 struct kvm_pmu *pmu = &vcpu->arch.pmu; 408 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 409 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 410 411 if (likely(irqchip_in_kernel(vcpu->kvm))) 412 return false; 413 414 return pmu->irq_level != run_level; 415 } 416 417 /* 418 * Reflect the PMU overflow interrupt output level into the kvm_run structure 419 */ 420 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 421 { 422 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 423 424 /* Populate the timer bitmap for user space */ 425 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 426 if (vcpu->arch.pmu.irq_level) 427 regs->device_irq_level |= KVM_ARM_DEV_PMU; 428 } 429 430 /** 431 * kvm_pmu_flush_hwstate - flush pmu state to cpu 432 * @vcpu: The vcpu pointer 433 * 434 * Check if the PMU has overflowed while we were running in the host, and inject 435 * an interrupt if that was the case. 436 */ 437 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 438 { 439 kvm_pmu_update_state(vcpu); 440 } 441 442 /** 443 * kvm_pmu_sync_hwstate - sync pmu state from cpu 444 * @vcpu: The vcpu pointer 445 * 446 * Check if the PMU has overflowed while we were running in the guest, and 447 * inject an interrupt if that was the case. 448 */ 449 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 450 { 451 kvm_pmu_update_state(vcpu); 452 } 453 454 /** 455 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 456 * to the event. 457 * This is why we need a callback to do it once outside of the NMI context. 458 */ 459 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 460 { 461 struct kvm_vcpu *vcpu; 462 struct kvm_pmu *pmu; 463 464 pmu = container_of(work, struct kvm_pmu, overflow_work); 465 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 466 467 kvm_vcpu_kick(vcpu); 468 } 469 470 /** 471 * When the perf event overflows, set the overflow status and inform the vcpu. 472 */ 473 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 474 struct perf_sample_data *data, 475 struct pt_regs *regs) 476 { 477 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 478 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 479 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 480 int idx = pmc->idx; 481 u64 period; 482 483 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 484 485 /* 486 * Reset the sample period to the architectural limit, 487 * i.e. the point where the counter overflows. 488 */ 489 period = -(local64_read(&perf_event->count)); 490 491 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 492 period &= GENMASK(31, 0); 493 494 local64_set(&perf_event->hw.period_left, 0); 495 perf_event->attr.sample_period = period; 496 perf_event->hw.sample_period = period; 497 498 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 499 500 if (kvm_pmu_overflow_status(vcpu)) { 501 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 502 503 if (!in_nmi()) 504 kvm_vcpu_kick(vcpu); 505 else 506 irq_work_queue(&vcpu->arch.pmu.overflow_work); 507 } 508 509 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 510 } 511 512 /** 513 * kvm_pmu_software_increment - do software increment 514 * @vcpu: The vcpu pointer 515 * @val: the value guest writes to PMSWINC register 516 */ 517 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 518 { 519 struct kvm_pmu *pmu = &vcpu->arch.pmu; 520 int i; 521 522 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 523 return; 524 525 /* Weed out disabled counters */ 526 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 527 528 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 529 u64 type, reg; 530 531 if (!(val & BIT(i))) 532 continue; 533 534 /* PMSWINC only applies to ... SW_INC! */ 535 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 536 type &= kvm_pmu_event_mask(vcpu->kvm); 537 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 538 continue; 539 540 /* increment this even SW_INC counter */ 541 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 542 reg = lower_32_bits(reg); 543 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 544 545 if (reg) /* no overflow on the low part */ 546 continue; 547 548 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 549 /* increment the high counter */ 550 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 551 reg = lower_32_bits(reg); 552 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 553 if (!reg) /* mark overflow on the high counter */ 554 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 555 } else { 556 /* mark overflow on low counter */ 557 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 558 } 559 } 560 } 561 562 /** 563 * kvm_pmu_handle_pmcr - handle PMCR register 564 * @vcpu: The vcpu pointer 565 * @val: the value guest writes to PMCR register 566 */ 567 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 568 { 569 int i; 570 571 if (val & ARMV8_PMU_PMCR_E) { 572 kvm_pmu_enable_counter_mask(vcpu, 573 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 574 } else { 575 kvm_pmu_disable_counter_mask(vcpu, 576 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 577 } 578 579 if (val & ARMV8_PMU_PMCR_C) 580 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 581 582 if (val & ARMV8_PMU_PMCR_P) { 583 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 584 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 585 for_each_set_bit(i, &mask, 32) 586 kvm_pmu_set_counter_value(vcpu, i, 0); 587 } 588 } 589 590 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 591 { 592 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 593 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 594 } 595 596 /** 597 * kvm_pmu_create_perf_event - create a perf event for a counter 598 * @vcpu: The vcpu pointer 599 * @select_idx: The number of selected counter 600 */ 601 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 602 { 603 struct kvm_pmu *pmu = &vcpu->arch.pmu; 604 struct kvm_pmc *pmc; 605 struct perf_event *event; 606 struct perf_event_attr attr; 607 u64 eventsel, counter, reg, data; 608 609 /* 610 * For chained counters the event type and filtering attributes are 611 * obtained from the low/even counter. We also use this counter to 612 * determine if the event is enabled/disabled. 613 */ 614 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 615 616 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 617 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 618 data = __vcpu_sys_reg(vcpu, reg); 619 620 kvm_pmu_stop_counter(vcpu, pmc); 621 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 622 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 623 else 624 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 625 626 /* Software increment event doesn't need to be backed by a perf event */ 627 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 628 return; 629 630 /* 631 * If we have a filter in place and that the event isn't allowed, do 632 * not install a perf event either. 633 */ 634 if (vcpu->kvm->arch.pmu_filter && 635 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 636 return; 637 638 memset(&attr, 0, sizeof(struct perf_event_attr)); 639 attr.type = PERF_TYPE_RAW; 640 attr.size = sizeof(attr); 641 attr.pinned = 1; 642 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 643 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 644 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 645 attr.exclude_hv = 1; /* Don't count EL2 events */ 646 attr.exclude_host = 1; /* Don't count host events */ 647 attr.config = eventsel; 648 649 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 650 651 if (kvm_pmu_pmc_is_chained(pmc)) { 652 /** 653 * The initial sample period (overflow count) of an event. For 654 * chained counters we only support overflow interrupts on the 655 * high counter. 656 */ 657 attr.sample_period = (-counter) & GENMASK(63, 0); 658 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 659 660 event = perf_event_create_kernel_counter(&attr, -1, current, 661 kvm_pmu_perf_overflow, 662 pmc + 1); 663 } else { 664 /* The initial sample period (overflow count) of an event. */ 665 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 666 attr.sample_period = (-counter) & GENMASK(63, 0); 667 else 668 attr.sample_period = (-counter) & GENMASK(31, 0); 669 670 event = perf_event_create_kernel_counter(&attr, -1, current, 671 kvm_pmu_perf_overflow, pmc); 672 } 673 674 if (IS_ERR(event)) { 675 pr_err_once("kvm: pmu event creation failed %ld\n", 676 PTR_ERR(event)); 677 return; 678 } 679 680 pmc->perf_event = event; 681 } 682 683 /** 684 * kvm_pmu_update_pmc_chained - update chained bitmap 685 * @vcpu: The vcpu pointer 686 * @select_idx: The number of selected counter 687 * 688 * Update the chained bitmap based on the event type written in the 689 * typer register and the enable state of the odd register. 690 */ 691 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 692 { 693 struct kvm_pmu *pmu = &vcpu->arch.pmu; 694 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 695 bool new_state, old_state; 696 697 old_state = kvm_pmu_pmc_is_chained(pmc); 698 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 699 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 700 701 if (old_state == new_state) 702 return; 703 704 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 705 kvm_pmu_stop_counter(vcpu, canonical_pmc); 706 if (new_state) { 707 /* 708 * During promotion from !chained to chained we must ensure 709 * the adjacent counter is stopped and its event destroyed 710 */ 711 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 712 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 713 return; 714 } 715 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 716 } 717 718 /** 719 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 720 * @vcpu: The vcpu pointer 721 * @data: The data guest writes to PMXEVTYPER_EL0 722 * @select_idx: The number of selected counter 723 * 724 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 725 * event with given hardware event number. Here we call perf_event API to 726 * emulate this action and create a kernel perf event for it. 727 */ 728 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 729 u64 select_idx) 730 { 731 u64 reg, mask; 732 733 mask = ARMV8_PMU_EVTYPE_MASK; 734 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 735 mask |= kvm_pmu_event_mask(vcpu->kvm); 736 737 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 738 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 739 740 __vcpu_sys_reg(vcpu, reg) = data & mask; 741 742 kvm_pmu_update_pmc_chained(vcpu, select_idx); 743 kvm_pmu_create_perf_event(vcpu, select_idx); 744 } 745 746 void kvm_host_pmu_init(struct arm_pmu *pmu) 747 { 748 if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF && 749 !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled()) 750 static_branch_enable(&kvm_arm_pmu_available); 751 } 752 753 static int kvm_pmu_probe_pmuver(void) 754 { 755 struct perf_event_attr attr = { }; 756 struct perf_event *event; 757 struct arm_pmu *pmu; 758 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; 759 760 /* 761 * Create a dummy event that only counts user cycles. As we'll never 762 * leave this function with the event being live, it will never 763 * count anything. But it allows us to probe some of the PMU 764 * details. Yes, this is terrible. 765 */ 766 attr.type = PERF_TYPE_RAW; 767 attr.size = sizeof(attr); 768 attr.pinned = 1; 769 attr.disabled = 0; 770 attr.exclude_user = 0; 771 attr.exclude_kernel = 1; 772 attr.exclude_hv = 1; 773 attr.exclude_host = 1; 774 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 775 attr.sample_period = GENMASK(63, 0); 776 777 event = perf_event_create_kernel_counter(&attr, -1, current, 778 kvm_pmu_perf_overflow, &attr); 779 780 if (IS_ERR(event)) { 781 pr_err_once("kvm: pmu event creation failed %ld\n", 782 PTR_ERR(event)); 783 return ID_AA64DFR0_PMUVER_IMP_DEF; 784 } 785 786 if (event->pmu) { 787 pmu = to_arm_pmu(event->pmu); 788 if (pmu->pmuver) 789 pmuver = pmu->pmuver; 790 } 791 792 perf_event_disable(event); 793 perf_event_release_kernel(event); 794 795 return pmuver; 796 } 797 798 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 799 { 800 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 801 u64 val, mask = 0; 802 int base, i, nr_events; 803 804 if (!pmceid1) { 805 val = read_sysreg(pmceid0_el0); 806 base = 0; 807 } else { 808 val = read_sysreg(pmceid1_el0); 809 /* 810 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 811 * as RAZ 812 */ 813 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) 814 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 815 base = 32; 816 } 817 818 if (!bmap) 819 return val; 820 821 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 822 823 for (i = 0; i < 32; i += 8) { 824 u64 byte; 825 826 byte = bitmap_get_value8(bmap, base + i); 827 mask |= byte << i; 828 if (nr_events >= (0x4000 + base + 32)) { 829 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 830 mask |= byte << (32 + i); 831 } 832 } 833 834 return val & mask; 835 } 836 837 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 838 { 839 if (!kvm_vcpu_has_pmu(vcpu)) 840 return 0; 841 842 if (!vcpu->arch.pmu.created) 843 return -EINVAL; 844 845 /* 846 * A valid interrupt configuration for the PMU is either to have a 847 * properly configured interrupt number and using an in-kernel 848 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 849 */ 850 if (irqchip_in_kernel(vcpu->kvm)) { 851 int irq = vcpu->arch.pmu.irq_num; 852 /* 853 * If we are using an in-kernel vgic, at this point we know 854 * the vgic will be initialized, so we can check the PMU irq 855 * number against the dimensions of the vgic and make sure 856 * it's valid. 857 */ 858 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 859 return -EINVAL; 860 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 861 return -EINVAL; 862 } 863 864 /* One-off reload of the PMU on first run */ 865 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 866 867 return 0; 868 } 869 870 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 871 { 872 if (irqchip_in_kernel(vcpu->kvm)) { 873 int ret; 874 875 /* 876 * If using the PMU with an in-kernel virtual GIC 877 * implementation, we require the GIC to be already 878 * initialized when initializing the PMU. 879 */ 880 if (!vgic_initialized(vcpu->kvm)) 881 return -ENODEV; 882 883 if (!kvm_arm_pmu_irq_initialized(vcpu)) 884 return -ENXIO; 885 886 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 887 &vcpu->arch.pmu); 888 if (ret) 889 return ret; 890 } 891 892 init_irq_work(&vcpu->arch.pmu.overflow_work, 893 kvm_pmu_perf_overflow_notify_vcpu); 894 895 vcpu->arch.pmu.created = true; 896 return 0; 897 } 898 899 /* 900 * For one VM the interrupt type must be same for each vcpu. 901 * As a PPI, the interrupt number is the same for all vcpus, 902 * while as an SPI it must be a separate number per vcpu. 903 */ 904 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 905 { 906 unsigned long i; 907 struct kvm_vcpu *vcpu; 908 909 kvm_for_each_vcpu(i, vcpu, kvm) { 910 if (!kvm_arm_pmu_irq_initialized(vcpu)) 911 continue; 912 913 if (irq_is_ppi(irq)) { 914 if (vcpu->arch.pmu.irq_num != irq) 915 return false; 916 } else { 917 if (vcpu->arch.pmu.irq_num == irq) 918 return false; 919 } 920 } 921 922 return true; 923 } 924 925 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 926 { 927 if (!kvm_vcpu_has_pmu(vcpu)) 928 return -ENODEV; 929 930 if (vcpu->arch.pmu.created) 931 return -EBUSY; 932 933 if (!vcpu->kvm->arch.pmuver) 934 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); 935 936 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) 937 return -ENODEV; 938 939 switch (attr->attr) { 940 case KVM_ARM_VCPU_PMU_V3_IRQ: { 941 int __user *uaddr = (int __user *)(long)attr->addr; 942 int irq; 943 944 if (!irqchip_in_kernel(vcpu->kvm)) 945 return -EINVAL; 946 947 if (get_user(irq, uaddr)) 948 return -EFAULT; 949 950 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 951 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 952 return -EINVAL; 953 954 if (!pmu_irq_is_valid(vcpu->kvm, irq)) 955 return -EINVAL; 956 957 if (kvm_arm_pmu_irq_initialized(vcpu)) 958 return -EBUSY; 959 960 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 961 vcpu->arch.pmu.irq_num = irq; 962 return 0; 963 } 964 case KVM_ARM_VCPU_PMU_V3_FILTER: { 965 struct kvm_pmu_event_filter __user *uaddr; 966 struct kvm_pmu_event_filter filter; 967 int nr_events; 968 969 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 970 971 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 972 973 if (copy_from_user(&filter, uaddr, sizeof(filter))) 974 return -EFAULT; 975 976 if (((u32)filter.base_event + filter.nevents) > nr_events || 977 (filter.action != KVM_PMU_EVENT_ALLOW && 978 filter.action != KVM_PMU_EVENT_DENY)) 979 return -EINVAL; 980 981 mutex_lock(&vcpu->kvm->lock); 982 983 if (!vcpu->kvm->arch.pmu_filter) { 984 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 985 if (!vcpu->kvm->arch.pmu_filter) { 986 mutex_unlock(&vcpu->kvm->lock); 987 return -ENOMEM; 988 } 989 990 /* 991 * The default depends on the first applied filter. 992 * If it allows events, the default is to deny. 993 * Conversely, if the first filter denies a set of 994 * events, the default is to allow. 995 */ 996 if (filter.action == KVM_PMU_EVENT_ALLOW) 997 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); 998 else 999 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); 1000 } 1001 1002 if (filter.action == KVM_PMU_EVENT_ALLOW) 1003 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1004 else 1005 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1006 1007 mutex_unlock(&vcpu->kvm->lock); 1008 1009 return 0; 1010 } 1011 case KVM_ARM_VCPU_PMU_V3_INIT: 1012 return kvm_arm_pmu_v3_init(vcpu); 1013 } 1014 1015 return -ENXIO; 1016 } 1017 1018 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1019 { 1020 switch (attr->attr) { 1021 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1022 int __user *uaddr = (int __user *)(long)attr->addr; 1023 int irq; 1024 1025 if (!irqchip_in_kernel(vcpu->kvm)) 1026 return -EINVAL; 1027 1028 if (!kvm_vcpu_has_pmu(vcpu)) 1029 return -ENODEV; 1030 1031 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1032 return -ENXIO; 1033 1034 irq = vcpu->arch.pmu.irq_num; 1035 return put_user(irq, uaddr); 1036 } 1037 } 1038 1039 return -ENXIO; 1040 } 1041 1042 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1043 { 1044 switch (attr->attr) { 1045 case KVM_ARM_VCPU_PMU_V3_IRQ: 1046 case KVM_ARM_VCPU_PMU_V3_INIT: 1047 case KVM_ARM_VCPU_PMU_V3_FILTER: 1048 if (kvm_vcpu_has_pmu(vcpu)) 1049 return 0; 1050 } 1051 1052 return -ENXIO; 1053 } 1054