1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 19 20 static LIST_HEAD(arm_pmus); 21 static DEFINE_MUTEX(arm_pmus_lock); 22 23 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 24 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 25 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 26 27 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 28 29 static u32 kvm_pmu_event_mask(struct kvm *kvm) 30 { 31 unsigned int pmuver; 32 33 pmuver = kvm->arch.arm_pmu->pmuver; 34 35 switch (pmuver) { 36 case ID_AA64DFR0_PMUVER_8_0: 37 return GENMASK(9, 0); 38 case ID_AA64DFR0_PMUVER_8_1: 39 case ID_AA64DFR0_PMUVER_8_4: 40 case ID_AA64DFR0_PMUVER_8_5: 41 case ID_AA64DFR0_PMUVER_8_7: 42 return GENMASK(15, 0); 43 default: /* Shouldn't be here, just for sanity */ 44 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 45 return 0; 46 } 47 } 48 49 /** 50 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 51 * @vcpu: The vcpu pointer 52 * @select_idx: The counter index 53 */ 54 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 55 { 56 return (select_idx == ARMV8_PMU_CYCLE_IDX && 57 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 58 } 59 60 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 61 { 62 struct kvm_pmu *pmu; 63 struct kvm_vcpu_arch *vcpu_arch; 64 65 pmc -= pmc->idx; 66 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 67 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 68 return container_of(vcpu_arch, struct kvm_vcpu, arch); 69 } 70 71 /** 72 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 73 * @pmc: The PMU counter pointer 74 */ 75 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 76 { 77 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 78 79 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 80 } 81 82 /** 83 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 84 * @select_idx: The counter index 85 */ 86 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 87 { 88 return select_idx & 0x1; 89 } 90 91 /** 92 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 93 * @pmc: The PMU counter pointer 94 * 95 * When a pair of PMCs are chained together we use the low counter (canonical) 96 * to hold the underlying perf event. 97 */ 98 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 99 { 100 if (kvm_pmu_pmc_is_chained(pmc) && 101 kvm_pmu_idx_is_high_counter(pmc->idx)) 102 return pmc - 1; 103 104 return pmc; 105 } 106 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 107 { 108 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 109 return pmc - 1; 110 else 111 return pmc + 1; 112 } 113 114 /** 115 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 u64 eventsel, reg; 122 123 select_idx |= 0x1; 124 125 if (select_idx == ARMV8_PMU_CYCLE_IDX) 126 return false; 127 128 reg = PMEVTYPER0_EL0 + select_idx; 129 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 130 131 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 132 } 133 134 /** 135 * kvm_pmu_get_pair_counter_value - get PMU counter value 136 * @vcpu: The vcpu pointer 137 * @pmc: The PMU counter pointer 138 */ 139 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 140 struct kvm_pmc *pmc) 141 { 142 u64 counter, counter_high, reg, enabled, running; 143 144 if (kvm_pmu_pmc_is_chained(pmc)) { 145 pmc = kvm_pmu_get_canonical_pmc(pmc); 146 reg = PMEVCNTR0_EL0 + pmc->idx; 147 148 counter = __vcpu_sys_reg(vcpu, reg); 149 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 150 151 counter = lower_32_bits(counter) | (counter_high << 32); 152 } else { 153 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 154 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 155 counter = __vcpu_sys_reg(vcpu, reg); 156 } 157 158 /* 159 * The real counter value is equal to the value of counter register plus 160 * the value perf event counts. 161 */ 162 if (pmc->perf_event) 163 counter += perf_event_read_value(pmc->perf_event, &enabled, 164 &running); 165 166 return counter; 167 } 168 169 /** 170 * kvm_pmu_get_counter_value - get PMU counter value 171 * @vcpu: The vcpu pointer 172 * @select_idx: The counter index 173 */ 174 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 175 { 176 u64 counter; 177 struct kvm_pmu *pmu = &vcpu->arch.pmu; 178 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 179 180 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 181 182 if (kvm_pmu_pmc_is_chained(pmc) && 183 kvm_pmu_idx_is_high_counter(select_idx)) 184 counter = upper_32_bits(counter); 185 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 186 counter = lower_32_bits(counter); 187 188 return counter; 189 } 190 191 /** 192 * kvm_pmu_set_counter_value - set PMU counter value 193 * @vcpu: The vcpu pointer 194 * @select_idx: The counter index 195 * @val: The counter value 196 */ 197 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 198 { 199 u64 reg; 200 201 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 202 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 203 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 204 205 /* Recreate the perf event to reflect the updated sample_period */ 206 kvm_pmu_create_perf_event(vcpu, select_idx); 207 } 208 209 /** 210 * kvm_pmu_release_perf_event - remove the perf event 211 * @pmc: The PMU counter pointer 212 */ 213 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 214 { 215 pmc = kvm_pmu_get_canonical_pmc(pmc); 216 if (pmc->perf_event) { 217 perf_event_disable(pmc->perf_event); 218 perf_event_release_kernel(pmc->perf_event); 219 pmc->perf_event = NULL; 220 } 221 } 222 223 /** 224 * kvm_pmu_stop_counter - stop PMU counter 225 * @pmc: The PMU counter pointer 226 * 227 * If this counter has been configured to monitor some event, release it here. 228 */ 229 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 230 { 231 u64 counter, reg, val; 232 233 pmc = kvm_pmu_get_canonical_pmc(pmc); 234 if (!pmc->perf_event) 235 return; 236 237 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 238 239 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 240 reg = PMCCNTR_EL0; 241 val = counter; 242 } else { 243 reg = PMEVCNTR0_EL0 + pmc->idx; 244 val = lower_32_bits(counter); 245 } 246 247 __vcpu_sys_reg(vcpu, reg) = val; 248 249 if (kvm_pmu_pmc_is_chained(pmc)) 250 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 251 252 kvm_pmu_release_perf_event(pmc); 253 } 254 255 /** 256 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 257 * @vcpu: The vcpu pointer 258 * 259 */ 260 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 261 { 262 int i; 263 struct kvm_pmu *pmu = &vcpu->arch.pmu; 264 265 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 266 pmu->pmc[i].idx = i; 267 } 268 269 /** 270 * kvm_pmu_vcpu_reset - reset pmu state for cpu 271 * @vcpu: The vcpu pointer 272 * 273 */ 274 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 275 { 276 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 277 struct kvm_pmu *pmu = &vcpu->arch.pmu; 278 int i; 279 280 for_each_set_bit(i, &mask, 32) 281 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 282 283 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 284 } 285 286 /** 287 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 288 * @vcpu: The vcpu pointer 289 * 290 */ 291 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 292 { 293 int i; 294 struct kvm_pmu *pmu = &vcpu->arch.pmu; 295 296 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 297 kvm_pmu_release_perf_event(&pmu->pmc[i]); 298 irq_work_sync(&vcpu->arch.pmu.overflow_work); 299 } 300 301 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 302 { 303 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 304 305 val &= ARMV8_PMU_PMCR_N_MASK; 306 if (val == 0) 307 return BIT(ARMV8_PMU_CYCLE_IDX); 308 else 309 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 310 } 311 312 /** 313 * kvm_pmu_enable_counter_mask - enable selected PMU counters 314 * @vcpu: The vcpu pointer 315 * @val: the value guest writes to PMCNTENSET register 316 * 317 * Call perf_event_enable to start counting the perf event 318 */ 319 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 320 { 321 int i; 322 struct kvm_pmu *pmu = &vcpu->arch.pmu; 323 struct kvm_pmc *pmc; 324 325 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 326 return; 327 328 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 329 if (!(val & BIT(i))) 330 continue; 331 332 pmc = &pmu->pmc[i]; 333 334 /* A change in the enable state may affect the chain state */ 335 kvm_pmu_update_pmc_chained(vcpu, i); 336 kvm_pmu_create_perf_event(vcpu, i); 337 338 /* At this point, pmc must be the canonical */ 339 if (pmc->perf_event) { 340 perf_event_enable(pmc->perf_event); 341 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 342 kvm_debug("fail to enable perf event\n"); 343 } 344 } 345 } 346 347 /** 348 * kvm_pmu_disable_counter_mask - disable selected PMU counters 349 * @vcpu: The vcpu pointer 350 * @val: the value guest writes to PMCNTENCLR register 351 * 352 * Call perf_event_disable to stop counting the perf event 353 */ 354 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 355 { 356 int i; 357 struct kvm_pmu *pmu = &vcpu->arch.pmu; 358 struct kvm_pmc *pmc; 359 360 if (!val) 361 return; 362 363 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 364 if (!(val & BIT(i))) 365 continue; 366 367 pmc = &pmu->pmc[i]; 368 369 /* A change in the enable state may affect the chain state */ 370 kvm_pmu_update_pmc_chained(vcpu, i); 371 kvm_pmu_create_perf_event(vcpu, i); 372 373 /* At this point, pmc must be the canonical */ 374 if (pmc->perf_event) 375 perf_event_disable(pmc->perf_event); 376 } 377 } 378 379 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 380 { 381 u64 reg = 0; 382 383 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 384 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 385 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 386 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 387 } 388 389 return reg; 390 } 391 392 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 393 { 394 struct kvm_pmu *pmu = &vcpu->arch.pmu; 395 bool overflow; 396 397 if (!kvm_vcpu_has_pmu(vcpu)) 398 return; 399 400 overflow = !!kvm_pmu_overflow_status(vcpu); 401 if (pmu->irq_level == overflow) 402 return; 403 404 pmu->irq_level = overflow; 405 406 if (likely(irqchip_in_kernel(vcpu->kvm))) { 407 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 408 pmu->irq_num, overflow, pmu); 409 WARN_ON(ret); 410 } 411 } 412 413 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 414 { 415 struct kvm_pmu *pmu = &vcpu->arch.pmu; 416 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 417 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 418 419 if (likely(irqchip_in_kernel(vcpu->kvm))) 420 return false; 421 422 return pmu->irq_level != run_level; 423 } 424 425 /* 426 * Reflect the PMU overflow interrupt output level into the kvm_run structure 427 */ 428 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 429 { 430 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 431 432 /* Populate the timer bitmap for user space */ 433 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 434 if (vcpu->arch.pmu.irq_level) 435 regs->device_irq_level |= KVM_ARM_DEV_PMU; 436 } 437 438 /** 439 * kvm_pmu_flush_hwstate - flush pmu state to cpu 440 * @vcpu: The vcpu pointer 441 * 442 * Check if the PMU has overflowed while we were running in the host, and inject 443 * an interrupt if that was the case. 444 */ 445 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 446 { 447 kvm_pmu_update_state(vcpu); 448 } 449 450 /** 451 * kvm_pmu_sync_hwstate - sync pmu state from cpu 452 * @vcpu: The vcpu pointer 453 * 454 * Check if the PMU has overflowed while we were running in the guest, and 455 * inject an interrupt if that was the case. 456 */ 457 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 458 { 459 kvm_pmu_update_state(vcpu); 460 } 461 462 /** 463 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 464 * to the event. 465 * This is why we need a callback to do it once outside of the NMI context. 466 */ 467 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 468 { 469 struct kvm_vcpu *vcpu; 470 struct kvm_pmu *pmu; 471 472 pmu = container_of(work, struct kvm_pmu, overflow_work); 473 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 474 475 kvm_vcpu_kick(vcpu); 476 } 477 478 /** 479 * When the perf event overflows, set the overflow status and inform the vcpu. 480 */ 481 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 482 struct perf_sample_data *data, 483 struct pt_regs *regs) 484 { 485 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 486 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 487 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 488 int idx = pmc->idx; 489 u64 period; 490 491 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 492 493 /* 494 * Reset the sample period to the architectural limit, 495 * i.e. the point where the counter overflows. 496 */ 497 period = -(local64_read(&perf_event->count)); 498 499 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 500 period &= GENMASK(31, 0); 501 502 local64_set(&perf_event->hw.period_left, 0); 503 perf_event->attr.sample_period = period; 504 perf_event->hw.sample_period = period; 505 506 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 507 508 if (kvm_pmu_overflow_status(vcpu)) { 509 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 510 511 if (!in_nmi()) 512 kvm_vcpu_kick(vcpu); 513 else 514 irq_work_queue(&vcpu->arch.pmu.overflow_work); 515 } 516 517 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 518 } 519 520 /** 521 * kvm_pmu_software_increment - do software increment 522 * @vcpu: The vcpu pointer 523 * @val: the value guest writes to PMSWINC register 524 */ 525 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 526 { 527 struct kvm_pmu *pmu = &vcpu->arch.pmu; 528 int i; 529 530 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 531 return; 532 533 /* Weed out disabled counters */ 534 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 535 536 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 537 u64 type, reg; 538 539 if (!(val & BIT(i))) 540 continue; 541 542 /* PMSWINC only applies to ... SW_INC! */ 543 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 544 type &= kvm_pmu_event_mask(vcpu->kvm); 545 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 546 continue; 547 548 /* increment this even SW_INC counter */ 549 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 550 reg = lower_32_bits(reg); 551 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 552 553 if (reg) /* no overflow on the low part */ 554 continue; 555 556 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 557 /* increment the high counter */ 558 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 559 reg = lower_32_bits(reg); 560 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 561 if (!reg) /* mark overflow on the high counter */ 562 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 563 } else { 564 /* mark overflow on low counter */ 565 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 566 } 567 } 568 } 569 570 /** 571 * kvm_pmu_handle_pmcr - handle PMCR register 572 * @vcpu: The vcpu pointer 573 * @val: the value guest writes to PMCR register 574 */ 575 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 576 { 577 int i; 578 579 if (val & ARMV8_PMU_PMCR_E) { 580 kvm_pmu_enable_counter_mask(vcpu, 581 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 582 } else { 583 kvm_pmu_disable_counter_mask(vcpu, 584 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 585 } 586 587 if (val & ARMV8_PMU_PMCR_C) 588 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 589 590 if (val & ARMV8_PMU_PMCR_P) { 591 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 592 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 593 for_each_set_bit(i, &mask, 32) 594 kvm_pmu_set_counter_value(vcpu, i, 0); 595 } 596 } 597 598 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 599 { 600 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 601 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 602 } 603 604 /** 605 * kvm_pmu_create_perf_event - create a perf event for a counter 606 * @vcpu: The vcpu pointer 607 * @select_idx: The number of selected counter 608 */ 609 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 610 { 611 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 612 struct kvm_pmu *pmu = &vcpu->arch.pmu; 613 struct kvm_pmc *pmc; 614 struct perf_event *event; 615 struct perf_event_attr attr; 616 u64 eventsel, counter, reg, data; 617 618 /* 619 * For chained counters the event type and filtering attributes are 620 * obtained from the low/even counter. We also use this counter to 621 * determine if the event is enabled/disabled. 622 */ 623 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 624 625 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 626 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 627 data = __vcpu_sys_reg(vcpu, reg); 628 629 kvm_pmu_stop_counter(vcpu, pmc); 630 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 631 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 632 else 633 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 634 635 /* Software increment event doesn't need to be backed by a perf event */ 636 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 637 return; 638 639 /* 640 * If we have a filter in place and that the event isn't allowed, do 641 * not install a perf event either. 642 */ 643 if (vcpu->kvm->arch.pmu_filter && 644 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 645 return; 646 647 memset(&attr, 0, sizeof(struct perf_event_attr)); 648 attr.type = arm_pmu->pmu.type; 649 attr.size = sizeof(attr); 650 attr.pinned = 1; 651 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 652 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 653 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 654 attr.exclude_hv = 1; /* Don't count EL2 events */ 655 attr.exclude_host = 1; /* Don't count host events */ 656 attr.config = eventsel; 657 658 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 659 660 if (kvm_pmu_pmc_is_chained(pmc)) { 661 /** 662 * The initial sample period (overflow count) of an event. For 663 * chained counters we only support overflow interrupts on the 664 * high counter. 665 */ 666 attr.sample_period = (-counter) & GENMASK(63, 0); 667 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 668 669 event = perf_event_create_kernel_counter(&attr, -1, current, 670 kvm_pmu_perf_overflow, 671 pmc + 1); 672 } else { 673 /* The initial sample period (overflow count) of an event. */ 674 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 675 attr.sample_period = (-counter) & GENMASK(63, 0); 676 else 677 attr.sample_period = (-counter) & GENMASK(31, 0); 678 679 event = perf_event_create_kernel_counter(&attr, -1, current, 680 kvm_pmu_perf_overflow, pmc); 681 } 682 683 if (IS_ERR(event)) { 684 pr_err_once("kvm: pmu event creation failed %ld\n", 685 PTR_ERR(event)); 686 return; 687 } 688 689 pmc->perf_event = event; 690 } 691 692 /** 693 * kvm_pmu_update_pmc_chained - update chained bitmap 694 * @vcpu: The vcpu pointer 695 * @select_idx: The number of selected counter 696 * 697 * Update the chained bitmap based on the event type written in the 698 * typer register and the enable state of the odd register. 699 */ 700 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 701 { 702 struct kvm_pmu *pmu = &vcpu->arch.pmu; 703 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 704 bool new_state, old_state; 705 706 old_state = kvm_pmu_pmc_is_chained(pmc); 707 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 708 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 709 710 if (old_state == new_state) 711 return; 712 713 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 714 kvm_pmu_stop_counter(vcpu, canonical_pmc); 715 if (new_state) { 716 /* 717 * During promotion from !chained to chained we must ensure 718 * the adjacent counter is stopped and its event destroyed 719 */ 720 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 721 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 722 return; 723 } 724 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 725 } 726 727 /** 728 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 729 * @vcpu: The vcpu pointer 730 * @data: The data guest writes to PMXEVTYPER_EL0 731 * @select_idx: The number of selected counter 732 * 733 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 734 * event with given hardware event number. Here we call perf_event API to 735 * emulate this action and create a kernel perf event for it. 736 */ 737 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 738 u64 select_idx) 739 { 740 u64 reg, mask; 741 742 mask = ARMV8_PMU_EVTYPE_MASK; 743 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 744 mask |= kvm_pmu_event_mask(vcpu->kvm); 745 746 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 747 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 748 749 __vcpu_sys_reg(vcpu, reg) = data & mask; 750 751 kvm_pmu_update_pmc_chained(vcpu, select_idx); 752 kvm_pmu_create_perf_event(vcpu, select_idx); 753 } 754 755 void kvm_host_pmu_init(struct arm_pmu *pmu) 756 { 757 struct arm_pmu_entry *entry; 758 759 if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || 760 is_protected_kvm_enabled()) 761 return; 762 763 mutex_lock(&arm_pmus_lock); 764 765 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 766 if (!entry) 767 goto out_unlock; 768 769 entry->arm_pmu = pmu; 770 list_add_tail(&entry->entry, &arm_pmus); 771 772 if (list_is_singular(&arm_pmus)) 773 static_branch_enable(&kvm_arm_pmu_available); 774 775 out_unlock: 776 mutex_unlock(&arm_pmus_lock); 777 } 778 779 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 780 { 781 struct perf_event_attr attr = { }; 782 struct perf_event *event; 783 struct arm_pmu *pmu = NULL; 784 785 /* 786 * Create a dummy event that only counts user cycles. As we'll never 787 * leave this function with the event being live, it will never 788 * count anything. But it allows us to probe some of the PMU 789 * details. Yes, this is terrible. 790 */ 791 attr.type = PERF_TYPE_RAW; 792 attr.size = sizeof(attr); 793 attr.pinned = 1; 794 attr.disabled = 0; 795 attr.exclude_user = 0; 796 attr.exclude_kernel = 1; 797 attr.exclude_hv = 1; 798 attr.exclude_host = 1; 799 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 800 attr.sample_period = GENMASK(63, 0); 801 802 event = perf_event_create_kernel_counter(&attr, -1, current, 803 kvm_pmu_perf_overflow, &attr); 804 805 if (IS_ERR(event)) { 806 pr_err_once("kvm: pmu event creation failed %ld\n", 807 PTR_ERR(event)); 808 return NULL; 809 } 810 811 if (event->pmu) { 812 pmu = to_arm_pmu(event->pmu); 813 if (pmu->pmuver == 0 || 814 pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) 815 pmu = NULL; 816 } 817 818 perf_event_disable(event); 819 perf_event_release_kernel(event); 820 821 return pmu; 822 } 823 824 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 825 { 826 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 827 u64 val, mask = 0; 828 int base, i, nr_events; 829 830 if (!pmceid1) { 831 val = read_sysreg(pmceid0_el0); 832 base = 0; 833 } else { 834 val = read_sysreg(pmceid1_el0); 835 /* 836 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 837 * as RAZ 838 */ 839 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4) 840 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 841 base = 32; 842 } 843 844 if (!bmap) 845 return val; 846 847 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 848 849 for (i = 0; i < 32; i += 8) { 850 u64 byte; 851 852 byte = bitmap_get_value8(bmap, base + i); 853 mask |= byte << i; 854 if (nr_events >= (0x4000 + base + 32)) { 855 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 856 mask |= byte << (32 + i); 857 } 858 } 859 860 return val & mask; 861 } 862 863 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 864 { 865 if (!kvm_vcpu_has_pmu(vcpu)) 866 return 0; 867 868 if (!vcpu->arch.pmu.created) 869 return -EINVAL; 870 871 /* 872 * A valid interrupt configuration for the PMU is either to have a 873 * properly configured interrupt number and using an in-kernel 874 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 875 */ 876 if (irqchip_in_kernel(vcpu->kvm)) { 877 int irq = vcpu->arch.pmu.irq_num; 878 /* 879 * If we are using an in-kernel vgic, at this point we know 880 * the vgic will be initialized, so we can check the PMU irq 881 * number against the dimensions of the vgic and make sure 882 * it's valid. 883 */ 884 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 885 return -EINVAL; 886 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 887 return -EINVAL; 888 } 889 890 /* One-off reload of the PMU on first run */ 891 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 892 893 return 0; 894 } 895 896 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 897 { 898 if (irqchip_in_kernel(vcpu->kvm)) { 899 int ret; 900 901 /* 902 * If using the PMU with an in-kernel virtual GIC 903 * implementation, we require the GIC to be already 904 * initialized when initializing the PMU. 905 */ 906 if (!vgic_initialized(vcpu->kvm)) 907 return -ENODEV; 908 909 if (!kvm_arm_pmu_irq_initialized(vcpu)) 910 return -ENXIO; 911 912 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 913 &vcpu->arch.pmu); 914 if (ret) 915 return ret; 916 } 917 918 init_irq_work(&vcpu->arch.pmu.overflow_work, 919 kvm_pmu_perf_overflow_notify_vcpu); 920 921 vcpu->arch.pmu.created = true; 922 return 0; 923 } 924 925 /* 926 * For one VM the interrupt type must be same for each vcpu. 927 * As a PPI, the interrupt number is the same for all vcpus, 928 * while as an SPI it must be a separate number per vcpu. 929 */ 930 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 931 { 932 unsigned long i; 933 struct kvm_vcpu *vcpu; 934 935 kvm_for_each_vcpu(i, vcpu, kvm) { 936 if (!kvm_arm_pmu_irq_initialized(vcpu)) 937 continue; 938 939 if (irq_is_ppi(irq)) { 940 if (vcpu->arch.pmu.irq_num != irq) 941 return false; 942 } else { 943 if (vcpu->arch.pmu.irq_num == irq) 944 return false; 945 } 946 } 947 948 return true; 949 } 950 951 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 952 { 953 struct kvm *kvm = vcpu->kvm; 954 struct arm_pmu_entry *entry; 955 struct arm_pmu *arm_pmu; 956 int ret = -ENXIO; 957 958 mutex_lock(&kvm->lock); 959 mutex_lock(&arm_pmus_lock); 960 961 list_for_each_entry(entry, &arm_pmus, entry) { 962 arm_pmu = entry->arm_pmu; 963 if (arm_pmu->pmu.type == pmu_id) { 964 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || 965 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 966 ret = -EBUSY; 967 break; 968 } 969 970 kvm->arch.arm_pmu = arm_pmu; 971 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 972 ret = 0; 973 break; 974 } 975 } 976 977 mutex_unlock(&arm_pmus_lock); 978 mutex_unlock(&kvm->lock); 979 return ret; 980 } 981 982 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 983 { 984 struct kvm *kvm = vcpu->kvm; 985 986 if (!kvm_vcpu_has_pmu(vcpu)) 987 return -ENODEV; 988 989 if (vcpu->arch.pmu.created) 990 return -EBUSY; 991 992 mutex_lock(&kvm->lock); 993 if (!kvm->arch.arm_pmu) { 994 /* No PMU set, get the default one */ 995 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 996 if (!kvm->arch.arm_pmu) { 997 mutex_unlock(&kvm->lock); 998 return -ENODEV; 999 } 1000 } 1001 mutex_unlock(&kvm->lock); 1002 1003 switch (attr->attr) { 1004 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1005 int __user *uaddr = (int __user *)(long)attr->addr; 1006 int irq; 1007 1008 if (!irqchip_in_kernel(kvm)) 1009 return -EINVAL; 1010 1011 if (get_user(irq, uaddr)) 1012 return -EFAULT; 1013 1014 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 1015 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 1016 return -EINVAL; 1017 1018 if (!pmu_irq_is_valid(kvm, irq)) 1019 return -EINVAL; 1020 1021 if (kvm_arm_pmu_irq_initialized(vcpu)) 1022 return -EBUSY; 1023 1024 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 1025 vcpu->arch.pmu.irq_num = irq; 1026 return 0; 1027 } 1028 case KVM_ARM_VCPU_PMU_V3_FILTER: { 1029 struct kvm_pmu_event_filter __user *uaddr; 1030 struct kvm_pmu_event_filter filter; 1031 int nr_events; 1032 1033 nr_events = kvm_pmu_event_mask(kvm) + 1; 1034 1035 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 1036 1037 if (copy_from_user(&filter, uaddr, sizeof(filter))) 1038 return -EFAULT; 1039 1040 if (((u32)filter.base_event + filter.nevents) > nr_events || 1041 (filter.action != KVM_PMU_EVENT_ALLOW && 1042 filter.action != KVM_PMU_EVENT_DENY)) 1043 return -EINVAL; 1044 1045 mutex_lock(&kvm->lock); 1046 1047 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { 1048 mutex_unlock(&kvm->lock); 1049 return -EBUSY; 1050 } 1051 1052 if (!kvm->arch.pmu_filter) { 1053 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 1054 if (!kvm->arch.pmu_filter) { 1055 mutex_unlock(&kvm->lock); 1056 return -ENOMEM; 1057 } 1058 1059 /* 1060 * The default depends on the first applied filter. 1061 * If it allows events, the default is to deny. 1062 * Conversely, if the first filter denies a set of 1063 * events, the default is to allow. 1064 */ 1065 if (filter.action == KVM_PMU_EVENT_ALLOW) 1066 bitmap_zero(kvm->arch.pmu_filter, nr_events); 1067 else 1068 bitmap_fill(kvm->arch.pmu_filter, nr_events); 1069 } 1070 1071 if (filter.action == KVM_PMU_EVENT_ALLOW) 1072 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1073 else 1074 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1075 1076 mutex_unlock(&kvm->lock); 1077 1078 return 0; 1079 } 1080 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1081 int __user *uaddr = (int __user *)(long)attr->addr; 1082 int pmu_id; 1083 1084 if (get_user(pmu_id, uaddr)) 1085 return -EFAULT; 1086 1087 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1088 } 1089 case KVM_ARM_VCPU_PMU_V3_INIT: 1090 return kvm_arm_pmu_v3_init(vcpu); 1091 } 1092 1093 return -ENXIO; 1094 } 1095 1096 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1097 { 1098 switch (attr->attr) { 1099 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1100 int __user *uaddr = (int __user *)(long)attr->addr; 1101 int irq; 1102 1103 if (!irqchip_in_kernel(vcpu->kvm)) 1104 return -EINVAL; 1105 1106 if (!kvm_vcpu_has_pmu(vcpu)) 1107 return -ENODEV; 1108 1109 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1110 return -ENXIO; 1111 1112 irq = vcpu->arch.pmu.irq_num; 1113 return put_user(irq, uaddr); 1114 } 1115 } 1116 1117 return -ENXIO; 1118 } 1119 1120 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1121 { 1122 switch (attr->attr) { 1123 case KVM_ARM_VCPU_PMU_V3_IRQ: 1124 case KVM_ARM_VCPU_PMU_V3_INIT: 1125 case KVM_ARM_VCPU_PMU_V3_FILTER: 1126 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1127 if (kvm_vcpu_has_pmu(vcpu)) 1128 return 0; 1129 } 1130 1131 return -ENXIO; 1132 } 1133