1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 19 20 static LIST_HEAD(arm_pmus); 21 static DEFINE_MUTEX(arm_pmus_lock); 22 23 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 24 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 25 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 26 27 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 28 29 static u32 kvm_pmu_event_mask(struct kvm *kvm) 30 { 31 unsigned int pmuver; 32 33 pmuver = kvm->arch.arm_pmu->pmuver; 34 35 switch (pmuver) { 36 case ID_AA64DFR0_PMUVER_8_0: 37 return GENMASK(9, 0); 38 case ID_AA64DFR0_PMUVER_8_1: 39 case ID_AA64DFR0_PMUVER_8_4: 40 case ID_AA64DFR0_PMUVER_8_5: 41 case ID_AA64DFR0_PMUVER_8_7: 42 return GENMASK(15, 0); 43 default: /* Shouldn't be here, just for sanity */ 44 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 45 return 0; 46 } 47 } 48 49 /** 50 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 51 * @vcpu: The vcpu pointer 52 * @select_idx: The counter index 53 */ 54 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 55 { 56 return (select_idx == ARMV8_PMU_CYCLE_IDX && 57 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 58 } 59 60 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 61 { 62 struct kvm_pmu *pmu; 63 struct kvm_vcpu_arch *vcpu_arch; 64 65 pmc -= pmc->idx; 66 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 67 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 68 return container_of(vcpu_arch, struct kvm_vcpu, arch); 69 } 70 71 /** 72 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 73 * @pmc: The PMU counter pointer 74 */ 75 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 76 { 77 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 78 79 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 80 } 81 82 /** 83 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 84 * @select_idx: The counter index 85 */ 86 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 87 { 88 return select_idx & 0x1; 89 } 90 91 /** 92 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 93 * @pmc: The PMU counter pointer 94 * 95 * When a pair of PMCs are chained together we use the low counter (canonical) 96 * to hold the underlying perf event. 97 */ 98 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 99 { 100 if (kvm_pmu_pmc_is_chained(pmc) && 101 kvm_pmu_idx_is_high_counter(pmc->idx)) 102 return pmc - 1; 103 104 return pmc; 105 } 106 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 107 { 108 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 109 return pmc - 1; 110 else 111 return pmc + 1; 112 } 113 114 /** 115 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 u64 eventsel, reg; 122 123 select_idx |= 0x1; 124 125 if (select_idx == ARMV8_PMU_CYCLE_IDX) 126 return false; 127 128 reg = PMEVTYPER0_EL0 + select_idx; 129 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 130 131 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 132 } 133 134 /** 135 * kvm_pmu_get_pair_counter_value - get PMU counter value 136 * @vcpu: The vcpu pointer 137 * @pmc: The PMU counter pointer 138 */ 139 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 140 struct kvm_pmc *pmc) 141 { 142 u64 counter, counter_high, reg, enabled, running; 143 144 if (kvm_pmu_pmc_is_chained(pmc)) { 145 pmc = kvm_pmu_get_canonical_pmc(pmc); 146 reg = PMEVCNTR0_EL0 + pmc->idx; 147 148 counter = __vcpu_sys_reg(vcpu, reg); 149 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 150 151 counter = lower_32_bits(counter) | (counter_high << 32); 152 } else { 153 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 154 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 155 counter = __vcpu_sys_reg(vcpu, reg); 156 } 157 158 /* 159 * The real counter value is equal to the value of counter register plus 160 * the value perf event counts. 161 */ 162 if (pmc->perf_event) 163 counter += perf_event_read_value(pmc->perf_event, &enabled, 164 &running); 165 166 return counter; 167 } 168 169 /** 170 * kvm_pmu_get_counter_value - get PMU counter value 171 * @vcpu: The vcpu pointer 172 * @select_idx: The counter index 173 */ 174 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 175 { 176 u64 counter; 177 struct kvm_pmu *pmu = &vcpu->arch.pmu; 178 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 179 180 if (!kvm_vcpu_has_pmu(vcpu)) 181 return 0; 182 183 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 184 185 if (kvm_pmu_pmc_is_chained(pmc) && 186 kvm_pmu_idx_is_high_counter(select_idx)) 187 counter = upper_32_bits(counter); 188 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 189 counter = lower_32_bits(counter); 190 191 return counter; 192 } 193 194 /** 195 * kvm_pmu_set_counter_value - set PMU counter value 196 * @vcpu: The vcpu pointer 197 * @select_idx: The counter index 198 * @val: The counter value 199 */ 200 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 201 { 202 u64 reg; 203 204 if (!kvm_vcpu_has_pmu(vcpu)) 205 return; 206 207 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 208 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 209 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 210 211 /* Recreate the perf event to reflect the updated sample_period */ 212 kvm_pmu_create_perf_event(vcpu, select_idx); 213 } 214 215 /** 216 * kvm_pmu_release_perf_event - remove the perf event 217 * @pmc: The PMU counter pointer 218 */ 219 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 220 { 221 pmc = kvm_pmu_get_canonical_pmc(pmc); 222 if (pmc->perf_event) { 223 perf_event_disable(pmc->perf_event); 224 perf_event_release_kernel(pmc->perf_event); 225 pmc->perf_event = NULL; 226 } 227 } 228 229 /** 230 * kvm_pmu_stop_counter - stop PMU counter 231 * @pmc: The PMU counter pointer 232 * 233 * If this counter has been configured to monitor some event, release it here. 234 */ 235 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 236 { 237 u64 counter, reg, val; 238 239 pmc = kvm_pmu_get_canonical_pmc(pmc); 240 if (!pmc->perf_event) 241 return; 242 243 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 244 245 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 246 reg = PMCCNTR_EL0; 247 val = counter; 248 } else { 249 reg = PMEVCNTR0_EL0 + pmc->idx; 250 val = lower_32_bits(counter); 251 } 252 253 __vcpu_sys_reg(vcpu, reg) = val; 254 255 if (kvm_pmu_pmc_is_chained(pmc)) 256 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 257 258 kvm_pmu_release_perf_event(pmc); 259 } 260 261 /** 262 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 263 * @vcpu: The vcpu pointer 264 * 265 */ 266 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 267 { 268 int i; 269 struct kvm_pmu *pmu = &vcpu->arch.pmu; 270 271 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 272 pmu->pmc[i].idx = i; 273 } 274 275 /** 276 * kvm_pmu_vcpu_reset - reset pmu state for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 281 { 282 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 int i; 285 286 for_each_set_bit(i, &mask, 32) 287 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 288 289 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 290 } 291 292 /** 293 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 294 * @vcpu: The vcpu pointer 295 * 296 */ 297 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 298 { 299 int i; 300 struct kvm_pmu *pmu = &vcpu->arch.pmu; 301 302 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 303 kvm_pmu_release_perf_event(&pmu->pmc[i]); 304 irq_work_sync(&vcpu->arch.pmu.overflow_work); 305 } 306 307 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 308 { 309 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 310 311 val &= ARMV8_PMU_PMCR_N_MASK; 312 if (val == 0) 313 return BIT(ARMV8_PMU_CYCLE_IDX); 314 else 315 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 316 } 317 318 /** 319 * kvm_pmu_enable_counter_mask - enable selected PMU counters 320 * @vcpu: The vcpu pointer 321 * @val: the value guest writes to PMCNTENSET register 322 * 323 * Call perf_event_enable to start counting the perf event 324 */ 325 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 326 { 327 int i; 328 struct kvm_pmu *pmu = &vcpu->arch.pmu; 329 struct kvm_pmc *pmc; 330 331 if (!kvm_vcpu_has_pmu(vcpu)) 332 return; 333 334 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 335 return; 336 337 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 338 if (!(val & BIT(i))) 339 continue; 340 341 pmc = &pmu->pmc[i]; 342 343 /* A change in the enable state may affect the chain state */ 344 kvm_pmu_update_pmc_chained(vcpu, i); 345 kvm_pmu_create_perf_event(vcpu, i); 346 347 /* At this point, pmc must be the canonical */ 348 if (pmc->perf_event) { 349 perf_event_enable(pmc->perf_event); 350 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 351 kvm_debug("fail to enable perf event\n"); 352 } 353 } 354 } 355 356 /** 357 * kvm_pmu_disable_counter_mask - disable selected PMU counters 358 * @vcpu: The vcpu pointer 359 * @val: the value guest writes to PMCNTENCLR register 360 * 361 * Call perf_event_disable to stop counting the perf event 362 */ 363 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 364 { 365 int i; 366 struct kvm_pmu *pmu = &vcpu->arch.pmu; 367 struct kvm_pmc *pmc; 368 369 if (!kvm_vcpu_has_pmu(vcpu) || !val) 370 return; 371 372 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 373 if (!(val & BIT(i))) 374 continue; 375 376 pmc = &pmu->pmc[i]; 377 378 /* A change in the enable state may affect the chain state */ 379 kvm_pmu_update_pmc_chained(vcpu, i); 380 kvm_pmu_create_perf_event(vcpu, i); 381 382 /* At this point, pmc must be the canonical */ 383 if (pmc->perf_event) 384 perf_event_disable(pmc->perf_event); 385 } 386 } 387 388 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 389 { 390 u64 reg = 0; 391 392 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 393 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 394 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 395 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 396 } 397 398 return reg; 399 } 400 401 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 402 { 403 struct kvm_pmu *pmu = &vcpu->arch.pmu; 404 bool overflow; 405 406 if (!kvm_vcpu_has_pmu(vcpu)) 407 return; 408 409 overflow = !!kvm_pmu_overflow_status(vcpu); 410 if (pmu->irq_level == overflow) 411 return; 412 413 pmu->irq_level = overflow; 414 415 if (likely(irqchip_in_kernel(vcpu->kvm))) { 416 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 417 pmu->irq_num, overflow, pmu); 418 WARN_ON(ret); 419 } 420 } 421 422 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 423 { 424 struct kvm_pmu *pmu = &vcpu->arch.pmu; 425 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 426 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 427 428 if (likely(irqchip_in_kernel(vcpu->kvm))) 429 return false; 430 431 return pmu->irq_level != run_level; 432 } 433 434 /* 435 * Reflect the PMU overflow interrupt output level into the kvm_run structure 436 */ 437 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 438 { 439 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 440 441 /* Populate the timer bitmap for user space */ 442 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 443 if (vcpu->arch.pmu.irq_level) 444 regs->device_irq_level |= KVM_ARM_DEV_PMU; 445 } 446 447 /** 448 * kvm_pmu_flush_hwstate - flush pmu state to cpu 449 * @vcpu: The vcpu pointer 450 * 451 * Check if the PMU has overflowed while we were running in the host, and inject 452 * an interrupt if that was the case. 453 */ 454 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 455 { 456 kvm_pmu_update_state(vcpu); 457 } 458 459 /** 460 * kvm_pmu_sync_hwstate - sync pmu state from cpu 461 * @vcpu: The vcpu pointer 462 * 463 * Check if the PMU has overflowed while we were running in the guest, and 464 * inject an interrupt if that was the case. 465 */ 466 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 467 { 468 kvm_pmu_update_state(vcpu); 469 } 470 471 /** 472 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 473 * to the event. 474 * This is why we need a callback to do it once outside of the NMI context. 475 */ 476 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 477 { 478 struct kvm_vcpu *vcpu; 479 struct kvm_pmu *pmu; 480 481 pmu = container_of(work, struct kvm_pmu, overflow_work); 482 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 483 484 kvm_vcpu_kick(vcpu); 485 } 486 487 /** 488 * When the perf event overflows, set the overflow status and inform the vcpu. 489 */ 490 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 491 struct perf_sample_data *data, 492 struct pt_regs *regs) 493 { 494 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 495 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 496 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 497 int idx = pmc->idx; 498 u64 period; 499 500 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 501 502 /* 503 * Reset the sample period to the architectural limit, 504 * i.e. the point where the counter overflows. 505 */ 506 period = -(local64_read(&perf_event->count)); 507 508 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 509 period &= GENMASK(31, 0); 510 511 local64_set(&perf_event->hw.period_left, 0); 512 perf_event->attr.sample_period = period; 513 perf_event->hw.sample_period = period; 514 515 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 516 517 if (kvm_pmu_overflow_status(vcpu)) { 518 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 519 520 if (!in_nmi()) 521 kvm_vcpu_kick(vcpu); 522 else 523 irq_work_queue(&vcpu->arch.pmu.overflow_work); 524 } 525 526 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 527 } 528 529 /** 530 * kvm_pmu_software_increment - do software increment 531 * @vcpu: The vcpu pointer 532 * @val: the value guest writes to PMSWINC register 533 */ 534 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 535 { 536 struct kvm_pmu *pmu = &vcpu->arch.pmu; 537 int i; 538 539 if (!kvm_vcpu_has_pmu(vcpu)) 540 return; 541 542 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 543 return; 544 545 /* Weed out disabled counters */ 546 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 547 548 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 549 u64 type, reg; 550 551 if (!(val & BIT(i))) 552 continue; 553 554 /* PMSWINC only applies to ... SW_INC! */ 555 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 556 type &= kvm_pmu_event_mask(vcpu->kvm); 557 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 558 continue; 559 560 /* increment this even SW_INC counter */ 561 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 562 reg = lower_32_bits(reg); 563 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 564 565 if (reg) /* no overflow on the low part */ 566 continue; 567 568 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 569 /* increment the high counter */ 570 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 571 reg = lower_32_bits(reg); 572 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 573 if (!reg) /* mark overflow on the high counter */ 574 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 575 } else { 576 /* mark overflow on low counter */ 577 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 578 } 579 } 580 } 581 582 /** 583 * kvm_pmu_handle_pmcr - handle PMCR register 584 * @vcpu: The vcpu pointer 585 * @val: the value guest writes to PMCR register 586 */ 587 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 588 { 589 int i; 590 591 if (!kvm_vcpu_has_pmu(vcpu)) 592 return; 593 594 if (val & ARMV8_PMU_PMCR_E) { 595 kvm_pmu_enable_counter_mask(vcpu, 596 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 597 } else { 598 kvm_pmu_disable_counter_mask(vcpu, 599 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 600 } 601 602 if (val & ARMV8_PMU_PMCR_C) 603 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 604 605 if (val & ARMV8_PMU_PMCR_P) { 606 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 607 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 608 for_each_set_bit(i, &mask, 32) 609 kvm_pmu_set_counter_value(vcpu, i, 0); 610 } 611 } 612 613 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 614 { 615 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 616 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 617 } 618 619 /** 620 * kvm_pmu_create_perf_event - create a perf event for a counter 621 * @vcpu: The vcpu pointer 622 * @select_idx: The number of selected counter 623 */ 624 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 625 { 626 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 627 struct kvm_pmu *pmu = &vcpu->arch.pmu; 628 struct kvm_pmc *pmc; 629 struct perf_event *event; 630 struct perf_event_attr attr; 631 u64 eventsel, counter, reg, data; 632 633 /* 634 * For chained counters the event type and filtering attributes are 635 * obtained from the low/even counter. We also use this counter to 636 * determine if the event is enabled/disabled. 637 */ 638 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 639 640 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 641 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 642 data = __vcpu_sys_reg(vcpu, reg); 643 644 kvm_pmu_stop_counter(vcpu, pmc); 645 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 646 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 647 else 648 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 649 650 /* Software increment event doesn't need to be backed by a perf event */ 651 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 652 return; 653 654 /* 655 * If we have a filter in place and that the event isn't allowed, do 656 * not install a perf event either. 657 */ 658 if (vcpu->kvm->arch.pmu_filter && 659 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 660 return; 661 662 memset(&attr, 0, sizeof(struct perf_event_attr)); 663 attr.type = arm_pmu->pmu.type; 664 attr.size = sizeof(attr); 665 attr.pinned = 1; 666 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 667 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 668 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 669 attr.exclude_hv = 1; /* Don't count EL2 events */ 670 attr.exclude_host = 1; /* Don't count host events */ 671 attr.config = eventsel; 672 673 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 674 675 if (kvm_pmu_pmc_is_chained(pmc)) { 676 /** 677 * The initial sample period (overflow count) of an event. For 678 * chained counters we only support overflow interrupts on the 679 * high counter. 680 */ 681 attr.sample_period = (-counter) & GENMASK(63, 0); 682 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 683 684 event = perf_event_create_kernel_counter(&attr, -1, current, 685 kvm_pmu_perf_overflow, 686 pmc + 1); 687 } else { 688 /* The initial sample period (overflow count) of an event. */ 689 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 690 attr.sample_period = (-counter) & GENMASK(63, 0); 691 else 692 attr.sample_period = (-counter) & GENMASK(31, 0); 693 694 event = perf_event_create_kernel_counter(&attr, -1, current, 695 kvm_pmu_perf_overflow, pmc); 696 } 697 698 if (IS_ERR(event)) { 699 pr_err_once("kvm: pmu event creation failed %ld\n", 700 PTR_ERR(event)); 701 return; 702 } 703 704 pmc->perf_event = event; 705 } 706 707 /** 708 * kvm_pmu_update_pmc_chained - update chained bitmap 709 * @vcpu: The vcpu pointer 710 * @select_idx: The number of selected counter 711 * 712 * Update the chained bitmap based on the event type written in the 713 * typer register and the enable state of the odd register. 714 */ 715 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 716 { 717 struct kvm_pmu *pmu = &vcpu->arch.pmu; 718 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 719 bool new_state, old_state; 720 721 old_state = kvm_pmu_pmc_is_chained(pmc); 722 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 723 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 724 725 if (old_state == new_state) 726 return; 727 728 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 729 kvm_pmu_stop_counter(vcpu, canonical_pmc); 730 if (new_state) { 731 /* 732 * During promotion from !chained to chained we must ensure 733 * the adjacent counter is stopped and its event destroyed 734 */ 735 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 736 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 737 return; 738 } 739 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 740 } 741 742 /** 743 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 744 * @vcpu: The vcpu pointer 745 * @data: The data guest writes to PMXEVTYPER_EL0 746 * @select_idx: The number of selected counter 747 * 748 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 749 * event with given hardware event number. Here we call perf_event API to 750 * emulate this action and create a kernel perf event for it. 751 */ 752 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 753 u64 select_idx) 754 { 755 u64 reg, mask; 756 757 if (!kvm_vcpu_has_pmu(vcpu)) 758 return; 759 760 mask = ARMV8_PMU_EVTYPE_MASK; 761 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 762 mask |= kvm_pmu_event_mask(vcpu->kvm); 763 764 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 765 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 766 767 __vcpu_sys_reg(vcpu, reg) = data & mask; 768 769 kvm_pmu_update_pmc_chained(vcpu, select_idx); 770 kvm_pmu_create_perf_event(vcpu, select_idx); 771 } 772 773 void kvm_host_pmu_init(struct arm_pmu *pmu) 774 { 775 struct arm_pmu_entry *entry; 776 777 if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || 778 is_protected_kvm_enabled()) 779 return; 780 781 mutex_lock(&arm_pmus_lock); 782 783 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 784 if (!entry) 785 goto out_unlock; 786 787 entry->arm_pmu = pmu; 788 list_add_tail(&entry->entry, &arm_pmus); 789 790 if (list_is_singular(&arm_pmus)) 791 static_branch_enable(&kvm_arm_pmu_available); 792 793 out_unlock: 794 mutex_unlock(&arm_pmus_lock); 795 } 796 797 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 798 { 799 struct perf_event_attr attr = { }; 800 struct perf_event *event; 801 struct arm_pmu *pmu = NULL; 802 803 /* 804 * Create a dummy event that only counts user cycles. As we'll never 805 * leave this function with the event being live, it will never 806 * count anything. But it allows us to probe some of the PMU 807 * details. Yes, this is terrible. 808 */ 809 attr.type = PERF_TYPE_RAW; 810 attr.size = sizeof(attr); 811 attr.pinned = 1; 812 attr.disabled = 0; 813 attr.exclude_user = 0; 814 attr.exclude_kernel = 1; 815 attr.exclude_hv = 1; 816 attr.exclude_host = 1; 817 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 818 attr.sample_period = GENMASK(63, 0); 819 820 event = perf_event_create_kernel_counter(&attr, -1, current, 821 kvm_pmu_perf_overflow, &attr); 822 823 if (IS_ERR(event)) { 824 pr_err_once("kvm: pmu event creation failed %ld\n", 825 PTR_ERR(event)); 826 return NULL; 827 } 828 829 if (event->pmu) { 830 pmu = to_arm_pmu(event->pmu); 831 if (pmu->pmuver == 0 || 832 pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) 833 pmu = NULL; 834 } 835 836 perf_event_disable(event); 837 perf_event_release_kernel(event); 838 839 return pmu; 840 } 841 842 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 843 { 844 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 845 u64 val, mask = 0; 846 int base, i, nr_events; 847 848 if (!kvm_vcpu_has_pmu(vcpu)) 849 return 0; 850 851 if (!pmceid1) { 852 val = read_sysreg(pmceid0_el0); 853 base = 0; 854 } else { 855 val = read_sysreg(pmceid1_el0); 856 /* 857 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 858 * as RAZ 859 */ 860 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4) 861 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 862 base = 32; 863 } 864 865 if (!bmap) 866 return val; 867 868 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 869 870 for (i = 0; i < 32; i += 8) { 871 u64 byte; 872 873 byte = bitmap_get_value8(bmap, base + i); 874 mask |= byte << i; 875 if (nr_events >= (0x4000 + base + 32)) { 876 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 877 mask |= byte << (32 + i); 878 } 879 } 880 881 return val & mask; 882 } 883 884 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 885 { 886 if (!kvm_vcpu_has_pmu(vcpu)) 887 return 0; 888 889 if (!vcpu->arch.pmu.created) 890 return -EINVAL; 891 892 /* 893 * A valid interrupt configuration for the PMU is either to have a 894 * properly configured interrupt number and using an in-kernel 895 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 896 */ 897 if (irqchip_in_kernel(vcpu->kvm)) { 898 int irq = vcpu->arch.pmu.irq_num; 899 /* 900 * If we are using an in-kernel vgic, at this point we know 901 * the vgic will be initialized, so we can check the PMU irq 902 * number against the dimensions of the vgic and make sure 903 * it's valid. 904 */ 905 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 906 return -EINVAL; 907 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 908 return -EINVAL; 909 } 910 911 /* One-off reload of the PMU on first run */ 912 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 913 914 return 0; 915 } 916 917 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 918 { 919 if (irqchip_in_kernel(vcpu->kvm)) { 920 int ret; 921 922 /* 923 * If using the PMU with an in-kernel virtual GIC 924 * implementation, we require the GIC to be already 925 * initialized when initializing the PMU. 926 */ 927 if (!vgic_initialized(vcpu->kvm)) 928 return -ENODEV; 929 930 if (!kvm_arm_pmu_irq_initialized(vcpu)) 931 return -ENXIO; 932 933 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 934 &vcpu->arch.pmu); 935 if (ret) 936 return ret; 937 } 938 939 init_irq_work(&vcpu->arch.pmu.overflow_work, 940 kvm_pmu_perf_overflow_notify_vcpu); 941 942 vcpu->arch.pmu.created = true; 943 return 0; 944 } 945 946 /* 947 * For one VM the interrupt type must be same for each vcpu. 948 * As a PPI, the interrupt number is the same for all vcpus, 949 * while as an SPI it must be a separate number per vcpu. 950 */ 951 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 952 { 953 unsigned long i; 954 struct kvm_vcpu *vcpu; 955 956 kvm_for_each_vcpu(i, vcpu, kvm) { 957 if (!kvm_arm_pmu_irq_initialized(vcpu)) 958 continue; 959 960 if (irq_is_ppi(irq)) { 961 if (vcpu->arch.pmu.irq_num != irq) 962 return false; 963 } else { 964 if (vcpu->arch.pmu.irq_num == irq) 965 return false; 966 } 967 } 968 969 return true; 970 } 971 972 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 973 { 974 struct kvm *kvm = vcpu->kvm; 975 struct arm_pmu_entry *entry; 976 struct arm_pmu *arm_pmu; 977 int ret = -ENXIO; 978 979 mutex_lock(&kvm->lock); 980 mutex_lock(&arm_pmus_lock); 981 982 list_for_each_entry(entry, &arm_pmus, entry) { 983 arm_pmu = entry->arm_pmu; 984 if (arm_pmu->pmu.type == pmu_id) { 985 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || 986 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 987 ret = -EBUSY; 988 break; 989 } 990 991 kvm->arch.arm_pmu = arm_pmu; 992 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 993 ret = 0; 994 break; 995 } 996 } 997 998 mutex_unlock(&arm_pmus_lock); 999 mutex_unlock(&kvm->lock); 1000 return ret; 1001 } 1002 1003 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1004 { 1005 struct kvm *kvm = vcpu->kvm; 1006 1007 if (!kvm_vcpu_has_pmu(vcpu)) 1008 return -ENODEV; 1009 1010 if (vcpu->arch.pmu.created) 1011 return -EBUSY; 1012 1013 mutex_lock(&kvm->lock); 1014 if (!kvm->arch.arm_pmu) { 1015 /* No PMU set, get the default one */ 1016 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 1017 if (!kvm->arch.arm_pmu) { 1018 mutex_unlock(&kvm->lock); 1019 return -ENODEV; 1020 } 1021 } 1022 mutex_unlock(&kvm->lock); 1023 1024 switch (attr->attr) { 1025 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1026 int __user *uaddr = (int __user *)(long)attr->addr; 1027 int irq; 1028 1029 if (!irqchip_in_kernel(kvm)) 1030 return -EINVAL; 1031 1032 if (get_user(irq, uaddr)) 1033 return -EFAULT; 1034 1035 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 1036 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 1037 return -EINVAL; 1038 1039 if (!pmu_irq_is_valid(kvm, irq)) 1040 return -EINVAL; 1041 1042 if (kvm_arm_pmu_irq_initialized(vcpu)) 1043 return -EBUSY; 1044 1045 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 1046 vcpu->arch.pmu.irq_num = irq; 1047 return 0; 1048 } 1049 case KVM_ARM_VCPU_PMU_V3_FILTER: { 1050 struct kvm_pmu_event_filter __user *uaddr; 1051 struct kvm_pmu_event_filter filter; 1052 int nr_events; 1053 1054 nr_events = kvm_pmu_event_mask(kvm) + 1; 1055 1056 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 1057 1058 if (copy_from_user(&filter, uaddr, sizeof(filter))) 1059 return -EFAULT; 1060 1061 if (((u32)filter.base_event + filter.nevents) > nr_events || 1062 (filter.action != KVM_PMU_EVENT_ALLOW && 1063 filter.action != KVM_PMU_EVENT_DENY)) 1064 return -EINVAL; 1065 1066 mutex_lock(&kvm->lock); 1067 1068 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { 1069 mutex_unlock(&kvm->lock); 1070 return -EBUSY; 1071 } 1072 1073 if (!kvm->arch.pmu_filter) { 1074 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 1075 if (!kvm->arch.pmu_filter) { 1076 mutex_unlock(&kvm->lock); 1077 return -ENOMEM; 1078 } 1079 1080 /* 1081 * The default depends on the first applied filter. 1082 * If it allows events, the default is to deny. 1083 * Conversely, if the first filter denies a set of 1084 * events, the default is to allow. 1085 */ 1086 if (filter.action == KVM_PMU_EVENT_ALLOW) 1087 bitmap_zero(kvm->arch.pmu_filter, nr_events); 1088 else 1089 bitmap_fill(kvm->arch.pmu_filter, nr_events); 1090 } 1091 1092 if (filter.action == KVM_PMU_EVENT_ALLOW) 1093 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1094 else 1095 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1096 1097 mutex_unlock(&kvm->lock); 1098 1099 return 0; 1100 } 1101 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1102 int __user *uaddr = (int __user *)(long)attr->addr; 1103 int pmu_id; 1104 1105 if (get_user(pmu_id, uaddr)) 1106 return -EFAULT; 1107 1108 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1109 } 1110 case KVM_ARM_VCPU_PMU_V3_INIT: 1111 return kvm_arm_pmu_v3_init(vcpu); 1112 } 1113 1114 return -ENXIO; 1115 } 1116 1117 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1118 { 1119 switch (attr->attr) { 1120 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1121 int __user *uaddr = (int __user *)(long)attr->addr; 1122 int irq; 1123 1124 if (!irqchip_in_kernel(vcpu->kvm)) 1125 return -EINVAL; 1126 1127 if (!kvm_vcpu_has_pmu(vcpu)) 1128 return -ENODEV; 1129 1130 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1131 return -ENXIO; 1132 1133 irq = vcpu->arch.pmu.irq_num; 1134 return put_user(irq, uaddr); 1135 } 1136 } 1137 1138 return -ENXIO; 1139 } 1140 1141 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1142 { 1143 switch (attr->attr) { 1144 case KVM_ARM_VCPU_PMU_V3_IRQ: 1145 case KVM_ARM_VCPU_PMU_V3_INIT: 1146 case KVM_ARM_VCPU_PMU_V3_FILTER: 1147 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1148 if (kvm_vcpu_has_pmu(vcpu)) 1149 return 0; 1150 } 1151 1152 return -ENXIO; 1153 } 1154