1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/perf_event.h> 11 #include <linux/perf/arm_pmu.h> 12 #include <linux/uaccess.h> 13 #include <asm/kvm_emulate.h> 14 #include <kvm/arm_pmu.h> 15 #include <kvm/arm_vgic.h> 16 17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 20 21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 22 23 static u32 kvm_pmu_event_mask(struct kvm *kvm) 24 { 25 switch (kvm->arch.pmuver) { 26 case ID_AA64DFR0_PMUVER_8_0: 27 return GENMASK(9, 0); 28 case ID_AA64DFR0_PMUVER_8_1: 29 case ID_AA64DFR0_PMUVER_8_4: 30 case ID_AA64DFR0_PMUVER_8_5: 31 return GENMASK(15, 0); 32 default: /* Shouldn't be here, just for sanity */ 33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); 34 return 0; 35 } 36 } 37 38 /** 39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 40 * @vcpu: The vcpu pointer 41 * @select_idx: The counter index 42 */ 43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 44 { 45 return (select_idx == ARMV8_PMU_CYCLE_IDX && 46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 47 } 48 49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 50 { 51 struct kvm_pmu *pmu; 52 struct kvm_vcpu_arch *vcpu_arch; 53 54 pmc -= pmc->idx; 55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 57 return container_of(vcpu_arch, struct kvm_vcpu, arch); 58 } 59 60 /** 61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 62 * @pmc: The PMU counter pointer 63 */ 64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 65 { 66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 67 68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 69 } 70 71 /** 72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 73 * @select_idx: The counter index 74 */ 75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 76 { 77 return select_idx & 0x1; 78 } 79 80 /** 81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 82 * @pmc: The PMU counter pointer 83 * 84 * When a pair of PMCs are chained together we use the low counter (canonical) 85 * to hold the underlying perf event. 86 */ 87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 88 { 89 if (kvm_pmu_pmc_is_chained(pmc) && 90 kvm_pmu_idx_is_high_counter(pmc->idx)) 91 return pmc - 1; 92 93 return pmc; 94 } 95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 96 { 97 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 98 return pmc - 1; 99 else 100 return pmc + 1; 101 } 102 103 /** 104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 105 * @vcpu: The vcpu pointer 106 * @select_idx: The counter index 107 */ 108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 109 { 110 u64 eventsel, reg; 111 112 select_idx |= 0x1; 113 114 if (select_idx == ARMV8_PMU_CYCLE_IDX) 115 return false; 116 117 reg = PMEVTYPER0_EL0 + select_idx; 118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 119 120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 121 } 122 123 /** 124 * kvm_pmu_get_pair_counter_value - get PMU counter value 125 * @vcpu: The vcpu pointer 126 * @pmc: The PMU counter pointer 127 */ 128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 129 struct kvm_pmc *pmc) 130 { 131 u64 counter, counter_high, reg, enabled, running; 132 133 if (kvm_pmu_pmc_is_chained(pmc)) { 134 pmc = kvm_pmu_get_canonical_pmc(pmc); 135 reg = PMEVCNTR0_EL0 + pmc->idx; 136 137 counter = __vcpu_sys_reg(vcpu, reg); 138 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 139 140 counter = lower_32_bits(counter) | (counter_high << 32); 141 } else { 142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 144 counter = __vcpu_sys_reg(vcpu, reg); 145 } 146 147 /* 148 * The real counter value is equal to the value of counter register plus 149 * the value perf event counts. 150 */ 151 if (pmc->perf_event) 152 counter += perf_event_read_value(pmc->perf_event, &enabled, 153 &running); 154 155 return counter; 156 } 157 158 /** 159 * kvm_pmu_get_counter_value - get PMU counter value 160 * @vcpu: The vcpu pointer 161 * @select_idx: The counter index 162 */ 163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 164 { 165 u64 counter; 166 struct kvm_pmu *pmu = &vcpu->arch.pmu; 167 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 168 169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 170 171 if (kvm_pmu_pmc_is_chained(pmc) && 172 kvm_pmu_idx_is_high_counter(select_idx)) 173 counter = upper_32_bits(counter); 174 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 175 counter = lower_32_bits(counter); 176 177 return counter; 178 } 179 180 /** 181 * kvm_pmu_set_counter_value - set PMU counter value 182 * @vcpu: The vcpu pointer 183 * @select_idx: The counter index 184 * @val: The counter value 185 */ 186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 187 { 188 u64 reg; 189 190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 193 194 /* Recreate the perf event to reflect the updated sample_period */ 195 kvm_pmu_create_perf_event(vcpu, select_idx); 196 } 197 198 /** 199 * kvm_pmu_release_perf_event - remove the perf event 200 * @pmc: The PMU counter pointer 201 */ 202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 203 { 204 pmc = kvm_pmu_get_canonical_pmc(pmc); 205 if (pmc->perf_event) { 206 perf_event_disable(pmc->perf_event); 207 perf_event_release_kernel(pmc->perf_event); 208 pmc->perf_event = NULL; 209 } 210 } 211 212 /** 213 * kvm_pmu_stop_counter - stop PMU counter 214 * @pmc: The PMU counter pointer 215 * 216 * If this counter has been configured to monitor some event, release it here. 217 */ 218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 219 { 220 u64 counter, reg, val; 221 222 pmc = kvm_pmu_get_canonical_pmc(pmc); 223 if (!pmc->perf_event) 224 return; 225 226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 227 228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 229 reg = PMCCNTR_EL0; 230 val = counter; 231 } else { 232 reg = PMEVCNTR0_EL0 + pmc->idx; 233 val = lower_32_bits(counter); 234 } 235 236 __vcpu_sys_reg(vcpu, reg) = val; 237 238 if (kvm_pmu_pmc_is_chained(pmc)) 239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 240 241 kvm_pmu_release_perf_event(pmc); 242 } 243 244 /** 245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 246 * @vcpu: The vcpu pointer 247 * 248 */ 249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 250 { 251 int i; 252 struct kvm_pmu *pmu = &vcpu->arch.pmu; 253 254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 255 pmu->pmc[i].idx = i; 256 } 257 258 /** 259 * kvm_pmu_vcpu_reset - reset pmu state for cpu 260 * @vcpu: The vcpu pointer 261 * 262 */ 263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 264 { 265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 266 struct kvm_pmu *pmu = &vcpu->arch.pmu; 267 int i; 268 269 for_each_set_bit(i, &mask, 32) 270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 271 272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 273 } 274 275 /** 276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 281 { 282 int i; 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 286 kvm_pmu_release_perf_event(&pmu->pmc[i]); 287 irq_work_sync(&vcpu->arch.pmu.overflow_work); 288 } 289 290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 291 { 292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 293 294 val &= ARMV8_PMU_PMCR_N_MASK; 295 if (val == 0) 296 return BIT(ARMV8_PMU_CYCLE_IDX); 297 else 298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 299 } 300 301 /** 302 * kvm_pmu_enable_counter_mask - enable selected PMU counters 303 * @vcpu: The vcpu pointer 304 * @val: the value guest writes to PMCNTENSET register 305 * 306 * Call perf_event_enable to start counting the perf event 307 */ 308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 309 { 310 int i; 311 struct kvm_pmu *pmu = &vcpu->arch.pmu; 312 struct kvm_pmc *pmc; 313 314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 315 return; 316 317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 318 if (!(val & BIT(i))) 319 continue; 320 321 pmc = &pmu->pmc[i]; 322 323 /* A change in the enable state may affect the chain state */ 324 kvm_pmu_update_pmc_chained(vcpu, i); 325 kvm_pmu_create_perf_event(vcpu, i); 326 327 /* At this point, pmc must be the canonical */ 328 if (pmc->perf_event) { 329 perf_event_enable(pmc->perf_event); 330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 331 kvm_debug("fail to enable perf event\n"); 332 } 333 } 334 } 335 336 /** 337 * kvm_pmu_disable_counter_mask - disable selected PMU counters 338 * @vcpu: The vcpu pointer 339 * @val: the value guest writes to PMCNTENCLR register 340 * 341 * Call perf_event_disable to stop counting the perf event 342 */ 343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 344 { 345 int i; 346 struct kvm_pmu *pmu = &vcpu->arch.pmu; 347 struct kvm_pmc *pmc; 348 349 if (!val) 350 return; 351 352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 353 if (!(val & BIT(i))) 354 continue; 355 356 pmc = &pmu->pmc[i]; 357 358 /* A change in the enable state may affect the chain state */ 359 kvm_pmu_update_pmc_chained(vcpu, i); 360 kvm_pmu_create_perf_event(vcpu, i); 361 362 /* At this point, pmc must be the canonical */ 363 if (pmc->perf_event) 364 perf_event_disable(pmc->perf_event); 365 } 366 } 367 368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 369 { 370 u64 reg = 0; 371 372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 376 reg &= kvm_pmu_valid_counter_mask(vcpu); 377 } 378 379 return reg; 380 } 381 382 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 383 { 384 struct kvm_pmu *pmu = &vcpu->arch.pmu; 385 bool overflow; 386 387 if (!kvm_vcpu_has_pmu(vcpu)) 388 return; 389 390 overflow = !!kvm_pmu_overflow_status(vcpu); 391 if (pmu->irq_level == overflow) 392 return; 393 394 pmu->irq_level = overflow; 395 396 if (likely(irqchip_in_kernel(vcpu->kvm))) { 397 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 398 pmu->irq_num, overflow, pmu); 399 WARN_ON(ret); 400 } 401 } 402 403 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 404 { 405 struct kvm_pmu *pmu = &vcpu->arch.pmu; 406 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 407 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 408 409 if (likely(irqchip_in_kernel(vcpu->kvm))) 410 return false; 411 412 return pmu->irq_level != run_level; 413 } 414 415 /* 416 * Reflect the PMU overflow interrupt output level into the kvm_run structure 417 */ 418 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 419 { 420 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 421 422 /* Populate the timer bitmap for user space */ 423 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 424 if (vcpu->arch.pmu.irq_level) 425 regs->device_irq_level |= KVM_ARM_DEV_PMU; 426 } 427 428 /** 429 * kvm_pmu_flush_hwstate - flush pmu state to cpu 430 * @vcpu: The vcpu pointer 431 * 432 * Check if the PMU has overflowed while we were running in the host, and inject 433 * an interrupt if that was the case. 434 */ 435 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 436 { 437 kvm_pmu_update_state(vcpu); 438 } 439 440 /** 441 * kvm_pmu_sync_hwstate - sync pmu state from cpu 442 * @vcpu: The vcpu pointer 443 * 444 * Check if the PMU has overflowed while we were running in the guest, and 445 * inject an interrupt if that was the case. 446 */ 447 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 448 { 449 kvm_pmu_update_state(vcpu); 450 } 451 452 /** 453 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 454 * to the event. 455 * This is why we need a callback to do it once outside of the NMI context. 456 */ 457 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 458 { 459 struct kvm_vcpu *vcpu; 460 struct kvm_pmu *pmu; 461 462 pmu = container_of(work, struct kvm_pmu, overflow_work); 463 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 464 465 kvm_vcpu_kick(vcpu); 466 } 467 468 /** 469 * When the perf event overflows, set the overflow status and inform the vcpu. 470 */ 471 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 472 struct perf_sample_data *data, 473 struct pt_regs *regs) 474 { 475 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 476 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 477 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 478 int idx = pmc->idx; 479 u64 period; 480 481 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 482 483 /* 484 * Reset the sample period to the architectural limit, 485 * i.e. the point where the counter overflows. 486 */ 487 period = -(local64_read(&perf_event->count)); 488 489 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 490 period &= GENMASK(31, 0); 491 492 local64_set(&perf_event->hw.period_left, 0); 493 perf_event->attr.sample_period = period; 494 perf_event->hw.sample_period = period; 495 496 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 497 498 if (kvm_pmu_overflow_status(vcpu)) { 499 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 500 501 if (!in_nmi()) 502 kvm_vcpu_kick(vcpu); 503 else 504 irq_work_queue(&vcpu->arch.pmu.overflow_work); 505 } 506 507 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 508 } 509 510 /** 511 * kvm_pmu_software_increment - do software increment 512 * @vcpu: The vcpu pointer 513 * @val: the value guest writes to PMSWINC register 514 */ 515 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 516 { 517 struct kvm_pmu *pmu = &vcpu->arch.pmu; 518 int i; 519 520 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 521 return; 522 523 /* Weed out disabled counters */ 524 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 525 526 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 527 u64 type, reg; 528 529 if (!(val & BIT(i))) 530 continue; 531 532 /* PMSWINC only applies to ... SW_INC! */ 533 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 534 type &= kvm_pmu_event_mask(vcpu->kvm); 535 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 536 continue; 537 538 /* increment this even SW_INC counter */ 539 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 540 reg = lower_32_bits(reg); 541 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 542 543 if (reg) /* no overflow on the low part */ 544 continue; 545 546 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 547 /* increment the high counter */ 548 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 549 reg = lower_32_bits(reg); 550 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 551 if (!reg) /* mark overflow on the high counter */ 552 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 553 } else { 554 /* mark overflow on low counter */ 555 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 556 } 557 } 558 } 559 560 /** 561 * kvm_pmu_handle_pmcr - handle PMCR register 562 * @vcpu: The vcpu pointer 563 * @val: the value guest writes to PMCR register 564 */ 565 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 566 { 567 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 568 int i; 569 570 if (val & ARMV8_PMU_PMCR_E) { 571 kvm_pmu_enable_counter_mask(vcpu, 572 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); 573 } else { 574 kvm_pmu_disable_counter_mask(vcpu, mask); 575 } 576 577 if (val & ARMV8_PMU_PMCR_C) 578 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 579 580 if (val & ARMV8_PMU_PMCR_P) { 581 for_each_set_bit(i, &mask, 32) 582 kvm_pmu_set_counter_value(vcpu, i, 0); 583 } 584 } 585 586 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 587 { 588 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 589 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 590 } 591 592 /** 593 * kvm_pmu_create_perf_event - create a perf event for a counter 594 * @vcpu: The vcpu pointer 595 * @select_idx: The number of selected counter 596 */ 597 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 598 { 599 struct kvm_pmu *pmu = &vcpu->arch.pmu; 600 struct kvm_pmc *pmc; 601 struct perf_event *event; 602 struct perf_event_attr attr; 603 u64 eventsel, counter, reg, data; 604 605 /* 606 * For chained counters the event type and filtering attributes are 607 * obtained from the low/even counter. We also use this counter to 608 * determine if the event is enabled/disabled. 609 */ 610 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 611 612 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 613 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 614 data = __vcpu_sys_reg(vcpu, reg); 615 616 kvm_pmu_stop_counter(vcpu, pmc); 617 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 618 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 619 else 620 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 621 622 /* Software increment event doesn't need to be backed by a perf event */ 623 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 624 return; 625 626 /* 627 * If we have a filter in place and that the event isn't allowed, do 628 * not install a perf event either. 629 */ 630 if (vcpu->kvm->arch.pmu_filter && 631 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 632 return; 633 634 memset(&attr, 0, sizeof(struct perf_event_attr)); 635 attr.type = PERF_TYPE_RAW; 636 attr.size = sizeof(attr); 637 attr.pinned = 1; 638 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 639 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 640 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 641 attr.exclude_hv = 1; /* Don't count EL2 events */ 642 attr.exclude_host = 1; /* Don't count host events */ 643 attr.config = eventsel; 644 645 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 646 647 if (kvm_pmu_pmc_is_chained(pmc)) { 648 /** 649 * The initial sample period (overflow count) of an event. For 650 * chained counters we only support overflow interrupts on the 651 * high counter. 652 */ 653 attr.sample_period = (-counter) & GENMASK(63, 0); 654 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 655 656 event = perf_event_create_kernel_counter(&attr, -1, current, 657 kvm_pmu_perf_overflow, 658 pmc + 1); 659 } else { 660 /* The initial sample period (overflow count) of an event. */ 661 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 662 attr.sample_period = (-counter) & GENMASK(63, 0); 663 else 664 attr.sample_period = (-counter) & GENMASK(31, 0); 665 666 event = perf_event_create_kernel_counter(&attr, -1, current, 667 kvm_pmu_perf_overflow, pmc); 668 } 669 670 if (IS_ERR(event)) { 671 pr_err_once("kvm: pmu event creation failed %ld\n", 672 PTR_ERR(event)); 673 return; 674 } 675 676 pmc->perf_event = event; 677 } 678 679 /** 680 * kvm_pmu_update_pmc_chained - update chained bitmap 681 * @vcpu: The vcpu pointer 682 * @select_idx: The number of selected counter 683 * 684 * Update the chained bitmap based on the event type written in the 685 * typer register and the enable state of the odd register. 686 */ 687 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 688 { 689 struct kvm_pmu *pmu = &vcpu->arch.pmu; 690 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 691 bool new_state, old_state; 692 693 old_state = kvm_pmu_pmc_is_chained(pmc); 694 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 695 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 696 697 if (old_state == new_state) 698 return; 699 700 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 701 kvm_pmu_stop_counter(vcpu, canonical_pmc); 702 if (new_state) { 703 /* 704 * During promotion from !chained to chained we must ensure 705 * the adjacent counter is stopped and its event destroyed 706 */ 707 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 708 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 709 return; 710 } 711 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 712 } 713 714 /** 715 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 716 * @vcpu: The vcpu pointer 717 * @data: The data guest writes to PMXEVTYPER_EL0 718 * @select_idx: The number of selected counter 719 * 720 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 721 * event with given hardware event number. Here we call perf_event API to 722 * emulate this action and create a kernel perf event for it. 723 */ 724 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 725 u64 select_idx) 726 { 727 u64 reg, mask; 728 729 mask = ARMV8_PMU_EVTYPE_MASK; 730 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 731 mask |= kvm_pmu_event_mask(vcpu->kvm); 732 733 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 734 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 735 736 __vcpu_sys_reg(vcpu, reg) = data & mask; 737 738 kvm_pmu_update_pmc_chained(vcpu, select_idx); 739 kvm_pmu_create_perf_event(vcpu, select_idx); 740 } 741 742 static int kvm_pmu_probe_pmuver(void) 743 { 744 struct perf_event_attr attr = { }; 745 struct perf_event *event; 746 struct arm_pmu *pmu; 747 int pmuver = 0xf; 748 749 /* 750 * Create a dummy event that only counts user cycles. As we'll never 751 * leave this function with the event being live, it will never 752 * count anything. But it allows us to probe some of the PMU 753 * details. Yes, this is terrible. 754 */ 755 attr.type = PERF_TYPE_RAW; 756 attr.size = sizeof(attr); 757 attr.pinned = 1; 758 attr.disabled = 0; 759 attr.exclude_user = 0; 760 attr.exclude_kernel = 1; 761 attr.exclude_hv = 1; 762 attr.exclude_host = 1; 763 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 764 attr.sample_period = GENMASK(63, 0); 765 766 event = perf_event_create_kernel_counter(&attr, -1, current, 767 kvm_pmu_perf_overflow, &attr); 768 769 if (IS_ERR(event)) { 770 pr_err_once("kvm: pmu event creation failed %ld\n", 771 PTR_ERR(event)); 772 return 0xf; 773 } 774 775 if (event->pmu) { 776 pmu = to_arm_pmu(event->pmu); 777 if (pmu->pmuver) 778 pmuver = pmu->pmuver; 779 } 780 781 perf_event_disable(event); 782 perf_event_release_kernel(event); 783 784 return pmuver; 785 } 786 787 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 788 { 789 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 790 u64 val, mask = 0; 791 int base, i, nr_events; 792 793 if (!pmceid1) { 794 val = read_sysreg(pmceid0_el0); 795 base = 0; 796 } else { 797 val = read_sysreg(pmceid1_el0); 798 /* 799 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 800 * as RAZ 801 */ 802 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) 803 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 804 base = 32; 805 } 806 807 if (!bmap) 808 return val; 809 810 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 811 812 for (i = 0; i < 32; i += 8) { 813 u64 byte; 814 815 byte = bitmap_get_value8(bmap, base + i); 816 mask |= byte << i; 817 if (nr_events >= (0x4000 + base + 32)) { 818 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 819 mask |= byte << (32 + i); 820 } 821 } 822 823 return val & mask; 824 } 825 826 bool kvm_arm_support_pmu_v3(void) 827 { 828 /* 829 * Check if HW_PERF_EVENTS are supported by checking the number of 830 * hardware performance counters. This could ensure the presence of 831 * a physical PMU and CONFIG_PERF_EVENT is selected. 832 */ 833 return (perf_num_counters() > 0); 834 } 835 836 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 837 { 838 if (!kvm_vcpu_has_pmu(vcpu)) 839 return 0; 840 841 if (!vcpu->arch.pmu.created) 842 return -EINVAL; 843 844 /* 845 * A valid interrupt configuration for the PMU is either to have a 846 * properly configured interrupt number and using an in-kernel 847 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 848 */ 849 if (irqchip_in_kernel(vcpu->kvm)) { 850 int irq = vcpu->arch.pmu.irq_num; 851 /* 852 * If we are using an in-kernel vgic, at this point we know 853 * the vgic will be initialized, so we can check the PMU irq 854 * number against the dimensions of the vgic and make sure 855 * it's valid. 856 */ 857 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 858 return -EINVAL; 859 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 860 return -EINVAL; 861 } 862 863 return 0; 864 } 865 866 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 867 { 868 if (irqchip_in_kernel(vcpu->kvm)) { 869 int ret; 870 871 /* 872 * If using the PMU with an in-kernel virtual GIC 873 * implementation, we require the GIC to be already 874 * initialized when initializing the PMU. 875 */ 876 if (!vgic_initialized(vcpu->kvm)) 877 return -ENODEV; 878 879 if (!kvm_arm_pmu_irq_initialized(vcpu)) 880 return -ENXIO; 881 882 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 883 &vcpu->arch.pmu); 884 if (ret) 885 return ret; 886 } 887 888 init_irq_work(&vcpu->arch.pmu.overflow_work, 889 kvm_pmu_perf_overflow_notify_vcpu); 890 891 vcpu->arch.pmu.created = true; 892 return 0; 893 } 894 895 /* 896 * For one VM the interrupt type must be same for each vcpu. 897 * As a PPI, the interrupt number is the same for all vcpus, 898 * while as an SPI it must be a separate number per vcpu. 899 */ 900 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 901 { 902 int i; 903 struct kvm_vcpu *vcpu; 904 905 kvm_for_each_vcpu(i, vcpu, kvm) { 906 if (!kvm_arm_pmu_irq_initialized(vcpu)) 907 continue; 908 909 if (irq_is_ppi(irq)) { 910 if (vcpu->arch.pmu.irq_num != irq) 911 return false; 912 } else { 913 if (vcpu->arch.pmu.irq_num == irq) 914 return false; 915 } 916 } 917 918 return true; 919 } 920 921 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 922 { 923 if (!kvm_vcpu_has_pmu(vcpu)) 924 return -ENODEV; 925 926 if (vcpu->arch.pmu.created) 927 return -EBUSY; 928 929 if (!vcpu->kvm->arch.pmuver) 930 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); 931 932 if (vcpu->kvm->arch.pmuver == 0xf) 933 return -ENODEV; 934 935 switch (attr->attr) { 936 case KVM_ARM_VCPU_PMU_V3_IRQ: { 937 int __user *uaddr = (int __user *)(long)attr->addr; 938 int irq; 939 940 if (!irqchip_in_kernel(vcpu->kvm)) 941 return -EINVAL; 942 943 if (get_user(irq, uaddr)) 944 return -EFAULT; 945 946 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 947 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 948 return -EINVAL; 949 950 if (!pmu_irq_is_valid(vcpu->kvm, irq)) 951 return -EINVAL; 952 953 if (kvm_arm_pmu_irq_initialized(vcpu)) 954 return -EBUSY; 955 956 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 957 vcpu->arch.pmu.irq_num = irq; 958 return 0; 959 } 960 case KVM_ARM_VCPU_PMU_V3_FILTER: { 961 struct kvm_pmu_event_filter __user *uaddr; 962 struct kvm_pmu_event_filter filter; 963 int nr_events; 964 965 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 966 967 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 968 969 if (copy_from_user(&filter, uaddr, sizeof(filter))) 970 return -EFAULT; 971 972 if (((u32)filter.base_event + filter.nevents) > nr_events || 973 (filter.action != KVM_PMU_EVENT_ALLOW && 974 filter.action != KVM_PMU_EVENT_DENY)) 975 return -EINVAL; 976 977 mutex_lock(&vcpu->kvm->lock); 978 979 if (!vcpu->kvm->arch.pmu_filter) { 980 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); 981 if (!vcpu->kvm->arch.pmu_filter) { 982 mutex_unlock(&vcpu->kvm->lock); 983 return -ENOMEM; 984 } 985 986 /* 987 * The default depends on the first applied filter. 988 * If it allows events, the default is to deny. 989 * Conversely, if the first filter denies a set of 990 * events, the default is to allow. 991 */ 992 if (filter.action == KVM_PMU_EVENT_ALLOW) 993 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); 994 else 995 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); 996 } 997 998 if (filter.action == KVM_PMU_EVENT_ALLOW) 999 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1000 else 1001 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1002 1003 mutex_unlock(&vcpu->kvm->lock); 1004 1005 return 0; 1006 } 1007 case KVM_ARM_VCPU_PMU_V3_INIT: 1008 return kvm_arm_pmu_v3_init(vcpu); 1009 } 1010 1011 return -ENXIO; 1012 } 1013 1014 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1015 { 1016 switch (attr->attr) { 1017 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1018 int __user *uaddr = (int __user *)(long)attr->addr; 1019 int irq; 1020 1021 if (!irqchip_in_kernel(vcpu->kvm)) 1022 return -EINVAL; 1023 1024 if (!kvm_vcpu_has_pmu(vcpu)) 1025 return -ENODEV; 1026 1027 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1028 return -ENXIO; 1029 1030 irq = vcpu->arch.pmu.irq_num; 1031 return put_user(irq, uaddr); 1032 } 1033 } 1034 1035 return -ENXIO; 1036 } 1037 1038 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1039 { 1040 switch (attr->attr) { 1041 case KVM_ARM_VCPU_PMU_V3_IRQ: 1042 case KVM_ARM_VCPU_PMU_V3_INIT: 1043 case KVM_ARM_VCPU_PMU_V3_FILTER: 1044 if (kvm_vcpu_has_pmu(vcpu)) 1045 return 0; 1046 } 1047 1048 return -ENXIO; 1049 } 1050