1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/perf_event.h> 11 #include <linux/perf/arm_pmu.h> 12 #include <linux/uaccess.h> 13 #include <asm/kvm_emulate.h> 14 #include <kvm/arm_pmu.h> 15 #include <kvm/arm_vgic.h> 16 17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 20 21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 22 23 static u32 kvm_pmu_event_mask(struct kvm *kvm) 24 { 25 switch (kvm->arch.pmuver) { 26 case ID_AA64DFR0_PMUVER_8_0: 27 return GENMASK(9, 0); 28 case ID_AA64DFR0_PMUVER_8_1: 29 case ID_AA64DFR0_PMUVER_8_4: 30 case ID_AA64DFR0_PMUVER_8_5: 31 return GENMASK(15, 0); 32 default: /* Shouldn't be here, just for sanity */ 33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); 34 return 0; 35 } 36 } 37 38 /** 39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 40 * @vcpu: The vcpu pointer 41 * @select_idx: The counter index 42 */ 43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 44 { 45 return (select_idx == ARMV8_PMU_CYCLE_IDX && 46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 47 } 48 49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 50 { 51 struct kvm_pmu *pmu; 52 struct kvm_vcpu_arch *vcpu_arch; 53 54 pmc -= pmc->idx; 55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 57 return container_of(vcpu_arch, struct kvm_vcpu, arch); 58 } 59 60 /** 61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 62 * @pmc: The PMU counter pointer 63 */ 64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 65 { 66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 67 68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 69 } 70 71 /** 72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 73 * @select_idx: The counter index 74 */ 75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 76 { 77 return select_idx & 0x1; 78 } 79 80 /** 81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 82 * @pmc: The PMU counter pointer 83 * 84 * When a pair of PMCs are chained together we use the low counter (canonical) 85 * to hold the underlying perf event. 86 */ 87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 88 { 89 if (kvm_pmu_pmc_is_chained(pmc) && 90 kvm_pmu_idx_is_high_counter(pmc->idx)) 91 return pmc - 1; 92 93 return pmc; 94 } 95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 96 { 97 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 98 return pmc - 1; 99 else 100 return pmc + 1; 101 } 102 103 /** 104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 105 * @vcpu: The vcpu pointer 106 * @select_idx: The counter index 107 */ 108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 109 { 110 u64 eventsel, reg; 111 112 select_idx |= 0x1; 113 114 if (select_idx == ARMV8_PMU_CYCLE_IDX) 115 return false; 116 117 reg = PMEVTYPER0_EL0 + select_idx; 118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 119 120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 121 } 122 123 /** 124 * kvm_pmu_get_pair_counter_value - get PMU counter value 125 * @vcpu: The vcpu pointer 126 * @pmc: The PMU counter pointer 127 */ 128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 129 struct kvm_pmc *pmc) 130 { 131 u64 counter, counter_high, reg, enabled, running; 132 133 if (kvm_pmu_pmc_is_chained(pmc)) { 134 pmc = kvm_pmu_get_canonical_pmc(pmc); 135 reg = PMEVCNTR0_EL0 + pmc->idx; 136 137 counter = __vcpu_sys_reg(vcpu, reg); 138 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 139 140 counter = lower_32_bits(counter) | (counter_high << 32); 141 } else { 142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 144 counter = __vcpu_sys_reg(vcpu, reg); 145 } 146 147 /* 148 * The real counter value is equal to the value of counter register plus 149 * the value perf event counts. 150 */ 151 if (pmc->perf_event) 152 counter += perf_event_read_value(pmc->perf_event, &enabled, 153 &running); 154 155 return counter; 156 } 157 158 /** 159 * kvm_pmu_get_counter_value - get PMU counter value 160 * @vcpu: The vcpu pointer 161 * @select_idx: The counter index 162 */ 163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 164 { 165 u64 counter; 166 struct kvm_pmu *pmu = &vcpu->arch.pmu; 167 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 168 169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 170 171 if (kvm_pmu_pmc_is_chained(pmc) && 172 kvm_pmu_idx_is_high_counter(select_idx)) 173 counter = upper_32_bits(counter); 174 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 175 counter = lower_32_bits(counter); 176 177 return counter; 178 } 179 180 /** 181 * kvm_pmu_set_counter_value - set PMU counter value 182 * @vcpu: The vcpu pointer 183 * @select_idx: The counter index 184 * @val: The counter value 185 */ 186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 187 { 188 u64 reg; 189 190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 193 194 /* Recreate the perf event to reflect the updated sample_period */ 195 kvm_pmu_create_perf_event(vcpu, select_idx); 196 } 197 198 /** 199 * kvm_pmu_release_perf_event - remove the perf event 200 * @pmc: The PMU counter pointer 201 */ 202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 203 { 204 pmc = kvm_pmu_get_canonical_pmc(pmc); 205 if (pmc->perf_event) { 206 perf_event_disable(pmc->perf_event); 207 perf_event_release_kernel(pmc->perf_event); 208 pmc->perf_event = NULL; 209 } 210 } 211 212 /** 213 * kvm_pmu_stop_counter - stop PMU counter 214 * @pmc: The PMU counter pointer 215 * 216 * If this counter has been configured to monitor some event, release it here. 217 */ 218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 219 { 220 u64 counter, reg, val; 221 222 pmc = kvm_pmu_get_canonical_pmc(pmc); 223 if (!pmc->perf_event) 224 return; 225 226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 227 228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 229 reg = PMCCNTR_EL0; 230 val = counter; 231 } else { 232 reg = PMEVCNTR0_EL0 + pmc->idx; 233 val = lower_32_bits(counter); 234 } 235 236 __vcpu_sys_reg(vcpu, reg) = val; 237 238 if (kvm_pmu_pmc_is_chained(pmc)) 239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 240 241 kvm_pmu_release_perf_event(pmc); 242 } 243 244 /** 245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 246 * @vcpu: The vcpu pointer 247 * 248 */ 249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 250 { 251 int i; 252 struct kvm_pmu *pmu = &vcpu->arch.pmu; 253 254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 255 pmu->pmc[i].idx = i; 256 } 257 258 /** 259 * kvm_pmu_vcpu_reset - reset pmu state for cpu 260 * @vcpu: The vcpu pointer 261 * 262 */ 263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 264 { 265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 266 struct kvm_pmu *pmu = &vcpu->arch.pmu; 267 int i; 268 269 for_each_set_bit(i, &mask, 32) 270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 271 272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 273 } 274 275 /** 276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 281 { 282 int i; 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 286 kvm_pmu_release_perf_event(&pmu->pmc[i]); 287 irq_work_sync(&vcpu->arch.pmu.overflow_work); 288 } 289 290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 291 { 292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 293 294 val &= ARMV8_PMU_PMCR_N_MASK; 295 if (val == 0) 296 return BIT(ARMV8_PMU_CYCLE_IDX); 297 else 298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 299 } 300 301 /** 302 * kvm_pmu_enable_counter_mask - enable selected PMU counters 303 * @vcpu: The vcpu pointer 304 * @val: the value guest writes to PMCNTENSET register 305 * 306 * Call perf_event_enable to start counting the perf event 307 */ 308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 309 { 310 int i; 311 struct kvm_pmu *pmu = &vcpu->arch.pmu; 312 struct kvm_pmc *pmc; 313 314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 315 return; 316 317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 318 if (!(val & BIT(i))) 319 continue; 320 321 pmc = &pmu->pmc[i]; 322 323 /* A change in the enable state may affect the chain state */ 324 kvm_pmu_update_pmc_chained(vcpu, i); 325 kvm_pmu_create_perf_event(vcpu, i); 326 327 /* At this point, pmc must be the canonical */ 328 if (pmc->perf_event) { 329 perf_event_enable(pmc->perf_event); 330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 331 kvm_debug("fail to enable perf event\n"); 332 } 333 } 334 } 335 336 /** 337 * kvm_pmu_disable_counter_mask - disable selected PMU counters 338 * @vcpu: The vcpu pointer 339 * @val: the value guest writes to PMCNTENCLR register 340 * 341 * Call perf_event_disable to stop counting the perf event 342 */ 343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 344 { 345 int i; 346 struct kvm_pmu *pmu = &vcpu->arch.pmu; 347 struct kvm_pmc *pmc; 348 349 if (!val) 350 return; 351 352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 353 if (!(val & BIT(i))) 354 continue; 355 356 pmc = &pmu->pmc[i]; 357 358 /* A change in the enable state may affect the chain state */ 359 kvm_pmu_update_pmc_chained(vcpu, i); 360 kvm_pmu_create_perf_event(vcpu, i); 361 362 /* At this point, pmc must be the canonical */ 363 if (pmc->perf_event) 364 perf_event_disable(pmc->perf_event); 365 } 366 } 367 368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 369 { 370 u64 reg = 0; 371 372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 376 } 377 378 return reg; 379 } 380 381 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 382 { 383 struct kvm_pmu *pmu = &vcpu->arch.pmu; 384 bool overflow; 385 386 if (!kvm_vcpu_has_pmu(vcpu)) 387 return; 388 389 overflow = !!kvm_pmu_overflow_status(vcpu); 390 if (pmu->irq_level == overflow) 391 return; 392 393 pmu->irq_level = overflow; 394 395 if (likely(irqchip_in_kernel(vcpu->kvm))) { 396 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 397 pmu->irq_num, overflow, pmu); 398 WARN_ON(ret); 399 } 400 } 401 402 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 403 { 404 struct kvm_pmu *pmu = &vcpu->arch.pmu; 405 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 406 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 407 408 if (likely(irqchip_in_kernel(vcpu->kvm))) 409 return false; 410 411 return pmu->irq_level != run_level; 412 } 413 414 /* 415 * Reflect the PMU overflow interrupt output level into the kvm_run structure 416 */ 417 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 418 { 419 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 420 421 /* Populate the timer bitmap for user space */ 422 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 423 if (vcpu->arch.pmu.irq_level) 424 regs->device_irq_level |= KVM_ARM_DEV_PMU; 425 } 426 427 /** 428 * kvm_pmu_flush_hwstate - flush pmu state to cpu 429 * @vcpu: The vcpu pointer 430 * 431 * Check if the PMU has overflowed while we were running in the host, and inject 432 * an interrupt if that was the case. 433 */ 434 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 435 { 436 kvm_pmu_update_state(vcpu); 437 } 438 439 /** 440 * kvm_pmu_sync_hwstate - sync pmu state from cpu 441 * @vcpu: The vcpu pointer 442 * 443 * Check if the PMU has overflowed while we were running in the guest, and 444 * inject an interrupt if that was the case. 445 */ 446 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 447 { 448 kvm_pmu_update_state(vcpu); 449 } 450 451 /** 452 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 453 * to the event. 454 * This is why we need a callback to do it once outside of the NMI context. 455 */ 456 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 457 { 458 struct kvm_vcpu *vcpu; 459 struct kvm_pmu *pmu; 460 461 pmu = container_of(work, struct kvm_pmu, overflow_work); 462 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 463 464 kvm_vcpu_kick(vcpu); 465 } 466 467 /** 468 * When the perf event overflows, set the overflow status and inform the vcpu. 469 */ 470 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 471 struct perf_sample_data *data, 472 struct pt_regs *regs) 473 { 474 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 475 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 476 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 477 int idx = pmc->idx; 478 u64 period; 479 480 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 481 482 /* 483 * Reset the sample period to the architectural limit, 484 * i.e. the point where the counter overflows. 485 */ 486 period = -(local64_read(&perf_event->count)); 487 488 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 489 period &= GENMASK(31, 0); 490 491 local64_set(&perf_event->hw.period_left, 0); 492 perf_event->attr.sample_period = period; 493 perf_event->hw.sample_period = period; 494 495 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 496 497 if (kvm_pmu_overflow_status(vcpu)) { 498 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 499 500 if (!in_nmi()) 501 kvm_vcpu_kick(vcpu); 502 else 503 irq_work_queue(&vcpu->arch.pmu.overflow_work); 504 } 505 506 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 507 } 508 509 /** 510 * kvm_pmu_software_increment - do software increment 511 * @vcpu: The vcpu pointer 512 * @val: the value guest writes to PMSWINC register 513 */ 514 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 515 { 516 struct kvm_pmu *pmu = &vcpu->arch.pmu; 517 int i; 518 519 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 520 return; 521 522 /* Weed out disabled counters */ 523 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 524 525 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 526 u64 type, reg; 527 528 if (!(val & BIT(i))) 529 continue; 530 531 /* PMSWINC only applies to ... SW_INC! */ 532 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 533 type &= kvm_pmu_event_mask(vcpu->kvm); 534 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 535 continue; 536 537 /* increment this even SW_INC counter */ 538 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 539 reg = lower_32_bits(reg); 540 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 541 542 if (reg) /* no overflow on the low part */ 543 continue; 544 545 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 546 /* increment the high counter */ 547 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 548 reg = lower_32_bits(reg); 549 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 550 if (!reg) /* mark overflow on the high counter */ 551 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 552 } else { 553 /* mark overflow on low counter */ 554 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 555 } 556 } 557 } 558 559 /** 560 * kvm_pmu_handle_pmcr - handle PMCR register 561 * @vcpu: The vcpu pointer 562 * @val: the value guest writes to PMCR register 563 */ 564 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 565 { 566 int i; 567 568 if (val & ARMV8_PMU_PMCR_E) { 569 kvm_pmu_enable_counter_mask(vcpu, 570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 571 } else { 572 kvm_pmu_disable_counter_mask(vcpu, 573 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 574 } 575 576 if (val & ARMV8_PMU_PMCR_C) 577 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 578 579 if (val & ARMV8_PMU_PMCR_P) { 580 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 582 for_each_set_bit(i, &mask, 32) 583 kvm_pmu_set_counter_value(vcpu, i, 0); 584 } 585 } 586 587 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 588 { 589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 591 } 592 593 /** 594 * kvm_pmu_create_perf_event - create a perf event for a counter 595 * @vcpu: The vcpu pointer 596 * @select_idx: The number of selected counter 597 */ 598 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 599 { 600 struct kvm_pmu *pmu = &vcpu->arch.pmu; 601 struct kvm_pmc *pmc; 602 struct perf_event *event; 603 struct perf_event_attr attr; 604 u64 eventsel, counter, reg, data; 605 606 /* 607 * For chained counters the event type and filtering attributes are 608 * obtained from the low/even counter. We also use this counter to 609 * determine if the event is enabled/disabled. 610 */ 611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 612 613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 615 data = __vcpu_sys_reg(vcpu, reg); 616 617 kvm_pmu_stop_counter(vcpu, pmc); 618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 620 else 621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 622 623 /* Software increment event doesn't need to be backed by a perf event */ 624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 625 return; 626 627 /* 628 * If we have a filter in place and that the event isn't allowed, do 629 * not install a perf event either. 630 */ 631 if (vcpu->kvm->arch.pmu_filter && 632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 633 return; 634 635 memset(&attr, 0, sizeof(struct perf_event_attr)); 636 attr.type = PERF_TYPE_RAW; 637 attr.size = sizeof(attr); 638 attr.pinned = 1; 639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 642 attr.exclude_hv = 1; /* Don't count EL2 events */ 643 attr.exclude_host = 1; /* Don't count host events */ 644 attr.config = eventsel; 645 646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 647 648 if (kvm_pmu_pmc_is_chained(pmc)) { 649 /** 650 * The initial sample period (overflow count) of an event. For 651 * chained counters we only support overflow interrupts on the 652 * high counter. 653 */ 654 attr.sample_period = (-counter) & GENMASK(63, 0); 655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 656 657 event = perf_event_create_kernel_counter(&attr, -1, current, 658 kvm_pmu_perf_overflow, 659 pmc + 1); 660 } else { 661 /* The initial sample period (overflow count) of an event. */ 662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 663 attr.sample_period = (-counter) & GENMASK(63, 0); 664 else 665 attr.sample_period = (-counter) & GENMASK(31, 0); 666 667 event = perf_event_create_kernel_counter(&attr, -1, current, 668 kvm_pmu_perf_overflow, pmc); 669 } 670 671 if (IS_ERR(event)) { 672 pr_err_once("kvm: pmu event creation failed %ld\n", 673 PTR_ERR(event)); 674 return; 675 } 676 677 pmc->perf_event = event; 678 } 679 680 /** 681 * kvm_pmu_update_pmc_chained - update chained bitmap 682 * @vcpu: The vcpu pointer 683 * @select_idx: The number of selected counter 684 * 685 * Update the chained bitmap based on the event type written in the 686 * typer register and the enable state of the odd register. 687 */ 688 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 689 { 690 struct kvm_pmu *pmu = &vcpu->arch.pmu; 691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 692 bool new_state, old_state; 693 694 old_state = kvm_pmu_pmc_is_chained(pmc); 695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 697 698 if (old_state == new_state) 699 return; 700 701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 702 kvm_pmu_stop_counter(vcpu, canonical_pmc); 703 if (new_state) { 704 /* 705 * During promotion from !chained to chained we must ensure 706 * the adjacent counter is stopped and its event destroyed 707 */ 708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 710 return; 711 } 712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 713 } 714 715 /** 716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 717 * @vcpu: The vcpu pointer 718 * @data: The data guest writes to PMXEVTYPER_EL0 719 * @select_idx: The number of selected counter 720 * 721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 722 * event with given hardware event number. Here we call perf_event API to 723 * emulate this action and create a kernel perf event for it. 724 */ 725 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 726 u64 select_idx) 727 { 728 u64 reg, mask; 729 730 mask = ARMV8_PMU_EVTYPE_MASK; 731 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 732 mask |= kvm_pmu_event_mask(vcpu->kvm); 733 734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 736 737 __vcpu_sys_reg(vcpu, reg) = data & mask; 738 739 kvm_pmu_update_pmc_chained(vcpu, select_idx); 740 kvm_pmu_create_perf_event(vcpu, select_idx); 741 } 742 743 void kvm_host_pmu_init(struct arm_pmu *pmu) 744 { 745 if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF && 746 !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled()) 747 static_branch_enable(&kvm_arm_pmu_available); 748 } 749 750 static int kvm_pmu_probe_pmuver(void) 751 { 752 struct perf_event_attr attr = { }; 753 struct perf_event *event; 754 struct arm_pmu *pmu; 755 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; 756 757 /* 758 * Create a dummy event that only counts user cycles. As we'll never 759 * leave this function with the event being live, it will never 760 * count anything. But it allows us to probe some of the PMU 761 * details. Yes, this is terrible. 762 */ 763 attr.type = PERF_TYPE_RAW; 764 attr.size = sizeof(attr); 765 attr.pinned = 1; 766 attr.disabled = 0; 767 attr.exclude_user = 0; 768 attr.exclude_kernel = 1; 769 attr.exclude_hv = 1; 770 attr.exclude_host = 1; 771 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 772 attr.sample_period = GENMASK(63, 0); 773 774 event = perf_event_create_kernel_counter(&attr, -1, current, 775 kvm_pmu_perf_overflow, &attr); 776 777 if (IS_ERR(event)) { 778 pr_err_once("kvm: pmu event creation failed %ld\n", 779 PTR_ERR(event)); 780 return ID_AA64DFR0_PMUVER_IMP_DEF; 781 } 782 783 if (event->pmu) { 784 pmu = to_arm_pmu(event->pmu); 785 if (pmu->pmuver) 786 pmuver = pmu->pmuver; 787 } 788 789 perf_event_disable(event); 790 perf_event_release_kernel(event); 791 792 return pmuver; 793 } 794 795 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 796 { 797 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 798 u64 val, mask = 0; 799 int base, i, nr_events; 800 801 if (!pmceid1) { 802 val = read_sysreg(pmceid0_el0); 803 base = 0; 804 } else { 805 val = read_sysreg(pmceid1_el0); 806 /* 807 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 808 * as RAZ 809 */ 810 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) 811 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 812 base = 32; 813 } 814 815 if (!bmap) 816 return val; 817 818 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 819 820 for (i = 0; i < 32; i += 8) { 821 u64 byte; 822 823 byte = bitmap_get_value8(bmap, base + i); 824 mask |= byte << i; 825 if (nr_events >= (0x4000 + base + 32)) { 826 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 827 mask |= byte << (32 + i); 828 } 829 } 830 831 return val & mask; 832 } 833 834 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 835 { 836 if (!kvm_vcpu_has_pmu(vcpu)) 837 return 0; 838 839 if (!vcpu->arch.pmu.created) 840 return -EINVAL; 841 842 /* 843 * A valid interrupt configuration for the PMU is either to have a 844 * properly configured interrupt number and using an in-kernel 845 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 846 */ 847 if (irqchip_in_kernel(vcpu->kvm)) { 848 int irq = vcpu->arch.pmu.irq_num; 849 /* 850 * If we are using an in-kernel vgic, at this point we know 851 * the vgic will be initialized, so we can check the PMU irq 852 * number against the dimensions of the vgic and make sure 853 * it's valid. 854 */ 855 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 856 return -EINVAL; 857 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 858 return -EINVAL; 859 } 860 861 /* One-off reload of the PMU on first run */ 862 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 863 864 return 0; 865 } 866 867 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 868 { 869 if (irqchip_in_kernel(vcpu->kvm)) { 870 int ret; 871 872 /* 873 * If using the PMU with an in-kernel virtual GIC 874 * implementation, we require the GIC to be already 875 * initialized when initializing the PMU. 876 */ 877 if (!vgic_initialized(vcpu->kvm)) 878 return -ENODEV; 879 880 if (!kvm_arm_pmu_irq_initialized(vcpu)) 881 return -ENXIO; 882 883 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 884 &vcpu->arch.pmu); 885 if (ret) 886 return ret; 887 } 888 889 init_irq_work(&vcpu->arch.pmu.overflow_work, 890 kvm_pmu_perf_overflow_notify_vcpu); 891 892 vcpu->arch.pmu.created = true; 893 return 0; 894 } 895 896 /* 897 * For one VM the interrupt type must be same for each vcpu. 898 * As a PPI, the interrupt number is the same for all vcpus, 899 * while as an SPI it must be a separate number per vcpu. 900 */ 901 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 902 { 903 int i; 904 struct kvm_vcpu *vcpu; 905 906 kvm_for_each_vcpu(i, vcpu, kvm) { 907 if (!kvm_arm_pmu_irq_initialized(vcpu)) 908 continue; 909 910 if (irq_is_ppi(irq)) { 911 if (vcpu->arch.pmu.irq_num != irq) 912 return false; 913 } else { 914 if (vcpu->arch.pmu.irq_num == irq) 915 return false; 916 } 917 } 918 919 return true; 920 } 921 922 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 923 { 924 if (!kvm_vcpu_has_pmu(vcpu)) 925 return -ENODEV; 926 927 if (vcpu->arch.pmu.created) 928 return -EBUSY; 929 930 if (!vcpu->kvm->arch.pmuver) 931 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); 932 933 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) 934 return -ENODEV; 935 936 switch (attr->attr) { 937 case KVM_ARM_VCPU_PMU_V3_IRQ: { 938 int __user *uaddr = (int __user *)(long)attr->addr; 939 int irq; 940 941 if (!irqchip_in_kernel(vcpu->kvm)) 942 return -EINVAL; 943 944 if (get_user(irq, uaddr)) 945 return -EFAULT; 946 947 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 948 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 949 return -EINVAL; 950 951 if (!pmu_irq_is_valid(vcpu->kvm, irq)) 952 return -EINVAL; 953 954 if (kvm_arm_pmu_irq_initialized(vcpu)) 955 return -EBUSY; 956 957 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 958 vcpu->arch.pmu.irq_num = irq; 959 return 0; 960 } 961 case KVM_ARM_VCPU_PMU_V3_FILTER: { 962 struct kvm_pmu_event_filter __user *uaddr; 963 struct kvm_pmu_event_filter filter; 964 int nr_events; 965 966 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 967 968 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 969 970 if (copy_from_user(&filter, uaddr, sizeof(filter))) 971 return -EFAULT; 972 973 if (((u32)filter.base_event + filter.nevents) > nr_events || 974 (filter.action != KVM_PMU_EVENT_ALLOW && 975 filter.action != KVM_PMU_EVENT_DENY)) 976 return -EINVAL; 977 978 mutex_lock(&vcpu->kvm->lock); 979 980 if (!vcpu->kvm->arch.pmu_filter) { 981 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 982 if (!vcpu->kvm->arch.pmu_filter) { 983 mutex_unlock(&vcpu->kvm->lock); 984 return -ENOMEM; 985 } 986 987 /* 988 * The default depends on the first applied filter. 989 * If it allows events, the default is to deny. 990 * Conversely, if the first filter denies a set of 991 * events, the default is to allow. 992 */ 993 if (filter.action == KVM_PMU_EVENT_ALLOW) 994 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); 995 else 996 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); 997 } 998 999 if (filter.action == KVM_PMU_EVENT_ALLOW) 1000 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1001 else 1002 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1003 1004 mutex_unlock(&vcpu->kvm->lock); 1005 1006 return 0; 1007 } 1008 case KVM_ARM_VCPU_PMU_V3_INIT: 1009 return kvm_arm_pmu_v3_init(vcpu); 1010 } 1011 1012 return -ENXIO; 1013 } 1014 1015 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1016 { 1017 switch (attr->attr) { 1018 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1019 int __user *uaddr = (int __user *)(long)attr->addr; 1020 int irq; 1021 1022 if (!irqchip_in_kernel(vcpu->kvm)) 1023 return -EINVAL; 1024 1025 if (!kvm_vcpu_has_pmu(vcpu)) 1026 return -ENODEV; 1027 1028 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1029 return -ENXIO; 1030 1031 irq = vcpu->arch.pmu.irq_num; 1032 return put_user(irq, uaddr); 1033 } 1034 } 1035 1036 return -ENXIO; 1037 } 1038 1039 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1040 { 1041 switch (attr->attr) { 1042 case KVM_ARM_VCPU_PMU_V3_IRQ: 1043 case KVM_ARM_VCPU_PMU_V3_INIT: 1044 case KVM_ARM_VCPU_PMU_V3_FILTER: 1045 if (kvm_vcpu_has_pmu(vcpu)) 1046 return 0; 1047 } 1048 1049 return -ENXIO; 1050 } 1051