1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/perf_event.h> 11 #include <linux/perf/arm_pmu.h> 12 #include <linux/uaccess.h> 13 #include <asm/kvm_emulate.h> 14 #include <kvm/arm_pmu.h> 15 #include <kvm/arm_vgic.h> 16 17 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 18 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 19 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 20 21 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 22 23 static u32 kvm_pmu_event_mask(struct kvm *kvm) 24 { 25 switch (kvm->arch.pmuver) { 26 case ID_AA64DFR0_PMUVER_8_0: 27 return GENMASK(9, 0); 28 case ID_AA64DFR0_PMUVER_8_1: 29 case ID_AA64DFR0_PMUVER_8_4: 30 case ID_AA64DFR0_PMUVER_8_5: 31 return GENMASK(15, 0); 32 default: /* Shouldn't be here, just for sanity */ 33 WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); 34 return 0; 35 } 36 } 37 38 /** 39 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 40 * @vcpu: The vcpu pointer 41 * @select_idx: The counter index 42 */ 43 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 44 { 45 return (select_idx == ARMV8_PMU_CYCLE_IDX && 46 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 47 } 48 49 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 50 { 51 struct kvm_pmu *pmu; 52 struct kvm_vcpu_arch *vcpu_arch; 53 54 pmc -= pmc->idx; 55 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 56 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 57 return container_of(vcpu_arch, struct kvm_vcpu, arch); 58 } 59 60 /** 61 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 62 * @pmc: The PMU counter pointer 63 */ 64 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 65 { 66 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 67 68 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 69 } 70 71 /** 72 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 73 * @select_idx: The counter index 74 */ 75 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 76 { 77 return select_idx & 0x1; 78 } 79 80 /** 81 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 82 * @pmc: The PMU counter pointer 83 * 84 * When a pair of PMCs are chained together we use the low counter (canonical) 85 * to hold the underlying perf event. 86 */ 87 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 88 { 89 if (kvm_pmu_pmc_is_chained(pmc) && 90 kvm_pmu_idx_is_high_counter(pmc->idx)) 91 return pmc - 1; 92 93 return pmc; 94 } 95 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 96 { 97 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 98 return pmc - 1; 99 else 100 return pmc + 1; 101 } 102 103 /** 104 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 105 * @vcpu: The vcpu pointer 106 * @select_idx: The counter index 107 */ 108 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 109 { 110 u64 eventsel, reg; 111 112 select_idx |= 0x1; 113 114 if (select_idx == ARMV8_PMU_CYCLE_IDX) 115 return false; 116 117 reg = PMEVTYPER0_EL0 + select_idx; 118 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 119 120 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 121 } 122 123 /** 124 * kvm_pmu_get_pair_counter_value - get PMU counter value 125 * @vcpu: The vcpu pointer 126 * @pmc: The PMU counter pointer 127 */ 128 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 129 struct kvm_pmc *pmc) 130 { 131 u64 counter, counter_high, reg, enabled, running; 132 133 if (kvm_pmu_pmc_is_chained(pmc)) { 134 pmc = kvm_pmu_get_canonical_pmc(pmc); 135 reg = PMEVCNTR0_EL0 + pmc->idx; 136 137 counter = __vcpu_sys_reg(vcpu, reg); 138 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 139 140 counter = lower_32_bits(counter) | (counter_high << 32); 141 } else { 142 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 143 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 144 counter = __vcpu_sys_reg(vcpu, reg); 145 } 146 147 /* 148 * The real counter value is equal to the value of counter register plus 149 * the value perf event counts. 150 */ 151 if (pmc->perf_event) 152 counter += perf_event_read_value(pmc->perf_event, &enabled, 153 &running); 154 155 return counter; 156 } 157 158 /** 159 * kvm_pmu_get_counter_value - get PMU counter value 160 * @vcpu: The vcpu pointer 161 * @select_idx: The counter index 162 */ 163 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 164 { 165 u64 counter; 166 struct kvm_pmu *pmu = &vcpu->arch.pmu; 167 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 168 169 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 170 171 if (kvm_pmu_pmc_is_chained(pmc) && 172 kvm_pmu_idx_is_high_counter(select_idx)) 173 counter = upper_32_bits(counter); 174 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 175 counter = lower_32_bits(counter); 176 177 return counter; 178 } 179 180 /** 181 * kvm_pmu_set_counter_value - set PMU counter value 182 * @vcpu: The vcpu pointer 183 * @select_idx: The counter index 184 * @val: The counter value 185 */ 186 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 187 { 188 u64 reg; 189 190 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 191 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 192 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 193 194 /* Recreate the perf event to reflect the updated sample_period */ 195 kvm_pmu_create_perf_event(vcpu, select_idx); 196 } 197 198 /** 199 * kvm_pmu_release_perf_event - remove the perf event 200 * @pmc: The PMU counter pointer 201 */ 202 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 203 { 204 pmc = kvm_pmu_get_canonical_pmc(pmc); 205 if (pmc->perf_event) { 206 perf_event_disable(pmc->perf_event); 207 perf_event_release_kernel(pmc->perf_event); 208 pmc->perf_event = NULL; 209 } 210 } 211 212 /** 213 * kvm_pmu_stop_counter - stop PMU counter 214 * @pmc: The PMU counter pointer 215 * 216 * If this counter has been configured to monitor some event, release it here. 217 */ 218 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 219 { 220 u64 counter, reg, val; 221 222 pmc = kvm_pmu_get_canonical_pmc(pmc); 223 if (!pmc->perf_event) 224 return; 225 226 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 227 228 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 229 reg = PMCCNTR_EL0; 230 val = counter; 231 } else { 232 reg = PMEVCNTR0_EL0 + pmc->idx; 233 val = lower_32_bits(counter); 234 } 235 236 __vcpu_sys_reg(vcpu, reg) = val; 237 238 if (kvm_pmu_pmc_is_chained(pmc)) 239 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 240 241 kvm_pmu_release_perf_event(pmc); 242 } 243 244 /** 245 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 246 * @vcpu: The vcpu pointer 247 * 248 */ 249 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 250 { 251 int i; 252 struct kvm_pmu *pmu = &vcpu->arch.pmu; 253 254 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 255 pmu->pmc[i].idx = i; 256 } 257 258 /** 259 * kvm_pmu_vcpu_reset - reset pmu state for cpu 260 * @vcpu: The vcpu pointer 261 * 262 */ 263 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 264 { 265 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 266 struct kvm_pmu *pmu = &vcpu->arch.pmu; 267 int i; 268 269 for_each_set_bit(i, &mask, 32) 270 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 271 272 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 273 } 274 275 /** 276 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 281 { 282 int i; 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 285 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 286 kvm_pmu_release_perf_event(&pmu->pmc[i]); 287 irq_work_sync(&vcpu->arch.pmu.overflow_work); 288 } 289 290 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 291 { 292 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 293 294 val &= ARMV8_PMU_PMCR_N_MASK; 295 if (val == 0) 296 return BIT(ARMV8_PMU_CYCLE_IDX); 297 else 298 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 299 } 300 301 /** 302 * kvm_pmu_enable_counter_mask - enable selected PMU counters 303 * @vcpu: The vcpu pointer 304 * @val: the value guest writes to PMCNTENSET register 305 * 306 * Call perf_event_enable to start counting the perf event 307 */ 308 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 309 { 310 int i; 311 struct kvm_pmu *pmu = &vcpu->arch.pmu; 312 struct kvm_pmc *pmc; 313 314 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 315 return; 316 317 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 318 if (!(val & BIT(i))) 319 continue; 320 321 pmc = &pmu->pmc[i]; 322 323 /* A change in the enable state may affect the chain state */ 324 kvm_pmu_update_pmc_chained(vcpu, i); 325 kvm_pmu_create_perf_event(vcpu, i); 326 327 /* At this point, pmc must be the canonical */ 328 if (pmc->perf_event) { 329 perf_event_enable(pmc->perf_event); 330 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 331 kvm_debug("fail to enable perf event\n"); 332 } 333 } 334 } 335 336 /** 337 * kvm_pmu_disable_counter_mask - disable selected PMU counters 338 * @vcpu: The vcpu pointer 339 * @val: the value guest writes to PMCNTENCLR register 340 * 341 * Call perf_event_disable to stop counting the perf event 342 */ 343 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 344 { 345 int i; 346 struct kvm_pmu *pmu = &vcpu->arch.pmu; 347 struct kvm_pmc *pmc; 348 349 if (!val) 350 return; 351 352 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 353 if (!(val & BIT(i))) 354 continue; 355 356 pmc = &pmu->pmc[i]; 357 358 /* A change in the enable state may affect the chain state */ 359 kvm_pmu_update_pmc_chained(vcpu, i); 360 kvm_pmu_create_perf_event(vcpu, i); 361 362 /* At this point, pmc must be the canonical */ 363 if (pmc->perf_event) 364 perf_event_disable(pmc->perf_event); 365 } 366 } 367 368 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 369 { 370 u64 reg = 0; 371 372 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 373 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 374 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 375 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 376 } 377 378 return reg; 379 } 380 381 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 382 { 383 struct kvm_pmu *pmu = &vcpu->arch.pmu; 384 bool overflow; 385 386 if (!kvm_vcpu_has_pmu(vcpu)) 387 return; 388 389 overflow = !!kvm_pmu_overflow_status(vcpu); 390 if (pmu->irq_level == overflow) 391 return; 392 393 pmu->irq_level = overflow; 394 395 if (likely(irqchip_in_kernel(vcpu->kvm))) { 396 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 397 pmu->irq_num, overflow, pmu); 398 WARN_ON(ret); 399 } 400 } 401 402 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 403 { 404 struct kvm_pmu *pmu = &vcpu->arch.pmu; 405 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 406 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 407 408 if (likely(irqchip_in_kernel(vcpu->kvm))) 409 return false; 410 411 return pmu->irq_level != run_level; 412 } 413 414 /* 415 * Reflect the PMU overflow interrupt output level into the kvm_run structure 416 */ 417 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 418 { 419 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 420 421 /* Populate the timer bitmap for user space */ 422 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 423 if (vcpu->arch.pmu.irq_level) 424 regs->device_irq_level |= KVM_ARM_DEV_PMU; 425 } 426 427 /** 428 * kvm_pmu_flush_hwstate - flush pmu state to cpu 429 * @vcpu: The vcpu pointer 430 * 431 * Check if the PMU has overflowed while we were running in the host, and inject 432 * an interrupt if that was the case. 433 */ 434 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 435 { 436 kvm_pmu_update_state(vcpu); 437 } 438 439 /** 440 * kvm_pmu_sync_hwstate - sync pmu state from cpu 441 * @vcpu: The vcpu pointer 442 * 443 * Check if the PMU has overflowed while we were running in the guest, and 444 * inject an interrupt if that was the case. 445 */ 446 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 447 { 448 kvm_pmu_update_state(vcpu); 449 } 450 451 /** 452 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 453 * to the event. 454 * This is why we need a callback to do it once outside of the NMI context. 455 */ 456 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 457 { 458 struct kvm_vcpu *vcpu; 459 struct kvm_pmu *pmu; 460 461 pmu = container_of(work, struct kvm_pmu, overflow_work); 462 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 463 464 kvm_vcpu_kick(vcpu); 465 } 466 467 /** 468 * When the perf event overflows, set the overflow status and inform the vcpu. 469 */ 470 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 471 struct perf_sample_data *data, 472 struct pt_regs *regs) 473 { 474 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 475 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 476 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 477 int idx = pmc->idx; 478 u64 period; 479 480 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 481 482 /* 483 * Reset the sample period to the architectural limit, 484 * i.e. the point where the counter overflows. 485 */ 486 period = -(local64_read(&perf_event->count)); 487 488 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 489 period &= GENMASK(31, 0); 490 491 local64_set(&perf_event->hw.period_left, 0); 492 perf_event->attr.sample_period = period; 493 perf_event->hw.sample_period = period; 494 495 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 496 497 if (kvm_pmu_overflow_status(vcpu)) { 498 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 499 500 if (!in_nmi()) 501 kvm_vcpu_kick(vcpu); 502 else 503 irq_work_queue(&vcpu->arch.pmu.overflow_work); 504 } 505 506 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 507 } 508 509 /** 510 * kvm_pmu_software_increment - do software increment 511 * @vcpu: The vcpu pointer 512 * @val: the value guest writes to PMSWINC register 513 */ 514 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 515 { 516 struct kvm_pmu *pmu = &vcpu->arch.pmu; 517 int i; 518 519 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 520 return; 521 522 /* Weed out disabled counters */ 523 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 524 525 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 526 u64 type, reg; 527 528 if (!(val & BIT(i))) 529 continue; 530 531 /* PMSWINC only applies to ... SW_INC! */ 532 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 533 type &= kvm_pmu_event_mask(vcpu->kvm); 534 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 535 continue; 536 537 /* increment this even SW_INC counter */ 538 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 539 reg = lower_32_bits(reg); 540 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 541 542 if (reg) /* no overflow on the low part */ 543 continue; 544 545 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 546 /* increment the high counter */ 547 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 548 reg = lower_32_bits(reg); 549 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 550 if (!reg) /* mark overflow on the high counter */ 551 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 552 } else { 553 /* mark overflow on low counter */ 554 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 555 } 556 } 557 } 558 559 /** 560 * kvm_pmu_handle_pmcr - handle PMCR register 561 * @vcpu: The vcpu pointer 562 * @val: the value guest writes to PMCR register 563 */ 564 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 565 { 566 int i; 567 568 if (val & ARMV8_PMU_PMCR_E) { 569 kvm_pmu_enable_counter_mask(vcpu, 570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 571 } else { 572 kvm_pmu_disable_counter_mask(vcpu, 573 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 574 } 575 576 if (val & ARMV8_PMU_PMCR_C) 577 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 578 579 if (val & ARMV8_PMU_PMCR_P) { 580 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 581 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 582 for_each_set_bit(i, &mask, 32) 583 kvm_pmu_set_counter_value(vcpu, i, 0); 584 } 585 } 586 587 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 588 { 589 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 591 } 592 593 /** 594 * kvm_pmu_create_perf_event - create a perf event for a counter 595 * @vcpu: The vcpu pointer 596 * @select_idx: The number of selected counter 597 */ 598 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 599 { 600 struct kvm_pmu *pmu = &vcpu->arch.pmu; 601 struct kvm_pmc *pmc; 602 struct perf_event *event; 603 struct perf_event_attr attr; 604 u64 eventsel, counter, reg, data; 605 606 /* 607 * For chained counters the event type and filtering attributes are 608 * obtained from the low/even counter. We also use this counter to 609 * determine if the event is enabled/disabled. 610 */ 611 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 612 613 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 614 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 615 data = __vcpu_sys_reg(vcpu, reg); 616 617 kvm_pmu_stop_counter(vcpu, pmc); 618 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 619 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 620 else 621 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 622 623 /* Software increment event doesn't need to be backed by a perf event */ 624 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 625 return; 626 627 /* 628 * If we have a filter in place and that the event isn't allowed, do 629 * not install a perf event either. 630 */ 631 if (vcpu->kvm->arch.pmu_filter && 632 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 633 return; 634 635 memset(&attr, 0, sizeof(struct perf_event_attr)); 636 attr.type = PERF_TYPE_RAW; 637 attr.size = sizeof(attr); 638 attr.pinned = 1; 639 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 640 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 641 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 642 attr.exclude_hv = 1; /* Don't count EL2 events */ 643 attr.exclude_host = 1; /* Don't count host events */ 644 attr.config = eventsel; 645 646 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 647 648 if (kvm_pmu_pmc_is_chained(pmc)) { 649 /** 650 * The initial sample period (overflow count) of an event. For 651 * chained counters we only support overflow interrupts on the 652 * high counter. 653 */ 654 attr.sample_period = (-counter) & GENMASK(63, 0); 655 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 656 657 event = perf_event_create_kernel_counter(&attr, -1, current, 658 kvm_pmu_perf_overflow, 659 pmc + 1); 660 } else { 661 /* The initial sample period (overflow count) of an event. */ 662 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 663 attr.sample_period = (-counter) & GENMASK(63, 0); 664 else 665 attr.sample_period = (-counter) & GENMASK(31, 0); 666 667 event = perf_event_create_kernel_counter(&attr, -1, current, 668 kvm_pmu_perf_overflow, pmc); 669 } 670 671 if (IS_ERR(event)) { 672 pr_err_once("kvm: pmu event creation failed %ld\n", 673 PTR_ERR(event)); 674 return; 675 } 676 677 pmc->perf_event = event; 678 } 679 680 /** 681 * kvm_pmu_update_pmc_chained - update chained bitmap 682 * @vcpu: The vcpu pointer 683 * @select_idx: The number of selected counter 684 * 685 * Update the chained bitmap based on the event type written in the 686 * typer register and the enable state of the odd register. 687 */ 688 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 689 { 690 struct kvm_pmu *pmu = &vcpu->arch.pmu; 691 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 692 bool new_state, old_state; 693 694 old_state = kvm_pmu_pmc_is_chained(pmc); 695 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 696 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 697 698 if (old_state == new_state) 699 return; 700 701 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 702 kvm_pmu_stop_counter(vcpu, canonical_pmc); 703 if (new_state) { 704 /* 705 * During promotion from !chained to chained we must ensure 706 * the adjacent counter is stopped and its event destroyed 707 */ 708 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 709 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 710 return; 711 } 712 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 713 } 714 715 /** 716 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 717 * @vcpu: The vcpu pointer 718 * @data: The data guest writes to PMXEVTYPER_EL0 719 * @select_idx: The number of selected counter 720 * 721 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 722 * event with given hardware event number. Here we call perf_event API to 723 * emulate this action and create a kernel perf event for it. 724 */ 725 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 726 u64 select_idx) 727 { 728 u64 reg, mask; 729 730 mask = ARMV8_PMU_EVTYPE_MASK; 731 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 732 mask |= kvm_pmu_event_mask(vcpu->kvm); 733 734 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 735 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 736 737 __vcpu_sys_reg(vcpu, reg) = data & mask; 738 739 kvm_pmu_update_pmc_chained(vcpu, select_idx); 740 kvm_pmu_create_perf_event(vcpu, select_idx); 741 } 742 743 int kvm_pmu_probe_pmuver(void) 744 { 745 struct perf_event_attr attr = { }; 746 struct perf_event *event; 747 struct arm_pmu *pmu; 748 int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; 749 750 /* 751 * Create a dummy event that only counts user cycles. As we'll never 752 * leave this function with the event being live, it will never 753 * count anything. But it allows us to probe some of the PMU 754 * details. Yes, this is terrible. 755 */ 756 attr.type = PERF_TYPE_RAW; 757 attr.size = sizeof(attr); 758 attr.pinned = 1; 759 attr.disabled = 0; 760 attr.exclude_user = 0; 761 attr.exclude_kernel = 1; 762 attr.exclude_hv = 1; 763 attr.exclude_host = 1; 764 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 765 attr.sample_period = GENMASK(63, 0); 766 767 event = perf_event_create_kernel_counter(&attr, -1, current, 768 kvm_pmu_perf_overflow, &attr); 769 770 if (IS_ERR(event)) { 771 pr_err_once("kvm: pmu event creation failed %ld\n", 772 PTR_ERR(event)); 773 return ID_AA64DFR0_PMUVER_IMP_DEF; 774 } 775 776 if (event->pmu) { 777 pmu = to_arm_pmu(event->pmu); 778 if (pmu->pmuver) 779 pmuver = pmu->pmuver; 780 } 781 782 perf_event_disable(event); 783 perf_event_release_kernel(event); 784 785 return pmuver; 786 } 787 788 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 789 { 790 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 791 u64 val, mask = 0; 792 int base, i, nr_events; 793 794 if (!pmceid1) { 795 val = read_sysreg(pmceid0_el0); 796 base = 0; 797 } else { 798 val = read_sysreg(pmceid1_el0); 799 /* 800 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 801 * as RAZ 802 */ 803 if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) 804 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 805 base = 32; 806 } 807 808 if (!bmap) 809 return val; 810 811 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 812 813 for (i = 0; i < 32; i += 8) { 814 u64 byte; 815 816 byte = bitmap_get_value8(bmap, base + i); 817 mask |= byte << i; 818 if (nr_events >= (0x4000 + base + 32)) { 819 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 820 mask |= byte << (32 + i); 821 } 822 } 823 824 return val & mask; 825 } 826 827 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 828 { 829 if (!kvm_vcpu_has_pmu(vcpu)) 830 return 0; 831 832 if (!vcpu->arch.pmu.created) 833 return -EINVAL; 834 835 /* 836 * A valid interrupt configuration for the PMU is either to have a 837 * properly configured interrupt number and using an in-kernel 838 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 839 */ 840 if (irqchip_in_kernel(vcpu->kvm)) { 841 int irq = vcpu->arch.pmu.irq_num; 842 /* 843 * If we are using an in-kernel vgic, at this point we know 844 * the vgic will be initialized, so we can check the PMU irq 845 * number against the dimensions of the vgic and make sure 846 * it's valid. 847 */ 848 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 849 return -EINVAL; 850 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 851 return -EINVAL; 852 } 853 854 /* One-off reload of the PMU on first run */ 855 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 856 857 return 0; 858 } 859 860 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 861 { 862 if (irqchip_in_kernel(vcpu->kvm)) { 863 int ret; 864 865 /* 866 * If using the PMU with an in-kernel virtual GIC 867 * implementation, we require the GIC to be already 868 * initialized when initializing the PMU. 869 */ 870 if (!vgic_initialized(vcpu->kvm)) 871 return -ENODEV; 872 873 if (!kvm_arm_pmu_irq_initialized(vcpu)) 874 return -ENXIO; 875 876 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 877 &vcpu->arch.pmu); 878 if (ret) 879 return ret; 880 } 881 882 init_irq_work(&vcpu->arch.pmu.overflow_work, 883 kvm_pmu_perf_overflow_notify_vcpu); 884 885 vcpu->arch.pmu.created = true; 886 return 0; 887 } 888 889 /* 890 * For one VM the interrupt type must be same for each vcpu. 891 * As a PPI, the interrupt number is the same for all vcpus, 892 * while as an SPI it must be a separate number per vcpu. 893 */ 894 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 895 { 896 int i; 897 struct kvm_vcpu *vcpu; 898 899 kvm_for_each_vcpu(i, vcpu, kvm) { 900 if (!kvm_arm_pmu_irq_initialized(vcpu)) 901 continue; 902 903 if (irq_is_ppi(irq)) { 904 if (vcpu->arch.pmu.irq_num != irq) 905 return false; 906 } else { 907 if (vcpu->arch.pmu.irq_num == irq) 908 return false; 909 } 910 } 911 912 return true; 913 } 914 915 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 916 { 917 if (!kvm_vcpu_has_pmu(vcpu)) 918 return -ENODEV; 919 920 if (vcpu->arch.pmu.created) 921 return -EBUSY; 922 923 if (!vcpu->kvm->arch.pmuver) 924 vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); 925 926 if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) 927 return -ENODEV; 928 929 switch (attr->attr) { 930 case KVM_ARM_VCPU_PMU_V3_IRQ: { 931 int __user *uaddr = (int __user *)(long)attr->addr; 932 int irq; 933 934 if (!irqchip_in_kernel(vcpu->kvm)) 935 return -EINVAL; 936 937 if (get_user(irq, uaddr)) 938 return -EFAULT; 939 940 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 941 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 942 return -EINVAL; 943 944 if (!pmu_irq_is_valid(vcpu->kvm, irq)) 945 return -EINVAL; 946 947 if (kvm_arm_pmu_irq_initialized(vcpu)) 948 return -EBUSY; 949 950 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 951 vcpu->arch.pmu.irq_num = irq; 952 return 0; 953 } 954 case KVM_ARM_VCPU_PMU_V3_FILTER: { 955 struct kvm_pmu_event_filter __user *uaddr; 956 struct kvm_pmu_event_filter filter; 957 int nr_events; 958 959 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 960 961 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 962 963 if (copy_from_user(&filter, uaddr, sizeof(filter))) 964 return -EFAULT; 965 966 if (((u32)filter.base_event + filter.nevents) > nr_events || 967 (filter.action != KVM_PMU_EVENT_ALLOW && 968 filter.action != KVM_PMU_EVENT_DENY)) 969 return -EINVAL; 970 971 mutex_lock(&vcpu->kvm->lock); 972 973 if (!vcpu->kvm->arch.pmu_filter) { 974 vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); 975 if (!vcpu->kvm->arch.pmu_filter) { 976 mutex_unlock(&vcpu->kvm->lock); 977 return -ENOMEM; 978 } 979 980 /* 981 * The default depends on the first applied filter. 982 * If it allows events, the default is to deny. 983 * Conversely, if the first filter denies a set of 984 * events, the default is to allow. 985 */ 986 if (filter.action == KVM_PMU_EVENT_ALLOW) 987 bitmap_zero(vcpu->kvm->arch.pmu_filter, nr_events); 988 else 989 bitmap_fill(vcpu->kvm->arch.pmu_filter, nr_events); 990 } 991 992 if (filter.action == KVM_PMU_EVENT_ALLOW) 993 bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 994 else 995 bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents); 996 997 mutex_unlock(&vcpu->kvm->lock); 998 999 return 0; 1000 } 1001 case KVM_ARM_VCPU_PMU_V3_INIT: 1002 return kvm_arm_pmu_v3_init(vcpu); 1003 } 1004 1005 return -ENXIO; 1006 } 1007 1008 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1009 { 1010 switch (attr->attr) { 1011 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1012 int __user *uaddr = (int __user *)(long)attr->addr; 1013 int irq; 1014 1015 if (!irqchip_in_kernel(vcpu->kvm)) 1016 return -EINVAL; 1017 1018 if (!kvm_vcpu_has_pmu(vcpu)) 1019 return -ENODEV; 1020 1021 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1022 return -ENXIO; 1023 1024 irq = vcpu->arch.pmu.irq_num; 1025 return put_user(irq, uaddr); 1026 } 1027 } 1028 1029 return -ENXIO; 1030 } 1031 1032 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1033 { 1034 switch (attr->attr) { 1035 case KVM_ARM_VCPU_PMU_V3_IRQ: 1036 case KVM_ARM_VCPU_PMU_V3_INIT: 1037 case KVM_ARM_VCPU_PMU_V3_FILTER: 1038 if (kvm_vcpu_has_pmu(vcpu)) 1039 return 0; 1040 } 1041 1042 return -ENXIO; 1043 } 1044