1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 19 20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 21 22 static LIST_HEAD(arm_pmus); 23 static DEFINE_MUTEX(arm_pmus_lock); 24 25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 27 28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 29 { 30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 31 } 32 33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 34 { 35 return &vcpu->arch.pmu.pmc[cnt_idx]; 36 } 37 38 static u32 kvm_pmu_event_mask(struct kvm *kvm) 39 { 40 unsigned int pmuver; 41 42 pmuver = kvm->arch.arm_pmu->pmuver; 43 44 switch (pmuver) { 45 case ID_AA64DFR0_EL1_PMUVer_IMP: 46 return GENMASK(9, 0); 47 case ID_AA64DFR0_EL1_PMUVer_V3P1: 48 case ID_AA64DFR0_EL1_PMUVer_V3P4: 49 case ID_AA64DFR0_EL1_PMUVer_V3P5: 50 case ID_AA64DFR0_EL1_PMUVer_V3P7: 51 return GENMASK(15, 0); 52 default: /* Shouldn't be here, just for sanity */ 53 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 54 return 0; 55 } 56 } 57 58 /** 59 * kvm_pmc_is_64bit - determine if counter is 64bit 60 * @pmc: counter context 61 */ 62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 63 { 64 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 65 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); 66 } 67 68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 69 { 70 u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); 71 72 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 73 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 74 } 75 76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 77 { 78 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 79 !kvm_pmc_has_64bit_overflow(pmc)); 80 } 81 82 static u32 counter_index_to_reg(u64 idx) 83 { 84 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 85 } 86 87 static u32 counter_index_to_evtreg(u64 idx) 88 { 89 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 90 } 91 92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 93 { 94 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 95 u64 counter, reg, enabled, running; 96 97 reg = counter_index_to_reg(pmc->idx); 98 counter = __vcpu_sys_reg(vcpu, reg); 99 100 /* 101 * The real counter value is equal to the value of counter register plus 102 * the value perf event counts. 103 */ 104 if (pmc->perf_event) 105 counter += perf_event_read_value(pmc->perf_event, &enabled, 106 &running); 107 108 if (!kvm_pmc_is_64bit(pmc)) 109 counter = lower_32_bits(counter); 110 111 return counter; 112 } 113 114 /** 115 * kvm_pmu_get_counter_value - get PMU counter value 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 if (!kvm_vcpu_has_pmu(vcpu)) 122 return 0; 123 124 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 125 } 126 127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 128 { 129 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 130 u64 reg; 131 132 kvm_pmu_release_perf_event(pmc); 133 134 reg = counter_index_to_reg(pmc->idx); 135 136 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 137 !force) { 138 /* 139 * Even with PMUv3p5, AArch32 cannot write to the top 140 * 32bit of the counters. The only possible course of 141 * action is to use PMCR.P, which will reset them to 142 * 0 (the only use of the 'force' parameter). 143 */ 144 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 145 val |= lower_32_bits(val); 146 } 147 148 __vcpu_sys_reg(vcpu, reg) = val; 149 150 /* Recreate the perf event to reflect the updated sample_period */ 151 kvm_pmu_create_perf_event(pmc); 152 } 153 154 /** 155 * kvm_pmu_set_counter_value - set PMU counter value 156 * @vcpu: The vcpu pointer 157 * @select_idx: The counter index 158 * @val: The counter value 159 */ 160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 161 { 162 if (!kvm_vcpu_has_pmu(vcpu)) 163 return; 164 165 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 166 } 167 168 /** 169 * kvm_pmu_release_perf_event - remove the perf event 170 * @pmc: The PMU counter pointer 171 */ 172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 173 { 174 if (pmc->perf_event) { 175 perf_event_disable(pmc->perf_event); 176 perf_event_release_kernel(pmc->perf_event); 177 pmc->perf_event = NULL; 178 } 179 } 180 181 /** 182 * kvm_pmu_stop_counter - stop PMU counter 183 * @pmc: The PMU counter pointer 184 * 185 * If this counter has been configured to monitor some event, release it here. 186 */ 187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 188 { 189 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 190 u64 reg, val; 191 192 if (!pmc->perf_event) 193 return; 194 195 val = kvm_pmu_get_pmc_value(pmc); 196 197 reg = counter_index_to_reg(pmc->idx); 198 199 __vcpu_sys_reg(vcpu, reg) = val; 200 201 kvm_pmu_release_perf_event(pmc); 202 } 203 204 /** 205 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 206 * @vcpu: The vcpu pointer 207 * 208 */ 209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 210 { 211 int i; 212 struct kvm_pmu *pmu = &vcpu->arch.pmu; 213 214 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 215 pmu->pmc[i].idx = i; 216 } 217 218 /** 219 * kvm_pmu_vcpu_reset - reset pmu state for cpu 220 * @vcpu: The vcpu pointer 221 * 222 */ 223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 224 { 225 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 226 int i; 227 228 for_each_set_bit(i, &mask, 32) 229 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 230 } 231 232 /** 233 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 234 * @vcpu: The vcpu pointer 235 * 236 */ 237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 238 { 239 int i; 240 241 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 242 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 243 irq_work_sync(&vcpu->arch.pmu.overflow_work); 244 } 245 246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 247 { 248 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 249 250 val &= ARMV8_PMU_PMCR_N_MASK; 251 if (val == 0) 252 return BIT(ARMV8_PMU_CYCLE_IDX); 253 else 254 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 255 } 256 257 /** 258 * kvm_pmu_enable_counter_mask - enable selected PMU counters 259 * @vcpu: The vcpu pointer 260 * @val: the value guest writes to PMCNTENSET register 261 * 262 * Call perf_event_enable to start counting the perf event 263 */ 264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 265 { 266 int i; 267 if (!kvm_vcpu_has_pmu(vcpu)) 268 return; 269 270 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 271 return; 272 273 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 274 struct kvm_pmc *pmc; 275 276 if (!(val & BIT(i))) 277 continue; 278 279 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 280 281 if (!pmc->perf_event) { 282 kvm_pmu_create_perf_event(pmc); 283 } else { 284 perf_event_enable(pmc->perf_event); 285 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 286 kvm_debug("fail to enable perf event\n"); 287 } 288 } 289 } 290 291 /** 292 * kvm_pmu_disable_counter_mask - disable selected PMU counters 293 * @vcpu: The vcpu pointer 294 * @val: the value guest writes to PMCNTENCLR register 295 * 296 * Call perf_event_disable to stop counting the perf event 297 */ 298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 299 { 300 int i; 301 302 if (!kvm_vcpu_has_pmu(vcpu) || !val) 303 return; 304 305 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 306 struct kvm_pmc *pmc; 307 308 if (!(val & BIT(i))) 309 continue; 310 311 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 312 313 if (pmc->perf_event) 314 perf_event_disable(pmc->perf_event); 315 } 316 } 317 318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 319 { 320 u64 reg = 0; 321 322 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 323 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 324 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 325 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 326 } 327 328 return reg; 329 } 330 331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 332 { 333 struct kvm_pmu *pmu = &vcpu->arch.pmu; 334 bool overflow; 335 336 if (!kvm_vcpu_has_pmu(vcpu)) 337 return; 338 339 overflow = !!kvm_pmu_overflow_status(vcpu); 340 if (pmu->irq_level == overflow) 341 return; 342 343 pmu->irq_level = overflow; 344 345 if (likely(irqchip_in_kernel(vcpu->kvm))) { 346 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 347 pmu->irq_num, overflow, pmu); 348 WARN_ON(ret); 349 } 350 } 351 352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 353 { 354 struct kvm_pmu *pmu = &vcpu->arch.pmu; 355 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 356 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 357 358 if (likely(irqchip_in_kernel(vcpu->kvm))) 359 return false; 360 361 return pmu->irq_level != run_level; 362 } 363 364 /* 365 * Reflect the PMU overflow interrupt output level into the kvm_run structure 366 */ 367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 368 { 369 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 370 371 /* Populate the timer bitmap for user space */ 372 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 373 if (vcpu->arch.pmu.irq_level) 374 regs->device_irq_level |= KVM_ARM_DEV_PMU; 375 } 376 377 /** 378 * kvm_pmu_flush_hwstate - flush pmu state to cpu 379 * @vcpu: The vcpu pointer 380 * 381 * Check if the PMU has overflowed while we were running in the host, and inject 382 * an interrupt if that was the case. 383 */ 384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 385 { 386 kvm_pmu_update_state(vcpu); 387 } 388 389 /** 390 * kvm_pmu_sync_hwstate - sync pmu state from cpu 391 * @vcpu: The vcpu pointer 392 * 393 * Check if the PMU has overflowed while we were running in the guest, and 394 * inject an interrupt if that was the case. 395 */ 396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 397 { 398 kvm_pmu_update_state(vcpu); 399 } 400 401 /** 402 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 403 * to the event. 404 * This is why we need a callback to do it once outside of the NMI context. 405 */ 406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 407 { 408 struct kvm_vcpu *vcpu; 409 410 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 411 kvm_vcpu_kick(vcpu); 412 } 413 414 /* 415 * Perform an increment on any of the counters described in @mask, 416 * generating the overflow if required, and propagate it as a chained 417 * event if possible. 418 */ 419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 420 unsigned long mask, u32 event) 421 { 422 int i; 423 424 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 425 return; 426 427 /* Weed out disabled counters */ 428 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 429 430 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 431 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 432 u64 type, reg; 433 434 /* Filter on event type */ 435 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 436 type &= kvm_pmu_event_mask(vcpu->kvm); 437 if (type != event) 438 continue; 439 440 /* Increment this counter */ 441 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 442 if (!kvm_pmc_is_64bit(pmc)) 443 reg = lower_32_bits(reg); 444 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 445 446 /* No overflow? move on */ 447 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 448 continue; 449 450 /* Mark overflow */ 451 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 452 453 if (kvm_pmu_counter_can_chain(pmc)) 454 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 455 ARMV8_PMUV3_PERFCTR_CHAIN); 456 } 457 } 458 459 /* Compute the sample period for a given counter value */ 460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 461 { 462 u64 val; 463 464 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 465 val = (-counter) & GENMASK(63, 0); 466 else 467 val = (-counter) & GENMASK(31, 0); 468 469 return val; 470 } 471 472 /** 473 * When the perf event overflows, set the overflow status and inform the vcpu. 474 */ 475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 476 struct perf_sample_data *data, 477 struct pt_regs *regs) 478 { 479 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 480 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 481 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 482 int idx = pmc->idx; 483 u64 period; 484 485 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 486 487 /* 488 * Reset the sample period to the architectural limit, 489 * i.e. the point where the counter overflows. 490 */ 491 period = compute_period(pmc, local64_read(&perf_event->count)); 492 493 local64_set(&perf_event->hw.period_left, 0); 494 perf_event->attr.sample_period = period; 495 perf_event->hw.sample_period = period; 496 497 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 498 499 if (kvm_pmu_counter_can_chain(pmc)) 500 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 501 ARMV8_PMUV3_PERFCTR_CHAIN); 502 503 if (kvm_pmu_overflow_status(vcpu)) { 504 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 505 506 if (!in_nmi()) 507 kvm_vcpu_kick(vcpu); 508 else 509 irq_work_queue(&vcpu->arch.pmu.overflow_work); 510 } 511 512 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 513 } 514 515 /** 516 * kvm_pmu_software_increment - do software increment 517 * @vcpu: The vcpu pointer 518 * @val: the value guest writes to PMSWINC register 519 */ 520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 521 { 522 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 523 } 524 525 /** 526 * kvm_pmu_handle_pmcr - handle PMCR register 527 * @vcpu: The vcpu pointer 528 * @val: the value guest writes to PMCR register 529 */ 530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 531 { 532 int i; 533 534 if (!kvm_vcpu_has_pmu(vcpu)) 535 return; 536 537 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 538 if (!kvm_pmu_is_3p5(vcpu)) 539 val &= ~ARMV8_PMU_PMCR_LP; 540 541 /* The reset bits don't indicate any state, and shouldn't be saved. */ 542 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); 543 544 if (val & ARMV8_PMU_PMCR_E) { 545 kvm_pmu_enable_counter_mask(vcpu, 546 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 547 } else { 548 kvm_pmu_disable_counter_mask(vcpu, 549 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 550 } 551 552 if (val & ARMV8_PMU_PMCR_C) 553 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 554 555 if (val & ARMV8_PMU_PMCR_P) { 556 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 557 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 558 for_each_set_bit(i, &mask, 32) 559 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 560 } 561 kvm_vcpu_pmu_restore_guest(vcpu); 562 } 563 564 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 565 { 566 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 567 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 568 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 569 } 570 571 /** 572 * kvm_pmu_create_perf_event - create a perf event for a counter 573 * @pmc: Counter context 574 */ 575 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 576 { 577 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 578 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 579 struct perf_event *event; 580 struct perf_event_attr attr; 581 u64 eventsel, reg, data; 582 583 reg = counter_index_to_evtreg(pmc->idx); 584 data = __vcpu_sys_reg(vcpu, reg); 585 586 kvm_pmu_stop_counter(pmc); 587 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 588 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 589 else 590 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 591 592 /* 593 * Neither SW increment nor chained events need to be backed 594 * by a perf event. 595 */ 596 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 597 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 598 return; 599 600 /* 601 * If we have a filter in place and that the event isn't allowed, do 602 * not install a perf event either. 603 */ 604 if (vcpu->kvm->arch.pmu_filter && 605 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 606 return; 607 608 memset(&attr, 0, sizeof(struct perf_event_attr)); 609 attr.type = arm_pmu->pmu.type; 610 attr.size = sizeof(attr); 611 attr.pinned = 1; 612 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 613 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 614 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 615 attr.exclude_hv = 1; /* Don't count EL2 events */ 616 attr.exclude_host = 1; /* Don't count host events */ 617 attr.config = eventsel; 618 619 /* 620 * If counting with a 64bit counter, advertise it to the perf 621 * code, carefully dealing with the initial sample period 622 * which also depends on the overflow. 623 */ 624 if (kvm_pmc_is_64bit(pmc)) 625 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 626 627 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 628 629 event = perf_event_create_kernel_counter(&attr, -1, current, 630 kvm_pmu_perf_overflow, pmc); 631 632 if (IS_ERR(event)) { 633 pr_err_once("kvm: pmu event creation failed %ld\n", 634 PTR_ERR(event)); 635 return; 636 } 637 638 pmc->perf_event = event; 639 } 640 641 /** 642 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 643 * @vcpu: The vcpu pointer 644 * @data: The data guest writes to PMXEVTYPER_EL0 645 * @select_idx: The number of selected counter 646 * 647 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 648 * event with given hardware event number. Here we call perf_event API to 649 * emulate this action and create a kernel perf event for it. 650 */ 651 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 652 u64 select_idx) 653 { 654 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 655 u64 reg, mask; 656 657 if (!kvm_vcpu_has_pmu(vcpu)) 658 return; 659 660 mask = ARMV8_PMU_EVTYPE_MASK; 661 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 662 mask |= kvm_pmu_event_mask(vcpu->kvm); 663 664 reg = counter_index_to_evtreg(pmc->idx); 665 666 __vcpu_sys_reg(vcpu, reg) = data & mask; 667 668 kvm_pmu_create_perf_event(pmc); 669 } 670 671 void kvm_host_pmu_init(struct arm_pmu *pmu) 672 { 673 struct arm_pmu_entry *entry; 674 675 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 676 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 677 return; 678 679 mutex_lock(&arm_pmus_lock); 680 681 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 682 if (!entry) 683 goto out_unlock; 684 685 entry->arm_pmu = pmu; 686 list_add_tail(&entry->entry, &arm_pmus); 687 688 if (list_is_singular(&arm_pmus)) 689 static_branch_enable(&kvm_arm_pmu_available); 690 691 out_unlock: 692 mutex_unlock(&arm_pmus_lock); 693 } 694 695 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 696 { 697 struct arm_pmu *tmp, *pmu = NULL; 698 struct arm_pmu_entry *entry; 699 int cpu; 700 701 mutex_lock(&arm_pmus_lock); 702 703 cpu = smp_processor_id(); 704 list_for_each_entry(entry, &arm_pmus, entry) { 705 tmp = entry->arm_pmu; 706 707 if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { 708 pmu = tmp; 709 break; 710 } 711 } 712 713 mutex_unlock(&arm_pmus_lock); 714 715 return pmu; 716 } 717 718 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 719 { 720 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 721 u64 val, mask = 0; 722 int base, i, nr_events; 723 724 if (!kvm_vcpu_has_pmu(vcpu)) 725 return 0; 726 727 if (!pmceid1) { 728 val = read_sysreg(pmceid0_el0); 729 /* always support CHAIN */ 730 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 731 base = 0; 732 } else { 733 val = read_sysreg(pmceid1_el0); 734 /* 735 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 736 * as RAZ 737 */ 738 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) 739 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 740 base = 32; 741 } 742 743 if (!bmap) 744 return val; 745 746 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 747 748 for (i = 0; i < 32; i += 8) { 749 u64 byte; 750 751 byte = bitmap_get_value8(bmap, base + i); 752 mask |= byte << i; 753 if (nr_events >= (0x4000 + base + 32)) { 754 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 755 mask |= byte << (32 + i); 756 } 757 } 758 759 return val & mask; 760 } 761 762 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 763 { 764 if (!kvm_vcpu_has_pmu(vcpu)) 765 return 0; 766 767 if (!vcpu->arch.pmu.created) 768 return -EINVAL; 769 770 /* 771 * A valid interrupt configuration for the PMU is either to have a 772 * properly configured interrupt number and using an in-kernel 773 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 774 */ 775 if (irqchip_in_kernel(vcpu->kvm)) { 776 int irq = vcpu->arch.pmu.irq_num; 777 /* 778 * If we are using an in-kernel vgic, at this point we know 779 * the vgic will be initialized, so we can check the PMU irq 780 * number against the dimensions of the vgic and make sure 781 * it's valid. 782 */ 783 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 784 return -EINVAL; 785 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 786 return -EINVAL; 787 } 788 789 /* One-off reload of the PMU on first run */ 790 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 791 792 return 0; 793 } 794 795 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 796 { 797 if (irqchip_in_kernel(vcpu->kvm)) { 798 int ret; 799 800 /* 801 * If using the PMU with an in-kernel virtual GIC 802 * implementation, we require the GIC to be already 803 * initialized when initializing the PMU. 804 */ 805 if (!vgic_initialized(vcpu->kvm)) 806 return -ENODEV; 807 808 if (!kvm_arm_pmu_irq_initialized(vcpu)) 809 return -ENXIO; 810 811 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 812 &vcpu->arch.pmu); 813 if (ret) 814 return ret; 815 } 816 817 init_irq_work(&vcpu->arch.pmu.overflow_work, 818 kvm_pmu_perf_overflow_notify_vcpu); 819 820 vcpu->arch.pmu.created = true; 821 return 0; 822 } 823 824 /* 825 * For one VM the interrupt type must be same for each vcpu. 826 * As a PPI, the interrupt number is the same for all vcpus, 827 * while as an SPI it must be a separate number per vcpu. 828 */ 829 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 830 { 831 unsigned long i; 832 struct kvm_vcpu *vcpu; 833 834 kvm_for_each_vcpu(i, vcpu, kvm) { 835 if (!kvm_arm_pmu_irq_initialized(vcpu)) 836 continue; 837 838 if (irq_is_ppi(irq)) { 839 if (vcpu->arch.pmu.irq_num != irq) 840 return false; 841 } else { 842 if (vcpu->arch.pmu.irq_num == irq) 843 return false; 844 } 845 } 846 847 return true; 848 } 849 850 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 851 { 852 struct kvm *kvm = vcpu->kvm; 853 struct arm_pmu_entry *entry; 854 struct arm_pmu *arm_pmu; 855 int ret = -ENXIO; 856 857 lockdep_assert_held(&kvm->arch.config_lock); 858 mutex_lock(&arm_pmus_lock); 859 860 list_for_each_entry(entry, &arm_pmus, entry) { 861 arm_pmu = entry->arm_pmu; 862 if (arm_pmu->pmu.type == pmu_id) { 863 if (kvm_vm_has_ran_once(kvm) || 864 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 865 ret = -EBUSY; 866 break; 867 } 868 869 kvm->arch.arm_pmu = arm_pmu; 870 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 871 ret = 0; 872 break; 873 } 874 } 875 876 mutex_unlock(&arm_pmus_lock); 877 return ret; 878 } 879 880 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 881 { 882 struct kvm *kvm = vcpu->kvm; 883 884 lockdep_assert_held(&kvm->arch.config_lock); 885 886 if (!kvm_vcpu_has_pmu(vcpu)) 887 return -ENODEV; 888 889 if (vcpu->arch.pmu.created) 890 return -EBUSY; 891 892 if (!kvm->arch.arm_pmu) { 893 /* 894 * No PMU set, get the default one. 895 * 896 * The observant among you will notice that the supported_cpus 897 * mask does not get updated for the default PMU even though it 898 * is quite possible the selected instance supports only a 899 * subset of cores in the system. This is intentional, and 900 * upholds the preexisting behavior on heterogeneous systems 901 * where vCPUs can be scheduled on any core but the guest 902 * counters could stop working. 903 */ 904 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 905 if (!kvm->arch.arm_pmu) 906 return -ENODEV; 907 } 908 909 switch (attr->attr) { 910 case KVM_ARM_VCPU_PMU_V3_IRQ: { 911 int __user *uaddr = (int __user *)(long)attr->addr; 912 int irq; 913 914 if (!irqchip_in_kernel(kvm)) 915 return -EINVAL; 916 917 if (get_user(irq, uaddr)) 918 return -EFAULT; 919 920 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 921 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 922 return -EINVAL; 923 924 if (!pmu_irq_is_valid(kvm, irq)) 925 return -EINVAL; 926 927 if (kvm_arm_pmu_irq_initialized(vcpu)) 928 return -EBUSY; 929 930 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 931 vcpu->arch.pmu.irq_num = irq; 932 return 0; 933 } 934 case KVM_ARM_VCPU_PMU_V3_FILTER: { 935 struct kvm_pmu_event_filter __user *uaddr; 936 struct kvm_pmu_event_filter filter; 937 int nr_events; 938 939 nr_events = kvm_pmu_event_mask(kvm) + 1; 940 941 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 942 943 if (copy_from_user(&filter, uaddr, sizeof(filter))) 944 return -EFAULT; 945 946 if (((u32)filter.base_event + filter.nevents) > nr_events || 947 (filter.action != KVM_PMU_EVENT_ALLOW && 948 filter.action != KVM_PMU_EVENT_DENY)) 949 return -EINVAL; 950 951 if (kvm_vm_has_ran_once(kvm)) 952 return -EBUSY; 953 954 if (!kvm->arch.pmu_filter) { 955 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 956 if (!kvm->arch.pmu_filter) 957 return -ENOMEM; 958 959 /* 960 * The default depends on the first applied filter. 961 * If it allows events, the default is to deny. 962 * Conversely, if the first filter denies a set of 963 * events, the default is to allow. 964 */ 965 if (filter.action == KVM_PMU_EVENT_ALLOW) 966 bitmap_zero(kvm->arch.pmu_filter, nr_events); 967 else 968 bitmap_fill(kvm->arch.pmu_filter, nr_events); 969 } 970 971 if (filter.action == KVM_PMU_EVENT_ALLOW) 972 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 973 else 974 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 975 976 return 0; 977 } 978 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 979 int __user *uaddr = (int __user *)(long)attr->addr; 980 int pmu_id; 981 982 if (get_user(pmu_id, uaddr)) 983 return -EFAULT; 984 985 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 986 } 987 case KVM_ARM_VCPU_PMU_V3_INIT: 988 return kvm_arm_pmu_v3_init(vcpu); 989 } 990 991 return -ENXIO; 992 } 993 994 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 995 { 996 switch (attr->attr) { 997 case KVM_ARM_VCPU_PMU_V3_IRQ: { 998 int __user *uaddr = (int __user *)(long)attr->addr; 999 int irq; 1000 1001 if (!irqchip_in_kernel(vcpu->kvm)) 1002 return -EINVAL; 1003 1004 if (!kvm_vcpu_has_pmu(vcpu)) 1005 return -ENODEV; 1006 1007 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1008 return -ENXIO; 1009 1010 irq = vcpu->arch.pmu.irq_num; 1011 return put_user(irq, uaddr); 1012 } 1013 } 1014 1015 return -ENXIO; 1016 } 1017 1018 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1019 { 1020 switch (attr->attr) { 1021 case KVM_ARM_VCPU_PMU_V3_IRQ: 1022 case KVM_ARM_VCPU_PMU_V3_INIT: 1023 case KVM_ARM_VCPU_PMU_V3_FILTER: 1024 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1025 if (kvm_vcpu_has_pmu(vcpu)) 1026 return 0; 1027 } 1028 1029 return -ENXIO; 1030 } 1031 1032 u8 kvm_arm_pmu_get_pmuver_limit(void) 1033 { 1034 u64 tmp; 1035 1036 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1037 tmp = cpuid_feature_cap_perfmon_field(tmp, 1038 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1039 ID_AA64DFR0_EL1_PMUVer_V3P5); 1040 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1041 } 1042