1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 19 20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 21 22 static LIST_HEAD(arm_pmus); 23 static DEFINE_MUTEX(arm_pmus_lock); 24 25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 27 28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 29 { 30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 31 } 32 33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 34 { 35 return &vcpu->arch.pmu.pmc[cnt_idx]; 36 } 37 38 static u32 kvm_pmu_event_mask(struct kvm *kvm) 39 { 40 unsigned int pmuver; 41 42 pmuver = kvm->arch.arm_pmu->pmuver; 43 44 switch (pmuver) { 45 case ID_AA64DFR0_EL1_PMUVer_IMP: 46 return GENMASK(9, 0); 47 case ID_AA64DFR0_EL1_PMUVer_V3P1: 48 case ID_AA64DFR0_EL1_PMUVer_V3P4: 49 case ID_AA64DFR0_EL1_PMUVer_V3P5: 50 case ID_AA64DFR0_EL1_PMUVer_V3P7: 51 return GENMASK(15, 0); 52 default: /* Shouldn't be here, just for sanity */ 53 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 54 return 0; 55 } 56 } 57 58 /** 59 * kvm_pmc_is_64bit - determine if counter is 64bit 60 * @pmc: counter context 61 */ 62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 63 { 64 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 65 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); 66 } 67 68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 69 { 70 u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); 71 72 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 73 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 74 } 75 76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 77 { 78 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 79 !kvm_pmc_has_64bit_overflow(pmc)); 80 } 81 82 static u32 counter_index_to_reg(u64 idx) 83 { 84 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 85 } 86 87 static u32 counter_index_to_evtreg(u64 idx) 88 { 89 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 90 } 91 92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 93 { 94 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 95 u64 counter, reg, enabled, running; 96 97 reg = counter_index_to_reg(pmc->idx); 98 counter = __vcpu_sys_reg(vcpu, reg); 99 100 /* 101 * The real counter value is equal to the value of counter register plus 102 * the value perf event counts. 103 */ 104 if (pmc->perf_event) 105 counter += perf_event_read_value(pmc->perf_event, &enabled, 106 &running); 107 108 if (!kvm_pmc_is_64bit(pmc)) 109 counter = lower_32_bits(counter); 110 111 return counter; 112 } 113 114 /** 115 * kvm_pmu_get_counter_value - get PMU counter value 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 if (!kvm_vcpu_has_pmu(vcpu)) 122 return 0; 123 124 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 125 } 126 127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 128 { 129 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 130 u64 reg; 131 132 kvm_pmu_release_perf_event(pmc); 133 134 reg = counter_index_to_reg(pmc->idx); 135 136 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 137 !force) { 138 /* 139 * Even with PMUv3p5, AArch32 cannot write to the top 140 * 32bit of the counters. The only possible course of 141 * action is to use PMCR.P, which will reset them to 142 * 0 (the only use of the 'force' parameter). 143 */ 144 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 145 val |= lower_32_bits(val); 146 } 147 148 __vcpu_sys_reg(vcpu, reg) = val; 149 150 /* Recreate the perf event to reflect the updated sample_period */ 151 kvm_pmu_create_perf_event(pmc); 152 } 153 154 /** 155 * kvm_pmu_set_counter_value - set PMU counter value 156 * @vcpu: The vcpu pointer 157 * @select_idx: The counter index 158 * @val: The counter value 159 */ 160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 161 { 162 if (!kvm_vcpu_has_pmu(vcpu)) 163 return; 164 165 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 166 } 167 168 /** 169 * kvm_pmu_release_perf_event - remove the perf event 170 * @pmc: The PMU counter pointer 171 */ 172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 173 { 174 if (pmc->perf_event) { 175 perf_event_disable(pmc->perf_event); 176 perf_event_release_kernel(pmc->perf_event); 177 pmc->perf_event = NULL; 178 } 179 } 180 181 /** 182 * kvm_pmu_stop_counter - stop PMU counter 183 * @pmc: The PMU counter pointer 184 * 185 * If this counter has been configured to monitor some event, release it here. 186 */ 187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 188 { 189 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 190 u64 reg, val; 191 192 if (!pmc->perf_event) 193 return; 194 195 val = kvm_pmu_get_pmc_value(pmc); 196 197 reg = counter_index_to_reg(pmc->idx); 198 199 __vcpu_sys_reg(vcpu, reg) = val; 200 201 kvm_pmu_release_perf_event(pmc); 202 } 203 204 /** 205 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 206 * @vcpu: The vcpu pointer 207 * 208 */ 209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 210 { 211 int i; 212 struct kvm_pmu *pmu = &vcpu->arch.pmu; 213 214 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 215 pmu->pmc[i].idx = i; 216 } 217 218 /** 219 * kvm_pmu_vcpu_reset - reset pmu state for cpu 220 * @vcpu: The vcpu pointer 221 * 222 */ 223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 224 { 225 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 226 int i; 227 228 for_each_set_bit(i, &mask, 32) 229 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 230 } 231 232 /** 233 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 234 * @vcpu: The vcpu pointer 235 * 236 */ 237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 238 { 239 int i; 240 241 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 242 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 243 irq_work_sync(&vcpu->arch.pmu.overflow_work); 244 } 245 246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 247 { 248 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 249 250 val &= ARMV8_PMU_PMCR_N_MASK; 251 if (val == 0) 252 return BIT(ARMV8_PMU_CYCLE_IDX); 253 else 254 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 255 } 256 257 /** 258 * kvm_pmu_enable_counter_mask - enable selected PMU counters 259 * @vcpu: The vcpu pointer 260 * @val: the value guest writes to PMCNTENSET register 261 * 262 * Call perf_event_enable to start counting the perf event 263 */ 264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 265 { 266 int i; 267 if (!kvm_vcpu_has_pmu(vcpu)) 268 return; 269 270 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 271 return; 272 273 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 274 struct kvm_pmc *pmc; 275 276 if (!(val & BIT(i))) 277 continue; 278 279 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 280 281 if (!pmc->perf_event) { 282 kvm_pmu_create_perf_event(pmc); 283 } else { 284 perf_event_enable(pmc->perf_event); 285 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 286 kvm_debug("fail to enable perf event\n"); 287 } 288 } 289 } 290 291 /** 292 * kvm_pmu_disable_counter_mask - disable selected PMU counters 293 * @vcpu: The vcpu pointer 294 * @val: the value guest writes to PMCNTENCLR register 295 * 296 * Call perf_event_disable to stop counting the perf event 297 */ 298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 299 { 300 int i; 301 302 if (!kvm_vcpu_has_pmu(vcpu) || !val) 303 return; 304 305 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 306 struct kvm_pmc *pmc; 307 308 if (!(val & BIT(i))) 309 continue; 310 311 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 312 313 if (pmc->perf_event) 314 perf_event_disable(pmc->perf_event); 315 } 316 } 317 318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 319 { 320 u64 reg = 0; 321 322 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 323 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 324 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 325 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 326 } 327 328 return reg; 329 } 330 331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 332 { 333 struct kvm_pmu *pmu = &vcpu->arch.pmu; 334 bool overflow; 335 336 if (!kvm_vcpu_has_pmu(vcpu)) 337 return; 338 339 overflow = !!kvm_pmu_overflow_status(vcpu); 340 if (pmu->irq_level == overflow) 341 return; 342 343 pmu->irq_level = overflow; 344 345 if (likely(irqchip_in_kernel(vcpu->kvm))) { 346 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 347 pmu->irq_num, overflow, pmu); 348 WARN_ON(ret); 349 } 350 } 351 352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 353 { 354 struct kvm_pmu *pmu = &vcpu->arch.pmu; 355 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 356 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 357 358 if (likely(irqchip_in_kernel(vcpu->kvm))) 359 return false; 360 361 return pmu->irq_level != run_level; 362 } 363 364 /* 365 * Reflect the PMU overflow interrupt output level into the kvm_run structure 366 */ 367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 368 { 369 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 370 371 /* Populate the timer bitmap for user space */ 372 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 373 if (vcpu->arch.pmu.irq_level) 374 regs->device_irq_level |= KVM_ARM_DEV_PMU; 375 } 376 377 /** 378 * kvm_pmu_flush_hwstate - flush pmu state to cpu 379 * @vcpu: The vcpu pointer 380 * 381 * Check if the PMU has overflowed while we were running in the host, and inject 382 * an interrupt if that was the case. 383 */ 384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 385 { 386 kvm_pmu_update_state(vcpu); 387 } 388 389 /** 390 * kvm_pmu_sync_hwstate - sync pmu state from cpu 391 * @vcpu: The vcpu pointer 392 * 393 * Check if the PMU has overflowed while we were running in the guest, and 394 * inject an interrupt if that was the case. 395 */ 396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 397 { 398 kvm_pmu_update_state(vcpu); 399 } 400 401 /** 402 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 403 * to the event. 404 * This is why we need a callback to do it once outside of the NMI context. 405 */ 406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 407 { 408 struct kvm_vcpu *vcpu; 409 410 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 411 kvm_vcpu_kick(vcpu); 412 } 413 414 /* 415 * Perform an increment on any of the counters described in @mask, 416 * generating the overflow if required, and propagate it as a chained 417 * event if possible. 418 */ 419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 420 unsigned long mask, u32 event) 421 { 422 int i; 423 424 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 425 return; 426 427 /* Weed out disabled counters */ 428 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 429 430 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 431 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 432 u64 type, reg; 433 434 /* Filter on event type */ 435 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 436 type &= kvm_pmu_event_mask(vcpu->kvm); 437 if (type != event) 438 continue; 439 440 /* Increment this counter */ 441 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 442 if (!kvm_pmc_is_64bit(pmc)) 443 reg = lower_32_bits(reg); 444 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 445 446 /* No overflow? move on */ 447 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 448 continue; 449 450 /* Mark overflow */ 451 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 452 453 if (kvm_pmu_counter_can_chain(pmc)) 454 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 455 ARMV8_PMUV3_PERFCTR_CHAIN); 456 } 457 } 458 459 /* Compute the sample period for a given counter value */ 460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 461 { 462 u64 val; 463 464 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 465 val = (-counter) & GENMASK(63, 0); 466 else 467 val = (-counter) & GENMASK(31, 0); 468 469 return val; 470 } 471 472 /** 473 * When the perf event overflows, set the overflow status and inform the vcpu. 474 */ 475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 476 struct perf_sample_data *data, 477 struct pt_regs *regs) 478 { 479 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 480 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 481 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 482 int idx = pmc->idx; 483 u64 period; 484 485 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 486 487 /* 488 * Reset the sample period to the architectural limit, 489 * i.e. the point where the counter overflows. 490 */ 491 period = compute_period(pmc, local64_read(&perf_event->count)); 492 493 local64_set(&perf_event->hw.period_left, 0); 494 perf_event->attr.sample_period = period; 495 perf_event->hw.sample_period = period; 496 497 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 498 499 if (kvm_pmu_counter_can_chain(pmc)) 500 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 501 ARMV8_PMUV3_PERFCTR_CHAIN); 502 503 if (kvm_pmu_overflow_status(vcpu)) { 504 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 505 506 if (!in_nmi()) 507 kvm_vcpu_kick(vcpu); 508 else 509 irq_work_queue(&vcpu->arch.pmu.overflow_work); 510 } 511 512 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 513 } 514 515 /** 516 * kvm_pmu_software_increment - do software increment 517 * @vcpu: The vcpu pointer 518 * @val: the value guest writes to PMSWINC register 519 */ 520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 521 { 522 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 523 } 524 525 /** 526 * kvm_pmu_handle_pmcr - handle PMCR register 527 * @vcpu: The vcpu pointer 528 * @val: the value guest writes to PMCR register 529 */ 530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 531 { 532 int i; 533 534 if (!kvm_vcpu_has_pmu(vcpu)) 535 return; 536 537 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 538 if (!kvm_pmu_is_3p5(vcpu)) 539 val &= ~ARMV8_PMU_PMCR_LP; 540 541 __vcpu_sys_reg(vcpu, PMCR_EL0) = val; 542 543 if (val & ARMV8_PMU_PMCR_E) { 544 kvm_pmu_enable_counter_mask(vcpu, 545 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 546 } else { 547 kvm_pmu_disable_counter_mask(vcpu, 548 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 549 } 550 551 if (val & ARMV8_PMU_PMCR_C) 552 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 553 554 if (val & ARMV8_PMU_PMCR_P) { 555 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 556 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 557 for_each_set_bit(i, &mask, 32) 558 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 559 } 560 } 561 562 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 563 { 564 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 565 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 566 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 567 } 568 569 /** 570 * kvm_pmu_create_perf_event - create a perf event for a counter 571 * @pmc: Counter context 572 */ 573 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 574 { 575 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 576 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 577 struct perf_event *event; 578 struct perf_event_attr attr; 579 u64 eventsel, reg, data; 580 581 reg = counter_index_to_evtreg(pmc->idx); 582 data = __vcpu_sys_reg(vcpu, reg); 583 584 kvm_pmu_stop_counter(pmc); 585 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 586 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 587 else 588 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 589 590 /* 591 * Neither SW increment nor chained events need to be backed 592 * by a perf event. 593 */ 594 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 595 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 596 return; 597 598 /* 599 * If we have a filter in place and that the event isn't allowed, do 600 * not install a perf event either. 601 */ 602 if (vcpu->kvm->arch.pmu_filter && 603 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 604 return; 605 606 memset(&attr, 0, sizeof(struct perf_event_attr)); 607 attr.type = arm_pmu->pmu.type; 608 attr.size = sizeof(attr); 609 attr.pinned = 1; 610 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 611 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 612 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 613 attr.exclude_hv = 1; /* Don't count EL2 events */ 614 attr.exclude_host = 1; /* Don't count host events */ 615 attr.config = eventsel; 616 617 /* 618 * If counting with a 64bit counter, advertise it to the perf 619 * code, carefully dealing with the initial sample period 620 * which also depends on the overflow. 621 */ 622 if (kvm_pmc_is_64bit(pmc)) 623 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 624 625 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 626 627 event = perf_event_create_kernel_counter(&attr, -1, current, 628 kvm_pmu_perf_overflow, pmc); 629 630 if (IS_ERR(event)) { 631 pr_err_once("kvm: pmu event creation failed %ld\n", 632 PTR_ERR(event)); 633 return; 634 } 635 636 pmc->perf_event = event; 637 } 638 639 /** 640 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 641 * @vcpu: The vcpu pointer 642 * @data: The data guest writes to PMXEVTYPER_EL0 643 * @select_idx: The number of selected counter 644 * 645 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 646 * event with given hardware event number. Here we call perf_event API to 647 * emulate this action and create a kernel perf event for it. 648 */ 649 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 650 u64 select_idx) 651 { 652 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 653 u64 reg, mask; 654 655 if (!kvm_vcpu_has_pmu(vcpu)) 656 return; 657 658 mask = ARMV8_PMU_EVTYPE_MASK; 659 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 660 mask |= kvm_pmu_event_mask(vcpu->kvm); 661 662 reg = counter_index_to_evtreg(pmc->idx); 663 664 __vcpu_sys_reg(vcpu, reg) = data & mask; 665 666 kvm_pmu_create_perf_event(pmc); 667 } 668 669 void kvm_host_pmu_init(struct arm_pmu *pmu) 670 { 671 struct arm_pmu_entry *entry; 672 673 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 674 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 675 return; 676 677 mutex_lock(&arm_pmus_lock); 678 679 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 680 if (!entry) 681 goto out_unlock; 682 683 entry->arm_pmu = pmu; 684 list_add_tail(&entry->entry, &arm_pmus); 685 686 if (list_is_singular(&arm_pmus)) 687 static_branch_enable(&kvm_arm_pmu_available); 688 689 out_unlock: 690 mutex_unlock(&arm_pmus_lock); 691 } 692 693 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 694 { 695 struct perf_event_attr attr = { }; 696 struct perf_event *event; 697 struct arm_pmu *pmu = NULL; 698 699 /* 700 * Create a dummy event that only counts user cycles. As we'll never 701 * leave this function with the event being live, it will never 702 * count anything. But it allows us to probe some of the PMU 703 * details. Yes, this is terrible. 704 */ 705 attr.type = PERF_TYPE_RAW; 706 attr.size = sizeof(attr); 707 attr.pinned = 1; 708 attr.disabled = 0; 709 attr.exclude_user = 0; 710 attr.exclude_kernel = 1; 711 attr.exclude_hv = 1; 712 attr.exclude_host = 1; 713 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 714 attr.sample_period = GENMASK(63, 0); 715 716 event = perf_event_create_kernel_counter(&attr, -1, current, 717 kvm_pmu_perf_overflow, &attr); 718 719 if (IS_ERR(event)) { 720 pr_err_once("kvm: pmu event creation failed %ld\n", 721 PTR_ERR(event)); 722 return NULL; 723 } 724 725 if (event->pmu) { 726 pmu = to_arm_pmu(event->pmu); 727 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 728 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 729 pmu = NULL; 730 } 731 732 perf_event_disable(event); 733 perf_event_release_kernel(event); 734 735 return pmu; 736 } 737 738 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 739 { 740 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 741 u64 val, mask = 0; 742 int base, i, nr_events; 743 744 if (!kvm_vcpu_has_pmu(vcpu)) 745 return 0; 746 747 if (!pmceid1) { 748 val = read_sysreg(pmceid0_el0); 749 /* always support CHAIN */ 750 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 751 base = 0; 752 } else { 753 val = read_sysreg(pmceid1_el0); 754 /* 755 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 756 * as RAZ 757 */ 758 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) 759 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 760 base = 32; 761 } 762 763 if (!bmap) 764 return val; 765 766 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 767 768 for (i = 0; i < 32; i += 8) { 769 u64 byte; 770 771 byte = bitmap_get_value8(bmap, base + i); 772 mask |= byte << i; 773 if (nr_events >= (0x4000 + base + 32)) { 774 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 775 mask |= byte << (32 + i); 776 } 777 } 778 779 return val & mask; 780 } 781 782 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 783 { 784 if (!kvm_vcpu_has_pmu(vcpu)) 785 return 0; 786 787 if (!vcpu->arch.pmu.created) 788 return -EINVAL; 789 790 /* 791 * A valid interrupt configuration for the PMU is either to have a 792 * properly configured interrupt number and using an in-kernel 793 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 794 */ 795 if (irqchip_in_kernel(vcpu->kvm)) { 796 int irq = vcpu->arch.pmu.irq_num; 797 /* 798 * If we are using an in-kernel vgic, at this point we know 799 * the vgic will be initialized, so we can check the PMU irq 800 * number against the dimensions of the vgic and make sure 801 * it's valid. 802 */ 803 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 804 return -EINVAL; 805 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 806 return -EINVAL; 807 } 808 809 /* One-off reload of the PMU on first run */ 810 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 811 812 return 0; 813 } 814 815 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 816 { 817 if (irqchip_in_kernel(vcpu->kvm)) { 818 int ret; 819 820 /* 821 * If using the PMU with an in-kernel virtual GIC 822 * implementation, we require the GIC to be already 823 * initialized when initializing the PMU. 824 */ 825 if (!vgic_initialized(vcpu->kvm)) 826 return -ENODEV; 827 828 if (!kvm_arm_pmu_irq_initialized(vcpu)) 829 return -ENXIO; 830 831 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 832 &vcpu->arch.pmu); 833 if (ret) 834 return ret; 835 } 836 837 init_irq_work(&vcpu->arch.pmu.overflow_work, 838 kvm_pmu_perf_overflow_notify_vcpu); 839 840 vcpu->arch.pmu.created = true; 841 return 0; 842 } 843 844 /* 845 * For one VM the interrupt type must be same for each vcpu. 846 * As a PPI, the interrupt number is the same for all vcpus, 847 * while as an SPI it must be a separate number per vcpu. 848 */ 849 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 850 { 851 unsigned long i; 852 struct kvm_vcpu *vcpu; 853 854 kvm_for_each_vcpu(i, vcpu, kvm) { 855 if (!kvm_arm_pmu_irq_initialized(vcpu)) 856 continue; 857 858 if (irq_is_ppi(irq)) { 859 if (vcpu->arch.pmu.irq_num != irq) 860 return false; 861 } else { 862 if (vcpu->arch.pmu.irq_num == irq) 863 return false; 864 } 865 } 866 867 return true; 868 } 869 870 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 871 { 872 struct kvm *kvm = vcpu->kvm; 873 struct arm_pmu_entry *entry; 874 struct arm_pmu *arm_pmu; 875 int ret = -ENXIO; 876 877 mutex_lock(&kvm->lock); 878 mutex_lock(&arm_pmus_lock); 879 880 list_for_each_entry(entry, &arm_pmus, entry) { 881 arm_pmu = entry->arm_pmu; 882 if (arm_pmu->pmu.type == pmu_id) { 883 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || 884 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 885 ret = -EBUSY; 886 break; 887 } 888 889 kvm->arch.arm_pmu = arm_pmu; 890 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 891 ret = 0; 892 break; 893 } 894 } 895 896 mutex_unlock(&arm_pmus_lock); 897 mutex_unlock(&kvm->lock); 898 return ret; 899 } 900 901 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 902 { 903 struct kvm *kvm = vcpu->kvm; 904 905 if (!kvm_vcpu_has_pmu(vcpu)) 906 return -ENODEV; 907 908 if (vcpu->arch.pmu.created) 909 return -EBUSY; 910 911 mutex_lock(&kvm->lock); 912 if (!kvm->arch.arm_pmu) { 913 /* No PMU set, get the default one */ 914 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 915 if (!kvm->arch.arm_pmu) { 916 mutex_unlock(&kvm->lock); 917 return -ENODEV; 918 } 919 } 920 mutex_unlock(&kvm->lock); 921 922 switch (attr->attr) { 923 case KVM_ARM_VCPU_PMU_V3_IRQ: { 924 int __user *uaddr = (int __user *)(long)attr->addr; 925 int irq; 926 927 if (!irqchip_in_kernel(kvm)) 928 return -EINVAL; 929 930 if (get_user(irq, uaddr)) 931 return -EFAULT; 932 933 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 934 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 935 return -EINVAL; 936 937 if (!pmu_irq_is_valid(kvm, irq)) 938 return -EINVAL; 939 940 if (kvm_arm_pmu_irq_initialized(vcpu)) 941 return -EBUSY; 942 943 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 944 vcpu->arch.pmu.irq_num = irq; 945 return 0; 946 } 947 case KVM_ARM_VCPU_PMU_V3_FILTER: { 948 struct kvm_pmu_event_filter __user *uaddr; 949 struct kvm_pmu_event_filter filter; 950 int nr_events; 951 952 nr_events = kvm_pmu_event_mask(kvm) + 1; 953 954 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 955 956 if (copy_from_user(&filter, uaddr, sizeof(filter))) 957 return -EFAULT; 958 959 if (((u32)filter.base_event + filter.nevents) > nr_events || 960 (filter.action != KVM_PMU_EVENT_ALLOW && 961 filter.action != KVM_PMU_EVENT_DENY)) 962 return -EINVAL; 963 964 mutex_lock(&kvm->lock); 965 966 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { 967 mutex_unlock(&kvm->lock); 968 return -EBUSY; 969 } 970 971 if (!kvm->arch.pmu_filter) { 972 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 973 if (!kvm->arch.pmu_filter) { 974 mutex_unlock(&kvm->lock); 975 return -ENOMEM; 976 } 977 978 /* 979 * The default depends on the first applied filter. 980 * If it allows events, the default is to deny. 981 * Conversely, if the first filter denies a set of 982 * events, the default is to allow. 983 */ 984 if (filter.action == KVM_PMU_EVENT_ALLOW) 985 bitmap_zero(kvm->arch.pmu_filter, nr_events); 986 else 987 bitmap_fill(kvm->arch.pmu_filter, nr_events); 988 } 989 990 if (filter.action == KVM_PMU_EVENT_ALLOW) 991 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 992 else 993 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 994 995 mutex_unlock(&kvm->lock); 996 997 return 0; 998 } 999 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1000 int __user *uaddr = (int __user *)(long)attr->addr; 1001 int pmu_id; 1002 1003 if (get_user(pmu_id, uaddr)) 1004 return -EFAULT; 1005 1006 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1007 } 1008 case KVM_ARM_VCPU_PMU_V3_INIT: 1009 return kvm_arm_pmu_v3_init(vcpu); 1010 } 1011 1012 return -ENXIO; 1013 } 1014 1015 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1016 { 1017 switch (attr->attr) { 1018 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1019 int __user *uaddr = (int __user *)(long)attr->addr; 1020 int irq; 1021 1022 if (!irqchip_in_kernel(vcpu->kvm)) 1023 return -EINVAL; 1024 1025 if (!kvm_vcpu_has_pmu(vcpu)) 1026 return -ENODEV; 1027 1028 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1029 return -ENXIO; 1030 1031 irq = vcpu->arch.pmu.irq_num; 1032 return put_user(irq, uaddr); 1033 } 1034 } 1035 1036 return -ENXIO; 1037 } 1038 1039 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1040 { 1041 switch (attr->attr) { 1042 case KVM_ARM_VCPU_PMU_V3_IRQ: 1043 case KVM_ARM_VCPU_PMU_V3_INIT: 1044 case KVM_ARM_VCPU_PMU_V3_FILTER: 1045 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1046 if (kvm_vcpu_has_pmu(vcpu)) 1047 return 0; 1048 } 1049 1050 return -ENXIO; 1051 } 1052 1053 u8 kvm_arm_pmu_get_pmuver_limit(void) 1054 { 1055 u64 tmp; 1056 1057 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1058 tmp = cpuid_feature_cap_perfmon_field(tmp, 1059 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1060 ID_AA64DFR0_EL1_PMUVer_V3P5); 1061 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1062 } 1063