1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/types.h> 3 #include <linux/interrupt.h> 4 5 #include <asm/xen/hypercall.h> 6 #include <xen/xen.h> 7 #include <xen/page.h> 8 #include <xen/interface/xen.h> 9 #include <xen/interface/vcpu.h> 10 #include <xen/interface/xenpmu.h> 11 12 #include "xen-ops.h" 13 #include "pmu.h" 14 15 /* x86_pmu.handle_irq definition */ 16 #include "../events/perf_event.h" 17 18 #define XENPMU_IRQ_PROCESSING 1 19 struct xenpmu { 20 /* Shared page between hypervisor and domain */ 21 struct xen_pmu_data *xenpmu_data; 22 23 uint8_t flags; 24 }; 25 static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); 26 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) 27 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) 28 29 /* Macro for computing address of a PMU MSR bank */ 30 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ 31 (uintptr_t)ctxt->field)) 32 33 /* AMD PMU */ 34 #define F15H_NUM_COUNTERS 6 35 #define F10H_NUM_COUNTERS 4 36 37 static __read_mostly uint32_t amd_counters_base; 38 static __read_mostly uint32_t amd_ctrls_base; 39 static __read_mostly int amd_msr_step; 40 static __read_mostly int k7_counters_mirrored; 41 static __read_mostly int amd_num_counters; 42 43 /* Intel PMU */ 44 #define MSR_TYPE_COUNTER 0 45 #define MSR_TYPE_CTRL 1 46 #define MSR_TYPE_GLOBAL 2 47 #define MSR_TYPE_ARCH_COUNTER 3 48 #define MSR_TYPE_ARCH_CTRL 4 49 50 /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */ 51 #define PMU_GENERAL_NR_SHIFT 8 52 #define PMU_GENERAL_NR_BITS 8 53 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \ 54 << PMU_GENERAL_NR_SHIFT) 55 56 /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */ 57 #define PMU_FIXED_NR_SHIFT 0 58 #define PMU_FIXED_NR_BITS 5 59 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \ 60 << PMU_FIXED_NR_SHIFT) 61 62 /* Alias registers (0x4c1) for full-width writes to PMCs */ 63 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) 64 65 #define INTEL_PMC_TYPE_SHIFT 30 66 67 static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; 68 69 70 static void xen_pmu_arch_init(void) 71 { 72 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 73 74 switch (boot_cpu_data.x86) { 75 case 0x15: 76 amd_num_counters = F15H_NUM_COUNTERS; 77 amd_counters_base = MSR_F15H_PERF_CTR; 78 amd_ctrls_base = MSR_F15H_PERF_CTL; 79 amd_msr_step = 2; 80 k7_counters_mirrored = 1; 81 break; 82 case 0x10: 83 case 0x12: 84 case 0x14: 85 case 0x16: 86 default: 87 amd_num_counters = F10H_NUM_COUNTERS; 88 amd_counters_base = MSR_K7_PERFCTR0; 89 amd_ctrls_base = MSR_K7_EVNTSEL0; 90 amd_msr_step = 1; 91 k7_counters_mirrored = 0; 92 break; 93 } 94 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { 95 amd_num_counters = F10H_NUM_COUNTERS; 96 amd_counters_base = MSR_K7_PERFCTR0; 97 amd_ctrls_base = MSR_K7_EVNTSEL0; 98 amd_msr_step = 1; 99 k7_counters_mirrored = 0; 100 } else { 101 uint32_t eax, ebx, ecx, edx; 102 103 cpuid(0xa, &eax, &ebx, &ecx, &edx); 104 105 intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >> 106 PMU_GENERAL_NR_SHIFT; 107 intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >> 108 PMU_FIXED_NR_SHIFT; 109 } 110 } 111 112 static inline uint32_t get_fam15h_addr(u32 addr) 113 { 114 switch (addr) { 115 case MSR_K7_PERFCTR0: 116 case MSR_K7_PERFCTR1: 117 case MSR_K7_PERFCTR2: 118 case MSR_K7_PERFCTR3: 119 return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0); 120 case MSR_K7_EVNTSEL0: 121 case MSR_K7_EVNTSEL1: 122 case MSR_K7_EVNTSEL2: 123 case MSR_K7_EVNTSEL3: 124 return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0); 125 default: 126 break; 127 } 128 129 return addr; 130 } 131 132 static inline bool is_amd_pmu_msr(unsigned int msr) 133 { 134 if ((msr >= MSR_F15H_PERF_CTL && 135 msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || 136 (msr >= MSR_K7_EVNTSEL0 && 137 msr < MSR_K7_PERFCTR0 + amd_num_counters)) 138 return true; 139 140 return false; 141 } 142 143 static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) 144 { 145 u32 msr_index_pmc; 146 147 switch (msr_index) { 148 case MSR_CORE_PERF_FIXED_CTR_CTRL: 149 case MSR_IA32_DS_AREA: 150 case MSR_IA32_PEBS_ENABLE: 151 *type = MSR_TYPE_CTRL; 152 return true; 153 154 case MSR_CORE_PERF_GLOBAL_CTRL: 155 case MSR_CORE_PERF_GLOBAL_STATUS: 156 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 157 *type = MSR_TYPE_GLOBAL; 158 return true; 159 160 default: 161 162 if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) && 163 (msr_index < MSR_CORE_PERF_FIXED_CTR0 + 164 intel_num_fixed_counters)) { 165 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; 166 *type = MSR_TYPE_COUNTER; 167 return true; 168 } 169 170 if ((msr_index >= MSR_P6_EVNTSEL0) && 171 (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) { 172 *index = msr_index - MSR_P6_EVNTSEL0; 173 *type = MSR_TYPE_ARCH_CTRL; 174 return true; 175 } 176 177 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; 178 if ((msr_index_pmc >= MSR_IA32_PERFCTR0) && 179 (msr_index_pmc < MSR_IA32_PERFCTR0 + 180 intel_num_arch_counters)) { 181 *type = MSR_TYPE_ARCH_COUNTER; 182 *index = msr_index_pmc - MSR_IA32_PERFCTR0; 183 return true; 184 } 185 return false; 186 } 187 } 188 189 static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, 190 int index, bool is_read) 191 { 192 uint64_t *reg = NULL; 193 struct xen_pmu_intel_ctxt *ctxt; 194 uint64_t *fix_counters; 195 struct xen_pmu_cntr_pair *arch_cntr_pair; 196 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 197 uint8_t xenpmu_flags = get_xenpmu_flags(); 198 199 200 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 201 return false; 202 203 ctxt = &xenpmu_data->pmu.c.intel; 204 205 switch (msr) { 206 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 207 reg = &ctxt->global_ovf_ctrl; 208 break; 209 case MSR_CORE_PERF_GLOBAL_STATUS: 210 reg = &ctxt->global_status; 211 break; 212 case MSR_CORE_PERF_GLOBAL_CTRL: 213 reg = &ctxt->global_ctrl; 214 break; 215 case MSR_CORE_PERF_FIXED_CTR_CTRL: 216 reg = &ctxt->fixed_ctrl; 217 break; 218 default: 219 switch (type) { 220 case MSR_TYPE_COUNTER: 221 fix_counters = field_offset(ctxt, fixed_counters); 222 reg = &fix_counters[index]; 223 break; 224 case MSR_TYPE_ARCH_COUNTER: 225 arch_cntr_pair = field_offset(ctxt, arch_counters); 226 reg = &arch_cntr_pair[index].counter; 227 break; 228 case MSR_TYPE_ARCH_CTRL: 229 arch_cntr_pair = field_offset(ctxt, arch_counters); 230 reg = &arch_cntr_pair[index].control; 231 break; 232 default: 233 return false; 234 } 235 } 236 237 if (reg) { 238 if (is_read) 239 *val = *reg; 240 else { 241 *reg = *val; 242 243 if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) 244 ctxt->global_status &= (~(*val)); 245 } 246 return true; 247 } 248 249 return false; 250 } 251 252 static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) 253 { 254 uint64_t *reg = NULL; 255 int i, off = 0; 256 struct xen_pmu_amd_ctxt *ctxt; 257 uint64_t *counter_regs, *ctrl_regs; 258 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 259 uint8_t xenpmu_flags = get_xenpmu_flags(); 260 261 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 262 return false; 263 264 if (k7_counters_mirrored && 265 ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) 266 msr = get_fam15h_addr(msr); 267 268 ctxt = &xenpmu_data->pmu.c.amd; 269 for (i = 0; i < amd_num_counters; i++) { 270 if (msr == amd_ctrls_base + off) { 271 ctrl_regs = field_offset(ctxt, ctrls); 272 reg = &ctrl_regs[i]; 273 break; 274 } else if (msr == amd_counters_base + off) { 275 counter_regs = field_offset(ctxt, counters); 276 reg = &counter_regs[i]; 277 break; 278 } 279 off += amd_msr_step; 280 } 281 282 if (reg) { 283 if (is_read) 284 *val = *reg; 285 else 286 *reg = *val; 287 288 return true; 289 } 290 return false; 291 } 292 293 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) 294 { 295 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 296 if (is_amd_pmu_msr(msr)) { 297 if (!xen_amd_pmu_emulate(msr, val, 1)) 298 *val = native_read_msr_safe(msr, err); 299 return true; 300 } 301 } else { 302 int type, index; 303 304 if (is_intel_pmu_msr(msr, &type, &index)) { 305 if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) 306 *val = native_read_msr_safe(msr, err); 307 return true; 308 } 309 } 310 311 return false; 312 } 313 314 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) 315 { 316 uint64_t val = ((uint64_t)high << 32) | low; 317 318 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 319 if (is_amd_pmu_msr(msr)) { 320 if (!xen_amd_pmu_emulate(msr, &val, 0)) 321 *err = native_write_msr_safe(msr, low, high); 322 return true; 323 } 324 } else { 325 int type, index; 326 327 if (is_intel_pmu_msr(msr, &type, &index)) { 328 if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) 329 *err = native_write_msr_safe(msr, low, high); 330 return true; 331 } 332 } 333 334 return false; 335 } 336 337 static unsigned long long xen_amd_read_pmc(int counter) 338 { 339 struct xen_pmu_amd_ctxt *ctxt; 340 uint64_t *counter_regs; 341 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 342 uint8_t xenpmu_flags = get_xenpmu_flags(); 343 344 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 345 uint32_t msr; 346 int err; 347 348 msr = amd_counters_base + (counter * amd_msr_step); 349 return native_read_msr_safe(msr, &err); 350 } 351 352 ctxt = &xenpmu_data->pmu.c.amd; 353 counter_regs = field_offset(ctxt, counters); 354 return counter_regs[counter]; 355 } 356 357 static unsigned long long xen_intel_read_pmc(int counter) 358 { 359 struct xen_pmu_intel_ctxt *ctxt; 360 uint64_t *fixed_counters; 361 struct xen_pmu_cntr_pair *arch_cntr_pair; 362 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 363 uint8_t xenpmu_flags = get_xenpmu_flags(); 364 365 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 366 uint32_t msr; 367 int err; 368 369 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) 370 msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); 371 else 372 msr = MSR_IA32_PERFCTR0 + counter; 373 374 return native_read_msr_safe(msr, &err); 375 } 376 377 ctxt = &xenpmu_data->pmu.c.intel; 378 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { 379 fixed_counters = field_offset(ctxt, fixed_counters); 380 return fixed_counters[counter & 0xffff]; 381 } 382 383 arch_cntr_pair = field_offset(ctxt, arch_counters); 384 return arch_cntr_pair[counter].counter; 385 } 386 387 unsigned long long xen_read_pmc(int counter) 388 { 389 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 390 return xen_amd_read_pmc(counter); 391 else 392 return xen_intel_read_pmc(counter); 393 } 394 395 int pmu_apic_update(uint32_t val) 396 { 397 int ret; 398 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 399 400 if (!xenpmu_data) { 401 pr_warn_once("%s: pmudata not initialized\n", __func__); 402 return -EINVAL; 403 } 404 405 xenpmu_data->pmu.l.lapic_lvtpc = val; 406 407 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) 408 return 0; 409 410 ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); 411 412 return ret; 413 } 414 415 /* perf callbacks */ 416 static unsigned int xen_guest_state(void) 417 { 418 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 419 unsigned int state = 0; 420 421 if (!xenpmu_data) { 422 pr_warn_once("%s: pmudata not initialized\n", __func__); 423 return state; 424 } 425 426 if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) 427 return state; 428 429 state |= PERF_GUEST_ACTIVE; 430 431 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) { 432 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER) 433 state |= PERF_GUEST_USER; 434 } else if (xenpmu_data->pmu.r.regs.cpl & 3) { 435 state |= PERF_GUEST_USER; 436 } 437 438 return state; 439 } 440 441 static unsigned long xen_get_guest_ip(void) 442 { 443 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 444 445 if (!xenpmu_data) { 446 pr_warn_once("%s: pmudata not initialized\n", __func__); 447 return 0; 448 } 449 450 return xenpmu_data->pmu.r.regs.ip; 451 } 452 453 static struct perf_guest_info_callbacks xen_guest_cbs = { 454 .state = xen_guest_state, 455 .get_ip = xen_get_guest_ip, 456 }; 457 458 /* Convert registers from Xen's format to Linux' */ 459 static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, 460 struct pt_regs *regs, uint64_t pmu_flags) 461 { 462 regs->ip = xen_regs->ip; 463 regs->cs = xen_regs->cs; 464 regs->sp = xen_regs->sp; 465 466 if (pmu_flags & PMU_SAMPLE_PV) { 467 if (pmu_flags & PMU_SAMPLE_USER) 468 regs->cs |= 3; 469 else 470 regs->cs &= ~3; 471 } else { 472 if (xen_regs->cpl) 473 regs->cs |= 3; 474 else 475 regs->cs &= ~3; 476 } 477 } 478 479 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) 480 { 481 int err, ret = IRQ_NONE; 482 struct pt_regs regs = {0}; 483 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 484 uint8_t xenpmu_flags = get_xenpmu_flags(); 485 486 if (!xenpmu_data) { 487 pr_warn_once("%s: pmudata not initialized\n", __func__); 488 return ret; 489 } 490 491 this_cpu_ptr(&xenpmu_shared)->flags = 492 xenpmu_flags | XENPMU_IRQ_PROCESSING; 493 xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, 494 xenpmu_data->pmu.pmu_flags); 495 if (x86_pmu.handle_irq(®s)) 496 ret = IRQ_HANDLED; 497 498 /* Write out cached context to HW */ 499 err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); 500 this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; 501 if (err) { 502 pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); 503 return IRQ_NONE; 504 } 505 506 return ret; 507 } 508 509 bool is_xen_pmu(int cpu) 510 { 511 return (get_xenpmu_data() != NULL); 512 } 513 514 void xen_pmu_init(int cpu) 515 { 516 int err; 517 struct xen_pmu_params xp; 518 unsigned long pfn; 519 struct xen_pmu_data *xenpmu_data; 520 521 BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); 522 523 if (xen_hvm_domain()) 524 return; 525 526 xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); 527 if (!xenpmu_data) { 528 pr_err("VPMU init: No memory\n"); 529 return; 530 } 531 pfn = virt_to_pfn(xenpmu_data); 532 533 xp.val = pfn_to_mfn(pfn); 534 xp.vcpu = cpu; 535 xp.version.maj = XENPMU_VER_MAJ; 536 xp.version.min = XENPMU_VER_MIN; 537 err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); 538 if (err) 539 goto fail; 540 541 per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; 542 per_cpu(xenpmu_shared, cpu).flags = 0; 543 544 if (cpu == 0) { 545 perf_register_guest_info_callbacks(&xen_guest_cbs); 546 xen_pmu_arch_init(); 547 } 548 549 return; 550 551 fail: 552 if (err == -EOPNOTSUPP || err == -ENOSYS) 553 pr_info_once("VPMU disabled by hypervisor.\n"); 554 else 555 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", 556 cpu, err); 557 free_pages((unsigned long)xenpmu_data, 0); 558 } 559 560 void xen_pmu_finish(int cpu) 561 { 562 struct xen_pmu_params xp; 563 564 if (xen_hvm_domain()) 565 return; 566 567 xp.vcpu = cpu; 568 xp.version.maj = XENPMU_VER_MAJ; 569 xp.version.min = XENPMU_VER_MIN; 570 571 (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); 572 573 free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); 574 per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; 575 } 576