1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/types.h> 3 #include <linux/interrupt.h> 4 5 #include <asm/xen/hypercall.h> 6 #include <xen/page.h> 7 #include <xen/interface/xen.h> 8 #include <xen/interface/vcpu.h> 9 #include <xen/interface/xenpmu.h> 10 11 #include "xen-ops.h" 12 #include "pmu.h" 13 14 /* x86_pmu.handle_irq definition */ 15 #include "../events/perf_event.h" 16 17 #define XENPMU_IRQ_PROCESSING 1 18 struct xenpmu { 19 /* Shared page between hypervisor and domain */ 20 struct xen_pmu_data *xenpmu_data; 21 22 uint8_t flags; 23 }; 24 static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); 25 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) 26 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) 27 28 /* Macro for computing address of a PMU MSR bank */ 29 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ 30 (uintptr_t)ctxt->field)) 31 32 /* AMD PMU */ 33 #define F15H_NUM_COUNTERS 6 34 #define F10H_NUM_COUNTERS 4 35 36 static __read_mostly uint32_t amd_counters_base; 37 static __read_mostly uint32_t amd_ctrls_base; 38 static __read_mostly int amd_msr_step; 39 static __read_mostly int k7_counters_mirrored; 40 static __read_mostly int amd_num_counters; 41 42 /* Intel PMU */ 43 #define MSR_TYPE_COUNTER 0 44 #define MSR_TYPE_CTRL 1 45 #define MSR_TYPE_GLOBAL 2 46 #define MSR_TYPE_ARCH_COUNTER 3 47 #define MSR_TYPE_ARCH_CTRL 4 48 49 /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */ 50 #define PMU_GENERAL_NR_SHIFT 8 51 #define PMU_GENERAL_NR_BITS 8 52 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \ 53 << PMU_GENERAL_NR_SHIFT) 54 55 /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */ 56 #define PMU_FIXED_NR_SHIFT 0 57 #define PMU_FIXED_NR_BITS 5 58 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \ 59 << PMU_FIXED_NR_SHIFT) 60 61 /* Alias registers (0x4c1) for full-width writes to PMCs */ 62 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) 63 64 #define INTEL_PMC_TYPE_SHIFT 30 65 66 static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; 67 68 69 static void xen_pmu_arch_init(void) 70 { 71 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 72 73 switch (boot_cpu_data.x86) { 74 case 0x15: 75 amd_num_counters = F15H_NUM_COUNTERS; 76 amd_counters_base = MSR_F15H_PERF_CTR; 77 amd_ctrls_base = MSR_F15H_PERF_CTL; 78 amd_msr_step = 2; 79 k7_counters_mirrored = 1; 80 break; 81 case 0x10: 82 case 0x12: 83 case 0x14: 84 case 0x16: 85 default: 86 amd_num_counters = F10H_NUM_COUNTERS; 87 amd_counters_base = MSR_K7_PERFCTR0; 88 amd_ctrls_base = MSR_K7_EVNTSEL0; 89 amd_msr_step = 1; 90 k7_counters_mirrored = 0; 91 break; 92 } 93 } else { 94 uint32_t eax, ebx, ecx, edx; 95 96 cpuid(0xa, &eax, &ebx, &ecx, &edx); 97 98 intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >> 99 PMU_GENERAL_NR_SHIFT; 100 intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >> 101 PMU_FIXED_NR_SHIFT; 102 } 103 } 104 105 static inline uint32_t get_fam15h_addr(u32 addr) 106 { 107 switch (addr) { 108 case MSR_K7_PERFCTR0: 109 case MSR_K7_PERFCTR1: 110 case MSR_K7_PERFCTR2: 111 case MSR_K7_PERFCTR3: 112 return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0); 113 case MSR_K7_EVNTSEL0: 114 case MSR_K7_EVNTSEL1: 115 case MSR_K7_EVNTSEL2: 116 case MSR_K7_EVNTSEL3: 117 return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0); 118 default: 119 break; 120 } 121 122 return addr; 123 } 124 125 static inline bool is_amd_pmu_msr(unsigned int msr) 126 { 127 if ((msr >= MSR_F15H_PERF_CTL && 128 msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || 129 (msr >= MSR_K7_EVNTSEL0 && 130 msr < MSR_K7_PERFCTR0 + amd_num_counters)) 131 return true; 132 133 return false; 134 } 135 136 static int is_intel_pmu_msr(u32 msr_index, int *type, int *index) 137 { 138 u32 msr_index_pmc; 139 140 switch (msr_index) { 141 case MSR_CORE_PERF_FIXED_CTR_CTRL: 142 case MSR_IA32_DS_AREA: 143 case MSR_IA32_PEBS_ENABLE: 144 *type = MSR_TYPE_CTRL; 145 return true; 146 147 case MSR_CORE_PERF_GLOBAL_CTRL: 148 case MSR_CORE_PERF_GLOBAL_STATUS: 149 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 150 *type = MSR_TYPE_GLOBAL; 151 return true; 152 153 default: 154 155 if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) && 156 (msr_index < MSR_CORE_PERF_FIXED_CTR0 + 157 intel_num_fixed_counters)) { 158 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; 159 *type = MSR_TYPE_COUNTER; 160 return true; 161 } 162 163 if ((msr_index >= MSR_P6_EVNTSEL0) && 164 (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) { 165 *index = msr_index - MSR_P6_EVNTSEL0; 166 *type = MSR_TYPE_ARCH_CTRL; 167 return true; 168 } 169 170 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; 171 if ((msr_index_pmc >= MSR_IA32_PERFCTR0) && 172 (msr_index_pmc < MSR_IA32_PERFCTR0 + 173 intel_num_arch_counters)) { 174 *type = MSR_TYPE_ARCH_COUNTER; 175 *index = msr_index_pmc - MSR_IA32_PERFCTR0; 176 return true; 177 } 178 return false; 179 } 180 } 181 182 static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, 183 int index, bool is_read) 184 { 185 uint64_t *reg = NULL; 186 struct xen_pmu_intel_ctxt *ctxt; 187 uint64_t *fix_counters; 188 struct xen_pmu_cntr_pair *arch_cntr_pair; 189 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 190 uint8_t xenpmu_flags = get_xenpmu_flags(); 191 192 193 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 194 return false; 195 196 ctxt = &xenpmu_data->pmu.c.intel; 197 198 switch (msr) { 199 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 200 reg = &ctxt->global_ovf_ctrl; 201 break; 202 case MSR_CORE_PERF_GLOBAL_STATUS: 203 reg = &ctxt->global_status; 204 break; 205 case MSR_CORE_PERF_GLOBAL_CTRL: 206 reg = &ctxt->global_ctrl; 207 break; 208 case MSR_CORE_PERF_FIXED_CTR_CTRL: 209 reg = &ctxt->fixed_ctrl; 210 break; 211 default: 212 switch (type) { 213 case MSR_TYPE_COUNTER: 214 fix_counters = field_offset(ctxt, fixed_counters); 215 reg = &fix_counters[index]; 216 break; 217 case MSR_TYPE_ARCH_COUNTER: 218 arch_cntr_pair = field_offset(ctxt, arch_counters); 219 reg = &arch_cntr_pair[index].counter; 220 break; 221 case MSR_TYPE_ARCH_CTRL: 222 arch_cntr_pair = field_offset(ctxt, arch_counters); 223 reg = &arch_cntr_pair[index].control; 224 break; 225 default: 226 return false; 227 } 228 } 229 230 if (reg) { 231 if (is_read) 232 *val = *reg; 233 else { 234 *reg = *val; 235 236 if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) 237 ctxt->global_status &= (~(*val)); 238 } 239 return true; 240 } 241 242 return false; 243 } 244 245 static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) 246 { 247 uint64_t *reg = NULL; 248 int i, off = 0; 249 struct xen_pmu_amd_ctxt *ctxt; 250 uint64_t *counter_regs, *ctrl_regs; 251 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 252 uint8_t xenpmu_flags = get_xenpmu_flags(); 253 254 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 255 return false; 256 257 if (k7_counters_mirrored && 258 ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) 259 msr = get_fam15h_addr(msr); 260 261 ctxt = &xenpmu_data->pmu.c.amd; 262 for (i = 0; i < amd_num_counters; i++) { 263 if (msr == amd_ctrls_base + off) { 264 ctrl_regs = field_offset(ctxt, ctrls); 265 reg = &ctrl_regs[i]; 266 break; 267 } else if (msr == amd_counters_base + off) { 268 counter_regs = field_offset(ctxt, counters); 269 reg = &counter_regs[i]; 270 break; 271 } 272 off += amd_msr_step; 273 } 274 275 if (reg) { 276 if (is_read) 277 *val = *reg; 278 else 279 *reg = *val; 280 281 return true; 282 } 283 return false; 284 } 285 286 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) 287 { 288 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 289 if (is_amd_pmu_msr(msr)) { 290 if (!xen_amd_pmu_emulate(msr, val, 1)) 291 *val = native_read_msr_safe(msr, err); 292 return true; 293 } 294 } else { 295 int type, index; 296 297 if (is_intel_pmu_msr(msr, &type, &index)) { 298 if (!xen_intel_pmu_emulate(msr, val, type, index, 1)) 299 *val = native_read_msr_safe(msr, err); 300 return true; 301 } 302 } 303 304 return false; 305 } 306 307 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) 308 { 309 uint64_t val = ((uint64_t)high << 32) | low; 310 311 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 312 if (is_amd_pmu_msr(msr)) { 313 if (!xen_amd_pmu_emulate(msr, &val, 0)) 314 *err = native_write_msr_safe(msr, low, high); 315 return true; 316 } 317 } else { 318 int type, index; 319 320 if (is_intel_pmu_msr(msr, &type, &index)) { 321 if (!xen_intel_pmu_emulate(msr, &val, type, index, 0)) 322 *err = native_write_msr_safe(msr, low, high); 323 return true; 324 } 325 } 326 327 return false; 328 } 329 330 static unsigned long long xen_amd_read_pmc(int counter) 331 { 332 struct xen_pmu_amd_ctxt *ctxt; 333 uint64_t *counter_regs; 334 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 335 uint8_t xenpmu_flags = get_xenpmu_flags(); 336 337 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 338 uint32_t msr; 339 int err; 340 341 msr = amd_counters_base + (counter * amd_msr_step); 342 return native_read_msr_safe(msr, &err); 343 } 344 345 ctxt = &xenpmu_data->pmu.c.amd; 346 counter_regs = field_offset(ctxt, counters); 347 return counter_regs[counter]; 348 } 349 350 static unsigned long long xen_intel_read_pmc(int counter) 351 { 352 struct xen_pmu_intel_ctxt *ctxt; 353 uint64_t *fixed_counters; 354 struct xen_pmu_cntr_pair *arch_cntr_pair; 355 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 356 uint8_t xenpmu_flags = get_xenpmu_flags(); 357 358 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 359 uint32_t msr; 360 int err; 361 362 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) 363 msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); 364 else 365 msr = MSR_IA32_PERFCTR0 + counter; 366 367 return native_read_msr_safe(msr, &err); 368 } 369 370 ctxt = &xenpmu_data->pmu.c.intel; 371 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { 372 fixed_counters = field_offset(ctxt, fixed_counters); 373 return fixed_counters[counter & 0xffff]; 374 } 375 376 arch_cntr_pair = field_offset(ctxt, arch_counters); 377 return arch_cntr_pair[counter].counter; 378 } 379 380 unsigned long long xen_read_pmc(int counter) 381 { 382 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 383 return xen_amd_read_pmc(counter); 384 else 385 return xen_intel_read_pmc(counter); 386 } 387 388 int pmu_apic_update(uint32_t val) 389 { 390 int ret; 391 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 392 393 if (!xenpmu_data) { 394 pr_warn_once("%s: pmudata not initialized\n", __func__); 395 return -EINVAL; 396 } 397 398 xenpmu_data->pmu.l.lapic_lvtpc = val; 399 400 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) 401 return 0; 402 403 ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); 404 405 return ret; 406 } 407 408 /* perf callbacks */ 409 static int xen_is_in_guest(void) 410 { 411 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 412 413 if (!xenpmu_data) { 414 pr_warn_once("%s: pmudata not initialized\n", __func__); 415 return 0; 416 } 417 418 if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) 419 return 0; 420 421 return 1; 422 } 423 424 static int xen_is_user_mode(void) 425 { 426 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 427 428 if (!xenpmu_data) { 429 pr_warn_once("%s: pmudata not initialized\n", __func__); 430 return 0; 431 } 432 433 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) 434 return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); 435 else 436 return !!(xenpmu_data->pmu.r.regs.cpl & 3); 437 } 438 439 static unsigned long xen_get_guest_ip(void) 440 { 441 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 442 443 if (!xenpmu_data) { 444 pr_warn_once("%s: pmudata not initialized\n", __func__); 445 return 0; 446 } 447 448 return xenpmu_data->pmu.r.regs.ip; 449 } 450 451 static struct perf_guest_info_callbacks xen_guest_cbs = { 452 .is_in_guest = xen_is_in_guest, 453 .is_user_mode = xen_is_user_mode, 454 .get_guest_ip = xen_get_guest_ip, 455 }; 456 457 /* Convert registers from Xen's format to Linux' */ 458 static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, 459 struct pt_regs *regs, uint64_t pmu_flags) 460 { 461 regs->ip = xen_regs->ip; 462 regs->cs = xen_regs->cs; 463 regs->sp = xen_regs->sp; 464 465 if (pmu_flags & PMU_SAMPLE_PV) { 466 if (pmu_flags & PMU_SAMPLE_USER) 467 regs->cs |= 3; 468 else 469 regs->cs &= ~3; 470 } else { 471 if (xen_regs->cpl) 472 regs->cs |= 3; 473 else 474 regs->cs &= ~3; 475 } 476 } 477 478 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) 479 { 480 int err, ret = IRQ_NONE; 481 struct pt_regs regs; 482 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 483 uint8_t xenpmu_flags = get_xenpmu_flags(); 484 485 if (!xenpmu_data) { 486 pr_warn_once("%s: pmudata not initialized\n", __func__); 487 return ret; 488 } 489 490 this_cpu_ptr(&xenpmu_shared)->flags = 491 xenpmu_flags | XENPMU_IRQ_PROCESSING; 492 xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, 493 xenpmu_data->pmu.pmu_flags); 494 if (x86_pmu.handle_irq(®s)) 495 ret = IRQ_HANDLED; 496 497 /* Write out cached context to HW */ 498 err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); 499 this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; 500 if (err) { 501 pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); 502 return IRQ_NONE; 503 } 504 505 return ret; 506 } 507 508 bool is_xen_pmu(int cpu) 509 { 510 return (get_xenpmu_data() != NULL); 511 } 512 513 void xen_pmu_init(int cpu) 514 { 515 int err; 516 struct xen_pmu_params xp; 517 unsigned long pfn; 518 struct xen_pmu_data *xenpmu_data; 519 520 BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); 521 522 if (xen_hvm_domain()) 523 return; 524 525 xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); 526 if (!xenpmu_data) { 527 pr_err("VPMU init: No memory\n"); 528 return; 529 } 530 pfn = virt_to_pfn(xenpmu_data); 531 532 xp.val = pfn_to_mfn(pfn); 533 xp.vcpu = cpu; 534 xp.version.maj = XENPMU_VER_MAJ; 535 xp.version.min = XENPMU_VER_MIN; 536 err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); 537 if (err) 538 goto fail; 539 540 per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; 541 per_cpu(xenpmu_shared, cpu).flags = 0; 542 543 if (cpu == 0) { 544 perf_register_guest_info_callbacks(&xen_guest_cbs); 545 xen_pmu_arch_init(); 546 } 547 548 return; 549 550 fail: 551 if (err == -EOPNOTSUPP || err == -ENOSYS) 552 pr_info_once("VPMU disabled by hypervisor.\n"); 553 else 554 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", 555 cpu, err); 556 free_pages((unsigned long)xenpmu_data, 0); 557 } 558 559 void xen_pmu_finish(int cpu) 560 { 561 struct xen_pmu_params xp; 562 563 if (xen_hvm_domain()) 564 return; 565 566 xp.vcpu = cpu; 567 xp.version.maj = XENPMU_VER_MAJ; 568 xp.version.min = XENPMU_VER_MIN; 569 570 (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); 571 572 free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); 573 per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; 574 } 575