1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/bitops.h> 3 #include <linux/types.h> 4 #include <linux/slab.h> 5 6 #include <asm/cpu_entry_area.h> 7 #include <asm/perf_event.h> 8 #include <asm/insn.h> 9 10 #include "../perf_event.h" 11 12 /* Waste a full page so it can be mapped into the cpu_entry_area */ 13 DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); 14 15 /* The size of a BTS record in bytes: */ 16 #define BTS_RECORD_SIZE 24 17 18 #define PEBS_FIXUP_SIZE PAGE_SIZE 19 20 /* 21 * pebs_record_32 for p4 and core not supported 22 23 struct pebs_record_32 { 24 u32 flags, ip; 25 u32 ax, bc, cx, dx; 26 u32 si, di, bp, sp; 27 }; 28 29 */ 30 31 union intel_x86_pebs_dse { 32 u64 val; 33 struct { 34 unsigned int ld_dse:4; 35 unsigned int ld_stlb_miss:1; 36 unsigned int ld_locked:1; 37 unsigned int ld_reserved:26; 38 }; 39 struct { 40 unsigned int st_l1d_hit:1; 41 unsigned int st_reserved1:3; 42 unsigned int st_stlb_miss:1; 43 unsigned int st_locked:1; 44 unsigned int st_reserved2:26; 45 }; 46 }; 47 48 49 /* 50 * Map PEBS Load Latency Data Source encodings to generic 51 * memory data source information 52 */ 53 #define P(a, b) PERF_MEM_S(a, b) 54 #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) 55 #define LEVEL(x) P(LVLNUM, x) 56 #define REM P(REMOTE, REMOTE) 57 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) 58 59 /* Version for Sandy Bridge and later */ 60 static u64 pebs_data_source[] = { 61 P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ 62 OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ 63 OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ 64 OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ 65 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ 66 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ 67 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ 68 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ 69 OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ 70 OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ 71 OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ 72 OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ 73 OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */ 74 OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */ 75 OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */ 76 OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ 77 }; 78 79 /* Patch up minor differences in the bits */ 80 void __init intel_pmu_pebs_data_source_nhm(void) 81 { 82 pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 83 pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 84 pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 85 } 86 87 void __init intel_pmu_pebs_data_source_skl(bool pmem) 88 { 89 u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); 90 91 pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); 92 pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); 93 pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); 94 pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); 95 pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); 96 } 97 98 static u64 precise_store_data(u64 status) 99 { 100 union intel_x86_pebs_dse dse; 101 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); 102 103 dse.val = status; 104 105 /* 106 * bit 4: TLB access 107 * 1 = stored missed 2nd level TLB 108 * 109 * so it either hit the walker or the OS 110 * otherwise hit 2nd level TLB 111 */ 112 if (dse.st_stlb_miss) 113 val |= P(TLB, MISS); 114 else 115 val |= P(TLB, HIT); 116 117 /* 118 * bit 0: hit L1 data cache 119 * if not set, then all we know is that 120 * it missed L1D 121 */ 122 if (dse.st_l1d_hit) 123 val |= P(LVL, HIT); 124 else 125 val |= P(LVL, MISS); 126 127 /* 128 * bit 5: Locked prefix 129 */ 130 if (dse.st_locked) 131 val |= P(LOCK, LOCKED); 132 133 return val; 134 } 135 136 static u64 precise_datala_hsw(struct perf_event *event, u64 status) 137 { 138 union perf_mem_data_src dse; 139 140 dse.val = PERF_MEM_NA; 141 142 if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 143 dse.mem_op = PERF_MEM_OP_STORE; 144 else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) 145 dse.mem_op = PERF_MEM_OP_LOAD; 146 147 /* 148 * L1 info only valid for following events: 149 * 150 * MEM_UOPS_RETIRED.STLB_MISS_STORES 151 * MEM_UOPS_RETIRED.LOCK_STORES 152 * MEM_UOPS_RETIRED.SPLIT_STORES 153 * MEM_UOPS_RETIRED.ALL_STORES 154 */ 155 if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { 156 if (status & 1) 157 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 158 else 159 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 160 } 161 return dse.val; 162 } 163 164 static u64 load_latency_data(u64 status) 165 { 166 union intel_x86_pebs_dse dse; 167 u64 val; 168 169 dse.val = status; 170 171 /* 172 * use the mapping table for bit 0-3 173 */ 174 val = pebs_data_source[dse.ld_dse]; 175 176 /* 177 * Nehalem models do not support TLB, Lock infos 178 */ 179 if (x86_pmu.pebs_no_tlb) { 180 val |= P(TLB, NA) | P(LOCK, NA); 181 return val; 182 } 183 /* 184 * bit 4: TLB access 185 * 0 = did not miss 2nd level TLB 186 * 1 = missed 2nd level TLB 187 */ 188 if (dse.ld_stlb_miss) 189 val |= P(TLB, MISS) | P(TLB, L2); 190 else 191 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); 192 193 /* 194 * bit 5: locked prefix 195 */ 196 if (dse.ld_locked) 197 val |= P(LOCK, LOCKED); 198 199 return val; 200 } 201 202 struct pebs_record_core { 203 u64 flags, ip; 204 u64 ax, bx, cx, dx; 205 u64 si, di, bp, sp; 206 u64 r8, r9, r10, r11; 207 u64 r12, r13, r14, r15; 208 }; 209 210 struct pebs_record_nhm { 211 u64 flags, ip; 212 u64 ax, bx, cx, dx; 213 u64 si, di, bp, sp; 214 u64 r8, r9, r10, r11; 215 u64 r12, r13, r14, r15; 216 u64 status, dla, dse, lat; 217 }; 218 219 /* 220 * Same as pebs_record_nhm, with two additional fields. 221 */ 222 struct pebs_record_hsw { 223 u64 flags, ip; 224 u64 ax, bx, cx, dx; 225 u64 si, di, bp, sp; 226 u64 r8, r9, r10, r11; 227 u64 r12, r13, r14, r15; 228 u64 status, dla, dse, lat; 229 u64 real_ip, tsx_tuning; 230 }; 231 232 union hsw_tsx_tuning { 233 struct { 234 u32 cycles_last_block : 32, 235 hle_abort : 1, 236 rtm_abort : 1, 237 instruction_abort : 1, 238 non_instruction_abort : 1, 239 retry : 1, 240 data_conflict : 1, 241 capacity_writes : 1, 242 capacity_reads : 1; 243 }; 244 u64 value; 245 }; 246 247 #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL 248 249 /* Same as HSW, plus TSC */ 250 251 struct pebs_record_skl { 252 u64 flags, ip; 253 u64 ax, bx, cx, dx; 254 u64 si, di, bp, sp; 255 u64 r8, r9, r10, r11; 256 u64 r12, r13, r14, r15; 257 u64 status, dla, dse, lat; 258 u64 real_ip, tsx_tuning; 259 u64 tsc; 260 }; 261 262 void init_debug_store_on_cpu(int cpu) 263 { 264 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 265 266 if (!ds) 267 return; 268 269 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 270 (u32)((u64)(unsigned long)ds), 271 (u32)((u64)(unsigned long)ds >> 32)); 272 } 273 274 void fini_debug_store_on_cpu(int cpu) 275 { 276 if (!per_cpu(cpu_hw_events, cpu).ds) 277 return; 278 279 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 280 } 281 282 static DEFINE_PER_CPU(void *, insn_buffer); 283 284 static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) 285 { 286 phys_addr_t pa; 287 size_t msz = 0; 288 289 pa = virt_to_phys(addr); 290 for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) 291 cea_set_pte(cea, pa, prot); 292 } 293 294 static void ds_clear_cea(void *cea, size_t size) 295 { 296 size_t msz = 0; 297 298 for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) 299 cea_set_pte(cea, 0, PAGE_NONE); 300 } 301 302 static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) 303 { 304 unsigned int order = get_order(size); 305 int node = cpu_to_node(cpu); 306 struct page *page; 307 308 page = __alloc_pages_node(node, flags | __GFP_ZERO, order); 309 return page ? page_address(page) : NULL; 310 } 311 312 static void dsfree_pages(const void *buffer, size_t size) 313 { 314 if (buffer) 315 free_pages((unsigned long)buffer, get_order(size)); 316 } 317 318 static int alloc_pebs_buffer(int cpu) 319 { 320 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 321 struct debug_store *ds = hwev->ds; 322 size_t bsiz = x86_pmu.pebs_buffer_size; 323 int max, node = cpu_to_node(cpu); 324 void *buffer, *ibuffer, *cea; 325 326 if (!x86_pmu.pebs) 327 return 0; 328 329 buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); 330 if (unlikely(!buffer)) 331 return -ENOMEM; 332 333 /* 334 * HSW+ already provides us the eventing ip; no need to allocate this 335 * buffer then. 336 */ 337 if (x86_pmu.intel_cap.pebs_format < 2) { 338 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 339 if (!ibuffer) { 340 dsfree_pages(buffer, bsiz); 341 return -ENOMEM; 342 } 343 per_cpu(insn_buffer, cpu) = ibuffer; 344 } 345 hwev->ds_pebs_vaddr = buffer; 346 /* Update the cpu entry area mapping */ 347 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; 348 ds->pebs_buffer_base = (unsigned long) cea; 349 ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); 350 ds->pebs_index = ds->pebs_buffer_base; 351 max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); 352 ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; 353 return 0; 354 } 355 356 static void release_pebs_buffer(int cpu) 357 { 358 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 359 struct debug_store *ds = hwev->ds; 360 void *cea; 361 362 if (!ds || !x86_pmu.pebs) 363 return; 364 365 kfree(per_cpu(insn_buffer, cpu)); 366 per_cpu(insn_buffer, cpu) = NULL; 367 368 /* Clear the fixmap */ 369 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; 370 ds_clear_cea(cea, x86_pmu.pebs_buffer_size); 371 ds->pebs_buffer_base = 0; 372 dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); 373 hwev->ds_pebs_vaddr = NULL; 374 } 375 376 static int alloc_bts_buffer(int cpu) 377 { 378 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 379 struct debug_store *ds = hwev->ds; 380 void *buffer, *cea; 381 int max; 382 383 if (!x86_pmu.bts) 384 return 0; 385 386 buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); 387 if (unlikely(!buffer)) { 388 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 389 return -ENOMEM; 390 } 391 hwev->ds_bts_vaddr = buffer; 392 /* Update the fixmap */ 393 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; 394 ds->bts_buffer_base = (unsigned long) cea; 395 ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); 396 ds->bts_index = ds->bts_buffer_base; 397 max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); 398 ds->bts_absolute_maximum = ds->bts_buffer_base + max; 399 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); 400 return 0; 401 } 402 403 static void release_bts_buffer(int cpu) 404 { 405 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 406 struct debug_store *ds = hwev->ds; 407 void *cea; 408 409 if (!ds || !x86_pmu.bts) 410 return; 411 412 /* Clear the fixmap */ 413 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; 414 ds_clear_cea(cea, BTS_BUFFER_SIZE); 415 ds->bts_buffer_base = 0; 416 dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); 417 hwev->ds_bts_vaddr = NULL; 418 } 419 420 static int alloc_ds_buffer(int cpu) 421 { 422 struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; 423 424 memset(ds, 0, sizeof(*ds)); 425 per_cpu(cpu_hw_events, cpu).ds = ds; 426 return 0; 427 } 428 429 static void release_ds_buffer(int cpu) 430 { 431 per_cpu(cpu_hw_events, cpu).ds = NULL; 432 } 433 434 void release_ds_buffers(void) 435 { 436 int cpu; 437 438 if (!x86_pmu.bts && !x86_pmu.pebs) 439 return; 440 441 get_online_cpus(); 442 for_each_online_cpu(cpu) 443 fini_debug_store_on_cpu(cpu); 444 445 for_each_possible_cpu(cpu) { 446 release_pebs_buffer(cpu); 447 release_bts_buffer(cpu); 448 release_ds_buffer(cpu); 449 } 450 put_online_cpus(); 451 } 452 453 void reserve_ds_buffers(void) 454 { 455 int bts_err = 0, pebs_err = 0; 456 int cpu; 457 458 x86_pmu.bts_active = 0; 459 x86_pmu.pebs_active = 0; 460 461 if (!x86_pmu.bts && !x86_pmu.pebs) 462 return; 463 464 if (!x86_pmu.bts) 465 bts_err = 1; 466 467 if (!x86_pmu.pebs) 468 pebs_err = 1; 469 470 get_online_cpus(); 471 472 for_each_possible_cpu(cpu) { 473 if (alloc_ds_buffer(cpu)) { 474 bts_err = 1; 475 pebs_err = 1; 476 } 477 478 if (!bts_err && alloc_bts_buffer(cpu)) 479 bts_err = 1; 480 481 if (!pebs_err && alloc_pebs_buffer(cpu)) 482 pebs_err = 1; 483 484 if (bts_err && pebs_err) 485 break; 486 } 487 488 if (bts_err) { 489 for_each_possible_cpu(cpu) 490 release_bts_buffer(cpu); 491 } 492 493 if (pebs_err) { 494 for_each_possible_cpu(cpu) 495 release_pebs_buffer(cpu); 496 } 497 498 if (bts_err && pebs_err) { 499 for_each_possible_cpu(cpu) 500 release_ds_buffer(cpu); 501 } else { 502 if (x86_pmu.bts && !bts_err) 503 x86_pmu.bts_active = 1; 504 505 if (x86_pmu.pebs && !pebs_err) 506 x86_pmu.pebs_active = 1; 507 508 for_each_online_cpu(cpu) 509 init_debug_store_on_cpu(cpu); 510 } 511 512 put_online_cpus(); 513 } 514 515 /* 516 * BTS 517 */ 518 519 struct event_constraint bts_constraint = 520 EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); 521 522 void intel_pmu_enable_bts(u64 config) 523 { 524 unsigned long debugctlmsr; 525 526 debugctlmsr = get_debugctlmsr(); 527 528 debugctlmsr |= DEBUGCTLMSR_TR; 529 debugctlmsr |= DEBUGCTLMSR_BTS; 530 if (config & ARCH_PERFMON_EVENTSEL_INT) 531 debugctlmsr |= DEBUGCTLMSR_BTINT; 532 533 if (!(config & ARCH_PERFMON_EVENTSEL_OS)) 534 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; 535 536 if (!(config & ARCH_PERFMON_EVENTSEL_USR)) 537 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; 538 539 update_debugctlmsr(debugctlmsr); 540 } 541 542 void intel_pmu_disable_bts(void) 543 { 544 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 545 unsigned long debugctlmsr; 546 547 if (!cpuc->ds) 548 return; 549 550 debugctlmsr = get_debugctlmsr(); 551 552 debugctlmsr &= 553 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | 554 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); 555 556 update_debugctlmsr(debugctlmsr); 557 } 558 559 int intel_pmu_drain_bts_buffer(void) 560 { 561 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 562 struct debug_store *ds = cpuc->ds; 563 struct bts_record { 564 u64 from; 565 u64 to; 566 u64 flags; 567 }; 568 struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 569 struct bts_record *at, *base, *top; 570 struct perf_output_handle handle; 571 struct perf_event_header header; 572 struct perf_sample_data data; 573 unsigned long skip = 0; 574 struct pt_regs regs; 575 576 if (!event) 577 return 0; 578 579 if (!x86_pmu.bts_active) 580 return 0; 581 582 base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 583 top = (struct bts_record *)(unsigned long)ds->bts_index; 584 585 if (top <= base) 586 return 0; 587 588 memset(®s, 0, sizeof(regs)); 589 590 ds->bts_index = ds->bts_buffer_base; 591 592 perf_sample_data_init(&data, 0, event->hw.last_period); 593 594 /* 595 * BTS leaks kernel addresses in branches across the cpl boundary, 596 * such as traps or system calls, so unless the user is asking for 597 * kernel tracing (and right now it's not possible), we'd need to 598 * filter them out. But first we need to count how many of those we 599 * have in the current batch. This is an extra O(n) pass, however, 600 * it's much faster than the other one especially considering that 601 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the 602 * alloc_bts_buffer()). 603 */ 604 for (at = base; at < top; at++) { 605 /* 606 * Note that right now *this* BTS code only works if 607 * attr::exclude_kernel is set, but let's keep this extra 608 * check here in case that changes. 609 */ 610 if (event->attr.exclude_kernel && 611 (kernel_ip(at->from) || kernel_ip(at->to))) 612 skip++; 613 } 614 615 /* 616 * Prepare a generic sample, i.e. fill in the invariant fields. 617 * We will overwrite the from and to address before we output 618 * the sample. 619 */ 620 rcu_read_lock(); 621 perf_prepare_sample(&header, &data, event, ®s); 622 623 if (perf_output_begin(&handle, event, header.size * 624 (top - base - skip))) 625 goto unlock; 626 627 for (at = base; at < top; at++) { 628 /* Filter out any records that contain kernel addresses. */ 629 if (event->attr.exclude_kernel && 630 (kernel_ip(at->from) || kernel_ip(at->to))) 631 continue; 632 633 data.ip = at->from; 634 data.addr = at->to; 635 636 perf_output_sample(&handle, &header, &data, event); 637 } 638 639 perf_output_end(&handle); 640 641 /* There's new data available. */ 642 event->hw.interrupts++; 643 event->pending_kill = POLL_IN; 644 unlock: 645 rcu_read_unlock(); 646 return 1; 647 } 648 649 static inline void intel_pmu_drain_pebs_buffer(void) 650 { 651 struct pt_regs regs; 652 653 x86_pmu.drain_pebs(®s); 654 } 655 656 /* 657 * PEBS 658 */ 659 struct event_constraint intel_core2_pebs_event_constraints[] = { 660 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 661 INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 662 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 663 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 664 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 665 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 666 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 667 EVENT_CONSTRAINT_END 668 }; 669 670 struct event_constraint intel_atom_pebs_event_constraints[] = { 671 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 672 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 673 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 674 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 675 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 676 /* Allow all events as PEBS with no flags */ 677 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 678 EVENT_CONSTRAINT_END 679 }; 680 681 struct event_constraint intel_slm_pebs_event_constraints[] = { 682 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 683 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), 684 /* Allow all events as PEBS with no flags */ 685 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 686 EVENT_CONSTRAINT_END 687 }; 688 689 struct event_constraint intel_glm_pebs_event_constraints[] = { 690 /* Allow all events as PEBS with no flags */ 691 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 692 EVENT_CONSTRAINT_END 693 }; 694 695 struct event_constraint intel_glp_pebs_event_constraints[] = { 696 /* Allow all events as PEBS with no flags */ 697 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 698 EVENT_CONSTRAINT_END 699 }; 700 701 struct event_constraint intel_nehalem_pebs_event_constraints[] = { 702 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 703 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 704 INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 705 INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 706 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 707 INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 708 INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ 709 INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 710 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 711 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 712 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 713 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 714 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 715 EVENT_CONSTRAINT_END 716 }; 717 718 struct event_constraint intel_westmere_pebs_event_constraints[] = { 719 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 720 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 721 INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 722 INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 723 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 724 INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 725 INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 726 INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 727 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 728 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 729 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 730 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 731 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 732 EVENT_CONSTRAINT_END 733 }; 734 735 struct event_constraint intel_snb_pebs_event_constraints[] = { 736 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 737 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 738 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 739 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 740 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 741 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 742 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 743 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 744 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 745 /* Allow all events as PEBS with no flags */ 746 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 747 EVENT_CONSTRAINT_END 748 }; 749 750 struct event_constraint intel_ivb_pebs_event_constraints[] = { 751 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 752 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 753 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 754 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 755 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 756 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 757 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 758 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 759 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 760 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 761 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 762 /* Allow all events as PEBS with no flags */ 763 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 764 EVENT_CONSTRAINT_END 765 }; 766 767 struct event_constraint intel_hsw_pebs_event_constraints[] = { 768 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 769 INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 770 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 771 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 772 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 773 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 774 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 775 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 776 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 777 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 778 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 779 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 780 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 781 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 782 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 783 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 784 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 785 /* Allow all events as PEBS with no flags */ 786 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 787 EVENT_CONSTRAINT_END 788 }; 789 790 struct event_constraint intel_bdw_pebs_event_constraints[] = { 791 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 792 INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 793 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 794 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 795 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 796 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 797 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 798 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 799 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 800 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 801 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 802 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 803 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 804 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 805 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 806 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 807 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 808 /* Allow all events as PEBS with no flags */ 809 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 810 EVENT_CONSTRAINT_END 811 }; 812 813 814 struct event_constraint intel_skl_pebs_event_constraints[] = { 815 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 816 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 817 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 818 /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ 819 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 820 INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ 821 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 822 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 823 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 824 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */ 825 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 826 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 827 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 828 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 829 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 830 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 831 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */ 832 /* Allow all events as PEBS with no flags */ 833 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 834 EVENT_CONSTRAINT_END 835 }; 836 837 struct event_constraint *intel_pebs_constraints(struct perf_event *event) 838 { 839 struct event_constraint *c; 840 841 if (!event->attr.precise_ip) 842 return NULL; 843 844 if (x86_pmu.pebs_constraints) { 845 for_each_event_constraint(c, x86_pmu.pebs_constraints) { 846 if ((event->hw.config & c->cmask) == c->code) { 847 event->hw.flags |= c->flags; 848 return c; 849 } 850 } 851 } 852 853 return &emptyconstraint; 854 } 855 856 /* 857 * We need the sched_task callback even for per-cpu events when we use 858 * the large interrupt threshold, such that we can provide PID and TID 859 * to PEBS samples. 860 */ 861 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) 862 { 863 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); 864 } 865 866 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) 867 { 868 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 869 870 if (!sched_in && pebs_needs_sched_cb(cpuc)) 871 intel_pmu_drain_pebs_buffer(); 872 } 873 874 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) 875 { 876 struct debug_store *ds = cpuc->ds; 877 u64 threshold; 878 879 if (cpuc->n_pebs == cpuc->n_large_pebs) { 880 threshold = ds->pebs_absolute_maximum - 881 x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; 882 } else { 883 threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; 884 } 885 886 ds->pebs_interrupt_threshold = threshold; 887 } 888 889 static void 890 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) 891 { 892 /* 893 * Make sure we get updated with the first PEBS 894 * event. It will trigger also during removal, but 895 * that does not hurt: 896 */ 897 bool update = cpuc->n_pebs == 1; 898 899 if (needed_cb != pebs_needs_sched_cb(cpuc)) { 900 if (!needed_cb) 901 perf_sched_cb_inc(pmu); 902 else 903 perf_sched_cb_dec(pmu); 904 905 update = true; 906 } 907 908 if (update) 909 pebs_update_threshold(cpuc); 910 } 911 912 void intel_pmu_pebs_add(struct perf_event *event) 913 { 914 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 915 struct hw_perf_event *hwc = &event->hw; 916 bool needed_cb = pebs_needs_sched_cb(cpuc); 917 918 cpuc->n_pebs++; 919 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) 920 cpuc->n_large_pebs++; 921 922 pebs_update_state(needed_cb, cpuc, event->ctx->pmu); 923 } 924 925 void intel_pmu_pebs_enable(struct perf_event *event) 926 { 927 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 928 struct hw_perf_event *hwc = &event->hw; 929 struct debug_store *ds = cpuc->ds; 930 931 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 932 933 cpuc->pebs_enabled |= 1ULL << hwc->idx; 934 935 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 936 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 937 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 938 cpuc->pebs_enabled |= 1ULL << 63; 939 940 /* 941 * Use auto-reload if possible to save a MSR write in the PMI. 942 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. 943 */ 944 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 945 ds->pebs_event_reset[hwc->idx] = 946 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 947 } else { 948 ds->pebs_event_reset[hwc->idx] = 0; 949 } 950 } 951 952 void intel_pmu_pebs_del(struct perf_event *event) 953 { 954 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 955 struct hw_perf_event *hwc = &event->hw; 956 bool needed_cb = pebs_needs_sched_cb(cpuc); 957 958 cpuc->n_pebs--; 959 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) 960 cpuc->n_large_pebs--; 961 962 pebs_update_state(needed_cb, cpuc, event->ctx->pmu); 963 } 964 965 void intel_pmu_pebs_disable(struct perf_event *event) 966 { 967 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 968 struct hw_perf_event *hwc = &event->hw; 969 970 if (cpuc->n_pebs == cpuc->n_large_pebs) 971 intel_pmu_drain_pebs_buffer(); 972 973 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 974 975 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 976 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); 977 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 978 cpuc->pebs_enabled &= ~(1ULL << 63); 979 980 if (cpuc->enabled) 981 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 982 983 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 984 } 985 986 void intel_pmu_pebs_enable_all(void) 987 { 988 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 989 990 if (cpuc->pebs_enabled) 991 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 992 } 993 994 void intel_pmu_pebs_disable_all(void) 995 { 996 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 997 998 if (cpuc->pebs_enabled) 999 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 1000 } 1001 1002 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 1003 { 1004 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1005 unsigned long from = cpuc->lbr_entries[0].from; 1006 unsigned long old_to, to = cpuc->lbr_entries[0].to; 1007 unsigned long ip = regs->ip; 1008 int is_64bit = 0; 1009 void *kaddr; 1010 int size; 1011 1012 /* 1013 * We don't need to fixup if the PEBS assist is fault like 1014 */ 1015 if (!x86_pmu.intel_cap.pebs_trap) 1016 return 1; 1017 1018 /* 1019 * No LBR entry, no basic block, no rewinding 1020 */ 1021 if (!cpuc->lbr_stack.nr || !from || !to) 1022 return 0; 1023 1024 /* 1025 * Basic blocks should never cross user/kernel boundaries 1026 */ 1027 if (kernel_ip(ip) != kernel_ip(to)) 1028 return 0; 1029 1030 /* 1031 * unsigned math, either ip is before the start (impossible) or 1032 * the basic block is larger than 1 page (sanity) 1033 */ 1034 if ((ip - to) > PEBS_FIXUP_SIZE) 1035 return 0; 1036 1037 /* 1038 * We sampled a branch insn, rewind using the LBR stack 1039 */ 1040 if (ip == to) { 1041 set_linear_ip(regs, from); 1042 return 1; 1043 } 1044 1045 size = ip - to; 1046 if (!kernel_ip(ip)) { 1047 int bytes; 1048 u8 *buf = this_cpu_read(insn_buffer); 1049 1050 /* 'size' must fit our buffer, see above */ 1051 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 1052 if (bytes != 0) 1053 return 0; 1054 1055 kaddr = buf; 1056 } else { 1057 kaddr = (void *)to; 1058 } 1059 1060 do { 1061 struct insn insn; 1062 1063 old_to = to; 1064 1065 #ifdef CONFIG_X86_64 1066 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 1067 #endif 1068 insn_init(&insn, kaddr, size, is_64bit); 1069 insn_get_length(&insn); 1070 /* 1071 * Make sure there was not a problem decoding the 1072 * instruction and getting the length. This is 1073 * doubly important because we have an infinite 1074 * loop if insn.length=0. 1075 */ 1076 if (!insn.length) 1077 break; 1078 1079 to += insn.length; 1080 kaddr += insn.length; 1081 size -= insn.length; 1082 } while (to < ip); 1083 1084 if (to == ip) { 1085 set_linear_ip(regs, old_to); 1086 return 1; 1087 } 1088 1089 /* 1090 * Even though we decoded the basic block, the instruction stream 1091 * never matched the given IP, either the TO or the IP got corrupted. 1092 */ 1093 return 0; 1094 } 1095 1096 static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) 1097 { 1098 if (pebs->tsx_tuning) { 1099 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; 1100 return tsx.cycles_last_block; 1101 } 1102 return 0; 1103 } 1104 1105 static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) 1106 { 1107 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; 1108 1109 /* For RTM XABORTs also log the abort code from AX */ 1110 if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) 1111 txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 1112 return txn; 1113 } 1114 1115 static void setup_pebs_sample_data(struct perf_event *event, 1116 struct pt_regs *iregs, void *__pebs, 1117 struct perf_sample_data *data, 1118 struct pt_regs *regs) 1119 { 1120 #define PERF_X86_EVENT_PEBS_HSW_PREC \ 1121 (PERF_X86_EVENT_PEBS_ST_HSW | \ 1122 PERF_X86_EVENT_PEBS_LD_HSW | \ 1123 PERF_X86_EVENT_PEBS_NA_HSW) 1124 /* 1125 * We cast to the biggest pebs_record but are careful not to 1126 * unconditionally access the 'extra' entries. 1127 */ 1128 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1129 struct pebs_record_skl *pebs = __pebs; 1130 u64 sample_type; 1131 int fll, fst, dsrc; 1132 int fl = event->hw.flags; 1133 1134 if (pebs == NULL) 1135 return; 1136 1137 sample_type = event->attr.sample_type; 1138 dsrc = sample_type & PERF_SAMPLE_DATA_SRC; 1139 1140 fll = fl & PERF_X86_EVENT_PEBS_LDLAT; 1141 fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); 1142 1143 perf_sample_data_init(data, 0, event->hw.last_period); 1144 1145 data->period = event->hw.last_period; 1146 1147 /* 1148 * Use latency for weight (only avail with PEBS-LL) 1149 */ 1150 if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) 1151 data->weight = pebs->lat; 1152 1153 /* 1154 * data.data_src encodes the data source 1155 */ 1156 if (dsrc) { 1157 u64 val = PERF_MEM_NA; 1158 if (fll) 1159 val = load_latency_data(pebs->dse); 1160 else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) 1161 val = precise_datala_hsw(event, pebs->dse); 1162 else if (fst) 1163 val = precise_store_data(pebs->dse); 1164 data->data_src.val = val; 1165 } 1166 1167 /* 1168 * We use the interrupt regs as a base because the PEBS record does not 1169 * contain a full regs set, specifically it seems to lack segment 1170 * descriptors, which get used by things like user_mode(). 1171 * 1172 * In the simple case fix up only the IP for PERF_SAMPLE_IP. 1173 * 1174 * We must however always use BP,SP from iregs for the unwinder to stay 1175 * sane; the record BP,SP can point into thin air when the record is 1176 * from a previous PMI context or an (I)RET happend between the record 1177 * and PMI. 1178 */ 1179 *regs = *iregs; 1180 regs->flags = pebs->flags; 1181 set_linear_ip(regs, pebs->ip); 1182 1183 if (sample_type & PERF_SAMPLE_REGS_INTR) { 1184 regs->ax = pebs->ax; 1185 regs->bx = pebs->bx; 1186 regs->cx = pebs->cx; 1187 regs->dx = pebs->dx; 1188 regs->si = pebs->si; 1189 regs->di = pebs->di; 1190 1191 /* 1192 * Per the above; only set BP,SP if we don't need callchains. 1193 * 1194 * XXX: does this make sense? 1195 */ 1196 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { 1197 regs->bp = pebs->bp; 1198 regs->sp = pebs->sp; 1199 } 1200 1201 /* 1202 * Preserve PERF_EFLAGS_VM from set_linear_ip(). 1203 */ 1204 regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); 1205 #ifndef CONFIG_X86_32 1206 regs->r8 = pebs->r8; 1207 regs->r9 = pebs->r9; 1208 regs->r10 = pebs->r10; 1209 regs->r11 = pebs->r11; 1210 regs->r12 = pebs->r12; 1211 regs->r13 = pebs->r13; 1212 regs->r14 = pebs->r14; 1213 regs->r15 = pebs->r15; 1214 #endif 1215 } 1216 1217 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 1218 regs->ip = pebs->real_ip; 1219 regs->flags |= PERF_EFLAGS_EXACT; 1220 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) 1221 regs->flags |= PERF_EFLAGS_EXACT; 1222 else 1223 regs->flags &= ~PERF_EFLAGS_EXACT; 1224 1225 if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && 1226 x86_pmu.intel_cap.pebs_format >= 1) 1227 data->addr = pebs->dla; 1228 1229 if (x86_pmu.intel_cap.pebs_format >= 2) { 1230 /* Only set the TSX weight when no memory weight. */ 1231 if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) 1232 data->weight = intel_hsw_weight(pebs); 1233 1234 if (sample_type & PERF_SAMPLE_TRANSACTION) 1235 data->txn = intel_hsw_transaction(pebs); 1236 } 1237 1238 /* 1239 * v3 supplies an accurate time stamp, so we use that 1240 * for the time stamp. 1241 * 1242 * We can only do this for the default trace clock. 1243 */ 1244 if (x86_pmu.intel_cap.pebs_format >= 3 && 1245 event->attr.use_clockid == 0) 1246 data->time = native_sched_clock_from_tsc(pebs->tsc); 1247 1248 if (has_branch_stack(event)) 1249 data->br_stack = &cpuc->lbr_stack; 1250 } 1251 1252 static inline void * 1253 get_next_pebs_record_by_bit(void *base, void *top, int bit) 1254 { 1255 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1256 void *at; 1257 u64 pebs_status; 1258 1259 /* 1260 * fmt0 does not have a status bitfield (does not use 1261 * perf_record_nhm format) 1262 */ 1263 if (x86_pmu.intel_cap.pebs_format < 1) 1264 return base; 1265 1266 if (base == NULL) 1267 return NULL; 1268 1269 for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1270 struct pebs_record_nhm *p = at; 1271 1272 if (test_bit(bit, (unsigned long *)&p->status)) { 1273 /* PEBS v3 has accurate status bits */ 1274 if (x86_pmu.intel_cap.pebs_format >= 3) 1275 return at; 1276 1277 if (p->status == (1 << bit)) 1278 return at; 1279 1280 /* clear non-PEBS bit and re-check */ 1281 pebs_status = p->status & cpuc->pebs_enabled; 1282 pebs_status &= PEBS_COUNTER_MASK; 1283 if (pebs_status == (1 << bit)) 1284 return at; 1285 } 1286 } 1287 return NULL; 1288 } 1289 1290 static void __intel_pmu_pebs_event(struct perf_event *event, 1291 struct pt_regs *iregs, 1292 void *base, void *top, 1293 int bit, int count) 1294 { 1295 struct perf_sample_data data; 1296 struct pt_regs regs; 1297 void *at = get_next_pebs_record_by_bit(base, top, bit); 1298 1299 if (!intel_pmu_save_and_restart(event) && 1300 !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) 1301 return; 1302 1303 while (count > 1) { 1304 setup_pebs_sample_data(event, iregs, at, &data, ®s); 1305 perf_event_output(event, &data, ®s); 1306 at += x86_pmu.pebs_record_size; 1307 at = get_next_pebs_record_by_bit(at, top, bit); 1308 count--; 1309 } 1310 1311 setup_pebs_sample_data(event, iregs, at, &data, ®s); 1312 1313 /* 1314 * All but the last records are processed. 1315 * The last one is left to be able to call the overflow handler. 1316 */ 1317 if (perf_event_overflow(event, &data, ®s)) { 1318 x86_pmu_stop(event, 0); 1319 return; 1320 } 1321 1322 } 1323 1324 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 1325 { 1326 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1327 struct debug_store *ds = cpuc->ds; 1328 struct perf_event *event = cpuc->events[0]; /* PMC0 only */ 1329 struct pebs_record_core *at, *top; 1330 int n; 1331 1332 if (!x86_pmu.pebs_active) 1333 return; 1334 1335 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 1336 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; 1337 1338 /* 1339 * Whatever else happens, drain the thing 1340 */ 1341 ds->pebs_index = ds->pebs_buffer_base; 1342 1343 if (!test_bit(0, cpuc->active_mask)) 1344 return; 1345 1346 WARN_ON_ONCE(!event); 1347 1348 if (!event->attr.precise_ip) 1349 return; 1350 1351 n = top - at; 1352 if (n <= 0) 1353 return; 1354 1355 __intel_pmu_pebs_event(event, iregs, at, top, 0, n); 1356 } 1357 1358 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 1359 { 1360 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1361 struct debug_store *ds = cpuc->ds; 1362 struct perf_event *event; 1363 void *base, *at, *top; 1364 short counts[MAX_PEBS_EVENTS] = {}; 1365 short error[MAX_PEBS_EVENTS] = {}; 1366 int bit, i; 1367 1368 if (!x86_pmu.pebs_active) 1369 return; 1370 1371 base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 1372 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 1373 1374 ds->pebs_index = ds->pebs_buffer_base; 1375 1376 if (unlikely(base >= top)) 1377 return; 1378 1379 for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1380 struct pebs_record_nhm *p = at; 1381 u64 pebs_status; 1382 1383 pebs_status = p->status & cpuc->pebs_enabled; 1384 pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; 1385 1386 /* PEBS v3 has more accurate status bits */ 1387 if (x86_pmu.intel_cap.pebs_format >= 3) { 1388 for_each_set_bit(bit, (unsigned long *)&pebs_status, 1389 x86_pmu.max_pebs_events) 1390 counts[bit]++; 1391 1392 continue; 1393 } 1394 1395 /* 1396 * On some CPUs the PEBS status can be zero when PEBS is 1397 * racing with clearing of GLOBAL_STATUS. 1398 * 1399 * Normally we would drop that record, but in the 1400 * case when there is only a single active PEBS event 1401 * we can assume it's for that event. 1402 */ 1403 if (!pebs_status && cpuc->pebs_enabled && 1404 !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) 1405 pebs_status = cpuc->pebs_enabled; 1406 1407 bit = find_first_bit((unsigned long *)&pebs_status, 1408 x86_pmu.max_pebs_events); 1409 if (bit >= x86_pmu.max_pebs_events) 1410 continue; 1411 1412 /* 1413 * The PEBS hardware does not deal well with the situation 1414 * when events happen near to each other and multiple bits 1415 * are set. But it should happen rarely. 1416 * 1417 * If these events include one PEBS and multiple non-PEBS 1418 * events, it doesn't impact PEBS record. The record will 1419 * be handled normally. (slow path) 1420 * 1421 * If these events include two or more PEBS events, the 1422 * records for the events can be collapsed into a single 1423 * one, and it's not possible to reconstruct all events 1424 * that caused the PEBS record. It's called collision. 1425 * If collision happened, the record will be dropped. 1426 */ 1427 if (p->status != (1ULL << bit)) { 1428 for_each_set_bit(i, (unsigned long *)&pebs_status, 1429 x86_pmu.max_pebs_events) 1430 error[i]++; 1431 continue; 1432 } 1433 1434 counts[bit]++; 1435 } 1436 1437 for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { 1438 if ((counts[bit] == 0) && (error[bit] == 0)) 1439 continue; 1440 1441 event = cpuc->events[bit]; 1442 if (WARN_ON_ONCE(!event)) 1443 continue; 1444 1445 if (WARN_ON_ONCE(!event->attr.precise_ip)) 1446 continue; 1447 1448 /* log dropped samples number */ 1449 if (error[bit]) { 1450 perf_log_lost_samples(event, error[bit]); 1451 1452 if (perf_event_account_interrupt(event)) 1453 x86_pmu_stop(event, 0); 1454 } 1455 1456 if (counts[bit]) { 1457 __intel_pmu_pebs_event(event, iregs, base, 1458 top, bit, counts[bit]); 1459 } 1460 } 1461 } 1462 1463 /* 1464 * BTS, PEBS probe and setup 1465 */ 1466 1467 void __init intel_ds_init(void) 1468 { 1469 /* 1470 * No support for 32bit formats 1471 */ 1472 if (!boot_cpu_has(X86_FEATURE_DTES64)) 1473 return; 1474 1475 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 1476 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 1477 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; 1478 if (x86_pmu.pebs) { 1479 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 1480 int format = x86_pmu.intel_cap.pebs_format; 1481 1482 switch (format) { 1483 case 0: 1484 pr_cont("PEBS fmt0%c, ", pebs_type); 1485 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 1486 /* 1487 * Using >PAGE_SIZE buffers makes the WRMSR to 1488 * PERF_GLOBAL_CTRL in intel_pmu_enable_all() 1489 * mysteriously hang on Core2. 1490 * 1491 * As a workaround, we don't do this. 1492 */ 1493 x86_pmu.pebs_buffer_size = PAGE_SIZE; 1494 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 1495 break; 1496 1497 case 1: 1498 pr_cont("PEBS fmt1%c, ", pebs_type); 1499 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 1500 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1501 break; 1502 1503 case 2: 1504 pr_cont("PEBS fmt2%c, ", pebs_type); 1505 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 1506 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1507 break; 1508 1509 case 3: 1510 pr_cont("PEBS fmt3%c, ", pebs_type); 1511 x86_pmu.pebs_record_size = 1512 sizeof(struct pebs_record_skl); 1513 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1514 x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; 1515 break; 1516 1517 default: 1518 pr_cont("no PEBS fmt%d%c, ", format, pebs_type); 1519 x86_pmu.pebs = 0; 1520 } 1521 } 1522 } 1523 1524 void perf_restore_debug_store(void) 1525 { 1526 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1527 1528 if (!x86_pmu.bts && !x86_pmu.pebs) 1529 return; 1530 1531 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); 1532 } 1533