1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/bitops.h> 3 #include <linux/types.h> 4 #include <linux/slab.h> 5 6 #include <asm/perf_event.h> 7 #include <asm/insn.h> 8 9 #include "../perf_event.h" 10 11 /* The size of a BTS record in bytes: */ 12 #define BTS_RECORD_SIZE 24 13 14 #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 15 #define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) 16 #define PEBS_FIXUP_SIZE PAGE_SIZE 17 18 /* 19 * pebs_record_32 for p4 and core not supported 20 21 struct pebs_record_32 { 22 u32 flags, ip; 23 u32 ax, bc, cx, dx; 24 u32 si, di, bp, sp; 25 }; 26 27 */ 28 29 union intel_x86_pebs_dse { 30 u64 val; 31 struct { 32 unsigned int ld_dse:4; 33 unsigned int ld_stlb_miss:1; 34 unsigned int ld_locked:1; 35 unsigned int ld_reserved:26; 36 }; 37 struct { 38 unsigned int st_l1d_hit:1; 39 unsigned int st_reserved1:3; 40 unsigned int st_stlb_miss:1; 41 unsigned int st_locked:1; 42 unsigned int st_reserved2:26; 43 }; 44 }; 45 46 47 /* 48 * Map PEBS Load Latency Data Source encodings to generic 49 * memory data source information 50 */ 51 #define P(a, b) PERF_MEM_S(a, b) 52 #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) 53 #define LEVEL(x) P(LVLNUM, x) 54 #define REM P(REMOTE, REMOTE) 55 #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) 56 57 /* Version for Sandy Bridge and later */ 58 static u64 pebs_data_source[] = { 59 P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ 60 OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ 61 OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ 62 OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ 63 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ 64 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ 65 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ 66 OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ 67 OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ 68 OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ 69 OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ 70 OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ 71 OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */ 72 OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */ 73 OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */ 74 OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ 75 }; 76 77 /* Patch up minor differences in the bits */ 78 void __init intel_pmu_pebs_data_source_nhm(void) 79 { 80 pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 81 pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 82 pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); 83 } 84 85 void __init intel_pmu_pebs_data_source_skl(bool pmem) 86 { 87 u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); 88 89 pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); 90 pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); 91 pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); 92 pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); 93 pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); 94 } 95 96 static u64 precise_store_data(u64 status) 97 { 98 union intel_x86_pebs_dse dse; 99 u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); 100 101 dse.val = status; 102 103 /* 104 * bit 4: TLB access 105 * 1 = stored missed 2nd level TLB 106 * 107 * so it either hit the walker or the OS 108 * otherwise hit 2nd level TLB 109 */ 110 if (dse.st_stlb_miss) 111 val |= P(TLB, MISS); 112 else 113 val |= P(TLB, HIT); 114 115 /* 116 * bit 0: hit L1 data cache 117 * if not set, then all we know is that 118 * it missed L1D 119 */ 120 if (dse.st_l1d_hit) 121 val |= P(LVL, HIT); 122 else 123 val |= P(LVL, MISS); 124 125 /* 126 * bit 5: Locked prefix 127 */ 128 if (dse.st_locked) 129 val |= P(LOCK, LOCKED); 130 131 return val; 132 } 133 134 static u64 precise_datala_hsw(struct perf_event *event, u64 status) 135 { 136 union perf_mem_data_src dse; 137 138 dse.val = PERF_MEM_NA; 139 140 if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 141 dse.mem_op = PERF_MEM_OP_STORE; 142 else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) 143 dse.mem_op = PERF_MEM_OP_LOAD; 144 145 /* 146 * L1 info only valid for following events: 147 * 148 * MEM_UOPS_RETIRED.STLB_MISS_STORES 149 * MEM_UOPS_RETIRED.LOCK_STORES 150 * MEM_UOPS_RETIRED.SPLIT_STORES 151 * MEM_UOPS_RETIRED.ALL_STORES 152 */ 153 if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { 154 if (status & 1) 155 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 156 else 157 dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 158 } 159 return dse.val; 160 } 161 162 static u64 load_latency_data(u64 status) 163 { 164 union intel_x86_pebs_dse dse; 165 u64 val; 166 167 dse.val = status; 168 169 /* 170 * use the mapping table for bit 0-3 171 */ 172 val = pebs_data_source[dse.ld_dse]; 173 174 /* 175 * Nehalem models do not support TLB, Lock infos 176 */ 177 if (x86_pmu.pebs_no_tlb) { 178 val |= P(TLB, NA) | P(LOCK, NA); 179 return val; 180 } 181 /* 182 * bit 4: TLB access 183 * 0 = did not miss 2nd level TLB 184 * 1 = missed 2nd level TLB 185 */ 186 if (dse.ld_stlb_miss) 187 val |= P(TLB, MISS) | P(TLB, L2); 188 else 189 val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); 190 191 /* 192 * bit 5: locked prefix 193 */ 194 if (dse.ld_locked) 195 val |= P(LOCK, LOCKED); 196 197 return val; 198 } 199 200 struct pebs_record_core { 201 u64 flags, ip; 202 u64 ax, bx, cx, dx; 203 u64 si, di, bp, sp; 204 u64 r8, r9, r10, r11; 205 u64 r12, r13, r14, r15; 206 }; 207 208 struct pebs_record_nhm { 209 u64 flags, ip; 210 u64 ax, bx, cx, dx; 211 u64 si, di, bp, sp; 212 u64 r8, r9, r10, r11; 213 u64 r12, r13, r14, r15; 214 u64 status, dla, dse, lat; 215 }; 216 217 /* 218 * Same as pebs_record_nhm, with two additional fields. 219 */ 220 struct pebs_record_hsw { 221 u64 flags, ip; 222 u64 ax, bx, cx, dx; 223 u64 si, di, bp, sp; 224 u64 r8, r9, r10, r11; 225 u64 r12, r13, r14, r15; 226 u64 status, dla, dse, lat; 227 u64 real_ip, tsx_tuning; 228 }; 229 230 union hsw_tsx_tuning { 231 struct { 232 u32 cycles_last_block : 32, 233 hle_abort : 1, 234 rtm_abort : 1, 235 instruction_abort : 1, 236 non_instruction_abort : 1, 237 retry : 1, 238 data_conflict : 1, 239 capacity_writes : 1, 240 capacity_reads : 1; 241 }; 242 u64 value; 243 }; 244 245 #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL 246 247 /* Same as HSW, plus TSC */ 248 249 struct pebs_record_skl { 250 u64 flags, ip; 251 u64 ax, bx, cx, dx; 252 u64 si, di, bp, sp; 253 u64 r8, r9, r10, r11; 254 u64 r12, r13, r14, r15; 255 u64 status, dla, dse, lat; 256 u64 real_ip, tsx_tuning; 257 u64 tsc; 258 }; 259 260 void init_debug_store_on_cpu(int cpu) 261 { 262 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 263 264 if (!ds) 265 return; 266 267 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 268 (u32)((u64)(unsigned long)ds), 269 (u32)((u64)(unsigned long)ds >> 32)); 270 } 271 272 void fini_debug_store_on_cpu(int cpu) 273 { 274 if (!per_cpu(cpu_hw_events, cpu).ds) 275 return; 276 277 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 278 } 279 280 static DEFINE_PER_CPU(void *, insn_buffer); 281 282 static int alloc_pebs_buffer(int cpu) 283 { 284 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 285 int node = cpu_to_node(cpu); 286 int max; 287 void *buffer, *ibuffer; 288 289 if (!x86_pmu.pebs) 290 return 0; 291 292 buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); 293 if (unlikely(!buffer)) 294 return -ENOMEM; 295 296 /* 297 * HSW+ already provides us the eventing ip; no need to allocate this 298 * buffer then. 299 */ 300 if (x86_pmu.intel_cap.pebs_format < 2) { 301 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 302 if (!ibuffer) { 303 kfree(buffer); 304 return -ENOMEM; 305 } 306 per_cpu(insn_buffer, cpu) = ibuffer; 307 } 308 309 max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; 310 311 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 312 ds->pebs_index = ds->pebs_buffer_base; 313 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 314 max * x86_pmu.pebs_record_size; 315 316 return 0; 317 } 318 319 static void release_pebs_buffer(int cpu) 320 { 321 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 322 323 if (!ds || !x86_pmu.pebs) 324 return; 325 326 kfree(per_cpu(insn_buffer, cpu)); 327 per_cpu(insn_buffer, cpu) = NULL; 328 329 kfree((void *)(unsigned long)ds->pebs_buffer_base); 330 ds->pebs_buffer_base = 0; 331 } 332 333 static int alloc_bts_buffer(int cpu) 334 { 335 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 336 int node = cpu_to_node(cpu); 337 int max, thresh; 338 void *buffer; 339 340 if (!x86_pmu.bts) 341 return 0; 342 343 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); 344 if (unlikely(!buffer)) { 345 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 346 return -ENOMEM; 347 } 348 349 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 350 thresh = max / 16; 351 352 ds->bts_buffer_base = (u64)(unsigned long)buffer; 353 ds->bts_index = ds->bts_buffer_base; 354 ds->bts_absolute_maximum = ds->bts_buffer_base + 355 max * BTS_RECORD_SIZE; 356 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 357 thresh * BTS_RECORD_SIZE; 358 359 return 0; 360 } 361 362 static void release_bts_buffer(int cpu) 363 { 364 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 365 366 if (!ds || !x86_pmu.bts) 367 return; 368 369 kfree((void *)(unsigned long)ds->bts_buffer_base); 370 ds->bts_buffer_base = 0; 371 } 372 373 static int alloc_ds_buffer(int cpu) 374 { 375 int node = cpu_to_node(cpu); 376 struct debug_store *ds; 377 378 ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); 379 if (unlikely(!ds)) 380 return -ENOMEM; 381 382 per_cpu(cpu_hw_events, cpu).ds = ds; 383 384 return 0; 385 } 386 387 static void release_ds_buffer(int cpu) 388 { 389 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 390 391 if (!ds) 392 return; 393 394 per_cpu(cpu_hw_events, cpu).ds = NULL; 395 kfree(ds); 396 } 397 398 void release_ds_buffers(void) 399 { 400 int cpu; 401 402 if (!x86_pmu.bts && !x86_pmu.pebs) 403 return; 404 405 get_online_cpus(); 406 for_each_online_cpu(cpu) 407 fini_debug_store_on_cpu(cpu); 408 409 for_each_possible_cpu(cpu) { 410 release_pebs_buffer(cpu); 411 release_bts_buffer(cpu); 412 release_ds_buffer(cpu); 413 } 414 put_online_cpus(); 415 } 416 417 void reserve_ds_buffers(void) 418 { 419 int bts_err = 0, pebs_err = 0; 420 int cpu; 421 422 x86_pmu.bts_active = 0; 423 x86_pmu.pebs_active = 0; 424 425 if (!x86_pmu.bts && !x86_pmu.pebs) 426 return; 427 428 if (!x86_pmu.bts) 429 bts_err = 1; 430 431 if (!x86_pmu.pebs) 432 pebs_err = 1; 433 434 get_online_cpus(); 435 436 for_each_possible_cpu(cpu) { 437 if (alloc_ds_buffer(cpu)) { 438 bts_err = 1; 439 pebs_err = 1; 440 } 441 442 if (!bts_err && alloc_bts_buffer(cpu)) 443 bts_err = 1; 444 445 if (!pebs_err && alloc_pebs_buffer(cpu)) 446 pebs_err = 1; 447 448 if (bts_err && pebs_err) 449 break; 450 } 451 452 if (bts_err) { 453 for_each_possible_cpu(cpu) 454 release_bts_buffer(cpu); 455 } 456 457 if (pebs_err) { 458 for_each_possible_cpu(cpu) 459 release_pebs_buffer(cpu); 460 } 461 462 if (bts_err && pebs_err) { 463 for_each_possible_cpu(cpu) 464 release_ds_buffer(cpu); 465 } else { 466 if (x86_pmu.bts && !bts_err) 467 x86_pmu.bts_active = 1; 468 469 if (x86_pmu.pebs && !pebs_err) 470 x86_pmu.pebs_active = 1; 471 472 for_each_online_cpu(cpu) 473 init_debug_store_on_cpu(cpu); 474 } 475 476 put_online_cpus(); 477 } 478 479 /* 480 * BTS 481 */ 482 483 struct event_constraint bts_constraint = 484 EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); 485 486 void intel_pmu_enable_bts(u64 config) 487 { 488 unsigned long debugctlmsr; 489 490 debugctlmsr = get_debugctlmsr(); 491 492 debugctlmsr |= DEBUGCTLMSR_TR; 493 debugctlmsr |= DEBUGCTLMSR_BTS; 494 if (config & ARCH_PERFMON_EVENTSEL_INT) 495 debugctlmsr |= DEBUGCTLMSR_BTINT; 496 497 if (!(config & ARCH_PERFMON_EVENTSEL_OS)) 498 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; 499 500 if (!(config & ARCH_PERFMON_EVENTSEL_USR)) 501 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; 502 503 update_debugctlmsr(debugctlmsr); 504 } 505 506 void intel_pmu_disable_bts(void) 507 { 508 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 509 unsigned long debugctlmsr; 510 511 if (!cpuc->ds) 512 return; 513 514 debugctlmsr = get_debugctlmsr(); 515 516 debugctlmsr &= 517 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | 518 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); 519 520 update_debugctlmsr(debugctlmsr); 521 } 522 523 int intel_pmu_drain_bts_buffer(void) 524 { 525 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 526 struct debug_store *ds = cpuc->ds; 527 struct bts_record { 528 u64 from; 529 u64 to; 530 u64 flags; 531 }; 532 struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 533 struct bts_record *at, *base, *top; 534 struct perf_output_handle handle; 535 struct perf_event_header header; 536 struct perf_sample_data data; 537 unsigned long skip = 0; 538 struct pt_regs regs; 539 540 if (!event) 541 return 0; 542 543 if (!x86_pmu.bts_active) 544 return 0; 545 546 base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 547 top = (struct bts_record *)(unsigned long)ds->bts_index; 548 549 if (top <= base) 550 return 0; 551 552 memset(®s, 0, sizeof(regs)); 553 554 ds->bts_index = ds->bts_buffer_base; 555 556 perf_sample_data_init(&data, 0, event->hw.last_period); 557 558 /* 559 * BTS leaks kernel addresses in branches across the cpl boundary, 560 * such as traps or system calls, so unless the user is asking for 561 * kernel tracing (and right now it's not possible), we'd need to 562 * filter them out. But first we need to count how many of those we 563 * have in the current batch. This is an extra O(n) pass, however, 564 * it's much faster than the other one especially considering that 565 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the 566 * alloc_bts_buffer()). 567 */ 568 for (at = base; at < top; at++) { 569 /* 570 * Note that right now *this* BTS code only works if 571 * attr::exclude_kernel is set, but let's keep this extra 572 * check here in case that changes. 573 */ 574 if (event->attr.exclude_kernel && 575 (kernel_ip(at->from) || kernel_ip(at->to))) 576 skip++; 577 } 578 579 /* 580 * Prepare a generic sample, i.e. fill in the invariant fields. 581 * We will overwrite the from and to address before we output 582 * the sample. 583 */ 584 rcu_read_lock(); 585 perf_prepare_sample(&header, &data, event, ®s); 586 587 if (perf_output_begin(&handle, event, header.size * 588 (top - base - skip))) 589 goto unlock; 590 591 for (at = base; at < top; at++) { 592 /* Filter out any records that contain kernel addresses. */ 593 if (event->attr.exclude_kernel && 594 (kernel_ip(at->from) || kernel_ip(at->to))) 595 continue; 596 597 data.ip = at->from; 598 data.addr = at->to; 599 600 perf_output_sample(&handle, &header, &data, event); 601 } 602 603 perf_output_end(&handle); 604 605 /* There's new data available. */ 606 event->hw.interrupts++; 607 event->pending_kill = POLL_IN; 608 unlock: 609 rcu_read_unlock(); 610 return 1; 611 } 612 613 static inline void intel_pmu_drain_pebs_buffer(void) 614 { 615 struct pt_regs regs; 616 617 x86_pmu.drain_pebs(®s); 618 } 619 620 /* 621 * PEBS 622 */ 623 struct event_constraint intel_core2_pebs_event_constraints[] = { 624 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 625 INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 626 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 627 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 628 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 629 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 630 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 631 EVENT_CONSTRAINT_END 632 }; 633 634 struct event_constraint intel_atom_pebs_event_constraints[] = { 635 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 636 INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 637 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 638 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 639 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 640 /* Allow all events as PEBS with no flags */ 641 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 642 EVENT_CONSTRAINT_END 643 }; 644 645 struct event_constraint intel_slm_pebs_event_constraints[] = { 646 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 647 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), 648 /* Allow all events as PEBS with no flags */ 649 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 650 EVENT_CONSTRAINT_END 651 }; 652 653 struct event_constraint intel_glm_pebs_event_constraints[] = { 654 /* Allow all events as PEBS with no flags */ 655 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 656 EVENT_CONSTRAINT_END 657 }; 658 659 struct event_constraint intel_glp_pebs_event_constraints[] = { 660 /* Allow all events as PEBS with no flags */ 661 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 662 EVENT_CONSTRAINT_END 663 }; 664 665 struct event_constraint intel_nehalem_pebs_event_constraints[] = { 666 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 667 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 668 INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 669 INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 670 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 671 INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 672 INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ 673 INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 674 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 675 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 676 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 677 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 678 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 679 EVENT_CONSTRAINT_END 680 }; 681 682 struct event_constraint intel_westmere_pebs_event_constraints[] = { 683 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 684 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 685 INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 686 INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 687 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 688 INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 689 INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 690 INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 691 INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 692 INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 693 INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 694 /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 695 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 696 EVENT_CONSTRAINT_END 697 }; 698 699 struct event_constraint intel_snb_pebs_event_constraints[] = { 700 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 701 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 702 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 703 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 704 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 705 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 706 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 707 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 708 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 709 /* Allow all events as PEBS with no flags */ 710 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 711 EVENT_CONSTRAINT_END 712 }; 713 714 struct event_constraint intel_ivb_pebs_event_constraints[] = { 715 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 716 INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 717 INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 718 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 719 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 720 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 721 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 722 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 723 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 724 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 725 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 726 /* Allow all events as PEBS with no flags */ 727 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 728 EVENT_CONSTRAINT_END 729 }; 730 731 struct event_constraint intel_hsw_pebs_event_constraints[] = { 732 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 733 INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 734 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 735 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 736 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 737 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 738 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 739 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 740 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 741 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 742 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 743 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 744 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 745 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 746 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 747 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 748 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 749 /* Allow all events as PEBS with no flags */ 750 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 751 EVENT_CONSTRAINT_END 752 }; 753 754 struct event_constraint intel_bdw_pebs_event_constraints[] = { 755 INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 756 INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 757 /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 758 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 759 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 760 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 761 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 762 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 763 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 764 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 765 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 766 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 767 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 768 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 769 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 770 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 771 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 772 /* Allow all events as PEBS with no flags */ 773 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 774 EVENT_CONSTRAINT_END 775 }; 776 777 778 struct event_constraint intel_skl_pebs_event_constraints[] = { 779 INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 780 /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 781 INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 782 /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ 783 INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 784 INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ 785 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 786 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 787 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 788 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */ 789 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 790 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 791 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 792 INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 793 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 794 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 795 INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */ 796 /* Allow all events as PEBS with no flags */ 797 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 798 EVENT_CONSTRAINT_END 799 }; 800 801 struct event_constraint *intel_pebs_constraints(struct perf_event *event) 802 { 803 struct event_constraint *c; 804 805 if (!event->attr.precise_ip) 806 return NULL; 807 808 if (x86_pmu.pebs_constraints) { 809 for_each_event_constraint(c, x86_pmu.pebs_constraints) { 810 if ((event->hw.config & c->cmask) == c->code) { 811 event->hw.flags |= c->flags; 812 return c; 813 } 814 } 815 } 816 817 return &emptyconstraint; 818 } 819 820 /* 821 * We need the sched_task callback even for per-cpu events when we use 822 * the large interrupt threshold, such that we can provide PID and TID 823 * to PEBS samples. 824 */ 825 static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) 826 { 827 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); 828 } 829 830 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) 831 { 832 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 833 834 if (!sched_in && pebs_needs_sched_cb(cpuc)) 835 intel_pmu_drain_pebs_buffer(); 836 } 837 838 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) 839 { 840 struct debug_store *ds = cpuc->ds; 841 u64 threshold; 842 843 if (cpuc->n_pebs == cpuc->n_large_pebs) { 844 threshold = ds->pebs_absolute_maximum - 845 x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; 846 } else { 847 threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; 848 } 849 850 ds->pebs_interrupt_threshold = threshold; 851 } 852 853 static void 854 pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu) 855 { 856 /* 857 * Make sure we get updated with the first PEBS 858 * event. It will trigger also during removal, but 859 * that does not hurt: 860 */ 861 bool update = cpuc->n_pebs == 1; 862 863 if (needed_cb != pebs_needs_sched_cb(cpuc)) { 864 if (!needed_cb) 865 perf_sched_cb_inc(pmu); 866 else 867 perf_sched_cb_dec(pmu); 868 869 update = true; 870 } 871 872 if (update) 873 pebs_update_threshold(cpuc); 874 } 875 876 void intel_pmu_pebs_add(struct perf_event *event) 877 { 878 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 879 struct hw_perf_event *hwc = &event->hw; 880 bool needed_cb = pebs_needs_sched_cb(cpuc); 881 882 cpuc->n_pebs++; 883 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) 884 cpuc->n_large_pebs++; 885 886 pebs_update_state(needed_cb, cpuc, event->ctx->pmu); 887 } 888 889 void intel_pmu_pebs_enable(struct perf_event *event) 890 { 891 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 892 struct hw_perf_event *hwc = &event->hw; 893 struct debug_store *ds = cpuc->ds; 894 895 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 896 897 cpuc->pebs_enabled |= 1ULL << hwc->idx; 898 899 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 900 cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 901 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 902 cpuc->pebs_enabled |= 1ULL << 63; 903 904 /* 905 * Use auto-reload if possible to save a MSR write in the PMI. 906 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. 907 */ 908 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 909 ds->pebs_event_reset[hwc->idx] = 910 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 911 } else { 912 ds->pebs_event_reset[hwc->idx] = 0; 913 } 914 } 915 916 void intel_pmu_pebs_del(struct perf_event *event) 917 { 918 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 919 struct hw_perf_event *hwc = &event->hw; 920 bool needed_cb = pebs_needs_sched_cb(cpuc); 921 922 cpuc->n_pebs--; 923 if (hwc->flags & PERF_X86_EVENT_FREERUNNING) 924 cpuc->n_large_pebs--; 925 926 pebs_update_state(needed_cb, cpuc, event->ctx->pmu); 927 } 928 929 void intel_pmu_pebs_disable(struct perf_event *event) 930 { 931 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 932 struct hw_perf_event *hwc = &event->hw; 933 934 if (cpuc->n_pebs == cpuc->n_large_pebs) 935 intel_pmu_drain_pebs_buffer(); 936 937 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 938 939 if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 940 cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); 941 else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 942 cpuc->pebs_enabled &= ~(1ULL << 63); 943 944 if (cpuc->enabled) 945 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 946 947 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 948 } 949 950 void intel_pmu_pebs_enable_all(void) 951 { 952 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 953 954 if (cpuc->pebs_enabled) 955 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 956 } 957 958 void intel_pmu_pebs_disable_all(void) 959 { 960 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 961 962 if (cpuc->pebs_enabled) 963 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 964 } 965 966 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 967 { 968 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 969 unsigned long from = cpuc->lbr_entries[0].from; 970 unsigned long old_to, to = cpuc->lbr_entries[0].to; 971 unsigned long ip = regs->ip; 972 int is_64bit = 0; 973 void *kaddr; 974 int size; 975 976 /* 977 * We don't need to fixup if the PEBS assist is fault like 978 */ 979 if (!x86_pmu.intel_cap.pebs_trap) 980 return 1; 981 982 /* 983 * No LBR entry, no basic block, no rewinding 984 */ 985 if (!cpuc->lbr_stack.nr || !from || !to) 986 return 0; 987 988 /* 989 * Basic blocks should never cross user/kernel boundaries 990 */ 991 if (kernel_ip(ip) != kernel_ip(to)) 992 return 0; 993 994 /* 995 * unsigned math, either ip is before the start (impossible) or 996 * the basic block is larger than 1 page (sanity) 997 */ 998 if ((ip - to) > PEBS_FIXUP_SIZE) 999 return 0; 1000 1001 /* 1002 * We sampled a branch insn, rewind using the LBR stack 1003 */ 1004 if (ip == to) { 1005 set_linear_ip(regs, from); 1006 return 1; 1007 } 1008 1009 size = ip - to; 1010 if (!kernel_ip(ip)) { 1011 int bytes; 1012 u8 *buf = this_cpu_read(insn_buffer); 1013 1014 /* 'size' must fit our buffer, see above */ 1015 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 1016 if (bytes != 0) 1017 return 0; 1018 1019 kaddr = buf; 1020 } else { 1021 kaddr = (void *)to; 1022 } 1023 1024 do { 1025 struct insn insn; 1026 1027 old_to = to; 1028 1029 #ifdef CONFIG_X86_64 1030 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 1031 #endif 1032 insn_init(&insn, kaddr, size, is_64bit); 1033 insn_get_length(&insn); 1034 /* 1035 * Make sure there was not a problem decoding the 1036 * instruction and getting the length. This is 1037 * doubly important because we have an infinite 1038 * loop if insn.length=0. 1039 */ 1040 if (!insn.length) 1041 break; 1042 1043 to += insn.length; 1044 kaddr += insn.length; 1045 size -= insn.length; 1046 } while (to < ip); 1047 1048 if (to == ip) { 1049 set_linear_ip(regs, old_to); 1050 return 1; 1051 } 1052 1053 /* 1054 * Even though we decoded the basic block, the instruction stream 1055 * never matched the given IP, either the TO or the IP got corrupted. 1056 */ 1057 return 0; 1058 } 1059 1060 static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) 1061 { 1062 if (pebs->tsx_tuning) { 1063 union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; 1064 return tsx.cycles_last_block; 1065 } 1066 return 0; 1067 } 1068 1069 static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) 1070 { 1071 u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; 1072 1073 /* For RTM XABORTs also log the abort code from AX */ 1074 if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) 1075 txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 1076 return txn; 1077 } 1078 1079 static void setup_pebs_sample_data(struct perf_event *event, 1080 struct pt_regs *iregs, void *__pebs, 1081 struct perf_sample_data *data, 1082 struct pt_regs *regs) 1083 { 1084 #define PERF_X86_EVENT_PEBS_HSW_PREC \ 1085 (PERF_X86_EVENT_PEBS_ST_HSW | \ 1086 PERF_X86_EVENT_PEBS_LD_HSW | \ 1087 PERF_X86_EVENT_PEBS_NA_HSW) 1088 /* 1089 * We cast to the biggest pebs_record but are careful not to 1090 * unconditionally access the 'extra' entries. 1091 */ 1092 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1093 struct pebs_record_skl *pebs = __pebs; 1094 u64 sample_type; 1095 int fll, fst, dsrc; 1096 int fl = event->hw.flags; 1097 1098 if (pebs == NULL) 1099 return; 1100 1101 sample_type = event->attr.sample_type; 1102 dsrc = sample_type & PERF_SAMPLE_DATA_SRC; 1103 1104 fll = fl & PERF_X86_EVENT_PEBS_LDLAT; 1105 fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); 1106 1107 perf_sample_data_init(data, 0, event->hw.last_period); 1108 1109 data->period = event->hw.last_period; 1110 1111 /* 1112 * Use latency for weight (only avail with PEBS-LL) 1113 */ 1114 if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) 1115 data->weight = pebs->lat; 1116 1117 /* 1118 * data.data_src encodes the data source 1119 */ 1120 if (dsrc) { 1121 u64 val = PERF_MEM_NA; 1122 if (fll) 1123 val = load_latency_data(pebs->dse); 1124 else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) 1125 val = precise_datala_hsw(event, pebs->dse); 1126 else if (fst) 1127 val = precise_store_data(pebs->dse); 1128 data->data_src.val = val; 1129 } 1130 1131 /* 1132 * We use the interrupt regs as a base because the PEBS record does not 1133 * contain a full regs set, specifically it seems to lack segment 1134 * descriptors, which get used by things like user_mode(). 1135 * 1136 * In the simple case fix up only the IP for PERF_SAMPLE_IP. 1137 * 1138 * We must however always use BP,SP from iregs for the unwinder to stay 1139 * sane; the record BP,SP can point into thin air when the record is 1140 * from a previous PMI context or an (I)RET happend between the record 1141 * and PMI. 1142 */ 1143 *regs = *iregs; 1144 regs->flags = pebs->flags; 1145 set_linear_ip(regs, pebs->ip); 1146 1147 if (sample_type & PERF_SAMPLE_REGS_INTR) { 1148 regs->ax = pebs->ax; 1149 regs->bx = pebs->bx; 1150 regs->cx = pebs->cx; 1151 regs->dx = pebs->dx; 1152 regs->si = pebs->si; 1153 regs->di = pebs->di; 1154 1155 /* 1156 * Per the above; only set BP,SP if we don't need callchains. 1157 * 1158 * XXX: does this make sense? 1159 */ 1160 if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { 1161 regs->bp = pebs->bp; 1162 regs->sp = pebs->sp; 1163 } 1164 1165 /* 1166 * Preserve PERF_EFLAGS_VM from set_linear_ip(). 1167 */ 1168 regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); 1169 #ifndef CONFIG_X86_32 1170 regs->r8 = pebs->r8; 1171 regs->r9 = pebs->r9; 1172 regs->r10 = pebs->r10; 1173 regs->r11 = pebs->r11; 1174 regs->r12 = pebs->r12; 1175 regs->r13 = pebs->r13; 1176 regs->r14 = pebs->r14; 1177 regs->r15 = pebs->r15; 1178 #endif 1179 } 1180 1181 if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 1182 regs->ip = pebs->real_ip; 1183 regs->flags |= PERF_EFLAGS_EXACT; 1184 } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) 1185 regs->flags |= PERF_EFLAGS_EXACT; 1186 else 1187 regs->flags &= ~PERF_EFLAGS_EXACT; 1188 1189 if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && 1190 x86_pmu.intel_cap.pebs_format >= 1) 1191 data->addr = pebs->dla; 1192 1193 if (x86_pmu.intel_cap.pebs_format >= 2) { 1194 /* Only set the TSX weight when no memory weight. */ 1195 if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) 1196 data->weight = intel_hsw_weight(pebs); 1197 1198 if (sample_type & PERF_SAMPLE_TRANSACTION) 1199 data->txn = intel_hsw_transaction(pebs); 1200 } 1201 1202 /* 1203 * v3 supplies an accurate time stamp, so we use that 1204 * for the time stamp. 1205 * 1206 * We can only do this for the default trace clock. 1207 */ 1208 if (x86_pmu.intel_cap.pebs_format >= 3 && 1209 event->attr.use_clockid == 0) 1210 data->time = native_sched_clock_from_tsc(pebs->tsc); 1211 1212 if (has_branch_stack(event)) 1213 data->br_stack = &cpuc->lbr_stack; 1214 } 1215 1216 static inline void * 1217 get_next_pebs_record_by_bit(void *base, void *top, int bit) 1218 { 1219 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1220 void *at; 1221 u64 pebs_status; 1222 1223 /* 1224 * fmt0 does not have a status bitfield (does not use 1225 * perf_record_nhm format) 1226 */ 1227 if (x86_pmu.intel_cap.pebs_format < 1) 1228 return base; 1229 1230 if (base == NULL) 1231 return NULL; 1232 1233 for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1234 struct pebs_record_nhm *p = at; 1235 1236 if (test_bit(bit, (unsigned long *)&p->status)) { 1237 /* PEBS v3 has accurate status bits */ 1238 if (x86_pmu.intel_cap.pebs_format >= 3) 1239 return at; 1240 1241 if (p->status == (1 << bit)) 1242 return at; 1243 1244 /* clear non-PEBS bit and re-check */ 1245 pebs_status = p->status & cpuc->pebs_enabled; 1246 pebs_status &= PEBS_COUNTER_MASK; 1247 if (pebs_status == (1 << bit)) 1248 return at; 1249 } 1250 } 1251 return NULL; 1252 } 1253 1254 static void __intel_pmu_pebs_event(struct perf_event *event, 1255 struct pt_regs *iregs, 1256 void *base, void *top, 1257 int bit, int count) 1258 { 1259 struct perf_sample_data data; 1260 struct pt_regs regs; 1261 void *at = get_next_pebs_record_by_bit(base, top, bit); 1262 1263 if (!intel_pmu_save_and_restart(event) && 1264 !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) 1265 return; 1266 1267 while (count > 1) { 1268 setup_pebs_sample_data(event, iregs, at, &data, ®s); 1269 perf_event_output(event, &data, ®s); 1270 at += x86_pmu.pebs_record_size; 1271 at = get_next_pebs_record_by_bit(at, top, bit); 1272 count--; 1273 } 1274 1275 setup_pebs_sample_data(event, iregs, at, &data, ®s); 1276 1277 /* 1278 * All but the last records are processed. 1279 * The last one is left to be able to call the overflow handler. 1280 */ 1281 if (perf_event_overflow(event, &data, ®s)) { 1282 x86_pmu_stop(event, 0); 1283 return; 1284 } 1285 1286 } 1287 1288 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 1289 { 1290 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1291 struct debug_store *ds = cpuc->ds; 1292 struct perf_event *event = cpuc->events[0]; /* PMC0 only */ 1293 struct pebs_record_core *at, *top; 1294 int n; 1295 1296 if (!x86_pmu.pebs_active) 1297 return; 1298 1299 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 1300 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; 1301 1302 /* 1303 * Whatever else happens, drain the thing 1304 */ 1305 ds->pebs_index = ds->pebs_buffer_base; 1306 1307 if (!test_bit(0, cpuc->active_mask)) 1308 return; 1309 1310 WARN_ON_ONCE(!event); 1311 1312 if (!event->attr.precise_ip) 1313 return; 1314 1315 n = top - at; 1316 if (n <= 0) 1317 return; 1318 1319 __intel_pmu_pebs_event(event, iregs, at, top, 0, n); 1320 } 1321 1322 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 1323 { 1324 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1325 struct debug_store *ds = cpuc->ds; 1326 struct perf_event *event; 1327 void *base, *at, *top; 1328 short counts[MAX_PEBS_EVENTS] = {}; 1329 short error[MAX_PEBS_EVENTS] = {}; 1330 int bit, i; 1331 1332 if (!x86_pmu.pebs_active) 1333 return; 1334 1335 base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 1336 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 1337 1338 ds->pebs_index = ds->pebs_buffer_base; 1339 1340 if (unlikely(base >= top)) 1341 return; 1342 1343 for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1344 struct pebs_record_nhm *p = at; 1345 u64 pebs_status; 1346 1347 pebs_status = p->status & cpuc->pebs_enabled; 1348 pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; 1349 1350 /* PEBS v3 has more accurate status bits */ 1351 if (x86_pmu.intel_cap.pebs_format >= 3) { 1352 for_each_set_bit(bit, (unsigned long *)&pebs_status, 1353 x86_pmu.max_pebs_events) 1354 counts[bit]++; 1355 1356 continue; 1357 } 1358 1359 /* 1360 * On some CPUs the PEBS status can be zero when PEBS is 1361 * racing with clearing of GLOBAL_STATUS. 1362 * 1363 * Normally we would drop that record, but in the 1364 * case when there is only a single active PEBS event 1365 * we can assume it's for that event. 1366 */ 1367 if (!pebs_status && cpuc->pebs_enabled && 1368 !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) 1369 pebs_status = cpuc->pebs_enabled; 1370 1371 bit = find_first_bit((unsigned long *)&pebs_status, 1372 x86_pmu.max_pebs_events); 1373 if (bit >= x86_pmu.max_pebs_events) 1374 continue; 1375 1376 /* 1377 * The PEBS hardware does not deal well with the situation 1378 * when events happen near to each other and multiple bits 1379 * are set. But it should happen rarely. 1380 * 1381 * If these events include one PEBS and multiple non-PEBS 1382 * events, it doesn't impact PEBS record. The record will 1383 * be handled normally. (slow path) 1384 * 1385 * If these events include two or more PEBS events, the 1386 * records for the events can be collapsed into a single 1387 * one, and it's not possible to reconstruct all events 1388 * that caused the PEBS record. It's called collision. 1389 * If collision happened, the record will be dropped. 1390 */ 1391 if (p->status != (1ULL << bit)) { 1392 for_each_set_bit(i, (unsigned long *)&pebs_status, 1393 x86_pmu.max_pebs_events) 1394 error[i]++; 1395 continue; 1396 } 1397 1398 counts[bit]++; 1399 } 1400 1401 for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { 1402 if ((counts[bit] == 0) && (error[bit] == 0)) 1403 continue; 1404 1405 event = cpuc->events[bit]; 1406 if (WARN_ON_ONCE(!event)) 1407 continue; 1408 1409 if (WARN_ON_ONCE(!event->attr.precise_ip)) 1410 continue; 1411 1412 /* log dropped samples number */ 1413 if (error[bit]) { 1414 perf_log_lost_samples(event, error[bit]); 1415 1416 if (perf_event_account_interrupt(event)) 1417 x86_pmu_stop(event, 0); 1418 } 1419 1420 if (counts[bit]) { 1421 __intel_pmu_pebs_event(event, iregs, base, 1422 top, bit, counts[bit]); 1423 } 1424 } 1425 } 1426 1427 /* 1428 * BTS, PEBS probe and setup 1429 */ 1430 1431 void __init intel_ds_init(void) 1432 { 1433 /* 1434 * No support for 32bit formats 1435 */ 1436 if (!boot_cpu_has(X86_FEATURE_DTES64)) 1437 return; 1438 1439 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 1440 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 1441 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; 1442 if (x86_pmu.pebs) { 1443 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 1444 int format = x86_pmu.intel_cap.pebs_format; 1445 1446 switch (format) { 1447 case 0: 1448 pr_cont("PEBS fmt0%c, ", pebs_type); 1449 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 1450 /* 1451 * Using >PAGE_SIZE buffers makes the WRMSR to 1452 * PERF_GLOBAL_CTRL in intel_pmu_enable_all() 1453 * mysteriously hang on Core2. 1454 * 1455 * As a workaround, we don't do this. 1456 */ 1457 x86_pmu.pebs_buffer_size = PAGE_SIZE; 1458 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 1459 break; 1460 1461 case 1: 1462 pr_cont("PEBS fmt1%c, ", pebs_type); 1463 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 1464 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1465 break; 1466 1467 case 2: 1468 pr_cont("PEBS fmt2%c, ", pebs_type); 1469 x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 1470 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1471 break; 1472 1473 case 3: 1474 pr_cont("PEBS fmt3%c, ", pebs_type); 1475 x86_pmu.pebs_record_size = 1476 sizeof(struct pebs_record_skl); 1477 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1478 x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; 1479 break; 1480 1481 default: 1482 pr_cont("no PEBS fmt%d%c, ", format, pebs_type); 1483 x86_pmu.pebs = 0; 1484 } 1485 } 1486 } 1487 1488 void perf_restore_debug_store(void) 1489 { 1490 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1491 1492 if (!x86_pmu.bts && !x86_pmu.pebs) 1493 return; 1494 1495 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); 1496 } 1497