1*7010d129SBorislav Petkov #include <linux/bitops.h> 2*7010d129SBorislav Petkov #include <linux/types.h> 3*7010d129SBorislav Petkov #include <linux/slab.h> 4*7010d129SBorislav Petkov 5*7010d129SBorislav Petkov #include <asm/perf_event.h> 6*7010d129SBorislav Petkov #include <asm/insn.h> 7*7010d129SBorislav Petkov 8*7010d129SBorislav Petkov #include "../../kernel/cpu/perf_event.h" 9*7010d129SBorislav Petkov 10*7010d129SBorislav Petkov /* The size of a BTS record in bytes: */ 11*7010d129SBorislav Petkov #define BTS_RECORD_SIZE 24 12*7010d129SBorislav Petkov 13*7010d129SBorislav Petkov #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 14*7010d129SBorislav Petkov #define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) 15*7010d129SBorislav Petkov #define PEBS_FIXUP_SIZE PAGE_SIZE 16*7010d129SBorislav Petkov 17*7010d129SBorislav Petkov /* 18*7010d129SBorislav Petkov * pebs_record_32 for p4 and core not supported 19*7010d129SBorislav Petkov 20*7010d129SBorislav Petkov struct pebs_record_32 { 21*7010d129SBorislav Petkov u32 flags, ip; 22*7010d129SBorislav Petkov u32 ax, bc, cx, dx; 23*7010d129SBorislav Petkov u32 si, di, bp, sp; 24*7010d129SBorislav Petkov }; 25*7010d129SBorislav Petkov 26*7010d129SBorislav Petkov */ 27*7010d129SBorislav Petkov 28*7010d129SBorislav Petkov union intel_x86_pebs_dse { 29*7010d129SBorislav Petkov u64 val; 30*7010d129SBorislav Petkov struct { 31*7010d129SBorislav Petkov unsigned int ld_dse:4; 32*7010d129SBorislav Petkov unsigned int ld_stlb_miss:1; 33*7010d129SBorislav Petkov unsigned int ld_locked:1; 34*7010d129SBorislav Petkov unsigned int ld_reserved:26; 35*7010d129SBorislav Petkov }; 36*7010d129SBorislav Petkov struct { 37*7010d129SBorislav Petkov unsigned int st_l1d_hit:1; 38*7010d129SBorislav Petkov unsigned int st_reserved1:3; 39*7010d129SBorislav Petkov unsigned int st_stlb_miss:1; 40*7010d129SBorislav Petkov unsigned int st_locked:1; 41*7010d129SBorislav Petkov unsigned int st_reserved2:26; 42*7010d129SBorislav Petkov }; 43*7010d129SBorislav Petkov }; 44*7010d129SBorislav Petkov 45*7010d129SBorislav Petkov 46*7010d129SBorislav Petkov /* 47*7010d129SBorislav Petkov * Map PEBS Load Latency Data Source encodings to generic 48*7010d129SBorislav Petkov * memory data source information 49*7010d129SBorislav Petkov */ 50*7010d129SBorislav Petkov #define P(a, b) PERF_MEM_S(a, b) 51*7010d129SBorislav Petkov #define OP_LH (P(OP, LOAD) | P(LVL, HIT)) 52*7010d129SBorislav Petkov #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) 53*7010d129SBorislav Petkov 54*7010d129SBorislav Petkov static const u64 pebs_data_source[] = { 55*7010d129SBorislav Petkov P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ 56*7010d129SBorislav Petkov OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */ 57*7010d129SBorislav Petkov OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ 58*7010d129SBorislav Petkov OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */ 59*7010d129SBorislav Petkov OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */ 60*7010d129SBorislav Petkov OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */ 61*7010d129SBorislav Petkov OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */ 62*7010d129SBorislav Petkov OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */ 63*7010d129SBorislav Petkov OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */ 64*7010d129SBorislav Petkov OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/ 65*7010d129SBorislav Petkov OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */ 66*7010d129SBorislav Petkov OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */ 67*7010d129SBorislav Petkov OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */ 68*7010d129SBorislav Petkov OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */ 69*7010d129SBorislav Petkov OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */ 70*7010d129SBorislav Petkov OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ 71*7010d129SBorislav Petkov }; 72*7010d129SBorislav Petkov 73*7010d129SBorislav Petkov static u64 precise_store_data(u64 status) 74*7010d129SBorislav Petkov { 75*7010d129SBorislav Petkov union intel_x86_pebs_dse dse; 76*7010d129SBorislav Petkov u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); 77*7010d129SBorislav Petkov 78*7010d129SBorislav Petkov dse.val = status; 79*7010d129SBorislav Petkov 80*7010d129SBorislav Petkov /* 81*7010d129SBorislav Petkov * bit 4: TLB access 82*7010d129SBorislav Petkov * 1 = stored missed 2nd level TLB 83*7010d129SBorislav Petkov * 84*7010d129SBorislav Petkov * so it either hit the walker or the OS 85*7010d129SBorislav Petkov * otherwise hit 2nd level TLB 86*7010d129SBorislav Petkov */ 87*7010d129SBorislav Petkov if (dse.st_stlb_miss) 88*7010d129SBorislav Petkov val |= P(TLB, MISS); 89*7010d129SBorislav Petkov else 90*7010d129SBorislav Petkov val |= P(TLB, HIT); 91*7010d129SBorislav Petkov 92*7010d129SBorislav Petkov /* 93*7010d129SBorislav Petkov * bit 0: hit L1 data cache 94*7010d129SBorislav Petkov * if not set, then all we know is that 95*7010d129SBorislav Petkov * it missed L1D 96*7010d129SBorislav Petkov */ 97*7010d129SBorislav Petkov if (dse.st_l1d_hit) 98*7010d129SBorislav Petkov val |= P(LVL, HIT); 99*7010d129SBorislav Petkov else 100*7010d129SBorislav Petkov val |= P(LVL, MISS); 101*7010d129SBorislav Petkov 102*7010d129SBorislav Petkov /* 103*7010d129SBorislav Petkov * bit 5: Locked prefix 104*7010d129SBorislav Petkov */ 105*7010d129SBorislav Petkov if (dse.st_locked) 106*7010d129SBorislav Petkov val |= P(LOCK, LOCKED); 107*7010d129SBorislav Petkov 108*7010d129SBorislav Petkov return val; 109*7010d129SBorislav Petkov } 110*7010d129SBorislav Petkov 111*7010d129SBorislav Petkov static u64 precise_datala_hsw(struct perf_event *event, u64 status) 112*7010d129SBorislav Petkov { 113*7010d129SBorislav Petkov union perf_mem_data_src dse; 114*7010d129SBorislav Petkov 115*7010d129SBorislav Petkov dse.val = PERF_MEM_NA; 116*7010d129SBorislav Petkov 117*7010d129SBorislav Petkov if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) 118*7010d129SBorislav Petkov dse.mem_op = PERF_MEM_OP_STORE; 119*7010d129SBorislav Petkov else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW) 120*7010d129SBorislav Petkov dse.mem_op = PERF_MEM_OP_LOAD; 121*7010d129SBorislav Petkov 122*7010d129SBorislav Petkov /* 123*7010d129SBorislav Petkov * L1 info only valid for following events: 124*7010d129SBorislav Petkov * 125*7010d129SBorislav Petkov * MEM_UOPS_RETIRED.STLB_MISS_STORES 126*7010d129SBorislav Petkov * MEM_UOPS_RETIRED.LOCK_STORES 127*7010d129SBorislav Petkov * MEM_UOPS_RETIRED.SPLIT_STORES 128*7010d129SBorislav Petkov * MEM_UOPS_RETIRED.ALL_STORES 129*7010d129SBorislav Petkov */ 130*7010d129SBorislav Petkov if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) { 131*7010d129SBorislav Petkov if (status & 1) 132*7010d129SBorislav Petkov dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; 133*7010d129SBorislav Petkov else 134*7010d129SBorislav Petkov dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; 135*7010d129SBorislav Petkov } 136*7010d129SBorislav Petkov return dse.val; 137*7010d129SBorislav Petkov } 138*7010d129SBorislav Petkov 139*7010d129SBorislav Petkov static u64 load_latency_data(u64 status) 140*7010d129SBorislav Petkov { 141*7010d129SBorislav Petkov union intel_x86_pebs_dse dse; 142*7010d129SBorislav Petkov u64 val; 143*7010d129SBorislav Petkov int model = boot_cpu_data.x86_model; 144*7010d129SBorislav Petkov int fam = boot_cpu_data.x86; 145*7010d129SBorislav Petkov 146*7010d129SBorislav Petkov dse.val = status; 147*7010d129SBorislav Petkov 148*7010d129SBorislav Petkov /* 149*7010d129SBorislav Petkov * use the mapping table for bit 0-3 150*7010d129SBorislav Petkov */ 151*7010d129SBorislav Petkov val = pebs_data_source[dse.ld_dse]; 152*7010d129SBorislav Petkov 153*7010d129SBorislav Petkov /* 154*7010d129SBorislav Petkov * Nehalem models do not support TLB, Lock infos 155*7010d129SBorislav Petkov */ 156*7010d129SBorislav Petkov if (fam == 0x6 && (model == 26 || model == 30 157*7010d129SBorislav Petkov || model == 31 || model == 46)) { 158*7010d129SBorislav Petkov val |= P(TLB, NA) | P(LOCK, NA); 159*7010d129SBorislav Petkov return val; 160*7010d129SBorislav Petkov } 161*7010d129SBorislav Petkov /* 162*7010d129SBorislav Petkov * bit 4: TLB access 163*7010d129SBorislav Petkov * 0 = did not miss 2nd level TLB 164*7010d129SBorislav Petkov * 1 = missed 2nd level TLB 165*7010d129SBorislav Petkov */ 166*7010d129SBorislav Petkov if (dse.ld_stlb_miss) 167*7010d129SBorislav Petkov val |= P(TLB, MISS) | P(TLB, L2); 168*7010d129SBorislav Petkov else 169*7010d129SBorislav Petkov val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); 170*7010d129SBorislav Petkov 171*7010d129SBorislav Petkov /* 172*7010d129SBorislav Petkov * bit 5: locked prefix 173*7010d129SBorislav Petkov */ 174*7010d129SBorislav Petkov if (dse.ld_locked) 175*7010d129SBorislav Petkov val |= P(LOCK, LOCKED); 176*7010d129SBorislav Petkov 177*7010d129SBorislav Petkov return val; 178*7010d129SBorislav Petkov } 179*7010d129SBorislav Petkov 180*7010d129SBorislav Petkov struct pebs_record_core { 181*7010d129SBorislav Petkov u64 flags, ip; 182*7010d129SBorislav Petkov u64 ax, bx, cx, dx; 183*7010d129SBorislav Petkov u64 si, di, bp, sp; 184*7010d129SBorislav Petkov u64 r8, r9, r10, r11; 185*7010d129SBorislav Petkov u64 r12, r13, r14, r15; 186*7010d129SBorislav Petkov }; 187*7010d129SBorislav Petkov 188*7010d129SBorislav Petkov struct pebs_record_nhm { 189*7010d129SBorislav Petkov u64 flags, ip; 190*7010d129SBorislav Petkov u64 ax, bx, cx, dx; 191*7010d129SBorislav Petkov u64 si, di, bp, sp; 192*7010d129SBorislav Petkov u64 r8, r9, r10, r11; 193*7010d129SBorislav Petkov u64 r12, r13, r14, r15; 194*7010d129SBorislav Petkov u64 status, dla, dse, lat; 195*7010d129SBorislav Petkov }; 196*7010d129SBorislav Petkov 197*7010d129SBorislav Petkov /* 198*7010d129SBorislav Petkov * Same as pebs_record_nhm, with two additional fields. 199*7010d129SBorislav Petkov */ 200*7010d129SBorislav Petkov struct pebs_record_hsw { 201*7010d129SBorislav Petkov u64 flags, ip; 202*7010d129SBorislav Petkov u64 ax, bx, cx, dx; 203*7010d129SBorislav Petkov u64 si, di, bp, sp; 204*7010d129SBorislav Petkov u64 r8, r9, r10, r11; 205*7010d129SBorislav Petkov u64 r12, r13, r14, r15; 206*7010d129SBorislav Petkov u64 status, dla, dse, lat; 207*7010d129SBorislav Petkov u64 real_ip, tsx_tuning; 208*7010d129SBorislav Petkov }; 209*7010d129SBorislav Petkov 210*7010d129SBorislav Petkov union hsw_tsx_tuning { 211*7010d129SBorislav Petkov struct { 212*7010d129SBorislav Petkov u32 cycles_last_block : 32, 213*7010d129SBorislav Petkov hle_abort : 1, 214*7010d129SBorislav Petkov rtm_abort : 1, 215*7010d129SBorislav Petkov instruction_abort : 1, 216*7010d129SBorislav Petkov non_instruction_abort : 1, 217*7010d129SBorislav Petkov retry : 1, 218*7010d129SBorislav Petkov data_conflict : 1, 219*7010d129SBorislav Petkov capacity_writes : 1, 220*7010d129SBorislav Petkov capacity_reads : 1; 221*7010d129SBorislav Petkov }; 222*7010d129SBorislav Petkov u64 value; 223*7010d129SBorislav Petkov }; 224*7010d129SBorislav Petkov 225*7010d129SBorislav Petkov #define PEBS_HSW_TSX_FLAGS 0xff00000000ULL 226*7010d129SBorislav Petkov 227*7010d129SBorislav Petkov /* Same as HSW, plus TSC */ 228*7010d129SBorislav Petkov 229*7010d129SBorislav Petkov struct pebs_record_skl { 230*7010d129SBorislav Petkov u64 flags, ip; 231*7010d129SBorislav Petkov u64 ax, bx, cx, dx; 232*7010d129SBorislav Petkov u64 si, di, bp, sp; 233*7010d129SBorislav Petkov u64 r8, r9, r10, r11; 234*7010d129SBorislav Petkov u64 r12, r13, r14, r15; 235*7010d129SBorislav Petkov u64 status, dla, dse, lat; 236*7010d129SBorislav Petkov u64 real_ip, tsx_tuning; 237*7010d129SBorislav Petkov u64 tsc; 238*7010d129SBorislav Petkov }; 239*7010d129SBorislav Petkov 240*7010d129SBorislav Petkov void init_debug_store_on_cpu(int cpu) 241*7010d129SBorislav Petkov { 242*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 243*7010d129SBorislav Petkov 244*7010d129SBorislav Petkov if (!ds) 245*7010d129SBorislav Petkov return; 246*7010d129SBorislav Petkov 247*7010d129SBorislav Petkov wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 248*7010d129SBorislav Petkov (u32)((u64)(unsigned long)ds), 249*7010d129SBorislav Petkov (u32)((u64)(unsigned long)ds >> 32)); 250*7010d129SBorislav Petkov } 251*7010d129SBorislav Petkov 252*7010d129SBorislav Petkov void fini_debug_store_on_cpu(int cpu) 253*7010d129SBorislav Petkov { 254*7010d129SBorislav Petkov if (!per_cpu(cpu_hw_events, cpu).ds) 255*7010d129SBorislav Petkov return; 256*7010d129SBorislav Petkov 257*7010d129SBorislav Petkov wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 258*7010d129SBorislav Petkov } 259*7010d129SBorislav Petkov 260*7010d129SBorislav Petkov static DEFINE_PER_CPU(void *, insn_buffer); 261*7010d129SBorislav Petkov 262*7010d129SBorislav Petkov static int alloc_pebs_buffer(int cpu) 263*7010d129SBorislav Petkov { 264*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 265*7010d129SBorislav Petkov int node = cpu_to_node(cpu); 266*7010d129SBorislav Petkov int max; 267*7010d129SBorislav Petkov void *buffer, *ibuffer; 268*7010d129SBorislav Petkov 269*7010d129SBorislav Petkov if (!x86_pmu.pebs) 270*7010d129SBorislav Petkov return 0; 271*7010d129SBorislav Petkov 272*7010d129SBorislav Petkov buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); 273*7010d129SBorislav Petkov if (unlikely(!buffer)) 274*7010d129SBorislav Petkov return -ENOMEM; 275*7010d129SBorislav Petkov 276*7010d129SBorislav Petkov /* 277*7010d129SBorislav Petkov * HSW+ already provides us the eventing ip; no need to allocate this 278*7010d129SBorislav Petkov * buffer then. 279*7010d129SBorislav Petkov */ 280*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format < 2) { 281*7010d129SBorislav Petkov ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); 282*7010d129SBorislav Petkov if (!ibuffer) { 283*7010d129SBorislav Petkov kfree(buffer); 284*7010d129SBorislav Petkov return -ENOMEM; 285*7010d129SBorislav Petkov } 286*7010d129SBorislav Petkov per_cpu(insn_buffer, cpu) = ibuffer; 287*7010d129SBorislav Petkov } 288*7010d129SBorislav Petkov 289*7010d129SBorislav Petkov max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; 290*7010d129SBorislav Petkov 291*7010d129SBorislav Petkov ds->pebs_buffer_base = (u64)(unsigned long)buffer; 292*7010d129SBorislav Petkov ds->pebs_index = ds->pebs_buffer_base; 293*7010d129SBorislav Petkov ds->pebs_absolute_maximum = ds->pebs_buffer_base + 294*7010d129SBorislav Petkov max * x86_pmu.pebs_record_size; 295*7010d129SBorislav Petkov 296*7010d129SBorislav Petkov return 0; 297*7010d129SBorislav Petkov } 298*7010d129SBorislav Petkov 299*7010d129SBorislav Petkov static void release_pebs_buffer(int cpu) 300*7010d129SBorislav Petkov { 301*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 302*7010d129SBorislav Petkov 303*7010d129SBorislav Petkov if (!ds || !x86_pmu.pebs) 304*7010d129SBorislav Petkov return; 305*7010d129SBorislav Petkov 306*7010d129SBorislav Petkov kfree(per_cpu(insn_buffer, cpu)); 307*7010d129SBorislav Petkov per_cpu(insn_buffer, cpu) = NULL; 308*7010d129SBorislav Petkov 309*7010d129SBorislav Petkov kfree((void *)(unsigned long)ds->pebs_buffer_base); 310*7010d129SBorislav Petkov ds->pebs_buffer_base = 0; 311*7010d129SBorislav Petkov } 312*7010d129SBorislav Petkov 313*7010d129SBorislav Petkov static int alloc_bts_buffer(int cpu) 314*7010d129SBorislav Petkov { 315*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 316*7010d129SBorislav Petkov int node = cpu_to_node(cpu); 317*7010d129SBorislav Petkov int max, thresh; 318*7010d129SBorislav Petkov void *buffer; 319*7010d129SBorislav Petkov 320*7010d129SBorislav Petkov if (!x86_pmu.bts) 321*7010d129SBorislav Petkov return 0; 322*7010d129SBorislav Petkov 323*7010d129SBorislav Petkov buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); 324*7010d129SBorislav Petkov if (unlikely(!buffer)) { 325*7010d129SBorislav Petkov WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); 326*7010d129SBorislav Petkov return -ENOMEM; 327*7010d129SBorislav Petkov } 328*7010d129SBorislav Petkov 329*7010d129SBorislav Petkov max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 330*7010d129SBorislav Petkov thresh = max / 16; 331*7010d129SBorislav Petkov 332*7010d129SBorislav Petkov ds->bts_buffer_base = (u64)(unsigned long)buffer; 333*7010d129SBorislav Petkov ds->bts_index = ds->bts_buffer_base; 334*7010d129SBorislav Petkov ds->bts_absolute_maximum = ds->bts_buffer_base + 335*7010d129SBorislav Petkov max * BTS_RECORD_SIZE; 336*7010d129SBorislav Petkov ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 337*7010d129SBorislav Petkov thresh * BTS_RECORD_SIZE; 338*7010d129SBorislav Petkov 339*7010d129SBorislav Petkov return 0; 340*7010d129SBorislav Petkov } 341*7010d129SBorislav Petkov 342*7010d129SBorislav Petkov static void release_bts_buffer(int cpu) 343*7010d129SBorislav Petkov { 344*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 345*7010d129SBorislav Petkov 346*7010d129SBorislav Petkov if (!ds || !x86_pmu.bts) 347*7010d129SBorislav Petkov return; 348*7010d129SBorislav Petkov 349*7010d129SBorislav Petkov kfree((void *)(unsigned long)ds->bts_buffer_base); 350*7010d129SBorislav Petkov ds->bts_buffer_base = 0; 351*7010d129SBorislav Petkov } 352*7010d129SBorislav Petkov 353*7010d129SBorislav Petkov static int alloc_ds_buffer(int cpu) 354*7010d129SBorislav Petkov { 355*7010d129SBorislav Petkov int node = cpu_to_node(cpu); 356*7010d129SBorislav Petkov struct debug_store *ds; 357*7010d129SBorislav Petkov 358*7010d129SBorislav Petkov ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); 359*7010d129SBorislav Petkov if (unlikely(!ds)) 360*7010d129SBorislav Petkov return -ENOMEM; 361*7010d129SBorislav Petkov 362*7010d129SBorislav Petkov per_cpu(cpu_hw_events, cpu).ds = ds; 363*7010d129SBorislav Petkov 364*7010d129SBorislav Petkov return 0; 365*7010d129SBorislav Petkov } 366*7010d129SBorislav Petkov 367*7010d129SBorislav Petkov static void release_ds_buffer(int cpu) 368*7010d129SBorislav Petkov { 369*7010d129SBorislav Petkov struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 370*7010d129SBorislav Petkov 371*7010d129SBorislav Petkov if (!ds) 372*7010d129SBorislav Petkov return; 373*7010d129SBorislav Petkov 374*7010d129SBorislav Petkov per_cpu(cpu_hw_events, cpu).ds = NULL; 375*7010d129SBorislav Petkov kfree(ds); 376*7010d129SBorislav Petkov } 377*7010d129SBorislav Petkov 378*7010d129SBorislav Petkov void release_ds_buffers(void) 379*7010d129SBorislav Petkov { 380*7010d129SBorislav Petkov int cpu; 381*7010d129SBorislav Petkov 382*7010d129SBorislav Petkov if (!x86_pmu.bts && !x86_pmu.pebs) 383*7010d129SBorislav Petkov return; 384*7010d129SBorislav Petkov 385*7010d129SBorislav Petkov get_online_cpus(); 386*7010d129SBorislav Petkov for_each_online_cpu(cpu) 387*7010d129SBorislav Petkov fini_debug_store_on_cpu(cpu); 388*7010d129SBorislav Petkov 389*7010d129SBorislav Petkov for_each_possible_cpu(cpu) { 390*7010d129SBorislav Petkov release_pebs_buffer(cpu); 391*7010d129SBorislav Petkov release_bts_buffer(cpu); 392*7010d129SBorislav Petkov release_ds_buffer(cpu); 393*7010d129SBorislav Petkov } 394*7010d129SBorislav Petkov put_online_cpus(); 395*7010d129SBorislav Petkov } 396*7010d129SBorislav Petkov 397*7010d129SBorislav Petkov void reserve_ds_buffers(void) 398*7010d129SBorislav Petkov { 399*7010d129SBorislav Petkov int bts_err = 0, pebs_err = 0; 400*7010d129SBorislav Petkov int cpu; 401*7010d129SBorislav Petkov 402*7010d129SBorislav Petkov x86_pmu.bts_active = 0; 403*7010d129SBorislav Petkov x86_pmu.pebs_active = 0; 404*7010d129SBorislav Petkov 405*7010d129SBorislav Petkov if (!x86_pmu.bts && !x86_pmu.pebs) 406*7010d129SBorislav Petkov return; 407*7010d129SBorislav Petkov 408*7010d129SBorislav Petkov if (!x86_pmu.bts) 409*7010d129SBorislav Petkov bts_err = 1; 410*7010d129SBorislav Petkov 411*7010d129SBorislav Petkov if (!x86_pmu.pebs) 412*7010d129SBorislav Petkov pebs_err = 1; 413*7010d129SBorislav Petkov 414*7010d129SBorislav Petkov get_online_cpus(); 415*7010d129SBorislav Petkov 416*7010d129SBorislav Petkov for_each_possible_cpu(cpu) { 417*7010d129SBorislav Petkov if (alloc_ds_buffer(cpu)) { 418*7010d129SBorislav Petkov bts_err = 1; 419*7010d129SBorislav Petkov pebs_err = 1; 420*7010d129SBorislav Petkov } 421*7010d129SBorislav Petkov 422*7010d129SBorislav Petkov if (!bts_err && alloc_bts_buffer(cpu)) 423*7010d129SBorislav Petkov bts_err = 1; 424*7010d129SBorislav Petkov 425*7010d129SBorislav Petkov if (!pebs_err && alloc_pebs_buffer(cpu)) 426*7010d129SBorislav Petkov pebs_err = 1; 427*7010d129SBorislav Petkov 428*7010d129SBorislav Petkov if (bts_err && pebs_err) 429*7010d129SBorislav Petkov break; 430*7010d129SBorislav Petkov } 431*7010d129SBorislav Petkov 432*7010d129SBorislav Petkov if (bts_err) { 433*7010d129SBorislav Petkov for_each_possible_cpu(cpu) 434*7010d129SBorislav Petkov release_bts_buffer(cpu); 435*7010d129SBorislav Petkov } 436*7010d129SBorislav Petkov 437*7010d129SBorislav Petkov if (pebs_err) { 438*7010d129SBorislav Petkov for_each_possible_cpu(cpu) 439*7010d129SBorislav Petkov release_pebs_buffer(cpu); 440*7010d129SBorislav Petkov } 441*7010d129SBorislav Petkov 442*7010d129SBorislav Petkov if (bts_err && pebs_err) { 443*7010d129SBorislav Petkov for_each_possible_cpu(cpu) 444*7010d129SBorislav Petkov release_ds_buffer(cpu); 445*7010d129SBorislav Petkov } else { 446*7010d129SBorislav Petkov if (x86_pmu.bts && !bts_err) 447*7010d129SBorislav Petkov x86_pmu.bts_active = 1; 448*7010d129SBorislav Petkov 449*7010d129SBorislav Petkov if (x86_pmu.pebs && !pebs_err) 450*7010d129SBorislav Petkov x86_pmu.pebs_active = 1; 451*7010d129SBorislav Petkov 452*7010d129SBorislav Petkov for_each_online_cpu(cpu) 453*7010d129SBorislav Petkov init_debug_store_on_cpu(cpu); 454*7010d129SBorislav Petkov } 455*7010d129SBorislav Petkov 456*7010d129SBorislav Petkov put_online_cpus(); 457*7010d129SBorislav Petkov } 458*7010d129SBorislav Petkov 459*7010d129SBorislav Petkov /* 460*7010d129SBorislav Petkov * BTS 461*7010d129SBorislav Petkov */ 462*7010d129SBorislav Petkov 463*7010d129SBorislav Petkov struct event_constraint bts_constraint = 464*7010d129SBorislav Petkov EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0); 465*7010d129SBorislav Petkov 466*7010d129SBorislav Petkov void intel_pmu_enable_bts(u64 config) 467*7010d129SBorislav Petkov { 468*7010d129SBorislav Petkov unsigned long debugctlmsr; 469*7010d129SBorislav Petkov 470*7010d129SBorislav Petkov debugctlmsr = get_debugctlmsr(); 471*7010d129SBorislav Petkov 472*7010d129SBorislav Petkov debugctlmsr |= DEBUGCTLMSR_TR; 473*7010d129SBorislav Petkov debugctlmsr |= DEBUGCTLMSR_BTS; 474*7010d129SBorislav Petkov if (config & ARCH_PERFMON_EVENTSEL_INT) 475*7010d129SBorislav Petkov debugctlmsr |= DEBUGCTLMSR_BTINT; 476*7010d129SBorislav Petkov 477*7010d129SBorislav Petkov if (!(config & ARCH_PERFMON_EVENTSEL_OS)) 478*7010d129SBorislav Petkov debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; 479*7010d129SBorislav Petkov 480*7010d129SBorislav Petkov if (!(config & ARCH_PERFMON_EVENTSEL_USR)) 481*7010d129SBorislav Petkov debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; 482*7010d129SBorislav Petkov 483*7010d129SBorislav Petkov update_debugctlmsr(debugctlmsr); 484*7010d129SBorislav Petkov } 485*7010d129SBorislav Petkov 486*7010d129SBorislav Petkov void intel_pmu_disable_bts(void) 487*7010d129SBorislav Petkov { 488*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 489*7010d129SBorislav Petkov unsigned long debugctlmsr; 490*7010d129SBorislav Petkov 491*7010d129SBorislav Petkov if (!cpuc->ds) 492*7010d129SBorislav Petkov return; 493*7010d129SBorislav Petkov 494*7010d129SBorislav Petkov debugctlmsr = get_debugctlmsr(); 495*7010d129SBorislav Petkov 496*7010d129SBorislav Petkov debugctlmsr &= 497*7010d129SBorislav Petkov ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | 498*7010d129SBorislav Petkov DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); 499*7010d129SBorislav Petkov 500*7010d129SBorislav Petkov update_debugctlmsr(debugctlmsr); 501*7010d129SBorislav Petkov } 502*7010d129SBorislav Petkov 503*7010d129SBorislav Petkov int intel_pmu_drain_bts_buffer(void) 504*7010d129SBorislav Petkov { 505*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 506*7010d129SBorislav Petkov struct debug_store *ds = cpuc->ds; 507*7010d129SBorislav Petkov struct bts_record { 508*7010d129SBorislav Petkov u64 from; 509*7010d129SBorislav Petkov u64 to; 510*7010d129SBorislav Petkov u64 flags; 511*7010d129SBorislav Petkov }; 512*7010d129SBorislav Petkov struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 513*7010d129SBorislav Petkov struct bts_record *at, *base, *top; 514*7010d129SBorislav Petkov struct perf_output_handle handle; 515*7010d129SBorislav Petkov struct perf_event_header header; 516*7010d129SBorislav Petkov struct perf_sample_data data; 517*7010d129SBorislav Petkov unsigned long skip = 0; 518*7010d129SBorislav Petkov struct pt_regs regs; 519*7010d129SBorislav Petkov 520*7010d129SBorislav Petkov if (!event) 521*7010d129SBorislav Petkov return 0; 522*7010d129SBorislav Petkov 523*7010d129SBorislav Petkov if (!x86_pmu.bts_active) 524*7010d129SBorislav Petkov return 0; 525*7010d129SBorislav Petkov 526*7010d129SBorislav Petkov base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 527*7010d129SBorislav Petkov top = (struct bts_record *)(unsigned long)ds->bts_index; 528*7010d129SBorislav Petkov 529*7010d129SBorislav Petkov if (top <= base) 530*7010d129SBorislav Petkov return 0; 531*7010d129SBorislav Petkov 532*7010d129SBorislav Petkov memset(®s, 0, sizeof(regs)); 533*7010d129SBorislav Petkov 534*7010d129SBorislav Petkov ds->bts_index = ds->bts_buffer_base; 535*7010d129SBorislav Petkov 536*7010d129SBorislav Petkov perf_sample_data_init(&data, 0, event->hw.last_period); 537*7010d129SBorislav Petkov 538*7010d129SBorislav Petkov /* 539*7010d129SBorislav Petkov * BTS leaks kernel addresses in branches across the cpl boundary, 540*7010d129SBorislav Petkov * such as traps or system calls, so unless the user is asking for 541*7010d129SBorislav Petkov * kernel tracing (and right now it's not possible), we'd need to 542*7010d129SBorislav Petkov * filter them out. But first we need to count how many of those we 543*7010d129SBorislav Petkov * have in the current batch. This is an extra O(n) pass, however, 544*7010d129SBorislav Petkov * it's much faster than the other one especially considering that 545*7010d129SBorislav Petkov * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the 546*7010d129SBorislav Petkov * alloc_bts_buffer()). 547*7010d129SBorislav Petkov */ 548*7010d129SBorislav Petkov for (at = base; at < top; at++) { 549*7010d129SBorislav Petkov /* 550*7010d129SBorislav Petkov * Note that right now *this* BTS code only works if 551*7010d129SBorislav Petkov * attr::exclude_kernel is set, but let's keep this extra 552*7010d129SBorislav Petkov * check here in case that changes. 553*7010d129SBorislav Petkov */ 554*7010d129SBorislav Petkov if (event->attr.exclude_kernel && 555*7010d129SBorislav Petkov (kernel_ip(at->from) || kernel_ip(at->to))) 556*7010d129SBorislav Petkov skip++; 557*7010d129SBorislav Petkov } 558*7010d129SBorislav Petkov 559*7010d129SBorislav Petkov /* 560*7010d129SBorislav Petkov * Prepare a generic sample, i.e. fill in the invariant fields. 561*7010d129SBorislav Petkov * We will overwrite the from and to address before we output 562*7010d129SBorislav Petkov * the sample. 563*7010d129SBorislav Petkov */ 564*7010d129SBorislav Petkov perf_prepare_sample(&header, &data, event, ®s); 565*7010d129SBorislav Petkov 566*7010d129SBorislav Petkov if (perf_output_begin(&handle, event, header.size * 567*7010d129SBorislav Petkov (top - base - skip))) 568*7010d129SBorislav Petkov return 1; 569*7010d129SBorislav Petkov 570*7010d129SBorislav Petkov for (at = base; at < top; at++) { 571*7010d129SBorislav Petkov /* Filter out any records that contain kernel addresses. */ 572*7010d129SBorislav Petkov if (event->attr.exclude_kernel && 573*7010d129SBorislav Petkov (kernel_ip(at->from) || kernel_ip(at->to))) 574*7010d129SBorislav Petkov continue; 575*7010d129SBorislav Petkov 576*7010d129SBorislav Petkov data.ip = at->from; 577*7010d129SBorislav Petkov data.addr = at->to; 578*7010d129SBorislav Petkov 579*7010d129SBorislav Petkov perf_output_sample(&handle, &header, &data, event); 580*7010d129SBorislav Petkov } 581*7010d129SBorislav Petkov 582*7010d129SBorislav Petkov perf_output_end(&handle); 583*7010d129SBorislav Petkov 584*7010d129SBorislav Petkov /* There's new data available. */ 585*7010d129SBorislav Petkov event->hw.interrupts++; 586*7010d129SBorislav Petkov event->pending_kill = POLL_IN; 587*7010d129SBorislav Petkov return 1; 588*7010d129SBorislav Petkov } 589*7010d129SBorislav Petkov 590*7010d129SBorislav Petkov static inline void intel_pmu_drain_pebs_buffer(void) 591*7010d129SBorislav Petkov { 592*7010d129SBorislav Petkov struct pt_regs regs; 593*7010d129SBorislav Petkov 594*7010d129SBorislav Petkov x86_pmu.drain_pebs(®s); 595*7010d129SBorislav Petkov } 596*7010d129SBorislav Petkov 597*7010d129SBorislav Petkov void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) 598*7010d129SBorislav Petkov { 599*7010d129SBorislav Petkov if (!sched_in) 600*7010d129SBorislav Petkov intel_pmu_drain_pebs_buffer(); 601*7010d129SBorislav Petkov } 602*7010d129SBorislav Petkov 603*7010d129SBorislav Petkov /* 604*7010d129SBorislav Petkov * PEBS 605*7010d129SBorislav Petkov */ 606*7010d129SBorislav Petkov struct event_constraint intel_core2_pebs_event_constraints[] = { 607*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 608*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 609*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 610*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 611*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 612*7010d129SBorislav Petkov /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 613*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 614*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 615*7010d129SBorislav Petkov }; 616*7010d129SBorislav Petkov 617*7010d129SBorislav Petkov struct event_constraint intel_atom_pebs_event_constraints[] = { 618*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 619*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 620*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 621*7010d129SBorislav Petkov /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 622*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), 623*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 624*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 625*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 626*7010d129SBorislav Petkov }; 627*7010d129SBorislav Petkov 628*7010d129SBorislav Petkov struct event_constraint intel_slm_pebs_event_constraints[] = { 629*7010d129SBorislav Petkov /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 630*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), 631*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 632*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), 633*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 634*7010d129SBorislav Petkov }; 635*7010d129SBorislav Petkov 636*7010d129SBorislav Petkov struct event_constraint intel_nehalem_pebs_event_constraints[] = { 637*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 638*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 639*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 640*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ 641*7010d129SBorislav Petkov INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 642*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 643*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ 644*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 645*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 646*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 647*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 648*7010d129SBorislav Petkov /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 649*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 650*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 651*7010d129SBorislav Petkov }; 652*7010d129SBorislav Petkov 653*7010d129SBorislav Petkov struct event_constraint intel_westmere_pebs_event_constraints[] = { 654*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 655*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 656*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 657*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ 658*7010d129SBorislav Petkov INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ 659*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 660*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 661*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ 662*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ 663*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ 664*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ 665*7010d129SBorislav Petkov /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ 666*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 667*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 668*7010d129SBorislav Petkov }; 669*7010d129SBorislav Petkov 670*7010d129SBorislav Petkov struct event_constraint intel_snb_pebs_event_constraints[] = { 671*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 672*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 673*7010d129SBorislav Petkov INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 674*7010d129SBorislav Petkov /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 675*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 676*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 677*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 678*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 679*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 680*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 681*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 682*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 683*7010d129SBorislav Petkov }; 684*7010d129SBorislav Petkov 685*7010d129SBorislav Petkov struct event_constraint intel_ivb_pebs_event_constraints[] = { 686*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 687*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ 688*7010d129SBorislav Petkov INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ 689*7010d129SBorislav Petkov /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 690*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 691*7010d129SBorislav Petkov /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 692*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 693*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ 694*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 695*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 696*7010d129SBorislav Petkov INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 697*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 698*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 699*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 700*7010d129SBorislav Petkov }; 701*7010d129SBorislav Petkov 702*7010d129SBorislav Petkov struct event_constraint intel_hsw_pebs_event_constraints[] = { 703*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 704*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */ 705*7010d129SBorislav Petkov /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ 706*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), 707*7010d129SBorislav Petkov /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 708*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 709*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 710*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ 711*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ 712*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ 713*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ 714*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ 715*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */ 716*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ 717*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 718*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf), /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */ 719*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf), /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */ 720*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 721*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 722*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 723*7010d129SBorislav Petkov }; 724*7010d129SBorislav Petkov 725*7010d129SBorislav Petkov struct event_constraint intel_skl_pebs_event_constraints[] = { 726*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 727*7010d129SBorislav Petkov /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */ 728*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2), 729*7010d129SBorislav Petkov /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */ 730*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), 731*7010d129SBorislav Petkov INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */ 732*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ 733*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ 734*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ 735*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */ 736*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ 737*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ 738*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ 739*7010d129SBorislav Petkov INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ 740*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 741*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 742*7010d129SBorislav Petkov INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf), /* MEM_LOAD_L3_MISS_RETIRED.* */ 743*7010d129SBorislav Petkov /* Allow all events as PEBS with no flags */ 744*7010d129SBorislav Petkov INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), 745*7010d129SBorislav Petkov EVENT_CONSTRAINT_END 746*7010d129SBorislav Petkov }; 747*7010d129SBorislav Petkov 748*7010d129SBorislav Petkov struct event_constraint *intel_pebs_constraints(struct perf_event *event) 749*7010d129SBorislav Petkov { 750*7010d129SBorislav Petkov struct event_constraint *c; 751*7010d129SBorislav Petkov 752*7010d129SBorislav Petkov if (!event->attr.precise_ip) 753*7010d129SBorislav Petkov return NULL; 754*7010d129SBorislav Petkov 755*7010d129SBorislav Petkov if (x86_pmu.pebs_constraints) { 756*7010d129SBorislav Petkov for_each_event_constraint(c, x86_pmu.pebs_constraints) { 757*7010d129SBorislav Petkov if ((event->hw.config & c->cmask) == c->code) { 758*7010d129SBorislav Petkov event->hw.flags |= c->flags; 759*7010d129SBorislav Petkov return c; 760*7010d129SBorislav Petkov } 761*7010d129SBorislav Petkov } 762*7010d129SBorislav Petkov } 763*7010d129SBorislav Petkov 764*7010d129SBorislav Petkov return &emptyconstraint; 765*7010d129SBorislav Petkov } 766*7010d129SBorislav Petkov 767*7010d129SBorislav Petkov static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc) 768*7010d129SBorislav Petkov { 769*7010d129SBorislav Petkov return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1)); 770*7010d129SBorislav Petkov } 771*7010d129SBorislav Petkov 772*7010d129SBorislav Petkov void intel_pmu_pebs_enable(struct perf_event *event) 773*7010d129SBorislav Petkov { 774*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 775*7010d129SBorislav Petkov struct hw_perf_event *hwc = &event->hw; 776*7010d129SBorislav Petkov struct debug_store *ds = cpuc->ds; 777*7010d129SBorislav Petkov bool first_pebs; 778*7010d129SBorislav Petkov u64 threshold; 779*7010d129SBorislav Petkov 780*7010d129SBorislav Petkov hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 781*7010d129SBorislav Petkov 782*7010d129SBorislav Petkov first_pebs = !pebs_is_enabled(cpuc); 783*7010d129SBorislav Petkov cpuc->pebs_enabled |= 1ULL << hwc->idx; 784*7010d129SBorislav Petkov 785*7010d129SBorislav Petkov if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 786*7010d129SBorislav Petkov cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); 787*7010d129SBorislav Petkov else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 788*7010d129SBorislav Petkov cpuc->pebs_enabled |= 1ULL << 63; 789*7010d129SBorislav Petkov 790*7010d129SBorislav Petkov /* 791*7010d129SBorislav Petkov * When the event is constrained enough we can use a larger 792*7010d129SBorislav Petkov * threshold and run the event with less frequent PMI. 793*7010d129SBorislav Petkov */ 794*7010d129SBorislav Petkov if (hwc->flags & PERF_X86_EVENT_FREERUNNING) { 795*7010d129SBorislav Petkov threshold = ds->pebs_absolute_maximum - 796*7010d129SBorislav Petkov x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; 797*7010d129SBorislav Petkov 798*7010d129SBorislav Petkov if (first_pebs) 799*7010d129SBorislav Petkov perf_sched_cb_inc(event->ctx->pmu); 800*7010d129SBorislav Petkov } else { 801*7010d129SBorislav Petkov threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; 802*7010d129SBorislav Petkov 803*7010d129SBorislav Petkov /* 804*7010d129SBorislav Petkov * If not all events can use larger buffer, 805*7010d129SBorislav Petkov * roll back to threshold = 1 806*7010d129SBorislav Petkov */ 807*7010d129SBorislav Petkov if (!first_pebs && 808*7010d129SBorislav Petkov (ds->pebs_interrupt_threshold > threshold)) 809*7010d129SBorislav Petkov perf_sched_cb_dec(event->ctx->pmu); 810*7010d129SBorislav Petkov } 811*7010d129SBorislav Petkov 812*7010d129SBorislav Petkov /* Use auto-reload if possible to save a MSR write in the PMI */ 813*7010d129SBorislav Petkov if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 814*7010d129SBorislav Petkov ds->pebs_event_reset[hwc->idx] = 815*7010d129SBorislav Petkov (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 816*7010d129SBorislav Petkov } 817*7010d129SBorislav Petkov 818*7010d129SBorislav Petkov if (first_pebs || ds->pebs_interrupt_threshold > threshold) 819*7010d129SBorislav Petkov ds->pebs_interrupt_threshold = threshold; 820*7010d129SBorislav Petkov } 821*7010d129SBorislav Petkov 822*7010d129SBorislav Petkov void intel_pmu_pebs_disable(struct perf_event *event) 823*7010d129SBorislav Petkov { 824*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 825*7010d129SBorislav Petkov struct hw_perf_event *hwc = &event->hw; 826*7010d129SBorislav Petkov struct debug_store *ds = cpuc->ds; 827*7010d129SBorislav Petkov bool large_pebs = ds->pebs_interrupt_threshold > 828*7010d129SBorislav Petkov ds->pebs_buffer_base + x86_pmu.pebs_record_size; 829*7010d129SBorislav Petkov 830*7010d129SBorislav Petkov if (large_pebs) 831*7010d129SBorislav Petkov intel_pmu_drain_pebs_buffer(); 832*7010d129SBorislav Petkov 833*7010d129SBorislav Petkov cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 834*7010d129SBorislav Petkov 835*7010d129SBorislav Petkov if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) 836*7010d129SBorislav Petkov cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); 837*7010d129SBorislav Petkov else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) 838*7010d129SBorislav Petkov cpuc->pebs_enabled &= ~(1ULL << 63); 839*7010d129SBorislav Petkov 840*7010d129SBorislav Petkov if (large_pebs && !pebs_is_enabled(cpuc)) 841*7010d129SBorislav Petkov perf_sched_cb_dec(event->ctx->pmu); 842*7010d129SBorislav Petkov 843*7010d129SBorislav Petkov if (cpuc->enabled) 844*7010d129SBorislav Petkov wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 845*7010d129SBorislav Petkov 846*7010d129SBorislav Petkov hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 847*7010d129SBorislav Petkov } 848*7010d129SBorislav Petkov 849*7010d129SBorislav Petkov void intel_pmu_pebs_enable_all(void) 850*7010d129SBorislav Petkov { 851*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 852*7010d129SBorislav Petkov 853*7010d129SBorislav Petkov if (cpuc->pebs_enabled) 854*7010d129SBorislav Petkov wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 855*7010d129SBorislav Petkov } 856*7010d129SBorislav Petkov 857*7010d129SBorislav Petkov void intel_pmu_pebs_disable_all(void) 858*7010d129SBorislav Petkov { 859*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 860*7010d129SBorislav Petkov 861*7010d129SBorislav Petkov if (cpuc->pebs_enabled) 862*7010d129SBorislav Petkov wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 863*7010d129SBorislav Petkov } 864*7010d129SBorislav Petkov 865*7010d129SBorislav Petkov static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 866*7010d129SBorislav Petkov { 867*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 868*7010d129SBorislav Petkov unsigned long from = cpuc->lbr_entries[0].from; 869*7010d129SBorislav Petkov unsigned long old_to, to = cpuc->lbr_entries[0].to; 870*7010d129SBorislav Petkov unsigned long ip = regs->ip; 871*7010d129SBorislav Petkov int is_64bit = 0; 872*7010d129SBorislav Petkov void *kaddr; 873*7010d129SBorislav Petkov int size; 874*7010d129SBorislav Petkov 875*7010d129SBorislav Petkov /* 876*7010d129SBorislav Petkov * We don't need to fixup if the PEBS assist is fault like 877*7010d129SBorislav Petkov */ 878*7010d129SBorislav Petkov if (!x86_pmu.intel_cap.pebs_trap) 879*7010d129SBorislav Petkov return 1; 880*7010d129SBorislav Petkov 881*7010d129SBorislav Petkov /* 882*7010d129SBorislav Petkov * No LBR entry, no basic block, no rewinding 883*7010d129SBorislav Petkov */ 884*7010d129SBorislav Petkov if (!cpuc->lbr_stack.nr || !from || !to) 885*7010d129SBorislav Petkov return 0; 886*7010d129SBorislav Petkov 887*7010d129SBorislav Petkov /* 888*7010d129SBorislav Petkov * Basic blocks should never cross user/kernel boundaries 889*7010d129SBorislav Petkov */ 890*7010d129SBorislav Petkov if (kernel_ip(ip) != kernel_ip(to)) 891*7010d129SBorislav Petkov return 0; 892*7010d129SBorislav Petkov 893*7010d129SBorislav Petkov /* 894*7010d129SBorislav Petkov * unsigned math, either ip is before the start (impossible) or 895*7010d129SBorislav Petkov * the basic block is larger than 1 page (sanity) 896*7010d129SBorislav Petkov */ 897*7010d129SBorislav Petkov if ((ip - to) > PEBS_FIXUP_SIZE) 898*7010d129SBorislav Petkov return 0; 899*7010d129SBorislav Petkov 900*7010d129SBorislav Petkov /* 901*7010d129SBorislav Petkov * We sampled a branch insn, rewind using the LBR stack 902*7010d129SBorislav Petkov */ 903*7010d129SBorislav Petkov if (ip == to) { 904*7010d129SBorislav Petkov set_linear_ip(regs, from); 905*7010d129SBorislav Petkov return 1; 906*7010d129SBorislav Petkov } 907*7010d129SBorislav Petkov 908*7010d129SBorislav Petkov size = ip - to; 909*7010d129SBorislav Petkov if (!kernel_ip(ip)) { 910*7010d129SBorislav Petkov int bytes; 911*7010d129SBorislav Petkov u8 *buf = this_cpu_read(insn_buffer); 912*7010d129SBorislav Petkov 913*7010d129SBorislav Petkov /* 'size' must fit our buffer, see above */ 914*7010d129SBorislav Petkov bytes = copy_from_user_nmi(buf, (void __user *)to, size); 915*7010d129SBorislav Petkov if (bytes != 0) 916*7010d129SBorislav Petkov return 0; 917*7010d129SBorislav Petkov 918*7010d129SBorislav Petkov kaddr = buf; 919*7010d129SBorislav Petkov } else { 920*7010d129SBorislav Petkov kaddr = (void *)to; 921*7010d129SBorislav Petkov } 922*7010d129SBorislav Petkov 923*7010d129SBorislav Petkov do { 924*7010d129SBorislav Petkov struct insn insn; 925*7010d129SBorislav Petkov 926*7010d129SBorislav Petkov old_to = to; 927*7010d129SBorislav Petkov 928*7010d129SBorislav Petkov #ifdef CONFIG_X86_64 929*7010d129SBorislav Petkov is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 930*7010d129SBorislav Petkov #endif 931*7010d129SBorislav Petkov insn_init(&insn, kaddr, size, is_64bit); 932*7010d129SBorislav Petkov insn_get_length(&insn); 933*7010d129SBorislav Petkov /* 934*7010d129SBorislav Petkov * Make sure there was not a problem decoding the 935*7010d129SBorislav Petkov * instruction and getting the length. This is 936*7010d129SBorislav Petkov * doubly important because we have an infinite 937*7010d129SBorislav Petkov * loop if insn.length=0. 938*7010d129SBorislav Petkov */ 939*7010d129SBorislav Petkov if (!insn.length) 940*7010d129SBorislav Petkov break; 941*7010d129SBorislav Petkov 942*7010d129SBorislav Petkov to += insn.length; 943*7010d129SBorislav Petkov kaddr += insn.length; 944*7010d129SBorislav Petkov size -= insn.length; 945*7010d129SBorislav Petkov } while (to < ip); 946*7010d129SBorislav Petkov 947*7010d129SBorislav Petkov if (to == ip) { 948*7010d129SBorislav Petkov set_linear_ip(regs, old_to); 949*7010d129SBorislav Petkov return 1; 950*7010d129SBorislav Petkov } 951*7010d129SBorislav Petkov 952*7010d129SBorislav Petkov /* 953*7010d129SBorislav Petkov * Even though we decoded the basic block, the instruction stream 954*7010d129SBorislav Petkov * never matched the given IP, either the TO or the IP got corrupted. 955*7010d129SBorislav Petkov */ 956*7010d129SBorislav Petkov return 0; 957*7010d129SBorislav Petkov } 958*7010d129SBorislav Petkov 959*7010d129SBorislav Petkov static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs) 960*7010d129SBorislav Petkov { 961*7010d129SBorislav Petkov if (pebs->tsx_tuning) { 962*7010d129SBorislav Petkov union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; 963*7010d129SBorislav Petkov return tsx.cycles_last_block; 964*7010d129SBorislav Petkov } 965*7010d129SBorislav Petkov return 0; 966*7010d129SBorislav Petkov } 967*7010d129SBorislav Petkov 968*7010d129SBorislav Petkov static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs) 969*7010d129SBorislav Petkov { 970*7010d129SBorislav Petkov u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; 971*7010d129SBorislav Petkov 972*7010d129SBorislav Petkov /* For RTM XABORTs also log the abort code from AX */ 973*7010d129SBorislav Petkov if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) 974*7010d129SBorislav Petkov txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 975*7010d129SBorislav Petkov return txn; 976*7010d129SBorislav Petkov } 977*7010d129SBorislav Petkov 978*7010d129SBorislav Petkov static void setup_pebs_sample_data(struct perf_event *event, 979*7010d129SBorislav Petkov struct pt_regs *iregs, void *__pebs, 980*7010d129SBorislav Petkov struct perf_sample_data *data, 981*7010d129SBorislav Petkov struct pt_regs *regs) 982*7010d129SBorislav Petkov { 983*7010d129SBorislav Petkov #define PERF_X86_EVENT_PEBS_HSW_PREC \ 984*7010d129SBorislav Petkov (PERF_X86_EVENT_PEBS_ST_HSW | \ 985*7010d129SBorislav Petkov PERF_X86_EVENT_PEBS_LD_HSW | \ 986*7010d129SBorislav Petkov PERF_X86_EVENT_PEBS_NA_HSW) 987*7010d129SBorislav Petkov /* 988*7010d129SBorislav Petkov * We cast to the biggest pebs_record but are careful not to 989*7010d129SBorislav Petkov * unconditionally access the 'extra' entries. 990*7010d129SBorislav Petkov */ 991*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 992*7010d129SBorislav Petkov struct pebs_record_skl *pebs = __pebs; 993*7010d129SBorislav Petkov u64 sample_type; 994*7010d129SBorislav Petkov int fll, fst, dsrc; 995*7010d129SBorislav Petkov int fl = event->hw.flags; 996*7010d129SBorislav Petkov 997*7010d129SBorislav Petkov if (pebs == NULL) 998*7010d129SBorislav Petkov return; 999*7010d129SBorislav Petkov 1000*7010d129SBorislav Petkov sample_type = event->attr.sample_type; 1001*7010d129SBorislav Petkov dsrc = sample_type & PERF_SAMPLE_DATA_SRC; 1002*7010d129SBorislav Petkov 1003*7010d129SBorislav Petkov fll = fl & PERF_X86_EVENT_PEBS_LDLAT; 1004*7010d129SBorislav Petkov fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); 1005*7010d129SBorislav Petkov 1006*7010d129SBorislav Petkov perf_sample_data_init(data, 0, event->hw.last_period); 1007*7010d129SBorislav Petkov 1008*7010d129SBorislav Petkov data->period = event->hw.last_period; 1009*7010d129SBorislav Petkov 1010*7010d129SBorislav Petkov /* 1011*7010d129SBorislav Petkov * Use latency for weight (only avail with PEBS-LL) 1012*7010d129SBorislav Petkov */ 1013*7010d129SBorislav Petkov if (fll && (sample_type & PERF_SAMPLE_WEIGHT)) 1014*7010d129SBorislav Petkov data->weight = pebs->lat; 1015*7010d129SBorislav Petkov 1016*7010d129SBorislav Petkov /* 1017*7010d129SBorislav Petkov * data.data_src encodes the data source 1018*7010d129SBorislav Petkov */ 1019*7010d129SBorislav Petkov if (dsrc) { 1020*7010d129SBorislav Petkov u64 val = PERF_MEM_NA; 1021*7010d129SBorislav Petkov if (fll) 1022*7010d129SBorislav Petkov val = load_latency_data(pebs->dse); 1023*7010d129SBorislav Petkov else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) 1024*7010d129SBorislav Petkov val = precise_datala_hsw(event, pebs->dse); 1025*7010d129SBorislav Petkov else if (fst) 1026*7010d129SBorislav Petkov val = precise_store_data(pebs->dse); 1027*7010d129SBorislav Petkov data->data_src.val = val; 1028*7010d129SBorislav Petkov } 1029*7010d129SBorislav Petkov 1030*7010d129SBorislav Petkov /* 1031*7010d129SBorislav Petkov * We use the interrupt regs as a base because the PEBS record 1032*7010d129SBorislav Petkov * does not contain a full regs set, specifically it seems to 1033*7010d129SBorislav Petkov * lack segment descriptors, which get used by things like 1034*7010d129SBorislav Petkov * user_mode(). 1035*7010d129SBorislav Petkov * 1036*7010d129SBorislav Petkov * In the simple case fix up only the IP and BP,SP regs, for 1037*7010d129SBorislav Petkov * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. 1038*7010d129SBorislav Petkov * A possible PERF_SAMPLE_REGS will have to transfer all regs. 1039*7010d129SBorislav Petkov */ 1040*7010d129SBorislav Petkov *regs = *iregs; 1041*7010d129SBorislav Petkov regs->flags = pebs->flags; 1042*7010d129SBorislav Petkov set_linear_ip(regs, pebs->ip); 1043*7010d129SBorislav Petkov regs->bp = pebs->bp; 1044*7010d129SBorislav Petkov regs->sp = pebs->sp; 1045*7010d129SBorislav Petkov 1046*7010d129SBorislav Petkov if (sample_type & PERF_SAMPLE_REGS_INTR) { 1047*7010d129SBorislav Petkov regs->ax = pebs->ax; 1048*7010d129SBorislav Petkov regs->bx = pebs->bx; 1049*7010d129SBorislav Petkov regs->cx = pebs->cx; 1050*7010d129SBorislav Petkov regs->dx = pebs->dx; 1051*7010d129SBorislav Petkov regs->si = pebs->si; 1052*7010d129SBorislav Petkov regs->di = pebs->di; 1053*7010d129SBorislav Petkov regs->bp = pebs->bp; 1054*7010d129SBorislav Petkov regs->sp = pebs->sp; 1055*7010d129SBorislav Petkov 1056*7010d129SBorislav Petkov regs->flags = pebs->flags; 1057*7010d129SBorislav Petkov #ifndef CONFIG_X86_32 1058*7010d129SBorislav Petkov regs->r8 = pebs->r8; 1059*7010d129SBorislav Petkov regs->r9 = pebs->r9; 1060*7010d129SBorislav Petkov regs->r10 = pebs->r10; 1061*7010d129SBorislav Petkov regs->r11 = pebs->r11; 1062*7010d129SBorislav Petkov regs->r12 = pebs->r12; 1063*7010d129SBorislav Petkov regs->r13 = pebs->r13; 1064*7010d129SBorislav Petkov regs->r14 = pebs->r14; 1065*7010d129SBorislav Petkov regs->r15 = pebs->r15; 1066*7010d129SBorislav Petkov #endif 1067*7010d129SBorislav Petkov } 1068*7010d129SBorislav Petkov 1069*7010d129SBorislav Petkov if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 1070*7010d129SBorislav Petkov regs->ip = pebs->real_ip; 1071*7010d129SBorislav Petkov regs->flags |= PERF_EFLAGS_EXACT; 1072*7010d129SBorislav Petkov } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) 1073*7010d129SBorislav Petkov regs->flags |= PERF_EFLAGS_EXACT; 1074*7010d129SBorislav Petkov else 1075*7010d129SBorislav Petkov regs->flags &= ~PERF_EFLAGS_EXACT; 1076*7010d129SBorislav Petkov 1077*7010d129SBorislav Petkov if ((sample_type & PERF_SAMPLE_ADDR) && 1078*7010d129SBorislav Petkov x86_pmu.intel_cap.pebs_format >= 1) 1079*7010d129SBorislav Petkov data->addr = pebs->dla; 1080*7010d129SBorislav Petkov 1081*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format >= 2) { 1082*7010d129SBorislav Petkov /* Only set the TSX weight when no memory weight. */ 1083*7010d129SBorislav Petkov if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll) 1084*7010d129SBorislav Petkov data->weight = intel_hsw_weight(pebs); 1085*7010d129SBorislav Petkov 1086*7010d129SBorislav Petkov if (sample_type & PERF_SAMPLE_TRANSACTION) 1087*7010d129SBorislav Petkov data->txn = intel_hsw_transaction(pebs); 1088*7010d129SBorislav Petkov } 1089*7010d129SBorislav Petkov 1090*7010d129SBorislav Petkov /* 1091*7010d129SBorislav Petkov * v3 supplies an accurate time stamp, so we use that 1092*7010d129SBorislav Petkov * for the time stamp. 1093*7010d129SBorislav Petkov * 1094*7010d129SBorislav Petkov * We can only do this for the default trace clock. 1095*7010d129SBorislav Petkov */ 1096*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format >= 3 && 1097*7010d129SBorislav Petkov event->attr.use_clockid == 0) 1098*7010d129SBorislav Petkov data->time = native_sched_clock_from_tsc(pebs->tsc); 1099*7010d129SBorislav Petkov 1100*7010d129SBorislav Petkov if (has_branch_stack(event)) 1101*7010d129SBorislav Petkov data->br_stack = &cpuc->lbr_stack; 1102*7010d129SBorislav Petkov } 1103*7010d129SBorislav Petkov 1104*7010d129SBorislav Petkov static inline void * 1105*7010d129SBorislav Petkov get_next_pebs_record_by_bit(void *base, void *top, int bit) 1106*7010d129SBorislav Petkov { 1107*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1108*7010d129SBorislav Petkov void *at; 1109*7010d129SBorislav Petkov u64 pebs_status; 1110*7010d129SBorislav Petkov 1111*7010d129SBorislav Petkov /* 1112*7010d129SBorislav Petkov * fmt0 does not have a status bitfield (does not use 1113*7010d129SBorislav Petkov * perf_record_nhm format) 1114*7010d129SBorislav Petkov */ 1115*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format < 1) 1116*7010d129SBorislav Petkov return base; 1117*7010d129SBorislav Petkov 1118*7010d129SBorislav Petkov if (base == NULL) 1119*7010d129SBorislav Petkov return NULL; 1120*7010d129SBorislav Petkov 1121*7010d129SBorislav Petkov for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1122*7010d129SBorislav Petkov struct pebs_record_nhm *p = at; 1123*7010d129SBorislav Petkov 1124*7010d129SBorislav Petkov if (test_bit(bit, (unsigned long *)&p->status)) { 1125*7010d129SBorislav Petkov /* PEBS v3 has accurate status bits */ 1126*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format >= 3) 1127*7010d129SBorislav Petkov return at; 1128*7010d129SBorislav Petkov 1129*7010d129SBorislav Petkov if (p->status == (1 << bit)) 1130*7010d129SBorislav Petkov return at; 1131*7010d129SBorislav Petkov 1132*7010d129SBorislav Petkov /* clear non-PEBS bit and re-check */ 1133*7010d129SBorislav Petkov pebs_status = p->status & cpuc->pebs_enabled; 1134*7010d129SBorislav Petkov pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1; 1135*7010d129SBorislav Petkov if (pebs_status == (1 << bit)) 1136*7010d129SBorislav Petkov return at; 1137*7010d129SBorislav Petkov } 1138*7010d129SBorislav Petkov } 1139*7010d129SBorislav Petkov return NULL; 1140*7010d129SBorislav Petkov } 1141*7010d129SBorislav Petkov 1142*7010d129SBorislav Petkov static void __intel_pmu_pebs_event(struct perf_event *event, 1143*7010d129SBorislav Petkov struct pt_regs *iregs, 1144*7010d129SBorislav Petkov void *base, void *top, 1145*7010d129SBorislav Petkov int bit, int count) 1146*7010d129SBorislav Petkov { 1147*7010d129SBorislav Petkov struct perf_sample_data data; 1148*7010d129SBorislav Petkov struct pt_regs regs; 1149*7010d129SBorislav Petkov void *at = get_next_pebs_record_by_bit(base, top, bit); 1150*7010d129SBorislav Petkov 1151*7010d129SBorislav Petkov if (!intel_pmu_save_and_restart(event) && 1152*7010d129SBorislav Petkov !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) 1153*7010d129SBorislav Petkov return; 1154*7010d129SBorislav Petkov 1155*7010d129SBorislav Petkov while (count > 1) { 1156*7010d129SBorislav Petkov setup_pebs_sample_data(event, iregs, at, &data, ®s); 1157*7010d129SBorislav Petkov perf_event_output(event, &data, ®s); 1158*7010d129SBorislav Petkov at += x86_pmu.pebs_record_size; 1159*7010d129SBorislav Petkov at = get_next_pebs_record_by_bit(at, top, bit); 1160*7010d129SBorislav Petkov count--; 1161*7010d129SBorislav Petkov } 1162*7010d129SBorislav Petkov 1163*7010d129SBorislav Petkov setup_pebs_sample_data(event, iregs, at, &data, ®s); 1164*7010d129SBorislav Petkov 1165*7010d129SBorislav Petkov /* 1166*7010d129SBorislav Petkov * All but the last records are processed. 1167*7010d129SBorislav Petkov * The last one is left to be able to call the overflow handler. 1168*7010d129SBorislav Petkov */ 1169*7010d129SBorislav Petkov if (perf_event_overflow(event, &data, ®s)) { 1170*7010d129SBorislav Petkov x86_pmu_stop(event, 0); 1171*7010d129SBorislav Petkov return; 1172*7010d129SBorislav Petkov } 1173*7010d129SBorislav Petkov 1174*7010d129SBorislav Petkov } 1175*7010d129SBorislav Petkov 1176*7010d129SBorislav Petkov static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 1177*7010d129SBorislav Petkov { 1178*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1179*7010d129SBorislav Petkov struct debug_store *ds = cpuc->ds; 1180*7010d129SBorislav Petkov struct perf_event *event = cpuc->events[0]; /* PMC0 only */ 1181*7010d129SBorislav Petkov struct pebs_record_core *at, *top; 1182*7010d129SBorislav Petkov int n; 1183*7010d129SBorislav Petkov 1184*7010d129SBorislav Petkov if (!x86_pmu.pebs_active) 1185*7010d129SBorislav Petkov return; 1186*7010d129SBorislav Petkov 1187*7010d129SBorislav Petkov at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 1188*7010d129SBorislav Petkov top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; 1189*7010d129SBorislav Petkov 1190*7010d129SBorislav Petkov /* 1191*7010d129SBorislav Petkov * Whatever else happens, drain the thing 1192*7010d129SBorislav Petkov */ 1193*7010d129SBorislav Petkov ds->pebs_index = ds->pebs_buffer_base; 1194*7010d129SBorislav Petkov 1195*7010d129SBorislav Petkov if (!test_bit(0, cpuc->active_mask)) 1196*7010d129SBorislav Petkov return; 1197*7010d129SBorislav Petkov 1198*7010d129SBorislav Petkov WARN_ON_ONCE(!event); 1199*7010d129SBorislav Petkov 1200*7010d129SBorislav Petkov if (!event->attr.precise_ip) 1201*7010d129SBorislav Petkov return; 1202*7010d129SBorislav Petkov 1203*7010d129SBorislav Petkov n = top - at; 1204*7010d129SBorislav Petkov if (n <= 0) 1205*7010d129SBorislav Petkov return; 1206*7010d129SBorislav Petkov 1207*7010d129SBorislav Petkov __intel_pmu_pebs_event(event, iregs, at, top, 0, n); 1208*7010d129SBorislav Petkov } 1209*7010d129SBorislav Petkov 1210*7010d129SBorislav Petkov static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 1211*7010d129SBorislav Petkov { 1212*7010d129SBorislav Petkov struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1213*7010d129SBorislav Petkov struct debug_store *ds = cpuc->ds; 1214*7010d129SBorislav Petkov struct perf_event *event; 1215*7010d129SBorislav Petkov void *base, *at, *top; 1216*7010d129SBorislav Petkov short counts[MAX_PEBS_EVENTS] = {}; 1217*7010d129SBorislav Petkov short error[MAX_PEBS_EVENTS] = {}; 1218*7010d129SBorislav Petkov int bit, i; 1219*7010d129SBorislav Petkov 1220*7010d129SBorislav Petkov if (!x86_pmu.pebs_active) 1221*7010d129SBorislav Petkov return; 1222*7010d129SBorislav Petkov 1223*7010d129SBorislav Petkov base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 1224*7010d129SBorislav Petkov top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 1225*7010d129SBorislav Petkov 1226*7010d129SBorislav Petkov ds->pebs_index = ds->pebs_buffer_base; 1227*7010d129SBorislav Petkov 1228*7010d129SBorislav Petkov if (unlikely(base >= top)) 1229*7010d129SBorislav Petkov return; 1230*7010d129SBorislav Petkov 1231*7010d129SBorislav Petkov for (at = base; at < top; at += x86_pmu.pebs_record_size) { 1232*7010d129SBorislav Petkov struct pebs_record_nhm *p = at; 1233*7010d129SBorislav Petkov u64 pebs_status; 1234*7010d129SBorislav Petkov 1235*7010d129SBorislav Petkov /* PEBS v3 has accurate status bits */ 1236*7010d129SBorislav Petkov if (x86_pmu.intel_cap.pebs_format >= 3) { 1237*7010d129SBorislav Petkov for_each_set_bit(bit, (unsigned long *)&p->status, 1238*7010d129SBorislav Petkov MAX_PEBS_EVENTS) 1239*7010d129SBorislav Petkov counts[bit]++; 1240*7010d129SBorislav Petkov 1241*7010d129SBorislav Petkov continue; 1242*7010d129SBorislav Petkov } 1243*7010d129SBorislav Petkov 1244*7010d129SBorislav Petkov pebs_status = p->status & cpuc->pebs_enabled; 1245*7010d129SBorislav Petkov pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; 1246*7010d129SBorislav Petkov 1247*7010d129SBorislav Petkov /* 1248*7010d129SBorislav Petkov * On some CPUs the PEBS status can be zero when PEBS is 1249*7010d129SBorislav Petkov * racing with clearing of GLOBAL_STATUS. 1250*7010d129SBorislav Petkov * 1251*7010d129SBorislav Petkov * Normally we would drop that record, but in the 1252*7010d129SBorislav Petkov * case when there is only a single active PEBS event 1253*7010d129SBorislav Petkov * we can assume it's for that event. 1254*7010d129SBorislav Petkov */ 1255*7010d129SBorislav Petkov if (!pebs_status && cpuc->pebs_enabled && 1256*7010d129SBorislav Petkov !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) 1257*7010d129SBorislav Petkov pebs_status = cpuc->pebs_enabled; 1258*7010d129SBorislav Petkov 1259*7010d129SBorislav Petkov bit = find_first_bit((unsigned long *)&pebs_status, 1260*7010d129SBorislav Petkov x86_pmu.max_pebs_events); 1261*7010d129SBorislav Petkov if (bit >= x86_pmu.max_pebs_events) 1262*7010d129SBorislav Petkov continue; 1263*7010d129SBorislav Petkov 1264*7010d129SBorislav Petkov /* 1265*7010d129SBorislav Petkov * The PEBS hardware does not deal well with the situation 1266*7010d129SBorislav Petkov * when events happen near to each other and multiple bits 1267*7010d129SBorislav Petkov * are set. But it should happen rarely. 1268*7010d129SBorislav Petkov * 1269*7010d129SBorislav Petkov * If these events include one PEBS and multiple non-PEBS 1270*7010d129SBorislav Petkov * events, it doesn't impact PEBS record. The record will 1271*7010d129SBorislav Petkov * be handled normally. (slow path) 1272*7010d129SBorislav Petkov * 1273*7010d129SBorislav Petkov * If these events include two or more PEBS events, the 1274*7010d129SBorislav Petkov * records for the events can be collapsed into a single 1275*7010d129SBorislav Petkov * one, and it's not possible to reconstruct all events 1276*7010d129SBorislav Petkov * that caused the PEBS record. It's called collision. 1277*7010d129SBorislav Petkov * If collision happened, the record will be dropped. 1278*7010d129SBorislav Petkov */ 1279*7010d129SBorislav Petkov if (p->status != (1ULL << bit)) { 1280*7010d129SBorislav Petkov for_each_set_bit(i, (unsigned long *)&pebs_status, 1281*7010d129SBorislav Petkov x86_pmu.max_pebs_events) 1282*7010d129SBorislav Petkov error[i]++; 1283*7010d129SBorislav Petkov continue; 1284*7010d129SBorislav Petkov } 1285*7010d129SBorislav Petkov 1286*7010d129SBorislav Petkov counts[bit]++; 1287*7010d129SBorislav Petkov } 1288*7010d129SBorislav Petkov 1289*7010d129SBorislav Petkov for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { 1290*7010d129SBorislav Petkov if ((counts[bit] == 0) && (error[bit] == 0)) 1291*7010d129SBorislav Petkov continue; 1292*7010d129SBorislav Petkov 1293*7010d129SBorislav Petkov event = cpuc->events[bit]; 1294*7010d129SBorislav Petkov WARN_ON_ONCE(!event); 1295*7010d129SBorislav Petkov WARN_ON_ONCE(!event->attr.precise_ip); 1296*7010d129SBorislav Petkov 1297*7010d129SBorislav Petkov /* log dropped samples number */ 1298*7010d129SBorislav Petkov if (error[bit]) 1299*7010d129SBorislav Petkov perf_log_lost_samples(event, error[bit]); 1300*7010d129SBorislav Petkov 1301*7010d129SBorislav Petkov if (counts[bit]) { 1302*7010d129SBorislav Petkov __intel_pmu_pebs_event(event, iregs, base, 1303*7010d129SBorislav Petkov top, bit, counts[bit]); 1304*7010d129SBorislav Petkov } 1305*7010d129SBorislav Petkov } 1306*7010d129SBorislav Petkov } 1307*7010d129SBorislav Petkov 1308*7010d129SBorislav Petkov /* 1309*7010d129SBorislav Petkov * BTS, PEBS probe and setup 1310*7010d129SBorislav Petkov */ 1311*7010d129SBorislav Petkov 1312*7010d129SBorislav Petkov void __init intel_ds_init(void) 1313*7010d129SBorislav Petkov { 1314*7010d129SBorislav Petkov /* 1315*7010d129SBorislav Petkov * No support for 32bit formats 1316*7010d129SBorislav Petkov */ 1317*7010d129SBorislav Petkov if (!boot_cpu_has(X86_FEATURE_DTES64)) 1318*7010d129SBorislav Petkov return; 1319*7010d129SBorislav Petkov 1320*7010d129SBorislav Petkov x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 1321*7010d129SBorislav Petkov x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 1322*7010d129SBorislav Petkov if (x86_pmu.pebs) { 1323*7010d129SBorislav Petkov char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 1324*7010d129SBorislav Petkov int format = x86_pmu.intel_cap.pebs_format; 1325*7010d129SBorislav Petkov 1326*7010d129SBorislav Petkov switch (format) { 1327*7010d129SBorislav Petkov case 0: 1328*7010d129SBorislav Petkov pr_cont("PEBS fmt0%c, ", pebs_type); 1329*7010d129SBorislav Petkov x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 1330*7010d129SBorislav Petkov x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 1331*7010d129SBorislav Petkov break; 1332*7010d129SBorislav Petkov 1333*7010d129SBorislav Petkov case 1: 1334*7010d129SBorislav Petkov pr_cont("PEBS fmt1%c, ", pebs_type); 1335*7010d129SBorislav Petkov x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 1336*7010d129SBorislav Petkov x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1337*7010d129SBorislav Petkov break; 1338*7010d129SBorislav Petkov 1339*7010d129SBorislav Petkov case 2: 1340*7010d129SBorislav Petkov pr_cont("PEBS fmt2%c, ", pebs_type); 1341*7010d129SBorislav Petkov x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 1342*7010d129SBorislav Petkov x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1343*7010d129SBorislav Petkov break; 1344*7010d129SBorislav Petkov 1345*7010d129SBorislav Petkov case 3: 1346*7010d129SBorislav Petkov pr_cont("PEBS fmt3%c, ", pebs_type); 1347*7010d129SBorislav Petkov x86_pmu.pebs_record_size = 1348*7010d129SBorislav Petkov sizeof(struct pebs_record_skl); 1349*7010d129SBorislav Petkov x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 1350*7010d129SBorislav Petkov x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; 1351*7010d129SBorislav Petkov break; 1352*7010d129SBorislav Petkov 1353*7010d129SBorislav Petkov default: 1354*7010d129SBorislav Petkov pr_cont("no PEBS fmt%d%c, ", format, pebs_type); 1355*7010d129SBorislav Petkov x86_pmu.pebs = 0; 1356*7010d129SBorislav Petkov } 1357*7010d129SBorislav Petkov } 1358*7010d129SBorislav Petkov } 1359*7010d129SBorislav Petkov 1360*7010d129SBorislav Petkov void perf_restore_debug_store(void) 1361*7010d129SBorislav Petkov { 1362*7010d129SBorislav Petkov struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1363*7010d129SBorislav Petkov 1364*7010d129SBorislav Petkov if (!x86_pmu.bts && !x86_pmu.pebs) 1365*7010d129SBorislav Petkov return; 1366*7010d129SBorislav Petkov 1367*7010d129SBorislav Petkov wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); 1368*7010d129SBorislav Petkov } 1369