1 /* 2 * Per core/cpu state 3 * 4 * Used to coordinate shared registers between HT threads or 5 * among events on a single PMU. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/stddef.h> 11 #include <linux/types.h> 12 #include <linux/init.h> 13 #include <linux/slab.h> 14 #include <linux/export.h> 15 #include <linux/nmi.h> 16 17 #include <asm/cpufeature.h> 18 #include <asm/hardirq.h> 19 #include <asm/apic.h> 20 21 #include "../perf_event.h" 22 23 /* 24 * Intel PerfMon, used on Core and later. 25 */ 26 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 27 { 28 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 29 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 30 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 31 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 32 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 33 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 34 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 35 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 36 }; 37 38 static struct event_constraint intel_core_event_constraints[] __read_mostly = 39 { 40 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 41 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 42 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 43 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 45 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ 46 EVENT_CONSTRAINT_END 47 }; 48 49 static struct event_constraint intel_core2_event_constraints[] __read_mostly = 50 { 51 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 52 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 53 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 57 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ 58 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ 59 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ 60 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ 61 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ 62 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ 63 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ 64 EVENT_CONSTRAINT_END 65 }; 66 67 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = 68 { 69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 71 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 75 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ 76 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ 77 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ 78 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 79 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 80 EVENT_CONSTRAINT_END 81 }; 82 83 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 84 { 85 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 86 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 87 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 88 EVENT_EXTRA_END 89 }; 90 91 static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 92 { 93 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 94 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 95 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 96 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 97 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 98 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 99 INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ 100 EVENT_CONSTRAINT_END 101 }; 102 103 static struct event_constraint intel_snb_event_constraints[] __read_mostly = 104 { 105 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 106 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 107 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 108 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 109 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 110 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 111 INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 112 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 113 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 114 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 115 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ 116 INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 117 118 /* 119 * When HT is off these events can only run on the bottom 4 counters 120 * When HT is on, they are impacted by the HT bug and require EXCL access 121 */ 122 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 123 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 124 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 125 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 126 127 EVENT_CONSTRAINT_END 128 }; 129 130 static struct event_constraint intel_ivb_event_constraints[] __read_mostly = 131 { 132 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 133 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 134 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 135 INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ 136 INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ 137 INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ 138 INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ 139 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 140 INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ 141 INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ 142 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 143 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 144 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 145 146 /* 147 * When HT is off these events can only run on the bottom 4 counters 148 * When HT is on, they are impacted by the HT bug and require EXCL access 149 */ 150 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 151 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 152 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 153 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 154 155 EVENT_CONSTRAINT_END 156 }; 157 158 static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 159 { 160 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 161 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), 162 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), 163 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), 164 EVENT_EXTRA_END 165 }; 166 167 static struct event_constraint intel_v1_event_constraints[] __read_mostly = 168 { 169 EVENT_CONSTRAINT_END 170 }; 171 172 static struct event_constraint intel_gen_event_constraints[] __read_mostly = 173 { 174 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 175 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 176 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 177 EVENT_CONSTRAINT_END 178 }; 179 180 static struct event_constraint intel_slm_event_constraints[] __read_mostly = 181 { 182 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 183 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 184 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ 185 EVENT_CONSTRAINT_END 186 }; 187 188 struct event_constraint intel_skl_event_constraints[] = { 189 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 190 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 191 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 192 INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ 193 194 /* 195 * when HT is off, these can only run on the bottom 4 counters 196 */ 197 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ 198 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 199 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 200 INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ 201 INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ 202 203 EVENT_CONSTRAINT_END 204 }; 205 206 static struct extra_reg intel_knl_extra_regs[] __read_mostly = { 207 INTEL_UEVENT_EXTRA_REG(0x01b7, 208 MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), 209 INTEL_UEVENT_EXTRA_REG(0x02b7, 210 MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), 211 EVENT_EXTRA_END 212 }; 213 214 static struct extra_reg intel_snb_extra_regs[] __read_mostly = { 215 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 216 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), 217 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), 218 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 219 EVENT_EXTRA_END 220 }; 221 222 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { 223 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 224 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 225 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 226 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 227 EVENT_EXTRA_END 228 }; 229 230 static struct extra_reg intel_skl_extra_regs[] __read_mostly = { 231 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), 232 INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), 233 INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), 234 /* 235 * Note the low 8 bits eventsel code is not a continuous field, containing 236 * some #GPing bits. These are masked out. 237 */ 238 INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), 239 EVENT_EXTRA_END 240 }; 241 242 EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); 243 EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); 244 EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); 245 246 struct attribute *nhm_events_attrs[] = { 247 EVENT_PTR(mem_ld_nhm), 248 NULL, 249 }; 250 251 struct attribute *snb_events_attrs[] = { 252 EVENT_PTR(mem_ld_snb), 253 EVENT_PTR(mem_st_snb), 254 NULL, 255 }; 256 257 static struct event_constraint intel_hsw_event_constraints[] = { 258 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 259 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 260 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 261 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 262 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 263 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 264 /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 265 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), 266 /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 267 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), 268 /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ 269 INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), 270 271 /* 272 * When HT is off these events can only run on the bottom 4 counters 273 * When HT is on, they are impacted by the HT bug and require EXCL access 274 */ 275 INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 276 INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 277 INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 278 INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 279 280 EVENT_CONSTRAINT_END 281 }; 282 283 struct event_constraint intel_bdw_event_constraints[] = { 284 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 285 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 286 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ 287 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ 288 INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ 289 /* 290 * when HT is off, these can only run on the bottom 4 counters 291 */ 292 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ 293 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ 294 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ 295 INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ 296 EVENT_CONSTRAINT_END 297 }; 298 299 static u64 intel_pmu_event_map(int hw_event) 300 { 301 return intel_perfmon_event_map[hw_event]; 302 } 303 304 /* 305 * Notes on the events: 306 * - data reads do not include code reads (comparable to earlier tables) 307 * - data counts include speculative execution (except L1 write, dtlb, bpu) 308 * - remote node access includes remote memory, remote cache, remote mmio. 309 * - prefetches are not included in the counts. 310 * - icache miss does not include decoded icache 311 */ 312 313 #define SKL_DEMAND_DATA_RD BIT_ULL(0) 314 #define SKL_DEMAND_RFO BIT_ULL(1) 315 #define SKL_ANY_RESPONSE BIT_ULL(16) 316 #define SKL_SUPPLIER_NONE BIT_ULL(17) 317 #define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26) 318 #define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27) 319 #define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28) 320 #define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29) 321 #define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \ 322 SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 323 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 324 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 325 #define SKL_SPL_HIT BIT_ULL(30) 326 #define SKL_SNOOP_NONE BIT_ULL(31) 327 #define SKL_SNOOP_NOT_NEEDED BIT_ULL(32) 328 #define SKL_SNOOP_MISS BIT_ULL(33) 329 #define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34) 330 #define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35) 331 #define SKL_SNOOP_HITM BIT_ULL(36) 332 #define SKL_SNOOP_NON_DRAM BIT_ULL(37) 333 #define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \ 334 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 335 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 336 SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM) 337 #define SKL_DEMAND_READ SKL_DEMAND_DATA_RD 338 #define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \ 339 SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ 340 SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ 341 SKL_SNOOP_HITM|SKL_SPL_HIT) 342 #define SKL_DEMAND_WRITE SKL_DEMAND_RFO 343 #define SKL_LLC_ACCESS SKL_ANY_RESPONSE 344 #define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \ 345 SKL_L3_MISS_REMOTE_HOP1_DRAM| \ 346 SKL_L3_MISS_REMOTE_HOP2P_DRAM) 347 348 static __initconst const u64 skl_hw_cache_event_ids 349 [PERF_COUNT_HW_CACHE_MAX] 350 [PERF_COUNT_HW_CACHE_OP_MAX] 351 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 352 { 353 [ C(L1D ) ] = { 354 [ C(OP_READ) ] = { 355 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 356 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 357 }, 358 [ C(OP_WRITE) ] = { 359 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 360 [ C(RESULT_MISS) ] = 0x0, 361 }, 362 [ C(OP_PREFETCH) ] = { 363 [ C(RESULT_ACCESS) ] = 0x0, 364 [ C(RESULT_MISS) ] = 0x0, 365 }, 366 }, 367 [ C(L1I ) ] = { 368 [ C(OP_READ) ] = { 369 [ C(RESULT_ACCESS) ] = 0x0, 370 [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */ 371 }, 372 [ C(OP_WRITE) ] = { 373 [ C(RESULT_ACCESS) ] = -1, 374 [ C(RESULT_MISS) ] = -1, 375 }, 376 [ C(OP_PREFETCH) ] = { 377 [ C(RESULT_ACCESS) ] = 0x0, 378 [ C(RESULT_MISS) ] = 0x0, 379 }, 380 }, 381 [ C(LL ) ] = { 382 [ C(OP_READ) ] = { 383 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 384 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 385 }, 386 [ C(OP_WRITE) ] = { 387 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 388 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 389 }, 390 [ C(OP_PREFETCH) ] = { 391 [ C(RESULT_ACCESS) ] = 0x0, 392 [ C(RESULT_MISS) ] = 0x0, 393 }, 394 }, 395 [ C(DTLB) ] = { 396 [ C(OP_READ) ] = { 397 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ 398 [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ 399 }, 400 [ C(OP_WRITE) ] = { 401 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ 402 [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ 403 }, 404 [ C(OP_PREFETCH) ] = { 405 [ C(RESULT_ACCESS) ] = 0x0, 406 [ C(RESULT_MISS) ] = 0x0, 407 }, 408 }, 409 [ C(ITLB) ] = { 410 [ C(OP_READ) ] = { 411 [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */ 412 [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */ 413 }, 414 [ C(OP_WRITE) ] = { 415 [ C(RESULT_ACCESS) ] = -1, 416 [ C(RESULT_MISS) ] = -1, 417 }, 418 [ C(OP_PREFETCH) ] = { 419 [ C(RESULT_ACCESS) ] = -1, 420 [ C(RESULT_MISS) ] = -1, 421 }, 422 }, 423 [ C(BPU ) ] = { 424 [ C(OP_READ) ] = { 425 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 426 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 427 }, 428 [ C(OP_WRITE) ] = { 429 [ C(RESULT_ACCESS) ] = -1, 430 [ C(RESULT_MISS) ] = -1, 431 }, 432 [ C(OP_PREFETCH) ] = { 433 [ C(RESULT_ACCESS) ] = -1, 434 [ C(RESULT_MISS) ] = -1, 435 }, 436 }, 437 [ C(NODE) ] = { 438 [ C(OP_READ) ] = { 439 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 440 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 441 }, 442 [ C(OP_WRITE) ] = { 443 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 444 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 445 }, 446 [ C(OP_PREFETCH) ] = { 447 [ C(RESULT_ACCESS) ] = 0x0, 448 [ C(RESULT_MISS) ] = 0x0, 449 }, 450 }, 451 }; 452 453 static __initconst const u64 skl_hw_cache_extra_regs 454 [PERF_COUNT_HW_CACHE_MAX] 455 [PERF_COUNT_HW_CACHE_OP_MAX] 456 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 457 { 458 [ C(LL ) ] = { 459 [ C(OP_READ) ] = { 460 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 461 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 462 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 463 SKL_L3_MISS|SKL_ANY_SNOOP| 464 SKL_SUPPLIER_NONE, 465 }, 466 [ C(OP_WRITE) ] = { 467 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 468 SKL_LLC_ACCESS|SKL_ANY_SNOOP, 469 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 470 SKL_L3_MISS|SKL_ANY_SNOOP| 471 SKL_SUPPLIER_NONE, 472 }, 473 [ C(OP_PREFETCH) ] = { 474 [ C(RESULT_ACCESS) ] = 0x0, 475 [ C(RESULT_MISS) ] = 0x0, 476 }, 477 }, 478 [ C(NODE) ] = { 479 [ C(OP_READ) ] = { 480 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| 481 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 482 [ C(RESULT_MISS) ] = SKL_DEMAND_READ| 483 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 484 }, 485 [ C(OP_WRITE) ] = { 486 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| 487 SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, 488 [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| 489 SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, 490 }, 491 [ C(OP_PREFETCH) ] = { 492 [ C(RESULT_ACCESS) ] = 0x0, 493 [ C(RESULT_MISS) ] = 0x0, 494 }, 495 }, 496 }; 497 498 #define SNB_DMND_DATA_RD (1ULL << 0) 499 #define SNB_DMND_RFO (1ULL << 1) 500 #define SNB_DMND_IFETCH (1ULL << 2) 501 #define SNB_DMND_WB (1ULL << 3) 502 #define SNB_PF_DATA_RD (1ULL << 4) 503 #define SNB_PF_RFO (1ULL << 5) 504 #define SNB_PF_IFETCH (1ULL << 6) 505 #define SNB_LLC_DATA_RD (1ULL << 7) 506 #define SNB_LLC_RFO (1ULL << 8) 507 #define SNB_LLC_IFETCH (1ULL << 9) 508 #define SNB_BUS_LOCKS (1ULL << 10) 509 #define SNB_STRM_ST (1ULL << 11) 510 #define SNB_OTHER (1ULL << 15) 511 #define SNB_RESP_ANY (1ULL << 16) 512 #define SNB_NO_SUPP (1ULL << 17) 513 #define SNB_LLC_HITM (1ULL << 18) 514 #define SNB_LLC_HITE (1ULL << 19) 515 #define SNB_LLC_HITS (1ULL << 20) 516 #define SNB_LLC_HITF (1ULL << 21) 517 #define SNB_LOCAL (1ULL << 22) 518 #define SNB_REMOTE (0xffULL << 23) 519 #define SNB_SNP_NONE (1ULL << 31) 520 #define SNB_SNP_NOT_NEEDED (1ULL << 32) 521 #define SNB_SNP_MISS (1ULL << 33) 522 #define SNB_NO_FWD (1ULL << 34) 523 #define SNB_SNP_FWD (1ULL << 35) 524 #define SNB_HITM (1ULL << 36) 525 #define SNB_NON_DRAM (1ULL << 37) 526 527 #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) 528 #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) 529 #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 530 531 #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ 532 SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ 533 SNB_HITM) 534 535 #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) 536 #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) 537 538 #define SNB_L3_ACCESS SNB_RESP_ANY 539 #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) 540 541 static __initconst const u64 snb_hw_cache_extra_regs 542 [PERF_COUNT_HW_CACHE_MAX] 543 [PERF_COUNT_HW_CACHE_OP_MAX] 544 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 545 { 546 [ C(LL ) ] = { 547 [ C(OP_READ) ] = { 548 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, 549 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, 550 }, 551 [ C(OP_WRITE) ] = { 552 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, 553 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, 554 }, 555 [ C(OP_PREFETCH) ] = { 556 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, 557 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, 558 }, 559 }, 560 [ C(NODE) ] = { 561 [ C(OP_READ) ] = { 562 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, 563 [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, 564 }, 565 [ C(OP_WRITE) ] = { 566 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, 567 [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, 568 }, 569 [ C(OP_PREFETCH) ] = { 570 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, 571 [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, 572 }, 573 }, 574 }; 575 576 static __initconst const u64 snb_hw_cache_event_ids 577 [PERF_COUNT_HW_CACHE_MAX] 578 [PERF_COUNT_HW_CACHE_OP_MAX] 579 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 580 { 581 [ C(L1D) ] = { 582 [ C(OP_READ) ] = { 583 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ 584 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ 585 }, 586 [ C(OP_WRITE) ] = { 587 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ 588 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ 589 }, 590 [ C(OP_PREFETCH) ] = { 591 [ C(RESULT_ACCESS) ] = 0x0, 592 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ 593 }, 594 }, 595 [ C(L1I ) ] = { 596 [ C(OP_READ) ] = { 597 [ C(RESULT_ACCESS) ] = 0x0, 598 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ 599 }, 600 [ C(OP_WRITE) ] = { 601 [ C(RESULT_ACCESS) ] = -1, 602 [ C(RESULT_MISS) ] = -1, 603 }, 604 [ C(OP_PREFETCH) ] = { 605 [ C(RESULT_ACCESS) ] = 0x0, 606 [ C(RESULT_MISS) ] = 0x0, 607 }, 608 }, 609 [ C(LL ) ] = { 610 [ C(OP_READ) ] = { 611 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 612 [ C(RESULT_ACCESS) ] = 0x01b7, 613 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 614 [ C(RESULT_MISS) ] = 0x01b7, 615 }, 616 [ C(OP_WRITE) ] = { 617 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 618 [ C(RESULT_ACCESS) ] = 0x01b7, 619 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 620 [ C(RESULT_MISS) ] = 0x01b7, 621 }, 622 [ C(OP_PREFETCH) ] = { 623 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 624 [ C(RESULT_ACCESS) ] = 0x01b7, 625 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 626 [ C(RESULT_MISS) ] = 0x01b7, 627 }, 628 }, 629 [ C(DTLB) ] = { 630 [ C(OP_READ) ] = { 631 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ 632 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ 633 }, 634 [ C(OP_WRITE) ] = { 635 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ 636 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 637 }, 638 [ C(OP_PREFETCH) ] = { 639 [ C(RESULT_ACCESS) ] = 0x0, 640 [ C(RESULT_MISS) ] = 0x0, 641 }, 642 }, 643 [ C(ITLB) ] = { 644 [ C(OP_READ) ] = { 645 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ 646 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ 647 }, 648 [ C(OP_WRITE) ] = { 649 [ C(RESULT_ACCESS) ] = -1, 650 [ C(RESULT_MISS) ] = -1, 651 }, 652 [ C(OP_PREFETCH) ] = { 653 [ C(RESULT_ACCESS) ] = -1, 654 [ C(RESULT_MISS) ] = -1, 655 }, 656 }, 657 [ C(BPU ) ] = { 658 [ C(OP_READ) ] = { 659 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 660 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 661 }, 662 [ C(OP_WRITE) ] = { 663 [ C(RESULT_ACCESS) ] = -1, 664 [ C(RESULT_MISS) ] = -1, 665 }, 666 [ C(OP_PREFETCH) ] = { 667 [ C(RESULT_ACCESS) ] = -1, 668 [ C(RESULT_MISS) ] = -1, 669 }, 670 }, 671 [ C(NODE) ] = { 672 [ C(OP_READ) ] = { 673 [ C(RESULT_ACCESS) ] = 0x01b7, 674 [ C(RESULT_MISS) ] = 0x01b7, 675 }, 676 [ C(OP_WRITE) ] = { 677 [ C(RESULT_ACCESS) ] = 0x01b7, 678 [ C(RESULT_MISS) ] = 0x01b7, 679 }, 680 [ C(OP_PREFETCH) ] = { 681 [ C(RESULT_ACCESS) ] = 0x01b7, 682 [ C(RESULT_MISS) ] = 0x01b7, 683 }, 684 }, 685 686 }; 687 688 /* 689 * Notes on the events: 690 * - data reads do not include code reads (comparable to earlier tables) 691 * - data counts include speculative execution (except L1 write, dtlb, bpu) 692 * - remote node access includes remote memory, remote cache, remote mmio. 693 * - prefetches are not included in the counts because they are not 694 * reliably counted. 695 */ 696 697 #define HSW_DEMAND_DATA_RD BIT_ULL(0) 698 #define HSW_DEMAND_RFO BIT_ULL(1) 699 #define HSW_ANY_RESPONSE BIT_ULL(16) 700 #define HSW_SUPPLIER_NONE BIT_ULL(17) 701 #define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) 702 #define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) 703 #define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) 704 #define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) 705 #define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ 706 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 707 HSW_L3_MISS_REMOTE_HOP2P) 708 #define HSW_SNOOP_NONE BIT_ULL(31) 709 #define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) 710 #define HSW_SNOOP_MISS BIT_ULL(33) 711 #define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) 712 #define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) 713 #define HSW_SNOOP_HITM BIT_ULL(36) 714 #define HSW_SNOOP_NON_DRAM BIT_ULL(37) 715 #define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ 716 HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ 717 HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ 718 HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) 719 #define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) 720 #define HSW_DEMAND_READ HSW_DEMAND_DATA_RD 721 #define HSW_DEMAND_WRITE HSW_DEMAND_RFO 722 #define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ 723 HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) 724 #define HSW_LLC_ACCESS HSW_ANY_RESPONSE 725 726 #define BDW_L3_MISS_LOCAL BIT(26) 727 #define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ 728 HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ 729 HSW_L3_MISS_REMOTE_HOP2P) 730 731 732 static __initconst const u64 hsw_hw_cache_event_ids 733 [PERF_COUNT_HW_CACHE_MAX] 734 [PERF_COUNT_HW_CACHE_OP_MAX] 735 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 736 { 737 [ C(L1D ) ] = { 738 [ C(OP_READ) ] = { 739 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 740 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ 741 }, 742 [ C(OP_WRITE) ] = { 743 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 744 [ C(RESULT_MISS) ] = 0x0, 745 }, 746 [ C(OP_PREFETCH) ] = { 747 [ C(RESULT_ACCESS) ] = 0x0, 748 [ C(RESULT_MISS) ] = 0x0, 749 }, 750 }, 751 [ C(L1I ) ] = { 752 [ C(OP_READ) ] = { 753 [ C(RESULT_ACCESS) ] = 0x0, 754 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ 755 }, 756 [ C(OP_WRITE) ] = { 757 [ C(RESULT_ACCESS) ] = -1, 758 [ C(RESULT_MISS) ] = -1, 759 }, 760 [ C(OP_PREFETCH) ] = { 761 [ C(RESULT_ACCESS) ] = 0x0, 762 [ C(RESULT_MISS) ] = 0x0, 763 }, 764 }, 765 [ C(LL ) ] = { 766 [ C(OP_READ) ] = { 767 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 768 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 769 }, 770 [ C(OP_WRITE) ] = { 771 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 772 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 773 }, 774 [ C(OP_PREFETCH) ] = { 775 [ C(RESULT_ACCESS) ] = 0x0, 776 [ C(RESULT_MISS) ] = 0x0, 777 }, 778 }, 779 [ C(DTLB) ] = { 780 [ C(OP_READ) ] = { 781 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 782 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ 783 }, 784 [ C(OP_WRITE) ] = { 785 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 786 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ 787 }, 788 [ C(OP_PREFETCH) ] = { 789 [ C(RESULT_ACCESS) ] = 0x0, 790 [ C(RESULT_MISS) ] = 0x0, 791 }, 792 }, 793 [ C(ITLB) ] = { 794 [ C(OP_READ) ] = { 795 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ 796 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ 797 }, 798 [ C(OP_WRITE) ] = { 799 [ C(RESULT_ACCESS) ] = -1, 800 [ C(RESULT_MISS) ] = -1, 801 }, 802 [ C(OP_PREFETCH) ] = { 803 [ C(RESULT_ACCESS) ] = -1, 804 [ C(RESULT_MISS) ] = -1, 805 }, 806 }, 807 [ C(BPU ) ] = { 808 [ C(OP_READ) ] = { 809 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ 810 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 811 }, 812 [ C(OP_WRITE) ] = { 813 [ C(RESULT_ACCESS) ] = -1, 814 [ C(RESULT_MISS) ] = -1, 815 }, 816 [ C(OP_PREFETCH) ] = { 817 [ C(RESULT_ACCESS) ] = -1, 818 [ C(RESULT_MISS) ] = -1, 819 }, 820 }, 821 [ C(NODE) ] = { 822 [ C(OP_READ) ] = { 823 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 824 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 825 }, 826 [ C(OP_WRITE) ] = { 827 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 828 [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ 829 }, 830 [ C(OP_PREFETCH) ] = { 831 [ C(RESULT_ACCESS) ] = 0x0, 832 [ C(RESULT_MISS) ] = 0x0, 833 }, 834 }, 835 }; 836 837 static __initconst const u64 hsw_hw_cache_extra_regs 838 [PERF_COUNT_HW_CACHE_MAX] 839 [PERF_COUNT_HW_CACHE_OP_MAX] 840 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 841 { 842 [ C(LL ) ] = { 843 [ C(OP_READ) ] = { 844 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 845 HSW_LLC_ACCESS, 846 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 847 HSW_L3_MISS|HSW_ANY_SNOOP, 848 }, 849 [ C(OP_WRITE) ] = { 850 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 851 HSW_LLC_ACCESS, 852 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 853 HSW_L3_MISS|HSW_ANY_SNOOP, 854 }, 855 [ C(OP_PREFETCH) ] = { 856 [ C(RESULT_ACCESS) ] = 0x0, 857 [ C(RESULT_MISS) ] = 0x0, 858 }, 859 }, 860 [ C(NODE) ] = { 861 [ C(OP_READ) ] = { 862 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| 863 HSW_L3_MISS_LOCAL_DRAM| 864 HSW_SNOOP_DRAM, 865 [ C(RESULT_MISS) ] = HSW_DEMAND_READ| 866 HSW_L3_MISS_REMOTE| 867 HSW_SNOOP_DRAM, 868 }, 869 [ C(OP_WRITE) ] = { 870 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| 871 HSW_L3_MISS_LOCAL_DRAM| 872 HSW_SNOOP_DRAM, 873 [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| 874 HSW_L3_MISS_REMOTE| 875 HSW_SNOOP_DRAM, 876 }, 877 [ C(OP_PREFETCH) ] = { 878 [ C(RESULT_ACCESS) ] = 0x0, 879 [ C(RESULT_MISS) ] = 0x0, 880 }, 881 }, 882 }; 883 884 static __initconst const u64 westmere_hw_cache_event_ids 885 [PERF_COUNT_HW_CACHE_MAX] 886 [PERF_COUNT_HW_CACHE_OP_MAX] 887 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 888 { 889 [ C(L1D) ] = { 890 [ C(OP_READ) ] = { 891 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 892 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 893 }, 894 [ C(OP_WRITE) ] = { 895 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 896 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 897 }, 898 [ C(OP_PREFETCH) ] = { 899 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 900 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 901 }, 902 }, 903 [ C(L1I ) ] = { 904 [ C(OP_READ) ] = { 905 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 906 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 907 }, 908 [ C(OP_WRITE) ] = { 909 [ C(RESULT_ACCESS) ] = -1, 910 [ C(RESULT_MISS) ] = -1, 911 }, 912 [ C(OP_PREFETCH) ] = { 913 [ C(RESULT_ACCESS) ] = 0x0, 914 [ C(RESULT_MISS) ] = 0x0, 915 }, 916 }, 917 [ C(LL ) ] = { 918 [ C(OP_READ) ] = { 919 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 920 [ C(RESULT_ACCESS) ] = 0x01b7, 921 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 922 [ C(RESULT_MISS) ] = 0x01b7, 923 }, 924 /* 925 * Use RFO, not WRITEBACK, because a write miss would typically occur 926 * on RFO. 927 */ 928 [ C(OP_WRITE) ] = { 929 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 930 [ C(RESULT_ACCESS) ] = 0x01b7, 931 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 932 [ C(RESULT_MISS) ] = 0x01b7, 933 }, 934 [ C(OP_PREFETCH) ] = { 935 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 936 [ C(RESULT_ACCESS) ] = 0x01b7, 937 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 938 [ C(RESULT_MISS) ] = 0x01b7, 939 }, 940 }, 941 [ C(DTLB) ] = { 942 [ C(OP_READ) ] = { 943 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 944 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 945 }, 946 [ C(OP_WRITE) ] = { 947 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 948 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 949 }, 950 [ C(OP_PREFETCH) ] = { 951 [ C(RESULT_ACCESS) ] = 0x0, 952 [ C(RESULT_MISS) ] = 0x0, 953 }, 954 }, 955 [ C(ITLB) ] = { 956 [ C(OP_READ) ] = { 957 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 958 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ 959 }, 960 [ C(OP_WRITE) ] = { 961 [ C(RESULT_ACCESS) ] = -1, 962 [ C(RESULT_MISS) ] = -1, 963 }, 964 [ C(OP_PREFETCH) ] = { 965 [ C(RESULT_ACCESS) ] = -1, 966 [ C(RESULT_MISS) ] = -1, 967 }, 968 }, 969 [ C(BPU ) ] = { 970 [ C(OP_READ) ] = { 971 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 972 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 973 }, 974 [ C(OP_WRITE) ] = { 975 [ C(RESULT_ACCESS) ] = -1, 976 [ C(RESULT_MISS) ] = -1, 977 }, 978 [ C(OP_PREFETCH) ] = { 979 [ C(RESULT_ACCESS) ] = -1, 980 [ C(RESULT_MISS) ] = -1, 981 }, 982 }, 983 [ C(NODE) ] = { 984 [ C(OP_READ) ] = { 985 [ C(RESULT_ACCESS) ] = 0x01b7, 986 [ C(RESULT_MISS) ] = 0x01b7, 987 }, 988 [ C(OP_WRITE) ] = { 989 [ C(RESULT_ACCESS) ] = 0x01b7, 990 [ C(RESULT_MISS) ] = 0x01b7, 991 }, 992 [ C(OP_PREFETCH) ] = { 993 [ C(RESULT_ACCESS) ] = 0x01b7, 994 [ C(RESULT_MISS) ] = 0x01b7, 995 }, 996 }, 997 }; 998 999 /* 1000 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; 1001 * See IA32 SDM Vol 3B 30.6.1.3 1002 */ 1003 1004 #define NHM_DMND_DATA_RD (1 << 0) 1005 #define NHM_DMND_RFO (1 << 1) 1006 #define NHM_DMND_IFETCH (1 << 2) 1007 #define NHM_DMND_WB (1 << 3) 1008 #define NHM_PF_DATA_RD (1 << 4) 1009 #define NHM_PF_DATA_RFO (1 << 5) 1010 #define NHM_PF_IFETCH (1 << 6) 1011 #define NHM_OFFCORE_OTHER (1 << 7) 1012 #define NHM_UNCORE_HIT (1 << 8) 1013 #define NHM_OTHER_CORE_HIT_SNP (1 << 9) 1014 #define NHM_OTHER_CORE_HITM (1 << 10) 1015 /* reserved */ 1016 #define NHM_REMOTE_CACHE_FWD (1 << 12) 1017 #define NHM_REMOTE_DRAM (1 << 13) 1018 #define NHM_LOCAL_DRAM (1 << 14) 1019 #define NHM_NON_DRAM (1 << 15) 1020 1021 #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) 1022 #define NHM_REMOTE (NHM_REMOTE_DRAM) 1023 1024 #define NHM_DMND_READ (NHM_DMND_DATA_RD) 1025 #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) 1026 #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) 1027 1028 #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) 1029 #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) 1030 #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) 1031 1032 static __initconst const u64 nehalem_hw_cache_extra_regs 1033 [PERF_COUNT_HW_CACHE_MAX] 1034 [PERF_COUNT_HW_CACHE_OP_MAX] 1035 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1036 { 1037 [ C(LL ) ] = { 1038 [ C(OP_READ) ] = { 1039 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, 1040 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, 1041 }, 1042 [ C(OP_WRITE) ] = { 1043 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, 1044 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, 1045 }, 1046 [ C(OP_PREFETCH) ] = { 1047 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 1048 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 1049 }, 1050 }, 1051 [ C(NODE) ] = { 1052 [ C(OP_READ) ] = { 1053 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, 1054 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, 1055 }, 1056 [ C(OP_WRITE) ] = { 1057 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, 1058 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, 1059 }, 1060 [ C(OP_PREFETCH) ] = { 1061 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, 1062 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, 1063 }, 1064 }, 1065 }; 1066 1067 static __initconst const u64 nehalem_hw_cache_event_ids 1068 [PERF_COUNT_HW_CACHE_MAX] 1069 [PERF_COUNT_HW_CACHE_OP_MAX] 1070 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1071 { 1072 [ C(L1D) ] = { 1073 [ C(OP_READ) ] = { 1074 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ 1075 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ 1076 }, 1077 [ C(OP_WRITE) ] = { 1078 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ 1079 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ 1080 }, 1081 [ C(OP_PREFETCH) ] = { 1082 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ 1083 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ 1084 }, 1085 }, 1086 [ C(L1I ) ] = { 1087 [ C(OP_READ) ] = { 1088 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1089 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1090 }, 1091 [ C(OP_WRITE) ] = { 1092 [ C(RESULT_ACCESS) ] = -1, 1093 [ C(RESULT_MISS) ] = -1, 1094 }, 1095 [ C(OP_PREFETCH) ] = { 1096 [ C(RESULT_ACCESS) ] = 0x0, 1097 [ C(RESULT_MISS) ] = 0x0, 1098 }, 1099 }, 1100 [ C(LL ) ] = { 1101 [ C(OP_READ) ] = { 1102 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1103 [ C(RESULT_ACCESS) ] = 0x01b7, 1104 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ 1105 [ C(RESULT_MISS) ] = 0x01b7, 1106 }, 1107 /* 1108 * Use RFO, not WRITEBACK, because a write miss would typically occur 1109 * on RFO. 1110 */ 1111 [ C(OP_WRITE) ] = { 1112 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1113 [ C(RESULT_ACCESS) ] = 0x01b7, 1114 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1115 [ C(RESULT_MISS) ] = 0x01b7, 1116 }, 1117 [ C(OP_PREFETCH) ] = { 1118 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1119 [ C(RESULT_ACCESS) ] = 0x01b7, 1120 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1121 [ C(RESULT_MISS) ] = 0x01b7, 1122 }, 1123 }, 1124 [ C(DTLB) ] = { 1125 [ C(OP_READ) ] = { 1126 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1127 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ 1128 }, 1129 [ C(OP_WRITE) ] = { 1130 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1131 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ 1132 }, 1133 [ C(OP_PREFETCH) ] = { 1134 [ C(RESULT_ACCESS) ] = 0x0, 1135 [ C(RESULT_MISS) ] = 0x0, 1136 }, 1137 }, 1138 [ C(ITLB) ] = { 1139 [ C(OP_READ) ] = { 1140 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ 1141 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ 1142 }, 1143 [ C(OP_WRITE) ] = { 1144 [ C(RESULT_ACCESS) ] = -1, 1145 [ C(RESULT_MISS) ] = -1, 1146 }, 1147 [ C(OP_PREFETCH) ] = { 1148 [ C(RESULT_ACCESS) ] = -1, 1149 [ C(RESULT_MISS) ] = -1, 1150 }, 1151 }, 1152 [ C(BPU ) ] = { 1153 [ C(OP_READ) ] = { 1154 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1155 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ 1156 }, 1157 [ C(OP_WRITE) ] = { 1158 [ C(RESULT_ACCESS) ] = -1, 1159 [ C(RESULT_MISS) ] = -1, 1160 }, 1161 [ C(OP_PREFETCH) ] = { 1162 [ C(RESULT_ACCESS) ] = -1, 1163 [ C(RESULT_MISS) ] = -1, 1164 }, 1165 }, 1166 [ C(NODE) ] = { 1167 [ C(OP_READ) ] = { 1168 [ C(RESULT_ACCESS) ] = 0x01b7, 1169 [ C(RESULT_MISS) ] = 0x01b7, 1170 }, 1171 [ C(OP_WRITE) ] = { 1172 [ C(RESULT_ACCESS) ] = 0x01b7, 1173 [ C(RESULT_MISS) ] = 0x01b7, 1174 }, 1175 [ C(OP_PREFETCH) ] = { 1176 [ C(RESULT_ACCESS) ] = 0x01b7, 1177 [ C(RESULT_MISS) ] = 0x01b7, 1178 }, 1179 }, 1180 }; 1181 1182 static __initconst const u64 core2_hw_cache_event_ids 1183 [PERF_COUNT_HW_CACHE_MAX] 1184 [PERF_COUNT_HW_CACHE_OP_MAX] 1185 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1186 { 1187 [ C(L1D) ] = { 1188 [ C(OP_READ) ] = { 1189 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ 1190 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ 1191 }, 1192 [ C(OP_WRITE) ] = { 1193 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ 1194 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ 1195 }, 1196 [ C(OP_PREFETCH) ] = { 1197 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ 1198 [ C(RESULT_MISS) ] = 0, 1199 }, 1200 }, 1201 [ C(L1I ) ] = { 1202 [ C(OP_READ) ] = { 1203 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ 1204 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ 1205 }, 1206 [ C(OP_WRITE) ] = { 1207 [ C(RESULT_ACCESS) ] = -1, 1208 [ C(RESULT_MISS) ] = -1, 1209 }, 1210 [ C(OP_PREFETCH) ] = { 1211 [ C(RESULT_ACCESS) ] = 0, 1212 [ C(RESULT_MISS) ] = 0, 1213 }, 1214 }, 1215 [ C(LL ) ] = { 1216 [ C(OP_READ) ] = { 1217 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1218 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1219 }, 1220 [ C(OP_WRITE) ] = { 1221 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1222 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1223 }, 1224 [ C(OP_PREFETCH) ] = { 1225 [ C(RESULT_ACCESS) ] = 0, 1226 [ C(RESULT_MISS) ] = 0, 1227 }, 1228 }, 1229 [ C(DTLB) ] = { 1230 [ C(OP_READ) ] = { 1231 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ 1232 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ 1233 }, 1234 [ C(OP_WRITE) ] = { 1235 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ 1236 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ 1237 }, 1238 [ C(OP_PREFETCH) ] = { 1239 [ C(RESULT_ACCESS) ] = 0, 1240 [ C(RESULT_MISS) ] = 0, 1241 }, 1242 }, 1243 [ C(ITLB) ] = { 1244 [ C(OP_READ) ] = { 1245 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1246 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ 1247 }, 1248 [ C(OP_WRITE) ] = { 1249 [ C(RESULT_ACCESS) ] = -1, 1250 [ C(RESULT_MISS) ] = -1, 1251 }, 1252 [ C(OP_PREFETCH) ] = { 1253 [ C(RESULT_ACCESS) ] = -1, 1254 [ C(RESULT_MISS) ] = -1, 1255 }, 1256 }, 1257 [ C(BPU ) ] = { 1258 [ C(OP_READ) ] = { 1259 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1260 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1261 }, 1262 [ C(OP_WRITE) ] = { 1263 [ C(RESULT_ACCESS) ] = -1, 1264 [ C(RESULT_MISS) ] = -1, 1265 }, 1266 [ C(OP_PREFETCH) ] = { 1267 [ C(RESULT_ACCESS) ] = -1, 1268 [ C(RESULT_MISS) ] = -1, 1269 }, 1270 }, 1271 }; 1272 1273 static __initconst const u64 atom_hw_cache_event_ids 1274 [PERF_COUNT_HW_CACHE_MAX] 1275 [PERF_COUNT_HW_CACHE_OP_MAX] 1276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1277 { 1278 [ C(L1D) ] = { 1279 [ C(OP_READ) ] = { 1280 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ 1281 [ C(RESULT_MISS) ] = 0, 1282 }, 1283 [ C(OP_WRITE) ] = { 1284 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ 1285 [ C(RESULT_MISS) ] = 0, 1286 }, 1287 [ C(OP_PREFETCH) ] = { 1288 [ C(RESULT_ACCESS) ] = 0x0, 1289 [ C(RESULT_MISS) ] = 0, 1290 }, 1291 }, 1292 [ C(L1I ) ] = { 1293 [ C(OP_READ) ] = { 1294 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ 1295 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ 1296 }, 1297 [ C(OP_WRITE) ] = { 1298 [ C(RESULT_ACCESS) ] = -1, 1299 [ C(RESULT_MISS) ] = -1, 1300 }, 1301 [ C(OP_PREFETCH) ] = { 1302 [ C(RESULT_ACCESS) ] = 0, 1303 [ C(RESULT_MISS) ] = 0, 1304 }, 1305 }, 1306 [ C(LL ) ] = { 1307 [ C(OP_READ) ] = { 1308 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ 1309 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ 1310 }, 1311 [ C(OP_WRITE) ] = { 1312 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ 1313 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ 1314 }, 1315 [ C(OP_PREFETCH) ] = { 1316 [ C(RESULT_ACCESS) ] = 0, 1317 [ C(RESULT_MISS) ] = 0, 1318 }, 1319 }, 1320 [ C(DTLB) ] = { 1321 [ C(OP_READ) ] = { 1322 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ 1323 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ 1324 }, 1325 [ C(OP_WRITE) ] = { 1326 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ 1327 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ 1328 }, 1329 [ C(OP_PREFETCH) ] = { 1330 [ C(RESULT_ACCESS) ] = 0, 1331 [ C(RESULT_MISS) ] = 0, 1332 }, 1333 }, 1334 [ C(ITLB) ] = { 1335 [ C(OP_READ) ] = { 1336 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1337 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ 1338 }, 1339 [ C(OP_WRITE) ] = { 1340 [ C(RESULT_ACCESS) ] = -1, 1341 [ C(RESULT_MISS) ] = -1, 1342 }, 1343 [ C(OP_PREFETCH) ] = { 1344 [ C(RESULT_ACCESS) ] = -1, 1345 [ C(RESULT_MISS) ] = -1, 1346 }, 1347 }, 1348 [ C(BPU ) ] = { 1349 [ C(OP_READ) ] = { 1350 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1351 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1352 }, 1353 [ C(OP_WRITE) ] = { 1354 [ C(RESULT_ACCESS) ] = -1, 1355 [ C(RESULT_MISS) ] = -1, 1356 }, 1357 [ C(OP_PREFETCH) ] = { 1358 [ C(RESULT_ACCESS) ] = -1, 1359 [ C(RESULT_MISS) ] = -1, 1360 }, 1361 }, 1362 }; 1363 1364 static struct extra_reg intel_slm_extra_regs[] __read_mostly = 1365 { 1366 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1367 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), 1368 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1), 1369 EVENT_EXTRA_END 1370 }; 1371 1372 #define SLM_DMND_READ SNB_DMND_DATA_RD 1373 #define SLM_DMND_WRITE SNB_DMND_RFO 1374 #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1375 1376 #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) 1377 #define SLM_LLC_ACCESS SNB_RESP_ANY 1378 #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) 1379 1380 static __initconst const u64 slm_hw_cache_extra_regs 1381 [PERF_COUNT_HW_CACHE_MAX] 1382 [PERF_COUNT_HW_CACHE_OP_MAX] 1383 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1384 { 1385 [ C(LL ) ] = { 1386 [ C(OP_READ) ] = { 1387 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, 1388 [ C(RESULT_MISS) ] = 0, 1389 }, 1390 [ C(OP_WRITE) ] = { 1391 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, 1392 [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, 1393 }, 1394 [ C(OP_PREFETCH) ] = { 1395 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, 1396 [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, 1397 }, 1398 }, 1399 }; 1400 1401 static __initconst const u64 slm_hw_cache_event_ids 1402 [PERF_COUNT_HW_CACHE_MAX] 1403 [PERF_COUNT_HW_CACHE_OP_MAX] 1404 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 1405 { 1406 [ C(L1D) ] = { 1407 [ C(OP_READ) ] = { 1408 [ C(RESULT_ACCESS) ] = 0, 1409 [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ 1410 }, 1411 [ C(OP_WRITE) ] = { 1412 [ C(RESULT_ACCESS) ] = 0, 1413 [ C(RESULT_MISS) ] = 0, 1414 }, 1415 [ C(OP_PREFETCH) ] = { 1416 [ C(RESULT_ACCESS) ] = 0, 1417 [ C(RESULT_MISS) ] = 0, 1418 }, 1419 }, 1420 [ C(L1I ) ] = { 1421 [ C(OP_READ) ] = { 1422 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ 1423 [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ 1424 }, 1425 [ C(OP_WRITE) ] = { 1426 [ C(RESULT_ACCESS) ] = -1, 1427 [ C(RESULT_MISS) ] = -1, 1428 }, 1429 [ C(OP_PREFETCH) ] = { 1430 [ C(RESULT_ACCESS) ] = 0, 1431 [ C(RESULT_MISS) ] = 0, 1432 }, 1433 }, 1434 [ C(LL ) ] = { 1435 [ C(OP_READ) ] = { 1436 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ 1437 [ C(RESULT_ACCESS) ] = 0x01b7, 1438 [ C(RESULT_MISS) ] = 0, 1439 }, 1440 [ C(OP_WRITE) ] = { 1441 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ 1442 [ C(RESULT_ACCESS) ] = 0x01b7, 1443 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ 1444 [ C(RESULT_MISS) ] = 0x01b7, 1445 }, 1446 [ C(OP_PREFETCH) ] = { 1447 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ 1448 [ C(RESULT_ACCESS) ] = 0x01b7, 1449 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ 1450 [ C(RESULT_MISS) ] = 0x01b7, 1451 }, 1452 }, 1453 [ C(DTLB) ] = { 1454 [ C(OP_READ) ] = { 1455 [ C(RESULT_ACCESS) ] = 0, 1456 [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ 1457 }, 1458 [ C(OP_WRITE) ] = { 1459 [ C(RESULT_ACCESS) ] = 0, 1460 [ C(RESULT_MISS) ] = 0, 1461 }, 1462 [ C(OP_PREFETCH) ] = { 1463 [ C(RESULT_ACCESS) ] = 0, 1464 [ C(RESULT_MISS) ] = 0, 1465 }, 1466 }, 1467 [ C(ITLB) ] = { 1468 [ C(OP_READ) ] = { 1469 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ 1470 [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ 1471 }, 1472 [ C(OP_WRITE) ] = { 1473 [ C(RESULT_ACCESS) ] = -1, 1474 [ C(RESULT_MISS) ] = -1, 1475 }, 1476 [ C(OP_PREFETCH) ] = { 1477 [ C(RESULT_ACCESS) ] = -1, 1478 [ C(RESULT_MISS) ] = -1, 1479 }, 1480 }, 1481 [ C(BPU ) ] = { 1482 [ C(OP_READ) ] = { 1483 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ 1484 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ 1485 }, 1486 [ C(OP_WRITE) ] = { 1487 [ C(RESULT_ACCESS) ] = -1, 1488 [ C(RESULT_MISS) ] = -1, 1489 }, 1490 [ C(OP_PREFETCH) ] = { 1491 [ C(RESULT_ACCESS) ] = -1, 1492 [ C(RESULT_MISS) ] = -1, 1493 }, 1494 }, 1495 }; 1496 1497 static struct extra_reg intel_glm_extra_regs[] __read_mostly = { 1498 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ 1499 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), 1500 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), 1501 EVENT_EXTRA_END 1502 }; 1503 1504 #define GLM_DEMAND_DATA_RD BIT_ULL(0) 1505 #define GLM_DEMAND_RFO BIT_ULL(1) 1506 #define GLM_ANY_RESPONSE BIT_ULL(16) 1507 #define GLM_SNP_NONE_OR_MISS BIT_ULL(33) 1508 #define GLM_DEMAND_READ GLM_DEMAND_DATA_RD 1509 #define GLM_DEMAND_WRITE GLM_DEMAND_RFO 1510 #define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) 1511 #define GLM_LLC_ACCESS GLM_ANY_RESPONSE 1512 #define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) 1513 #define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) 1514 1515 static __initconst const u64 glm_hw_cache_event_ids 1516 [PERF_COUNT_HW_CACHE_MAX] 1517 [PERF_COUNT_HW_CACHE_OP_MAX] 1518 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1519 [C(L1D)] = { 1520 [C(OP_READ)] = { 1521 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1522 [C(RESULT_MISS)] = 0x0, 1523 }, 1524 [C(OP_WRITE)] = { 1525 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1526 [C(RESULT_MISS)] = 0x0, 1527 }, 1528 [C(OP_PREFETCH)] = { 1529 [C(RESULT_ACCESS)] = 0x0, 1530 [C(RESULT_MISS)] = 0x0, 1531 }, 1532 }, 1533 [C(L1I)] = { 1534 [C(OP_READ)] = { 1535 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ 1536 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ 1537 }, 1538 [C(OP_WRITE)] = { 1539 [C(RESULT_ACCESS)] = -1, 1540 [C(RESULT_MISS)] = -1, 1541 }, 1542 [C(OP_PREFETCH)] = { 1543 [C(RESULT_ACCESS)] = 0x0, 1544 [C(RESULT_MISS)] = 0x0, 1545 }, 1546 }, 1547 [C(LL)] = { 1548 [C(OP_READ)] = { 1549 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1550 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1551 }, 1552 [C(OP_WRITE)] = { 1553 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1554 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1555 }, 1556 [C(OP_PREFETCH)] = { 1557 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1558 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ 1559 }, 1560 }, 1561 [C(DTLB)] = { 1562 [C(OP_READ)] = { 1563 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ 1564 [C(RESULT_MISS)] = 0x0, 1565 }, 1566 [C(OP_WRITE)] = { 1567 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ 1568 [C(RESULT_MISS)] = 0x0, 1569 }, 1570 [C(OP_PREFETCH)] = { 1571 [C(RESULT_ACCESS)] = 0x0, 1572 [C(RESULT_MISS)] = 0x0, 1573 }, 1574 }, 1575 [C(ITLB)] = { 1576 [C(OP_READ)] = { 1577 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ 1578 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ 1579 }, 1580 [C(OP_WRITE)] = { 1581 [C(RESULT_ACCESS)] = -1, 1582 [C(RESULT_MISS)] = -1, 1583 }, 1584 [C(OP_PREFETCH)] = { 1585 [C(RESULT_ACCESS)] = -1, 1586 [C(RESULT_MISS)] = -1, 1587 }, 1588 }, 1589 [C(BPU)] = { 1590 [C(OP_READ)] = { 1591 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ 1592 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ 1593 }, 1594 [C(OP_WRITE)] = { 1595 [C(RESULT_ACCESS)] = -1, 1596 [C(RESULT_MISS)] = -1, 1597 }, 1598 [C(OP_PREFETCH)] = { 1599 [C(RESULT_ACCESS)] = -1, 1600 [C(RESULT_MISS)] = -1, 1601 }, 1602 }, 1603 }; 1604 1605 static __initconst const u64 glm_hw_cache_extra_regs 1606 [PERF_COUNT_HW_CACHE_MAX] 1607 [PERF_COUNT_HW_CACHE_OP_MAX] 1608 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1609 [C(LL)] = { 1610 [C(OP_READ)] = { 1611 [C(RESULT_ACCESS)] = GLM_DEMAND_READ| 1612 GLM_LLC_ACCESS, 1613 [C(RESULT_MISS)] = GLM_DEMAND_READ| 1614 GLM_LLC_MISS, 1615 }, 1616 [C(OP_WRITE)] = { 1617 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| 1618 GLM_LLC_ACCESS, 1619 [C(RESULT_MISS)] = GLM_DEMAND_WRITE| 1620 GLM_LLC_MISS, 1621 }, 1622 [C(OP_PREFETCH)] = { 1623 [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| 1624 GLM_LLC_ACCESS, 1625 [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| 1626 GLM_LLC_MISS, 1627 }, 1628 }, 1629 }; 1630 1631 #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ 1632 #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ 1633 #define KNL_MCDRAM_LOCAL BIT_ULL(21) 1634 #define KNL_MCDRAM_FAR BIT_ULL(22) 1635 #define KNL_DDR_LOCAL BIT_ULL(23) 1636 #define KNL_DDR_FAR BIT_ULL(24) 1637 #define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ 1638 KNL_DDR_LOCAL | KNL_DDR_FAR) 1639 #define KNL_L2_READ SLM_DMND_READ 1640 #define KNL_L2_WRITE SLM_DMND_WRITE 1641 #define KNL_L2_PREFETCH SLM_DMND_PREFETCH 1642 #define KNL_L2_ACCESS SLM_LLC_ACCESS 1643 #define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ 1644 KNL_DRAM_ANY | SNB_SNP_ANY | \ 1645 SNB_NON_DRAM) 1646 1647 static __initconst const u64 knl_hw_cache_extra_regs 1648 [PERF_COUNT_HW_CACHE_MAX] 1649 [PERF_COUNT_HW_CACHE_OP_MAX] 1650 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1651 [C(LL)] = { 1652 [C(OP_READ)] = { 1653 [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, 1654 [C(RESULT_MISS)] = 0, 1655 }, 1656 [C(OP_WRITE)] = { 1657 [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, 1658 [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, 1659 }, 1660 [C(OP_PREFETCH)] = { 1661 [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, 1662 [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, 1663 }, 1664 }, 1665 }; 1666 1667 /* 1668 * Used from PMIs where the LBRs are already disabled. 1669 * 1670 * This function could be called consecutively. It is required to remain in 1671 * disabled state if called consecutively. 1672 * 1673 * During consecutive calls, the same disable value will be written to related 1674 * registers, so the PMU state remains unchanged. hw.state in 1675 * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive 1676 * calls. 1677 */ 1678 static void __intel_pmu_disable_all(void) 1679 { 1680 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1681 1682 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 1683 1684 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 1685 intel_pmu_disable_bts(); 1686 else 1687 intel_bts_disable_local(); 1688 1689 intel_pmu_pebs_disable_all(); 1690 } 1691 1692 static void intel_pmu_disable_all(void) 1693 { 1694 __intel_pmu_disable_all(); 1695 intel_pmu_lbr_disable_all(); 1696 } 1697 1698 static void __intel_pmu_enable_all(int added, bool pmi) 1699 { 1700 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1701 1702 intel_pmu_pebs_enable_all(); 1703 intel_pmu_lbr_enable_all(pmi); 1704 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1705 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); 1706 1707 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 1708 struct perf_event *event = 1709 cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; 1710 1711 if (WARN_ON_ONCE(!event)) 1712 return; 1713 1714 intel_pmu_enable_bts(event->hw.config); 1715 } else 1716 intel_bts_enable_local(); 1717 } 1718 1719 static void intel_pmu_enable_all(int added) 1720 { 1721 __intel_pmu_enable_all(added, false); 1722 } 1723 1724 /* 1725 * Workaround for: 1726 * Intel Errata AAK100 (model 26) 1727 * Intel Errata AAP53 (model 30) 1728 * Intel Errata BD53 (model 44) 1729 * 1730 * The official story: 1731 * These chips need to be 'reset' when adding counters by programming the 1732 * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either 1733 * in sequence on the same PMC or on different PMCs. 1734 * 1735 * In practise it appears some of these events do in fact count, and 1736 * we need to programm all 4 events. 1737 */ 1738 static void intel_pmu_nhm_workaround(void) 1739 { 1740 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1741 static const unsigned long nhm_magic[4] = { 1742 0x4300B5, 1743 0x4300D2, 1744 0x4300B1, 1745 0x4300B1 1746 }; 1747 struct perf_event *event; 1748 int i; 1749 1750 /* 1751 * The Errata requires below steps: 1752 * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; 1753 * 2) Configure 4 PERFEVTSELx with the magic events and clear 1754 * the corresponding PMCx; 1755 * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; 1756 * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; 1757 * 5) Clear 4 pairs of ERFEVTSELx and PMCx; 1758 */ 1759 1760 /* 1761 * The real steps we choose are a little different from above. 1762 * A) To reduce MSR operations, we don't run step 1) as they 1763 * are already cleared before this function is called; 1764 * B) Call x86_perf_event_update to save PMCx before configuring 1765 * PERFEVTSELx with magic number; 1766 * C) With step 5), we do clear only when the PERFEVTSELx is 1767 * not used currently. 1768 * D) Call x86_perf_event_set_period to restore PMCx; 1769 */ 1770 1771 /* We always operate 4 pairs of PERF Counters */ 1772 for (i = 0; i < 4; i++) { 1773 event = cpuc->events[i]; 1774 if (event) 1775 x86_perf_event_update(event); 1776 } 1777 1778 for (i = 0; i < 4; i++) { 1779 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); 1780 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); 1781 } 1782 1783 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); 1784 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); 1785 1786 for (i = 0; i < 4; i++) { 1787 event = cpuc->events[i]; 1788 1789 if (event) { 1790 x86_perf_event_set_period(event); 1791 __x86_pmu_enable_event(&event->hw, 1792 ARCH_PERFMON_EVENTSEL_ENABLE); 1793 } else 1794 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); 1795 } 1796 } 1797 1798 static void intel_pmu_nhm_enable_all(int added) 1799 { 1800 if (added) 1801 intel_pmu_nhm_workaround(); 1802 intel_pmu_enable_all(added); 1803 } 1804 1805 static inline u64 intel_pmu_get_status(void) 1806 { 1807 u64 status; 1808 1809 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1810 1811 return status; 1812 } 1813 1814 static inline void intel_pmu_ack_status(u64 ack) 1815 { 1816 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 1817 } 1818 1819 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) 1820 { 1821 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1822 u64 ctrl_val, mask; 1823 1824 mask = 0xfULL << (idx * 4); 1825 1826 rdmsrl(hwc->config_base, ctrl_val); 1827 ctrl_val &= ~mask; 1828 wrmsrl(hwc->config_base, ctrl_val); 1829 } 1830 1831 static inline bool event_is_checkpointed(struct perf_event *event) 1832 { 1833 return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; 1834 } 1835 1836 static void intel_pmu_disable_event(struct perf_event *event) 1837 { 1838 struct hw_perf_event *hwc = &event->hw; 1839 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1840 1841 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1842 intel_pmu_disable_bts(); 1843 intel_pmu_drain_bts_buffer(); 1844 return; 1845 } 1846 1847 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); 1848 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); 1849 cpuc->intel_cp_status &= ~(1ull << hwc->idx); 1850 1851 /* 1852 * must disable before any actual event 1853 * because any event may be combined with LBR 1854 */ 1855 if (needs_branch_stack(event)) 1856 intel_pmu_lbr_disable(event); 1857 1858 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1859 intel_pmu_disable_fixed(hwc); 1860 return; 1861 } 1862 1863 x86_pmu_disable_event(event); 1864 1865 if (unlikely(event->attr.precise_ip)) 1866 intel_pmu_pebs_disable(event); 1867 } 1868 1869 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 1870 { 1871 int idx = hwc->idx - INTEL_PMC_IDX_FIXED; 1872 u64 ctrl_val, bits, mask; 1873 1874 /* 1875 * Enable IRQ generation (0x8), 1876 * and enable ring-3 counting (0x2) and ring-0 counting (0x1) 1877 * if requested: 1878 */ 1879 bits = 0x8ULL; 1880 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) 1881 bits |= 0x2; 1882 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 1883 bits |= 0x1; 1884 1885 /* 1886 * ANY bit is supported in v3 and up 1887 */ 1888 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) 1889 bits |= 0x4; 1890 1891 bits <<= (idx * 4); 1892 mask = 0xfULL << (idx * 4); 1893 1894 rdmsrl(hwc->config_base, ctrl_val); 1895 ctrl_val &= ~mask; 1896 ctrl_val |= bits; 1897 wrmsrl(hwc->config_base, ctrl_val); 1898 } 1899 1900 static void intel_pmu_enable_event(struct perf_event *event) 1901 { 1902 struct hw_perf_event *hwc = &event->hw; 1903 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1904 1905 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { 1906 if (!__this_cpu_read(cpu_hw_events.enabled)) 1907 return; 1908 1909 intel_pmu_enable_bts(hwc->config); 1910 return; 1911 } 1912 /* 1913 * must enabled before any actual event 1914 * because any event may be combined with LBR 1915 */ 1916 if (needs_branch_stack(event)) 1917 intel_pmu_lbr_enable(event); 1918 1919 if (event->attr.exclude_host) 1920 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); 1921 if (event->attr.exclude_guest) 1922 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); 1923 1924 if (unlikely(event_is_checkpointed(event))) 1925 cpuc->intel_cp_status |= (1ull << hwc->idx); 1926 1927 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1928 intel_pmu_enable_fixed(hwc); 1929 return; 1930 } 1931 1932 if (unlikely(event->attr.precise_ip)) 1933 intel_pmu_pebs_enable(event); 1934 1935 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 1936 } 1937 1938 /* 1939 * Save and restart an expired event. Called by NMI contexts, 1940 * so it has to be careful about preempting normal event ops: 1941 */ 1942 int intel_pmu_save_and_restart(struct perf_event *event) 1943 { 1944 x86_perf_event_update(event); 1945 /* 1946 * For a checkpointed counter always reset back to 0. This 1947 * avoids a situation where the counter overflows, aborts the 1948 * transaction and is then set back to shortly before the 1949 * overflow, and overflows and aborts again. 1950 */ 1951 if (unlikely(event_is_checkpointed(event))) { 1952 /* No race with NMIs because the counter should not be armed */ 1953 wrmsrl(event->hw.event_base, 0); 1954 local64_set(&event->hw.prev_count, 0); 1955 } 1956 return x86_perf_event_set_period(event); 1957 } 1958 1959 static void intel_pmu_reset(void) 1960 { 1961 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); 1962 unsigned long flags; 1963 int idx; 1964 1965 if (!x86_pmu.num_counters) 1966 return; 1967 1968 local_irq_save(flags); 1969 1970 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); 1971 1972 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1973 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); 1974 wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); 1975 } 1976 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 1977 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 1978 1979 if (ds) 1980 ds->bts_index = ds->bts_buffer_base; 1981 1982 /* Ack all overflows and disable fixed counters */ 1983 if (x86_pmu.version >= 2) { 1984 intel_pmu_ack_status(intel_pmu_get_status()); 1985 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 1986 } 1987 1988 /* Reset LBRs and LBR freezing */ 1989 if (x86_pmu.lbr_nr) { 1990 update_debugctlmsr(get_debugctlmsr() & 1991 ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); 1992 } 1993 1994 local_irq_restore(flags); 1995 } 1996 1997 /* 1998 * This handler is triggered by the local APIC, so the APIC IRQ handling 1999 * rules apply: 2000 */ 2001 static int intel_pmu_handle_irq(struct pt_regs *regs) 2002 { 2003 struct perf_sample_data data; 2004 struct cpu_hw_events *cpuc; 2005 int bit, loops; 2006 u64 status; 2007 int handled; 2008 2009 cpuc = this_cpu_ptr(&cpu_hw_events); 2010 2011 /* 2012 * No known reason to not always do late ACK, 2013 * but just in case do it opt-in. 2014 */ 2015 if (!x86_pmu.late_ack) 2016 apic_write(APIC_LVTPC, APIC_DM_NMI); 2017 __intel_pmu_disable_all(); 2018 handled = intel_pmu_drain_bts_buffer(); 2019 handled += intel_bts_interrupt(); 2020 status = intel_pmu_get_status(); 2021 if (!status) 2022 goto done; 2023 2024 loops = 0; 2025 again: 2026 intel_pmu_lbr_read(); 2027 intel_pmu_ack_status(status); 2028 if (++loops > 100) { 2029 static bool warned = false; 2030 if (!warned) { 2031 WARN(1, "perfevents: irq loop stuck!\n"); 2032 perf_event_print_debug(); 2033 warned = true; 2034 } 2035 intel_pmu_reset(); 2036 goto done; 2037 } 2038 2039 inc_irq_stat(apic_perf_irqs); 2040 2041 2042 /* 2043 * Ignore a range of extra bits in status that do not indicate 2044 * overflow by themselves. 2045 */ 2046 status &= ~(GLOBAL_STATUS_COND_CHG | 2047 GLOBAL_STATUS_ASIF | 2048 GLOBAL_STATUS_LBRS_FROZEN); 2049 if (!status) 2050 goto done; 2051 2052 /* 2053 * PEBS overflow sets bit 62 in the global status register 2054 */ 2055 if (__test_and_clear_bit(62, (unsigned long *)&status)) { 2056 handled++; 2057 x86_pmu.drain_pebs(regs); 2058 /* 2059 * There are cases where, even though, the PEBS ovfl bit is set 2060 * in GLOBAL_OVF_STATUS, the PEBS events may also have their 2061 * overflow bits set for their counters. We must clear them 2062 * here because they have been processed as exact samples in 2063 * the drain_pebs() routine. They must not be processed again 2064 * in the for_each_bit_set() loop for regular samples below. 2065 */ 2066 status &= ~cpuc->pebs_enabled; 2067 status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; 2068 } 2069 2070 /* 2071 * Intel PT 2072 */ 2073 if (__test_and_clear_bit(55, (unsigned long *)&status)) { 2074 handled++; 2075 intel_pt_interrupt(); 2076 } 2077 2078 /* 2079 * Checkpointed counters can lead to 'spurious' PMIs because the 2080 * rollback caused by the PMI will have cleared the overflow status 2081 * bit. Therefore always force probe these counters. 2082 */ 2083 status |= cpuc->intel_cp_status; 2084 2085 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 2086 struct perf_event *event = cpuc->events[bit]; 2087 2088 handled++; 2089 2090 if (!test_bit(bit, cpuc->active_mask)) 2091 continue; 2092 2093 if (!intel_pmu_save_and_restart(event)) 2094 continue; 2095 2096 perf_sample_data_init(&data, 0, event->hw.last_period); 2097 2098 if (has_branch_stack(event)) 2099 data.br_stack = &cpuc->lbr_stack; 2100 2101 if (perf_event_overflow(event, &data, regs)) 2102 x86_pmu_stop(event, 0); 2103 } 2104 2105 /* 2106 * Repeat if there is more work to be done: 2107 */ 2108 status = intel_pmu_get_status(); 2109 if (status) 2110 goto again; 2111 2112 done: 2113 /* Only restore PMU state when it's active. See x86_pmu_disable(). */ 2114 if (cpuc->enabled) 2115 __intel_pmu_enable_all(0, true); 2116 2117 /* 2118 * Only unmask the NMI after the overflow counters 2119 * have been reset. This avoids spurious NMIs on 2120 * Haswell CPUs. 2121 */ 2122 if (x86_pmu.late_ack) 2123 apic_write(APIC_LVTPC, APIC_DM_NMI); 2124 return handled; 2125 } 2126 2127 static struct event_constraint * 2128 intel_bts_constraints(struct perf_event *event) 2129 { 2130 struct hw_perf_event *hwc = &event->hw; 2131 unsigned int hw_event, bts_event; 2132 2133 if (event->attr.freq) 2134 return NULL; 2135 2136 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; 2137 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); 2138 2139 if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) 2140 return &bts_constraint; 2141 2142 return NULL; 2143 } 2144 2145 static int intel_alt_er(int idx, u64 config) 2146 { 2147 int alt_idx = idx; 2148 2149 if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) 2150 return idx; 2151 2152 if (idx == EXTRA_REG_RSP_0) 2153 alt_idx = EXTRA_REG_RSP_1; 2154 2155 if (idx == EXTRA_REG_RSP_1) 2156 alt_idx = EXTRA_REG_RSP_0; 2157 2158 if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask) 2159 return idx; 2160 2161 return alt_idx; 2162 } 2163 2164 static void intel_fixup_er(struct perf_event *event, int idx) 2165 { 2166 event->hw.extra_reg.idx = idx; 2167 2168 if (idx == EXTRA_REG_RSP_0) { 2169 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 2170 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; 2171 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 2172 } else if (idx == EXTRA_REG_RSP_1) { 2173 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 2174 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; 2175 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 2176 } 2177 } 2178 2179 /* 2180 * manage allocation of shared extra msr for certain events 2181 * 2182 * sharing can be: 2183 * per-cpu: to be shared between the various events on a single PMU 2184 * per-core: per-cpu + shared by HT threads 2185 */ 2186 static struct event_constraint * 2187 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, 2188 struct perf_event *event, 2189 struct hw_perf_event_extra *reg) 2190 { 2191 struct event_constraint *c = &emptyconstraint; 2192 struct er_account *era; 2193 unsigned long flags; 2194 int idx = reg->idx; 2195 2196 /* 2197 * reg->alloc can be set due to existing state, so for fake cpuc we 2198 * need to ignore this, otherwise we might fail to allocate proper fake 2199 * state for this extra reg constraint. Also see the comment below. 2200 */ 2201 if (reg->alloc && !cpuc->is_fake) 2202 return NULL; /* call x86_get_event_constraint() */ 2203 2204 again: 2205 era = &cpuc->shared_regs->regs[idx]; 2206 /* 2207 * we use spin_lock_irqsave() to avoid lockdep issues when 2208 * passing a fake cpuc 2209 */ 2210 raw_spin_lock_irqsave(&era->lock, flags); 2211 2212 if (!atomic_read(&era->ref) || era->config == reg->config) { 2213 2214 /* 2215 * If its a fake cpuc -- as per validate_{group,event}() we 2216 * shouldn't touch event state and we can avoid doing so 2217 * since both will only call get_event_constraints() once 2218 * on each event, this avoids the need for reg->alloc. 2219 * 2220 * Not doing the ER fixup will only result in era->reg being 2221 * wrong, but since we won't actually try and program hardware 2222 * this isn't a problem either. 2223 */ 2224 if (!cpuc->is_fake) { 2225 if (idx != reg->idx) 2226 intel_fixup_er(event, idx); 2227 2228 /* 2229 * x86_schedule_events() can call get_event_constraints() 2230 * multiple times on events in the case of incremental 2231 * scheduling(). reg->alloc ensures we only do the ER 2232 * allocation once. 2233 */ 2234 reg->alloc = 1; 2235 } 2236 2237 /* lock in msr value */ 2238 era->config = reg->config; 2239 era->reg = reg->reg; 2240 2241 /* one more user */ 2242 atomic_inc(&era->ref); 2243 2244 /* 2245 * need to call x86_get_event_constraint() 2246 * to check if associated event has constraints 2247 */ 2248 c = NULL; 2249 } else { 2250 idx = intel_alt_er(idx, reg->config); 2251 if (idx != reg->idx) { 2252 raw_spin_unlock_irqrestore(&era->lock, flags); 2253 goto again; 2254 } 2255 } 2256 raw_spin_unlock_irqrestore(&era->lock, flags); 2257 2258 return c; 2259 } 2260 2261 static void 2262 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, 2263 struct hw_perf_event_extra *reg) 2264 { 2265 struct er_account *era; 2266 2267 /* 2268 * Only put constraint if extra reg was actually allocated. Also takes 2269 * care of event which do not use an extra shared reg. 2270 * 2271 * Also, if this is a fake cpuc we shouldn't touch any event state 2272 * (reg->alloc) and we don't care about leaving inconsistent cpuc state 2273 * either since it'll be thrown out. 2274 */ 2275 if (!reg->alloc || cpuc->is_fake) 2276 return; 2277 2278 era = &cpuc->shared_regs->regs[reg->idx]; 2279 2280 /* one fewer user */ 2281 atomic_dec(&era->ref); 2282 2283 /* allocate again next time */ 2284 reg->alloc = 0; 2285 } 2286 2287 static struct event_constraint * 2288 intel_shared_regs_constraints(struct cpu_hw_events *cpuc, 2289 struct perf_event *event) 2290 { 2291 struct event_constraint *c = NULL, *d; 2292 struct hw_perf_event_extra *xreg, *breg; 2293 2294 xreg = &event->hw.extra_reg; 2295 if (xreg->idx != EXTRA_REG_NONE) { 2296 c = __intel_shared_reg_get_constraints(cpuc, event, xreg); 2297 if (c == &emptyconstraint) 2298 return c; 2299 } 2300 breg = &event->hw.branch_reg; 2301 if (breg->idx != EXTRA_REG_NONE) { 2302 d = __intel_shared_reg_get_constraints(cpuc, event, breg); 2303 if (d == &emptyconstraint) { 2304 __intel_shared_reg_put_constraints(cpuc, xreg); 2305 c = d; 2306 } 2307 } 2308 return c; 2309 } 2310 2311 struct event_constraint * 2312 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2313 struct perf_event *event) 2314 { 2315 struct event_constraint *c; 2316 2317 if (x86_pmu.event_constraints) { 2318 for_each_event_constraint(c, x86_pmu.event_constraints) { 2319 if ((event->hw.config & c->cmask) == c->code) { 2320 event->hw.flags |= c->flags; 2321 return c; 2322 } 2323 } 2324 } 2325 2326 return &unconstrained; 2327 } 2328 2329 static struct event_constraint * 2330 __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2331 struct perf_event *event) 2332 { 2333 struct event_constraint *c; 2334 2335 c = intel_bts_constraints(event); 2336 if (c) 2337 return c; 2338 2339 c = intel_shared_regs_constraints(cpuc, event); 2340 if (c) 2341 return c; 2342 2343 c = intel_pebs_constraints(event); 2344 if (c) 2345 return c; 2346 2347 return x86_get_event_constraints(cpuc, idx, event); 2348 } 2349 2350 static void 2351 intel_start_scheduling(struct cpu_hw_events *cpuc) 2352 { 2353 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2354 struct intel_excl_states *xl; 2355 int tid = cpuc->excl_thread_id; 2356 2357 /* 2358 * nothing needed if in group validation mode 2359 */ 2360 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2361 return; 2362 2363 /* 2364 * no exclusion needed 2365 */ 2366 if (WARN_ON_ONCE(!excl_cntrs)) 2367 return; 2368 2369 xl = &excl_cntrs->states[tid]; 2370 2371 xl->sched_started = true; 2372 /* 2373 * lock shared state until we are done scheduling 2374 * in stop_event_scheduling() 2375 * makes scheduling appear as a transaction 2376 */ 2377 raw_spin_lock(&excl_cntrs->lock); 2378 } 2379 2380 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) 2381 { 2382 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2383 struct event_constraint *c = cpuc->event_constraint[idx]; 2384 struct intel_excl_states *xl; 2385 int tid = cpuc->excl_thread_id; 2386 2387 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2388 return; 2389 2390 if (WARN_ON_ONCE(!excl_cntrs)) 2391 return; 2392 2393 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) 2394 return; 2395 2396 xl = &excl_cntrs->states[tid]; 2397 2398 lockdep_assert_held(&excl_cntrs->lock); 2399 2400 if (c->flags & PERF_X86_EVENT_EXCL) 2401 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; 2402 else 2403 xl->state[cntr] = INTEL_EXCL_SHARED; 2404 } 2405 2406 static void 2407 intel_stop_scheduling(struct cpu_hw_events *cpuc) 2408 { 2409 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2410 struct intel_excl_states *xl; 2411 int tid = cpuc->excl_thread_id; 2412 2413 /* 2414 * nothing needed if in group validation mode 2415 */ 2416 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2417 return; 2418 /* 2419 * no exclusion needed 2420 */ 2421 if (WARN_ON_ONCE(!excl_cntrs)) 2422 return; 2423 2424 xl = &excl_cntrs->states[tid]; 2425 2426 xl->sched_started = false; 2427 /* 2428 * release shared state lock (acquired in intel_start_scheduling()) 2429 */ 2430 raw_spin_unlock(&excl_cntrs->lock); 2431 } 2432 2433 static struct event_constraint * 2434 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 2435 int idx, struct event_constraint *c) 2436 { 2437 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2438 struct intel_excl_states *xlo; 2439 int tid = cpuc->excl_thread_id; 2440 int is_excl, i; 2441 2442 /* 2443 * validating a group does not require 2444 * enforcing cross-thread exclusion 2445 */ 2446 if (cpuc->is_fake || !is_ht_workaround_enabled()) 2447 return c; 2448 2449 /* 2450 * no exclusion needed 2451 */ 2452 if (WARN_ON_ONCE(!excl_cntrs)) 2453 return c; 2454 2455 /* 2456 * because we modify the constraint, we need 2457 * to make a copy. Static constraints come 2458 * from static const tables. 2459 * 2460 * only needed when constraint has not yet 2461 * been cloned (marked dynamic) 2462 */ 2463 if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { 2464 struct event_constraint *cx; 2465 2466 /* 2467 * grab pre-allocated constraint entry 2468 */ 2469 cx = &cpuc->constraint_list[idx]; 2470 2471 /* 2472 * initialize dynamic constraint 2473 * with static constraint 2474 */ 2475 *cx = *c; 2476 2477 /* 2478 * mark constraint as dynamic, so we 2479 * can free it later on 2480 */ 2481 cx->flags |= PERF_X86_EVENT_DYNAMIC; 2482 c = cx; 2483 } 2484 2485 /* 2486 * From here on, the constraint is dynamic. 2487 * Either it was just allocated above, or it 2488 * was allocated during a earlier invocation 2489 * of this function 2490 */ 2491 2492 /* 2493 * state of sibling HT 2494 */ 2495 xlo = &excl_cntrs->states[tid ^ 1]; 2496 2497 /* 2498 * event requires exclusive counter access 2499 * across HT threads 2500 */ 2501 is_excl = c->flags & PERF_X86_EVENT_EXCL; 2502 if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { 2503 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; 2504 if (!cpuc->n_excl++) 2505 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); 2506 } 2507 2508 /* 2509 * Modify static constraint with current dynamic 2510 * state of thread 2511 * 2512 * EXCLUSIVE: sibling counter measuring exclusive event 2513 * SHARED : sibling counter measuring non-exclusive event 2514 * UNUSED : sibling counter unused 2515 */ 2516 for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { 2517 /* 2518 * exclusive event in sibling counter 2519 * our corresponding counter cannot be used 2520 * regardless of our event 2521 */ 2522 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) 2523 __clear_bit(i, c->idxmsk); 2524 /* 2525 * if measuring an exclusive event, sibling 2526 * measuring non-exclusive, then counter cannot 2527 * be used 2528 */ 2529 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) 2530 __clear_bit(i, c->idxmsk); 2531 } 2532 2533 /* 2534 * recompute actual bit weight for scheduling algorithm 2535 */ 2536 c->weight = hweight64(c->idxmsk64); 2537 2538 /* 2539 * if we return an empty mask, then switch 2540 * back to static empty constraint to avoid 2541 * the cost of freeing later on 2542 */ 2543 if (c->weight == 0) 2544 c = &emptyconstraint; 2545 2546 return c; 2547 } 2548 2549 static struct event_constraint * 2550 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2551 struct perf_event *event) 2552 { 2553 struct event_constraint *c1 = NULL; 2554 struct event_constraint *c2; 2555 2556 if (idx >= 0) /* fake does < 0 */ 2557 c1 = cpuc->event_constraint[idx]; 2558 2559 /* 2560 * first time only 2561 * - static constraint: no change across incremental scheduling calls 2562 * - dynamic constraint: handled by intel_get_excl_constraints() 2563 */ 2564 c2 = __intel_get_event_constraints(cpuc, idx, event); 2565 if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { 2566 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); 2567 c1->weight = c2->weight; 2568 c2 = c1; 2569 } 2570 2571 if (cpuc->excl_cntrs) 2572 return intel_get_excl_constraints(cpuc, event, idx, c2); 2573 2574 return c2; 2575 } 2576 2577 static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, 2578 struct perf_event *event) 2579 { 2580 struct hw_perf_event *hwc = &event->hw; 2581 struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; 2582 int tid = cpuc->excl_thread_id; 2583 struct intel_excl_states *xl; 2584 2585 /* 2586 * nothing needed if in group validation mode 2587 */ 2588 if (cpuc->is_fake) 2589 return; 2590 2591 if (WARN_ON_ONCE(!excl_cntrs)) 2592 return; 2593 2594 if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { 2595 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; 2596 if (!--cpuc->n_excl) 2597 WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); 2598 } 2599 2600 /* 2601 * If event was actually assigned, then mark the counter state as 2602 * unused now. 2603 */ 2604 if (hwc->idx >= 0) { 2605 xl = &excl_cntrs->states[tid]; 2606 2607 /* 2608 * put_constraint may be called from x86_schedule_events() 2609 * which already has the lock held so here make locking 2610 * conditional. 2611 */ 2612 if (!xl->sched_started) 2613 raw_spin_lock(&excl_cntrs->lock); 2614 2615 xl->state[hwc->idx] = INTEL_EXCL_UNUSED; 2616 2617 if (!xl->sched_started) 2618 raw_spin_unlock(&excl_cntrs->lock); 2619 } 2620 } 2621 2622 static void 2623 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, 2624 struct perf_event *event) 2625 { 2626 struct hw_perf_event_extra *reg; 2627 2628 reg = &event->hw.extra_reg; 2629 if (reg->idx != EXTRA_REG_NONE) 2630 __intel_shared_reg_put_constraints(cpuc, reg); 2631 2632 reg = &event->hw.branch_reg; 2633 if (reg->idx != EXTRA_REG_NONE) 2634 __intel_shared_reg_put_constraints(cpuc, reg); 2635 } 2636 2637 static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 2638 struct perf_event *event) 2639 { 2640 intel_put_shared_regs_event_constraints(cpuc, event); 2641 2642 /* 2643 * is PMU has exclusive counter restrictions, then 2644 * all events are subject to and must call the 2645 * put_excl_constraints() routine 2646 */ 2647 if (cpuc->excl_cntrs) 2648 intel_put_excl_constraints(cpuc, event); 2649 } 2650 2651 static void intel_pebs_aliases_core2(struct perf_event *event) 2652 { 2653 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2654 /* 2655 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2656 * (0x003c) so that we can use it with PEBS. 2657 * 2658 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2659 * PEBS capable. However we can use INST_RETIRED.ANY_P 2660 * (0x00c0), which is a PEBS capable event, to get the same 2661 * count. 2662 * 2663 * INST_RETIRED.ANY_P counts the number of cycles that retires 2664 * CNTMASK instructions. By setting CNTMASK to a value (16) 2665 * larger than the maximum number of instructions that can be 2666 * retired per cycle (4) and then inverting the condition, we 2667 * count all cycles that retire 16 or less instructions, which 2668 * is every cycle. 2669 * 2670 * Thereby we gain a PEBS capable cycle counter. 2671 */ 2672 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 2673 2674 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2675 event->hw.config = alt_config; 2676 } 2677 } 2678 2679 static void intel_pebs_aliases_snb(struct perf_event *event) 2680 { 2681 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2682 /* 2683 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2684 * (0x003c) so that we can use it with PEBS. 2685 * 2686 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2687 * PEBS capable. However we can use UOPS_RETIRED.ALL 2688 * (0x01c2), which is a PEBS capable event, to get the same 2689 * count. 2690 * 2691 * UOPS_RETIRED.ALL counts the number of cycles that retires 2692 * CNTMASK micro-ops. By setting CNTMASK to a value (16) 2693 * larger than the maximum number of micro-ops that can be 2694 * retired per cycle (4) and then inverting the condition, we 2695 * count all cycles that retire 16 or less micro-ops, which 2696 * is every cycle. 2697 * 2698 * Thereby we gain a PEBS capable cycle counter. 2699 */ 2700 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); 2701 2702 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2703 event->hw.config = alt_config; 2704 } 2705 } 2706 2707 static void intel_pebs_aliases_precdist(struct perf_event *event) 2708 { 2709 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { 2710 /* 2711 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 2712 * (0x003c) so that we can use it with PEBS. 2713 * 2714 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't 2715 * PEBS capable. However we can use INST_RETIRED.PREC_DIST 2716 * (0x01c0), which is a PEBS capable event, to get the same 2717 * count. 2718 * 2719 * The PREC_DIST event has special support to minimize sample 2720 * shadowing effects. One drawback is that it can be 2721 * only programmed on counter 1, but that seems like an 2722 * acceptable trade off. 2723 */ 2724 u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); 2725 2726 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 2727 event->hw.config = alt_config; 2728 } 2729 } 2730 2731 static void intel_pebs_aliases_ivb(struct perf_event *event) 2732 { 2733 if (event->attr.precise_ip < 3) 2734 return intel_pebs_aliases_snb(event); 2735 return intel_pebs_aliases_precdist(event); 2736 } 2737 2738 static void intel_pebs_aliases_skl(struct perf_event *event) 2739 { 2740 if (event->attr.precise_ip < 3) 2741 return intel_pebs_aliases_core2(event); 2742 return intel_pebs_aliases_precdist(event); 2743 } 2744 2745 static unsigned long intel_pmu_free_running_flags(struct perf_event *event) 2746 { 2747 unsigned long flags = x86_pmu.free_running_flags; 2748 2749 if (event->attr.use_clockid) 2750 flags &= ~PERF_SAMPLE_TIME; 2751 return flags; 2752 } 2753 2754 static int intel_pmu_hw_config(struct perf_event *event) 2755 { 2756 int ret = x86_pmu_hw_config(event); 2757 2758 if (ret) 2759 return ret; 2760 2761 if (event->attr.precise_ip) { 2762 if (!event->attr.freq) { 2763 event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; 2764 if (!(event->attr.sample_type & 2765 ~intel_pmu_free_running_flags(event))) 2766 event->hw.flags |= PERF_X86_EVENT_FREERUNNING; 2767 } 2768 if (x86_pmu.pebs_aliases) 2769 x86_pmu.pebs_aliases(event); 2770 } 2771 2772 if (needs_branch_stack(event)) { 2773 ret = intel_pmu_setup_lbr_filter(event); 2774 if (ret) 2775 return ret; 2776 2777 /* 2778 * BTS is set up earlier in this path, so don't account twice 2779 */ 2780 if (!intel_pmu_has_bts(event)) { 2781 /* disallow lbr if conflicting events are present */ 2782 if (x86_add_exclusive(x86_lbr_exclusive_lbr)) 2783 return -EBUSY; 2784 2785 event->destroy = hw_perf_lbr_event_destroy; 2786 } 2787 } 2788 2789 if (event->attr.type != PERF_TYPE_RAW) 2790 return 0; 2791 2792 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) 2793 return 0; 2794 2795 if (x86_pmu.version < 3) 2796 return -EINVAL; 2797 2798 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 2799 return -EACCES; 2800 2801 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; 2802 2803 return 0; 2804 } 2805 2806 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 2807 { 2808 if (x86_pmu.guest_get_msrs) 2809 return x86_pmu.guest_get_msrs(nr); 2810 *nr = 0; 2811 return NULL; 2812 } 2813 EXPORT_SYMBOL_GPL(perf_guest_get_msrs); 2814 2815 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) 2816 { 2817 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2818 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2819 2820 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; 2821 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; 2822 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; 2823 /* 2824 * If PMU counter has PEBS enabled it is not enough to disable counter 2825 * on a guest entry since PEBS memory write can overshoot guest entry 2826 * and corrupt guest memory. Disabling PEBS solves the problem. 2827 */ 2828 arr[1].msr = MSR_IA32_PEBS_ENABLE; 2829 arr[1].host = cpuc->pebs_enabled; 2830 arr[1].guest = 0; 2831 2832 *nr = 2; 2833 return arr; 2834 } 2835 2836 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) 2837 { 2838 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2839 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; 2840 int idx; 2841 2842 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2843 struct perf_event *event = cpuc->events[idx]; 2844 2845 arr[idx].msr = x86_pmu_config_addr(idx); 2846 arr[idx].host = arr[idx].guest = 0; 2847 2848 if (!test_bit(idx, cpuc->active_mask)) 2849 continue; 2850 2851 arr[idx].host = arr[idx].guest = 2852 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; 2853 2854 if (event->attr.exclude_host) 2855 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2856 else if (event->attr.exclude_guest) 2857 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 2858 } 2859 2860 *nr = x86_pmu.num_counters; 2861 return arr; 2862 } 2863 2864 static void core_pmu_enable_event(struct perf_event *event) 2865 { 2866 if (!event->attr.exclude_host) 2867 x86_pmu_enable_event(event); 2868 } 2869 2870 static void core_pmu_enable_all(int added) 2871 { 2872 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 2873 int idx; 2874 2875 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 2876 struct hw_perf_event *hwc = &cpuc->events[idx]->hw; 2877 2878 if (!test_bit(idx, cpuc->active_mask) || 2879 cpuc->events[idx]->attr.exclude_host) 2880 continue; 2881 2882 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 2883 } 2884 } 2885 2886 static int hsw_hw_config(struct perf_event *event) 2887 { 2888 int ret = intel_pmu_hw_config(event); 2889 2890 if (ret) 2891 return ret; 2892 if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) 2893 return 0; 2894 event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 2895 2896 /* 2897 * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with 2898 * PEBS or in ANY thread mode. Since the results are non-sensical forbid 2899 * this combination. 2900 */ 2901 if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && 2902 ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || 2903 event->attr.precise_ip > 0)) 2904 return -EOPNOTSUPP; 2905 2906 if (event_is_checkpointed(event)) { 2907 /* 2908 * Sampling of checkpointed events can cause situations where 2909 * the CPU constantly aborts because of a overflow, which is 2910 * then checkpointed back and ignored. Forbid checkpointing 2911 * for sampling. 2912 * 2913 * But still allow a long sampling period, so that perf stat 2914 * from KVM works. 2915 */ 2916 if (event->attr.sample_period > 0 && 2917 event->attr.sample_period < 0x7fffffff) 2918 return -EOPNOTSUPP; 2919 } 2920 return 0; 2921 } 2922 2923 static struct event_constraint counter2_constraint = 2924 EVENT_CONSTRAINT(0, 0x4, 0); 2925 2926 static struct event_constraint * 2927 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 2928 struct perf_event *event) 2929 { 2930 struct event_constraint *c; 2931 2932 c = intel_get_event_constraints(cpuc, idx, event); 2933 2934 /* Handle special quirk on in_tx_checkpointed only in counter 2 */ 2935 if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { 2936 if (c->idxmsk64 & (1U << 2)) 2937 return &counter2_constraint; 2938 return &emptyconstraint; 2939 } 2940 2941 return c; 2942 } 2943 2944 /* 2945 * Broadwell: 2946 * 2947 * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared 2948 * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine 2949 * the two to enforce a minimum period of 128 (the smallest value that has bits 2950 * 0-5 cleared and >= 100). 2951 * 2952 * Because of how the code in x86_perf_event_set_period() works, the truncation 2953 * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period 2954 * to make up for the 'lost' events due to carrying the 'error' in period_left. 2955 * 2956 * Therefore the effective (average) period matches the requested period, 2957 * despite coarser hardware granularity. 2958 */ 2959 static unsigned bdw_limit_period(struct perf_event *event, unsigned left) 2960 { 2961 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == 2962 X86_CONFIG(.event=0xc0, .umask=0x01)) { 2963 if (left < 128) 2964 left = 128; 2965 left &= ~0x3fu; 2966 } 2967 return left; 2968 } 2969 2970 PMU_FORMAT_ATTR(event, "config:0-7" ); 2971 PMU_FORMAT_ATTR(umask, "config:8-15" ); 2972 PMU_FORMAT_ATTR(edge, "config:18" ); 2973 PMU_FORMAT_ATTR(pc, "config:19" ); 2974 PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ 2975 PMU_FORMAT_ATTR(inv, "config:23" ); 2976 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 2977 PMU_FORMAT_ATTR(in_tx, "config:32"); 2978 PMU_FORMAT_ATTR(in_tx_cp, "config:33"); 2979 2980 static struct attribute *intel_arch_formats_attr[] = { 2981 &format_attr_event.attr, 2982 &format_attr_umask.attr, 2983 &format_attr_edge.attr, 2984 &format_attr_pc.attr, 2985 &format_attr_inv.attr, 2986 &format_attr_cmask.attr, 2987 NULL, 2988 }; 2989 2990 ssize_t intel_event_sysfs_show(char *page, u64 config) 2991 { 2992 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); 2993 2994 return x86_event_sysfs_show(page, config, event); 2995 } 2996 2997 struct intel_shared_regs *allocate_shared_regs(int cpu) 2998 { 2999 struct intel_shared_regs *regs; 3000 int i; 3001 3002 regs = kzalloc_node(sizeof(struct intel_shared_regs), 3003 GFP_KERNEL, cpu_to_node(cpu)); 3004 if (regs) { 3005 /* 3006 * initialize the locks to keep lockdep happy 3007 */ 3008 for (i = 0; i < EXTRA_REG_MAX; i++) 3009 raw_spin_lock_init(®s->regs[i].lock); 3010 3011 regs->core_id = -1; 3012 } 3013 return regs; 3014 } 3015 3016 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) 3017 { 3018 struct intel_excl_cntrs *c; 3019 3020 c = kzalloc_node(sizeof(struct intel_excl_cntrs), 3021 GFP_KERNEL, cpu_to_node(cpu)); 3022 if (c) { 3023 raw_spin_lock_init(&c->lock); 3024 c->core_id = -1; 3025 } 3026 return c; 3027 } 3028 3029 static int intel_pmu_cpu_prepare(int cpu) 3030 { 3031 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3032 3033 if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { 3034 cpuc->shared_regs = allocate_shared_regs(cpu); 3035 if (!cpuc->shared_regs) 3036 goto err; 3037 } 3038 3039 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 3040 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); 3041 3042 cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); 3043 if (!cpuc->constraint_list) 3044 goto err_shared_regs; 3045 3046 cpuc->excl_cntrs = allocate_excl_cntrs(cpu); 3047 if (!cpuc->excl_cntrs) 3048 goto err_constraint_list; 3049 3050 cpuc->excl_thread_id = 0; 3051 } 3052 3053 return NOTIFY_OK; 3054 3055 err_constraint_list: 3056 kfree(cpuc->constraint_list); 3057 cpuc->constraint_list = NULL; 3058 3059 err_shared_regs: 3060 kfree(cpuc->shared_regs); 3061 cpuc->shared_regs = NULL; 3062 3063 err: 3064 return NOTIFY_BAD; 3065 } 3066 3067 static void intel_pmu_cpu_starting(int cpu) 3068 { 3069 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3070 int core_id = topology_core_id(cpu); 3071 int i; 3072 3073 init_debug_store_on_cpu(cpu); 3074 /* 3075 * Deal with CPUs that don't clear their LBRs on power-up. 3076 */ 3077 intel_pmu_lbr_reset(); 3078 3079 cpuc->lbr_sel = NULL; 3080 3081 if (!cpuc->shared_regs) 3082 return; 3083 3084 if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { 3085 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 3086 struct intel_shared_regs *pc; 3087 3088 pc = per_cpu(cpu_hw_events, i).shared_regs; 3089 if (pc && pc->core_id == core_id) { 3090 cpuc->kfree_on_online[0] = cpuc->shared_regs; 3091 cpuc->shared_regs = pc; 3092 break; 3093 } 3094 } 3095 cpuc->shared_regs->core_id = core_id; 3096 cpuc->shared_regs->refcnt++; 3097 } 3098 3099 if (x86_pmu.lbr_sel_map) 3100 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; 3101 3102 if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { 3103 for_each_cpu(i, topology_sibling_cpumask(cpu)) { 3104 struct intel_excl_cntrs *c; 3105 3106 c = per_cpu(cpu_hw_events, i).excl_cntrs; 3107 if (c && c->core_id == core_id) { 3108 cpuc->kfree_on_online[1] = cpuc->excl_cntrs; 3109 cpuc->excl_cntrs = c; 3110 cpuc->excl_thread_id = 1; 3111 break; 3112 } 3113 } 3114 cpuc->excl_cntrs->core_id = core_id; 3115 cpuc->excl_cntrs->refcnt++; 3116 } 3117 } 3118 3119 static void free_excl_cntrs(int cpu) 3120 { 3121 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3122 struct intel_excl_cntrs *c; 3123 3124 c = cpuc->excl_cntrs; 3125 if (c) { 3126 if (c->core_id == -1 || --c->refcnt == 0) 3127 kfree(c); 3128 cpuc->excl_cntrs = NULL; 3129 kfree(cpuc->constraint_list); 3130 cpuc->constraint_list = NULL; 3131 } 3132 } 3133 3134 static void intel_pmu_cpu_dying(int cpu) 3135 { 3136 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 3137 struct intel_shared_regs *pc; 3138 3139 pc = cpuc->shared_regs; 3140 if (pc) { 3141 if (pc->core_id == -1 || --pc->refcnt == 0) 3142 kfree(pc); 3143 cpuc->shared_regs = NULL; 3144 } 3145 3146 free_excl_cntrs(cpu); 3147 3148 fini_debug_store_on_cpu(cpu); 3149 } 3150 3151 static void intel_pmu_sched_task(struct perf_event_context *ctx, 3152 bool sched_in) 3153 { 3154 if (x86_pmu.pebs_active) 3155 intel_pmu_pebs_sched_task(ctx, sched_in); 3156 if (x86_pmu.lbr_nr) 3157 intel_pmu_lbr_sched_task(ctx, sched_in); 3158 } 3159 3160 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 3161 3162 PMU_FORMAT_ATTR(ldlat, "config1:0-15"); 3163 3164 PMU_FORMAT_ATTR(frontend, "config1:0-23"); 3165 3166 static struct attribute *intel_arch3_formats_attr[] = { 3167 &format_attr_event.attr, 3168 &format_attr_umask.attr, 3169 &format_attr_edge.attr, 3170 &format_attr_pc.attr, 3171 &format_attr_any.attr, 3172 &format_attr_inv.attr, 3173 &format_attr_cmask.attr, 3174 &format_attr_in_tx.attr, 3175 &format_attr_in_tx_cp.attr, 3176 3177 &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ 3178 &format_attr_ldlat.attr, /* PEBS load latency */ 3179 NULL, 3180 }; 3181 3182 static struct attribute *skl_format_attr[] = { 3183 &format_attr_frontend.attr, 3184 NULL, 3185 }; 3186 3187 static __initconst const struct x86_pmu core_pmu = { 3188 .name = "core", 3189 .handle_irq = x86_pmu_handle_irq, 3190 .disable_all = x86_pmu_disable_all, 3191 .enable_all = core_pmu_enable_all, 3192 .enable = core_pmu_enable_event, 3193 .disable = x86_pmu_disable_event, 3194 .hw_config = x86_pmu_hw_config, 3195 .schedule_events = x86_schedule_events, 3196 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 3197 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 3198 .event_map = intel_pmu_event_map, 3199 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 3200 .apic = 1, 3201 .free_running_flags = PEBS_FREERUNNING_FLAGS, 3202 3203 /* 3204 * Intel PMCs cannot be accessed sanely above 32-bit width, 3205 * so we install an artificial 1<<31 period regardless of 3206 * the generic event period: 3207 */ 3208 .max_period = (1ULL<<31) - 1, 3209 .get_event_constraints = intel_get_event_constraints, 3210 .put_event_constraints = intel_put_event_constraints, 3211 .event_constraints = intel_core_event_constraints, 3212 .guest_get_msrs = core_guest_get_msrs, 3213 .format_attrs = intel_arch_formats_attr, 3214 .events_sysfs_show = intel_event_sysfs_show, 3215 3216 /* 3217 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs 3218 * together with PMU version 1 and thus be using core_pmu with 3219 * shared_regs. We need following callbacks here to allocate 3220 * it properly. 3221 */ 3222 .cpu_prepare = intel_pmu_cpu_prepare, 3223 .cpu_starting = intel_pmu_cpu_starting, 3224 .cpu_dying = intel_pmu_cpu_dying, 3225 }; 3226 3227 static __initconst const struct x86_pmu intel_pmu = { 3228 .name = "Intel", 3229 .handle_irq = intel_pmu_handle_irq, 3230 .disable_all = intel_pmu_disable_all, 3231 .enable_all = intel_pmu_enable_all, 3232 .enable = intel_pmu_enable_event, 3233 .disable = intel_pmu_disable_event, 3234 .hw_config = intel_pmu_hw_config, 3235 .schedule_events = x86_schedule_events, 3236 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 3237 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 3238 .event_map = intel_pmu_event_map, 3239 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 3240 .apic = 1, 3241 .free_running_flags = PEBS_FREERUNNING_FLAGS, 3242 /* 3243 * Intel PMCs cannot be accessed sanely above 32 bit width, 3244 * so we install an artificial 1<<31 period regardless of 3245 * the generic event period: 3246 */ 3247 .max_period = (1ULL << 31) - 1, 3248 .get_event_constraints = intel_get_event_constraints, 3249 .put_event_constraints = intel_put_event_constraints, 3250 .pebs_aliases = intel_pebs_aliases_core2, 3251 3252 .format_attrs = intel_arch3_formats_attr, 3253 .events_sysfs_show = intel_event_sysfs_show, 3254 3255 .cpu_prepare = intel_pmu_cpu_prepare, 3256 .cpu_starting = intel_pmu_cpu_starting, 3257 .cpu_dying = intel_pmu_cpu_dying, 3258 .guest_get_msrs = intel_guest_get_msrs, 3259 .sched_task = intel_pmu_sched_task, 3260 }; 3261 3262 static __init void intel_clovertown_quirk(void) 3263 { 3264 /* 3265 * PEBS is unreliable due to: 3266 * 3267 * AJ67 - PEBS may experience CPL leaks 3268 * AJ68 - PEBS PMI may be delayed by one event 3269 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] 3270 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS 3271 * 3272 * AJ67 could be worked around by restricting the OS/USR flags. 3273 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. 3274 * 3275 * AJ106 could possibly be worked around by not allowing LBR 3276 * usage from PEBS, including the fixup. 3277 * AJ68 could possibly be worked around by always programming 3278 * a pebs_event_reset[0] value and coping with the lost events. 3279 * 3280 * But taken together it might just make sense to not enable PEBS on 3281 * these chips. 3282 */ 3283 pr_warn("PEBS disabled due to CPU errata\n"); 3284 x86_pmu.pebs = 0; 3285 x86_pmu.pebs_constraints = NULL; 3286 } 3287 3288 static int intel_snb_pebs_broken(int cpu) 3289 { 3290 u32 rev = UINT_MAX; /* default to broken for unknown models */ 3291 3292 switch (cpu_data(cpu).x86_model) { 3293 case 42: /* SNB */ 3294 rev = 0x28; 3295 break; 3296 3297 case 45: /* SNB-EP */ 3298 switch (cpu_data(cpu).x86_mask) { 3299 case 6: rev = 0x618; break; 3300 case 7: rev = 0x70c; break; 3301 } 3302 } 3303 3304 return (cpu_data(cpu).microcode < rev); 3305 } 3306 3307 static void intel_snb_check_microcode(void) 3308 { 3309 int pebs_broken = 0; 3310 int cpu; 3311 3312 get_online_cpus(); 3313 for_each_online_cpu(cpu) { 3314 if ((pebs_broken = intel_snb_pebs_broken(cpu))) 3315 break; 3316 } 3317 put_online_cpus(); 3318 3319 if (pebs_broken == x86_pmu.pebs_broken) 3320 return; 3321 3322 /* 3323 * Serialized by the microcode lock.. 3324 */ 3325 if (x86_pmu.pebs_broken) { 3326 pr_info("PEBS enabled due to microcode update\n"); 3327 x86_pmu.pebs_broken = 0; 3328 } else { 3329 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); 3330 x86_pmu.pebs_broken = 1; 3331 } 3332 } 3333 3334 /* 3335 * Under certain circumstances, access certain MSR may cause #GP. 3336 * The function tests if the input MSR can be safely accessed. 3337 */ 3338 static bool check_msr(unsigned long msr, u64 mask) 3339 { 3340 u64 val_old, val_new, val_tmp; 3341 3342 /* 3343 * Read the current value, change it and read it back to see if it 3344 * matches, this is needed to detect certain hardware emulators 3345 * (qemu/kvm) that don't trap on the MSR access and always return 0s. 3346 */ 3347 if (rdmsrl_safe(msr, &val_old)) 3348 return false; 3349 3350 /* 3351 * Only change the bits which can be updated by wrmsrl. 3352 */ 3353 val_tmp = val_old ^ mask; 3354 if (wrmsrl_safe(msr, val_tmp) || 3355 rdmsrl_safe(msr, &val_new)) 3356 return false; 3357 3358 if (val_new != val_tmp) 3359 return false; 3360 3361 /* Here it's sure that the MSR can be safely accessed. 3362 * Restore the old value and return. 3363 */ 3364 wrmsrl(msr, val_old); 3365 3366 return true; 3367 } 3368 3369 static __init void intel_sandybridge_quirk(void) 3370 { 3371 x86_pmu.check_microcode = intel_snb_check_microcode; 3372 intel_snb_check_microcode(); 3373 } 3374 3375 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 3376 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, 3377 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, 3378 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, 3379 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, 3380 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, 3381 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, 3382 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, 3383 }; 3384 3385 static __init void intel_arch_events_quirk(void) 3386 { 3387 int bit; 3388 3389 /* disable event that reported as not presend by cpuid */ 3390 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 3391 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 3392 pr_warn("CPUID marked event: \'%s\' unavailable\n", 3393 intel_arch_events_map[bit].name); 3394 } 3395 } 3396 3397 static __init void intel_nehalem_quirk(void) 3398 { 3399 union cpuid10_ebx ebx; 3400 3401 ebx.full = x86_pmu.events_maskl; 3402 if (ebx.split.no_branch_misses_retired) { 3403 /* 3404 * Erratum AAJ80 detected, we work it around by using 3405 * the BR_MISP_EXEC.ANY event. This will over-count 3406 * branch-misses, but it's still much better than the 3407 * architectural event which is often completely bogus: 3408 */ 3409 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 3410 ebx.split.no_branch_misses_retired = 0; 3411 x86_pmu.events_maskl = ebx.full; 3412 pr_info("CPU erratum AAJ80 worked around\n"); 3413 } 3414 } 3415 3416 /* 3417 * enable software workaround for errata: 3418 * SNB: BJ122 3419 * IVB: BV98 3420 * HSW: HSD29 3421 * 3422 * Only needed when HT is enabled. However detecting 3423 * if HT is enabled is difficult (model specific). So instead, 3424 * we enable the workaround in the early boot, and verify if 3425 * it is needed in a later initcall phase once we have valid 3426 * topology information to check if HT is actually enabled 3427 */ 3428 static __init void intel_ht_bug(void) 3429 { 3430 x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; 3431 3432 x86_pmu.start_scheduling = intel_start_scheduling; 3433 x86_pmu.commit_scheduling = intel_commit_scheduling; 3434 x86_pmu.stop_scheduling = intel_stop_scheduling; 3435 } 3436 3437 EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); 3438 EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") 3439 3440 /* Haswell special events */ 3441 EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); 3442 EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); 3443 EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); 3444 EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); 3445 EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); 3446 EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); 3447 EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); 3448 EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); 3449 EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); 3450 EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); 3451 EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); 3452 EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); 3453 3454 static struct attribute *hsw_events_attrs[] = { 3455 EVENT_PTR(tx_start), 3456 EVENT_PTR(tx_commit), 3457 EVENT_PTR(tx_abort), 3458 EVENT_PTR(tx_capacity), 3459 EVENT_PTR(tx_conflict), 3460 EVENT_PTR(el_start), 3461 EVENT_PTR(el_commit), 3462 EVENT_PTR(el_abort), 3463 EVENT_PTR(el_capacity), 3464 EVENT_PTR(el_conflict), 3465 EVENT_PTR(cycles_t), 3466 EVENT_PTR(cycles_ct), 3467 EVENT_PTR(mem_ld_hsw), 3468 EVENT_PTR(mem_st_hsw), 3469 NULL 3470 }; 3471 3472 __init int intel_pmu_init(void) 3473 { 3474 union cpuid10_edx edx; 3475 union cpuid10_eax eax; 3476 union cpuid10_ebx ebx; 3477 struct event_constraint *c; 3478 unsigned int unused; 3479 struct extra_reg *er; 3480 int version, i; 3481 3482 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 3483 switch (boot_cpu_data.x86) { 3484 case 0x6: 3485 return p6_pmu_init(); 3486 case 0xb: 3487 return knc_pmu_init(); 3488 case 0xf: 3489 return p4_pmu_init(); 3490 } 3491 return -ENODEV; 3492 } 3493 3494 /* 3495 * Check whether the Architectural PerfMon supports 3496 * Branch Misses Retired hw_event or not. 3497 */ 3498 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); 3499 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) 3500 return -ENODEV; 3501 3502 version = eax.split.version_id; 3503 if (version < 2) 3504 x86_pmu = core_pmu; 3505 else 3506 x86_pmu = intel_pmu; 3507 3508 x86_pmu.version = version; 3509 x86_pmu.num_counters = eax.split.num_counters; 3510 x86_pmu.cntval_bits = eax.split.bit_width; 3511 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 3512 3513 x86_pmu.events_maskl = ebx.full; 3514 x86_pmu.events_mask_len = eax.split.mask_length; 3515 3516 x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); 3517 3518 /* 3519 * Quirk: v2 perfmon does not report fixed-purpose events, so 3520 * assume at least 3 events: 3521 */ 3522 if (version > 1) 3523 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 3524 3525 if (boot_cpu_has(X86_FEATURE_PDCM)) { 3526 u64 capabilities; 3527 3528 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); 3529 x86_pmu.intel_cap.capabilities = capabilities; 3530 } 3531 3532 intel_ds_init(); 3533 3534 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ 3535 3536 /* 3537 * Install the hw-cache-events table: 3538 */ 3539 switch (boot_cpu_data.x86_model) { 3540 case 14: /* 65nm Core "Yonah" */ 3541 pr_cont("Core events, "); 3542 break; 3543 3544 case 15: /* 65nm Core2 "Merom" */ 3545 x86_add_quirk(intel_clovertown_quirk); 3546 case 22: /* 65nm Core2 "Merom-L" */ 3547 case 23: /* 45nm Core2 "Penryn" */ 3548 case 29: /* 45nm Core2 "Dunnington (MP) */ 3549 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 3550 sizeof(hw_cache_event_ids)); 3551 3552 intel_pmu_lbr_init_core(); 3553 3554 x86_pmu.event_constraints = intel_core2_event_constraints; 3555 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; 3556 pr_cont("Core2 events, "); 3557 break; 3558 3559 case 30: /* 45nm Nehalem */ 3560 case 26: /* 45nm Nehalem-EP */ 3561 case 46: /* 45nm Nehalem-EX */ 3562 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 3563 sizeof(hw_cache_event_ids)); 3564 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3565 sizeof(hw_cache_extra_regs)); 3566 3567 intel_pmu_lbr_init_nhm(); 3568 3569 x86_pmu.event_constraints = intel_nehalem_event_constraints; 3570 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 3571 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3572 x86_pmu.extra_regs = intel_nehalem_extra_regs; 3573 3574 x86_pmu.cpu_events = nhm_events_attrs; 3575 3576 /* UOPS_ISSUED.STALLED_CYCLES */ 3577 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3578 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3579 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3580 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3581 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3582 3583 intel_pmu_pebs_data_source_nhm(); 3584 x86_add_quirk(intel_nehalem_quirk); 3585 3586 pr_cont("Nehalem events, "); 3587 break; 3588 3589 case 28: /* 45nm Atom "Pineview" */ 3590 case 38: /* 45nm Atom "Lincroft" */ 3591 case 39: /* 32nm Atom "Penwell" */ 3592 case 53: /* 32nm Atom "Cloverview" */ 3593 case 54: /* 32nm Atom "Cedarview" */ 3594 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 3595 sizeof(hw_cache_event_ids)); 3596 3597 intel_pmu_lbr_init_atom(); 3598 3599 x86_pmu.event_constraints = intel_gen_event_constraints; 3600 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; 3601 x86_pmu.pebs_aliases = intel_pebs_aliases_core2; 3602 pr_cont("Atom events, "); 3603 break; 3604 3605 case 55: /* 22nm Atom "Silvermont" */ 3606 case 76: /* 14nm Atom "Airmont" */ 3607 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ 3608 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 3609 sizeof(hw_cache_event_ids)); 3610 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, 3611 sizeof(hw_cache_extra_regs)); 3612 3613 intel_pmu_lbr_init_slm(); 3614 3615 x86_pmu.event_constraints = intel_slm_event_constraints; 3616 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 3617 x86_pmu.extra_regs = intel_slm_extra_regs; 3618 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3619 pr_cont("Silvermont events, "); 3620 break; 3621 3622 case 92: /* 14nm Atom "Goldmont" */ 3623 case 95: /* 14nm Atom "Goldmont Denverton" */ 3624 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, 3625 sizeof(hw_cache_event_ids)); 3626 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, 3627 sizeof(hw_cache_extra_regs)); 3628 3629 intel_pmu_lbr_init_skl(); 3630 3631 x86_pmu.event_constraints = intel_slm_event_constraints; 3632 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; 3633 x86_pmu.extra_regs = intel_glm_extra_regs; 3634 /* 3635 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS 3636 * for precise cycles. 3637 * :pp is identical to :ppp 3638 */ 3639 x86_pmu.pebs_aliases = NULL; 3640 x86_pmu.pebs_prec_dist = true; 3641 x86_pmu.lbr_pt_coexist = true; 3642 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3643 pr_cont("Goldmont events, "); 3644 break; 3645 3646 case 37: /* 32nm Westmere */ 3647 case 44: /* 32nm Westmere-EP */ 3648 case 47: /* 32nm Westmere-EX */ 3649 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 3650 sizeof(hw_cache_event_ids)); 3651 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, 3652 sizeof(hw_cache_extra_regs)); 3653 3654 intel_pmu_lbr_init_nhm(); 3655 3656 x86_pmu.event_constraints = intel_westmere_event_constraints; 3657 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 3658 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 3659 x86_pmu.extra_regs = intel_westmere_extra_regs; 3660 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3661 3662 x86_pmu.cpu_events = nhm_events_attrs; 3663 3664 /* UOPS_ISSUED.STALLED_CYCLES */ 3665 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3666 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3667 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 3668 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3669 X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); 3670 3671 intel_pmu_pebs_data_source_nhm(); 3672 pr_cont("Westmere events, "); 3673 break; 3674 3675 case 42: /* 32nm SandyBridge */ 3676 case 45: /* 32nm SandyBridge-E/EN/EP */ 3677 x86_add_quirk(intel_sandybridge_quirk); 3678 x86_add_quirk(intel_ht_bug); 3679 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3680 sizeof(hw_cache_event_ids)); 3681 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3682 sizeof(hw_cache_extra_regs)); 3683 3684 intel_pmu_lbr_init_snb(); 3685 3686 x86_pmu.event_constraints = intel_snb_event_constraints; 3687 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 3688 x86_pmu.pebs_aliases = intel_pebs_aliases_snb; 3689 if (boot_cpu_data.x86_model == 45) 3690 x86_pmu.extra_regs = intel_snbep_extra_regs; 3691 else 3692 x86_pmu.extra_regs = intel_snb_extra_regs; 3693 3694 3695 /* all extra regs are per-cpu when HT is on */ 3696 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3697 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3698 3699 x86_pmu.cpu_events = snb_events_attrs; 3700 3701 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3702 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3703 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3704 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ 3705 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 3706 X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); 3707 3708 pr_cont("SandyBridge events, "); 3709 break; 3710 3711 case 58: /* 22nm IvyBridge */ 3712 case 62: /* 22nm IvyBridge-EP/EX */ 3713 x86_add_quirk(intel_ht_bug); 3714 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 3715 sizeof(hw_cache_event_ids)); 3716 /* dTLB-load-misses on IVB is different than SNB */ 3717 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ 3718 3719 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 3720 sizeof(hw_cache_extra_regs)); 3721 3722 intel_pmu_lbr_init_snb(); 3723 3724 x86_pmu.event_constraints = intel_ivb_event_constraints; 3725 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; 3726 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3727 x86_pmu.pebs_prec_dist = true; 3728 if (boot_cpu_data.x86_model == 62) 3729 x86_pmu.extra_regs = intel_snbep_extra_regs; 3730 else 3731 x86_pmu.extra_regs = intel_snb_extra_regs; 3732 /* all extra regs are per-cpu when HT is on */ 3733 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3734 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3735 3736 x86_pmu.cpu_events = snb_events_attrs; 3737 3738 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 3739 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 3740 X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); 3741 3742 pr_cont("IvyBridge events, "); 3743 break; 3744 3745 3746 case 60: /* 22nm Haswell Core */ 3747 case 63: /* 22nm Haswell Server */ 3748 case 69: /* 22nm Haswell ULT */ 3749 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 3750 x86_add_quirk(intel_ht_bug); 3751 x86_pmu.late_ack = true; 3752 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3753 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3754 3755 intel_pmu_lbr_init_hsw(); 3756 3757 x86_pmu.event_constraints = intel_hsw_event_constraints; 3758 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; 3759 x86_pmu.extra_regs = intel_snbep_extra_regs; 3760 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3761 x86_pmu.pebs_prec_dist = true; 3762 /* all extra regs are per-cpu when HT is on */ 3763 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3764 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3765 3766 x86_pmu.hw_config = hsw_hw_config; 3767 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3768 x86_pmu.cpu_events = hsw_events_attrs; 3769 x86_pmu.lbr_double_abort = true; 3770 pr_cont("Haswell events, "); 3771 break; 3772 3773 case 61: /* 14nm Broadwell Core-M */ 3774 case 86: /* 14nm Broadwell Xeon D */ 3775 case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ 3776 case 79: /* 14nm Broadwell Server */ 3777 x86_pmu.late_ack = true; 3778 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3779 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3780 3781 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ 3782 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | 3783 BDW_L3_MISS|HSW_SNOOP_DRAM; 3784 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| 3785 HSW_SNOOP_DRAM; 3786 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| 3787 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3788 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| 3789 BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; 3790 3791 intel_pmu_lbr_init_hsw(); 3792 3793 x86_pmu.event_constraints = intel_bdw_event_constraints; 3794 x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; 3795 x86_pmu.extra_regs = intel_snbep_extra_regs; 3796 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; 3797 x86_pmu.pebs_prec_dist = true; 3798 /* all extra regs are per-cpu when HT is on */ 3799 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3800 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3801 3802 x86_pmu.hw_config = hsw_hw_config; 3803 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3804 x86_pmu.cpu_events = hsw_events_attrs; 3805 x86_pmu.limit_period = bdw_limit_period; 3806 pr_cont("Broadwell events, "); 3807 break; 3808 3809 case 87: /* Knights Landing Xeon Phi */ 3810 memcpy(hw_cache_event_ids, 3811 slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3812 memcpy(hw_cache_extra_regs, 3813 knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3814 intel_pmu_lbr_init_knl(); 3815 3816 x86_pmu.event_constraints = intel_slm_event_constraints; 3817 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; 3818 x86_pmu.extra_regs = intel_knl_extra_regs; 3819 3820 /* all extra regs are per-cpu when HT is on */ 3821 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3822 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3823 3824 pr_cont("Knights Landing events, "); 3825 break; 3826 3827 case 142: /* 14nm Kabylake Mobile */ 3828 case 158: /* 14nm Kabylake Desktop */ 3829 case 78: /* 14nm Skylake Mobile */ 3830 case 94: /* 14nm Skylake Desktop */ 3831 case 85: /* 14nm Skylake Server */ 3832 x86_pmu.late_ack = true; 3833 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3834 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3835 intel_pmu_lbr_init_skl(); 3836 3837 x86_pmu.event_constraints = intel_skl_event_constraints; 3838 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; 3839 x86_pmu.extra_regs = intel_skl_extra_regs; 3840 x86_pmu.pebs_aliases = intel_pebs_aliases_skl; 3841 x86_pmu.pebs_prec_dist = true; 3842 /* all extra regs are per-cpu when HT is on */ 3843 x86_pmu.flags |= PMU_FL_HAS_RSP_1; 3844 x86_pmu.flags |= PMU_FL_NO_HT_SHARING; 3845 3846 x86_pmu.hw_config = hsw_hw_config; 3847 x86_pmu.get_event_constraints = hsw_get_event_constraints; 3848 x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, 3849 skl_format_attr); 3850 WARN_ON(!x86_pmu.format_attrs); 3851 x86_pmu.cpu_events = hsw_events_attrs; 3852 pr_cont("Skylake events, "); 3853 break; 3854 3855 default: 3856 switch (x86_pmu.version) { 3857 case 1: 3858 x86_pmu.event_constraints = intel_v1_event_constraints; 3859 pr_cont("generic architected perfmon v1, "); 3860 break; 3861 default: 3862 /* 3863 * default constraints for v2 and up 3864 */ 3865 x86_pmu.event_constraints = intel_gen_event_constraints; 3866 pr_cont("generic architected perfmon, "); 3867 break; 3868 } 3869 } 3870 3871 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { 3872 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 3873 x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); 3874 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; 3875 } 3876 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 3877 3878 if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { 3879 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 3880 x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); 3881 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; 3882 } 3883 3884 x86_pmu.intel_ctrl |= 3885 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; 3886 3887 if (x86_pmu.event_constraints) { 3888 /* 3889 * event on fixed counter2 (REF_CYCLES) only works on this 3890 * counter, so do not extend mask to generic counters 3891 */ 3892 for_each_event_constraint(c, x86_pmu.event_constraints) { 3893 if (c->cmask == FIXED_EVENT_FLAGS 3894 && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { 3895 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 3896 } 3897 c->idxmsk64 &= 3898 ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); 3899 c->weight = hweight64(c->idxmsk64); 3900 } 3901 } 3902 3903 /* 3904 * Access LBR MSR may cause #GP under certain circumstances. 3905 * E.g. KVM doesn't support LBR MSR 3906 * Check all LBT MSR here. 3907 * Disable LBR access if any LBR MSRs can not be accessed. 3908 */ 3909 if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) 3910 x86_pmu.lbr_nr = 0; 3911 for (i = 0; i < x86_pmu.lbr_nr; i++) { 3912 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && 3913 check_msr(x86_pmu.lbr_to + i, 0xffffUL))) 3914 x86_pmu.lbr_nr = 0; 3915 } 3916 3917 /* 3918 * Access extra MSR may cause #GP under certain circumstances. 3919 * E.g. KVM doesn't support offcore event 3920 * Check all extra_regs here. 3921 */ 3922 if (x86_pmu.extra_regs) { 3923 for (er = x86_pmu.extra_regs; er->msr; er++) { 3924 er->extra_msr_access = check_msr(er->msr, 0x11UL); 3925 /* Disable LBR select mapping */ 3926 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) 3927 x86_pmu.lbr_sel_map = NULL; 3928 } 3929 } 3930 3931 /* Support full width counters using alternative MSR range */ 3932 if (x86_pmu.intel_cap.full_width_write) { 3933 x86_pmu.max_period = x86_pmu.cntval_mask; 3934 x86_pmu.perfctr = MSR_IA32_PMC0; 3935 pr_cont("full-width counters, "); 3936 } 3937 3938 return 0; 3939 } 3940 3941 /* 3942 * HT bug: phase 2 init 3943 * Called once we have valid topology information to check 3944 * whether or not HT is enabled 3945 * If HT is off, then we disable the workaround 3946 */ 3947 static __init int fixup_ht_bug(void) 3948 { 3949 int cpu = smp_processor_id(); 3950 int w, c; 3951 /* 3952 * problem not present on this CPU model, nothing to do 3953 */ 3954 if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) 3955 return 0; 3956 3957 w = cpumask_weight(topology_sibling_cpumask(cpu)); 3958 if (w > 1) { 3959 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); 3960 return 0; 3961 } 3962 3963 if (lockup_detector_suspend() != 0) { 3964 pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); 3965 return 0; 3966 } 3967 3968 x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); 3969 3970 x86_pmu.start_scheduling = NULL; 3971 x86_pmu.commit_scheduling = NULL; 3972 x86_pmu.stop_scheduling = NULL; 3973 3974 lockup_detector_resume(); 3975 3976 get_online_cpus(); 3977 3978 for_each_online_cpu(c) { 3979 free_excl_cntrs(c); 3980 } 3981 3982 put_online_cpus(); 3983 pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); 3984 return 0; 3985 } 3986 subsys_initcall(fixup_ht_bug) 3987