xref: /openbmc/linux/arch/x86/events/intel/ds.c (revision 16f6ccde74a6f8538c62f127f17207c75f4dba7a)
1  // SPDX-License-Identifier: GPL-2.0
2  #include <linux/bitops.h>
3  #include <linux/types.h>
4  #include <linux/slab.h>
5  #include <linux/sched/clock.h>
6  
7  #include <asm/cpu_entry_area.h>
8  #include <asm/perf_event.h>
9  #include <asm/tlbflush.h>
10  #include <asm/insn.h>
11  #include <asm/io.h>
12  #include <asm/timer.h>
13  
14  #include "../perf_event.h"
15  
16  /* Waste a full page so it can be mapped into the cpu_entry_area */
17  DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
18  
19  /* The size of a BTS record in bytes: */
20  #define BTS_RECORD_SIZE		24
21  
22  #define PEBS_FIXUP_SIZE		PAGE_SIZE
23  
24  /*
25   * pebs_record_32 for p4 and core not supported
26  
27  struct pebs_record_32 {
28  	u32 flags, ip;
29  	u32 ax, bc, cx, dx;
30  	u32 si, di, bp, sp;
31  };
32  
33   */
34  
35  union intel_x86_pebs_dse {
36  	u64 val;
37  	struct {
38  		unsigned int ld_dse:4;
39  		unsigned int ld_stlb_miss:1;
40  		unsigned int ld_locked:1;
41  		unsigned int ld_data_blk:1;
42  		unsigned int ld_addr_blk:1;
43  		unsigned int ld_reserved:24;
44  	};
45  	struct {
46  		unsigned int st_l1d_hit:1;
47  		unsigned int st_reserved1:3;
48  		unsigned int st_stlb_miss:1;
49  		unsigned int st_locked:1;
50  		unsigned int st_reserved2:26;
51  	};
52  	struct {
53  		unsigned int st_lat_dse:4;
54  		unsigned int st_lat_stlb_miss:1;
55  		unsigned int st_lat_locked:1;
56  		unsigned int ld_reserved3:26;
57  	};
58  	struct {
59  		unsigned int mtl_dse:5;
60  		unsigned int mtl_locked:1;
61  		unsigned int mtl_stlb_miss:1;
62  		unsigned int mtl_fwd_blk:1;
63  		unsigned int ld_reserved4:24;
64  	};
65  };
66  
67  
68  /*
69   * Map PEBS Load Latency Data Source encodings to generic
70   * memory data source information
71   */
72  #define P(a, b) PERF_MEM_S(a, b)
73  #define OP_LH (P(OP, LOAD) | P(LVL, HIT))
74  #define LEVEL(x) P(LVLNUM, x)
75  #define REM P(REMOTE, REMOTE)
76  #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
77  
78  /* Version for Sandy Bridge and later */
79  static u64 pebs_data_source[] = {
80  	P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
81  	OP_LH | P(LVL, L1)  | LEVEL(L1) | P(SNOOP, NONE),  /* 0x01: L1 local */
82  	OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
83  	OP_LH | P(LVL, L2)  | LEVEL(L2) | P(SNOOP, NONE),  /* 0x03: L2 hit */
84  	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, NONE),  /* 0x04: L3 hit */
85  	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, MISS),  /* 0x05: L3 hit, snoop miss */
86  	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HIT),   /* 0x06: L3 hit, snoop hit */
87  	OP_LH | P(LVL, L3)  | LEVEL(L3) | P(SNOOP, HITM),  /* 0x07: L3 hit, snoop hitm */
88  	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
89  	OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
90  	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, HIT),       /* 0x0a: L3 miss, shared */
91  	OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
92  	OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | SNOOP_NONE_MISS,     /* 0x0c: L3 miss, excl */
93  	OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
94  	OP_LH | P(LVL, IO)  | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
95  	OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
96  };
97  
98  /* Patch up minor differences in the bits */
intel_pmu_pebs_data_source_nhm(void)99  void __init intel_pmu_pebs_data_source_nhm(void)
100  {
101  	pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
102  	pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
103  	pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
104  }
105  
__intel_pmu_pebs_data_source_skl(bool pmem,u64 * data_source)106  static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source)
107  {
108  	u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
109  
110  	data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
111  	data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
112  	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
113  	data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
114  	data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
115  }
116  
intel_pmu_pebs_data_source_skl(bool pmem)117  void __init intel_pmu_pebs_data_source_skl(bool pmem)
118  {
119  	__intel_pmu_pebs_data_source_skl(pmem, pebs_data_source);
120  }
121  
__intel_pmu_pebs_data_source_grt(u64 * data_source)122  static void __init __intel_pmu_pebs_data_source_grt(u64 *data_source)
123  {
124  	data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
125  	data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
126  	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
127  }
128  
intel_pmu_pebs_data_source_grt(void)129  void __init intel_pmu_pebs_data_source_grt(void)
130  {
131  	__intel_pmu_pebs_data_source_grt(pebs_data_source);
132  }
133  
intel_pmu_pebs_data_source_adl(void)134  void __init intel_pmu_pebs_data_source_adl(void)
135  {
136  	u64 *data_source;
137  
138  	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
139  	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
140  	__intel_pmu_pebs_data_source_skl(false, data_source);
141  
142  	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
143  	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
144  	__intel_pmu_pebs_data_source_grt(data_source);
145  }
146  
__intel_pmu_pebs_data_source_cmt(u64 * data_source)147  static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
148  {
149  	data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
150  	data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
151  	data_source[0x0a] = OP_LH | P(LVL, LOC_RAM)  | LEVEL(RAM) | P(SNOOP, NONE);
152  	data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
153  	data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
154  	data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
155  }
156  
intel_pmu_pebs_data_source_mtl(void)157  void __init intel_pmu_pebs_data_source_mtl(void)
158  {
159  	u64 *data_source;
160  
161  	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
162  	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
163  	__intel_pmu_pebs_data_source_skl(false, data_source);
164  
165  	data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
166  	memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
167  	__intel_pmu_pebs_data_source_cmt(data_source);
168  }
169  
intel_pmu_pebs_data_source_cmt(void)170  void __init intel_pmu_pebs_data_source_cmt(void)
171  {
172  	__intel_pmu_pebs_data_source_cmt(pebs_data_source);
173  }
174  
precise_store_data(u64 status)175  static u64 precise_store_data(u64 status)
176  {
177  	union intel_x86_pebs_dse dse;
178  	u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
179  
180  	dse.val = status;
181  
182  	/*
183  	 * bit 4: TLB access
184  	 * 1 = stored missed 2nd level TLB
185  	 *
186  	 * so it either hit the walker or the OS
187  	 * otherwise hit 2nd level TLB
188  	 */
189  	if (dse.st_stlb_miss)
190  		val |= P(TLB, MISS);
191  	else
192  		val |= P(TLB, HIT);
193  
194  	/*
195  	 * bit 0: hit L1 data cache
196  	 * if not set, then all we know is that
197  	 * it missed L1D
198  	 */
199  	if (dse.st_l1d_hit)
200  		val |= P(LVL, HIT);
201  	else
202  		val |= P(LVL, MISS);
203  
204  	/*
205  	 * bit 5: Locked prefix
206  	 */
207  	if (dse.st_locked)
208  		val |= P(LOCK, LOCKED);
209  
210  	return val;
211  }
212  
precise_datala_hsw(struct perf_event * event,u64 status)213  static u64 precise_datala_hsw(struct perf_event *event, u64 status)
214  {
215  	union perf_mem_data_src dse;
216  
217  	dse.val = PERF_MEM_NA;
218  
219  	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
220  		dse.mem_op = PERF_MEM_OP_STORE;
221  	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
222  		dse.mem_op = PERF_MEM_OP_LOAD;
223  
224  	/*
225  	 * L1 info only valid for following events:
226  	 *
227  	 * MEM_UOPS_RETIRED.STLB_MISS_STORES
228  	 * MEM_UOPS_RETIRED.LOCK_STORES
229  	 * MEM_UOPS_RETIRED.SPLIT_STORES
230  	 * MEM_UOPS_RETIRED.ALL_STORES
231  	 */
232  	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
233  		if (status & 1)
234  			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
235  		else
236  			dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
237  	}
238  	return dse.val;
239  }
240  
pebs_set_tlb_lock(u64 * val,bool tlb,bool lock)241  static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
242  {
243  	/*
244  	 * TLB access
245  	 * 0 = did not miss 2nd level TLB
246  	 * 1 = missed 2nd level TLB
247  	 */
248  	if (tlb)
249  		*val |= P(TLB, MISS) | P(TLB, L2);
250  	else
251  		*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
252  
253  	/* locked prefix */
254  	if (lock)
255  		*val |= P(LOCK, LOCKED);
256  }
257  
258  /* Retrieve the latency data for e-core of ADL */
__adl_latency_data_small(struct perf_event * event,u64 status,u8 dse,bool tlb,bool lock,bool blk)259  static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
260  				     u8 dse, bool tlb, bool lock, bool blk)
261  {
262  	u64 val;
263  
264  	WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
265  
266  	dse &= PERF_PEBS_DATA_SOURCE_MASK;
267  	val = hybrid_var(event->pmu, pebs_data_source)[dse];
268  
269  	pebs_set_tlb_lock(&val, tlb, lock);
270  
271  	if (blk)
272  		val |= P(BLK, DATA);
273  	else
274  		val |= P(BLK, NA);
275  
276  	return val;
277  }
278  
adl_latency_data_small(struct perf_event * event,u64 status)279  u64 adl_latency_data_small(struct perf_event *event, u64 status)
280  {
281  	union intel_x86_pebs_dse dse;
282  
283  	dse.val = status;
284  
285  	return __adl_latency_data_small(event, status, dse.ld_dse,
286  					dse.ld_locked, dse.ld_stlb_miss,
287  					dse.ld_data_blk);
288  }
289  
290  /* Retrieve the latency data for e-core of MTL */
mtl_latency_data_small(struct perf_event * event,u64 status)291  u64 mtl_latency_data_small(struct perf_event *event, u64 status)
292  {
293  	union intel_x86_pebs_dse dse;
294  
295  	dse.val = status;
296  
297  	return __adl_latency_data_small(event, status, dse.mtl_dse,
298  					dse.mtl_stlb_miss, dse.mtl_locked,
299  					dse.mtl_fwd_blk);
300  }
301  
load_latency_data(struct perf_event * event,u64 status)302  static u64 load_latency_data(struct perf_event *event, u64 status)
303  {
304  	union intel_x86_pebs_dse dse;
305  	u64 val;
306  
307  	dse.val = status;
308  
309  	/*
310  	 * use the mapping table for bit 0-3
311  	 */
312  	val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
313  
314  	/*
315  	 * Nehalem models do not support TLB, Lock infos
316  	 */
317  	if (x86_pmu.pebs_no_tlb) {
318  		val |= P(TLB, NA) | P(LOCK, NA);
319  		return val;
320  	}
321  
322  	pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
323  
324  	/*
325  	 * Ice Lake and earlier models do not support block infos.
326  	 */
327  	if (!x86_pmu.pebs_block) {
328  		val |= P(BLK, NA);
329  		return val;
330  	}
331  	/*
332  	 * bit 6: load was blocked since its data could not be forwarded
333  	 *        from a preceding store
334  	 */
335  	if (dse.ld_data_blk)
336  		val |= P(BLK, DATA);
337  
338  	/*
339  	 * bit 7: load was blocked due to potential address conflict with
340  	 *        a preceding store
341  	 */
342  	if (dse.ld_addr_blk)
343  		val |= P(BLK, ADDR);
344  
345  	if (!dse.ld_data_blk && !dse.ld_addr_blk)
346  		val |= P(BLK, NA);
347  
348  	return val;
349  }
350  
store_latency_data(struct perf_event * event,u64 status)351  static u64 store_latency_data(struct perf_event *event, u64 status)
352  {
353  	union intel_x86_pebs_dse dse;
354  	union perf_mem_data_src src;
355  	u64 val;
356  
357  	dse.val = status;
358  
359  	/*
360  	 * use the mapping table for bit 0-3
361  	 */
362  	val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse];
363  
364  	pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
365  
366  	val |= P(BLK, NA);
367  
368  	/*
369  	 * the pebs_data_source table is only for loads
370  	 * so override the mem_op to say STORE instead
371  	 */
372  	src.val = val;
373  	src.mem_op = P(OP,STORE);
374  
375  	return src.val;
376  }
377  
378  struct pebs_record_core {
379  	u64 flags, ip;
380  	u64 ax, bx, cx, dx;
381  	u64 si, di, bp, sp;
382  	u64 r8,  r9,  r10, r11;
383  	u64 r12, r13, r14, r15;
384  };
385  
386  struct pebs_record_nhm {
387  	u64 flags, ip;
388  	u64 ax, bx, cx, dx;
389  	u64 si, di, bp, sp;
390  	u64 r8,  r9,  r10, r11;
391  	u64 r12, r13, r14, r15;
392  	u64 status, dla, dse, lat;
393  };
394  
395  /*
396   * Same as pebs_record_nhm, with two additional fields.
397   */
398  struct pebs_record_hsw {
399  	u64 flags, ip;
400  	u64 ax, bx, cx, dx;
401  	u64 si, di, bp, sp;
402  	u64 r8,  r9,  r10, r11;
403  	u64 r12, r13, r14, r15;
404  	u64 status, dla, dse, lat;
405  	u64 real_ip, tsx_tuning;
406  };
407  
408  union hsw_tsx_tuning {
409  	struct {
410  		u32 cycles_last_block     : 32,
411  		    hle_abort		  : 1,
412  		    rtm_abort		  : 1,
413  		    instruction_abort     : 1,
414  		    non_instruction_abort : 1,
415  		    retry		  : 1,
416  		    data_conflict	  : 1,
417  		    capacity_writes	  : 1,
418  		    capacity_reads	  : 1;
419  	};
420  	u64	    value;
421  };
422  
423  #define PEBS_HSW_TSX_FLAGS	0xff00000000ULL
424  
425  /* Same as HSW, plus TSC */
426  
427  struct pebs_record_skl {
428  	u64 flags, ip;
429  	u64 ax, bx, cx, dx;
430  	u64 si, di, bp, sp;
431  	u64 r8,  r9,  r10, r11;
432  	u64 r12, r13, r14, r15;
433  	u64 status, dla, dse, lat;
434  	u64 real_ip, tsx_tuning;
435  	u64 tsc;
436  };
437  
init_debug_store_on_cpu(int cpu)438  void init_debug_store_on_cpu(int cpu)
439  {
440  	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
441  
442  	if (!ds)
443  		return;
444  
445  	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
446  		     (u32)((u64)(unsigned long)ds),
447  		     (u32)((u64)(unsigned long)ds >> 32));
448  }
449  
fini_debug_store_on_cpu(int cpu)450  void fini_debug_store_on_cpu(int cpu)
451  {
452  	if (!per_cpu(cpu_hw_events, cpu).ds)
453  		return;
454  
455  	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
456  }
457  
458  static DEFINE_PER_CPU(void *, insn_buffer);
459  
ds_update_cea(void * cea,void * addr,size_t size,pgprot_t prot)460  static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
461  {
462  	unsigned long start = (unsigned long)cea;
463  	phys_addr_t pa;
464  	size_t msz = 0;
465  
466  	pa = virt_to_phys(addr);
467  
468  	preempt_disable();
469  	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
470  		cea_set_pte(cea, pa, prot);
471  
472  	/*
473  	 * This is a cross-CPU update of the cpu_entry_area, we must shoot down
474  	 * all TLB entries for it.
475  	 */
476  	flush_tlb_kernel_range(start, start + size);
477  	preempt_enable();
478  }
479  
ds_clear_cea(void * cea,size_t size)480  static void ds_clear_cea(void *cea, size_t size)
481  {
482  	unsigned long start = (unsigned long)cea;
483  	size_t msz = 0;
484  
485  	preempt_disable();
486  	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
487  		cea_set_pte(cea, 0, PAGE_NONE);
488  
489  	flush_tlb_kernel_range(start, start + size);
490  	preempt_enable();
491  }
492  
dsalloc_pages(size_t size,gfp_t flags,int cpu)493  static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
494  {
495  	unsigned int order = get_order(size);
496  	int node = cpu_to_node(cpu);
497  	struct page *page;
498  
499  	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
500  	return page ? page_address(page) : NULL;
501  }
502  
dsfree_pages(const void * buffer,size_t size)503  static void dsfree_pages(const void *buffer, size_t size)
504  {
505  	if (buffer)
506  		free_pages((unsigned long)buffer, get_order(size));
507  }
508  
alloc_pebs_buffer(int cpu)509  static int alloc_pebs_buffer(int cpu)
510  {
511  	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
512  	struct debug_store *ds = hwev->ds;
513  	size_t bsiz = x86_pmu.pebs_buffer_size;
514  	int max, node = cpu_to_node(cpu);
515  	void *buffer, *insn_buff, *cea;
516  
517  	if (!x86_pmu.pebs)
518  		return 0;
519  
520  	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
521  	if (unlikely(!buffer))
522  		return -ENOMEM;
523  
524  	/*
525  	 * HSW+ already provides us the eventing ip; no need to allocate this
526  	 * buffer then.
527  	 */
528  	if (x86_pmu.intel_cap.pebs_format < 2) {
529  		insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
530  		if (!insn_buff) {
531  			dsfree_pages(buffer, bsiz);
532  			return -ENOMEM;
533  		}
534  		per_cpu(insn_buffer, cpu) = insn_buff;
535  	}
536  	hwev->ds_pebs_vaddr = buffer;
537  	/* Update the cpu entry area mapping */
538  	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
539  	ds->pebs_buffer_base = (unsigned long) cea;
540  	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
541  	ds->pebs_index = ds->pebs_buffer_base;
542  	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
543  	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
544  	return 0;
545  }
546  
release_pebs_buffer(int cpu)547  static void release_pebs_buffer(int cpu)
548  {
549  	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
550  	void *cea;
551  
552  	if (!x86_pmu.pebs)
553  		return;
554  
555  	kfree(per_cpu(insn_buffer, cpu));
556  	per_cpu(insn_buffer, cpu) = NULL;
557  
558  	/* Clear the fixmap */
559  	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
560  	ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
561  	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
562  	hwev->ds_pebs_vaddr = NULL;
563  }
564  
alloc_bts_buffer(int cpu)565  static int alloc_bts_buffer(int cpu)
566  {
567  	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
568  	struct debug_store *ds = hwev->ds;
569  	void *buffer, *cea;
570  	int max;
571  
572  	if (!x86_pmu.bts)
573  		return 0;
574  
575  	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
576  	if (unlikely(!buffer)) {
577  		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
578  		return -ENOMEM;
579  	}
580  	hwev->ds_bts_vaddr = buffer;
581  	/* Update the fixmap */
582  	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
583  	ds->bts_buffer_base = (unsigned long) cea;
584  	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
585  	ds->bts_index = ds->bts_buffer_base;
586  	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
587  	ds->bts_absolute_maximum = ds->bts_buffer_base +
588  					max * BTS_RECORD_SIZE;
589  	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
590  					(max / 16) * BTS_RECORD_SIZE;
591  	return 0;
592  }
593  
release_bts_buffer(int cpu)594  static void release_bts_buffer(int cpu)
595  {
596  	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
597  	void *cea;
598  
599  	if (!x86_pmu.bts)
600  		return;
601  
602  	/* Clear the fixmap */
603  	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
604  	ds_clear_cea(cea, BTS_BUFFER_SIZE);
605  	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
606  	hwev->ds_bts_vaddr = NULL;
607  }
608  
alloc_ds_buffer(int cpu)609  static int alloc_ds_buffer(int cpu)
610  {
611  	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
612  
613  	memset(ds, 0, sizeof(*ds));
614  	per_cpu(cpu_hw_events, cpu).ds = ds;
615  	return 0;
616  }
617  
release_ds_buffer(int cpu)618  static void release_ds_buffer(int cpu)
619  {
620  	per_cpu(cpu_hw_events, cpu).ds = NULL;
621  }
622  
release_ds_buffers(void)623  void release_ds_buffers(void)
624  {
625  	int cpu;
626  
627  	if (!x86_pmu.bts && !x86_pmu.pebs)
628  		return;
629  
630  	for_each_possible_cpu(cpu)
631  		release_ds_buffer(cpu);
632  
633  	for_each_possible_cpu(cpu) {
634  		/*
635  		 * Again, ignore errors from offline CPUs, they will no longer
636  		 * observe cpu_hw_events.ds and not program the DS_AREA when
637  		 * they come up.
638  		 */
639  		fini_debug_store_on_cpu(cpu);
640  	}
641  
642  	for_each_possible_cpu(cpu) {
643  		release_pebs_buffer(cpu);
644  		release_bts_buffer(cpu);
645  	}
646  }
647  
reserve_ds_buffers(void)648  void reserve_ds_buffers(void)
649  {
650  	int bts_err = 0, pebs_err = 0;
651  	int cpu;
652  
653  	x86_pmu.bts_active = 0;
654  	x86_pmu.pebs_active = 0;
655  
656  	if (!x86_pmu.bts && !x86_pmu.pebs)
657  		return;
658  
659  	if (!x86_pmu.bts)
660  		bts_err = 1;
661  
662  	if (!x86_pmu.pebs)
663  		pebs_err = 1;
664  
665  	for_each_possible_cpu(cpu) {
666  		if (alloc_ds_buffer(cpu)) {
667  			bts_err = 1;
668  			pebs_err = 1;
669  		}
670  
671  		if (!bts_err && alloc_bts_buffer(cpu))
672  			bts_err = 1;
673  
674  		if (!pebs_err && alloc_pebs_buffer(cpu))
675  			pebs_err = 1;
676  
677  		if (bts_err && pebs_err)
678  			break;
679  	}
680  
681  	if (bts_err) {
682  		for_each_possible_cpu(cpu)
683  			release_bts_buffer(cpu);
684  	}
685  
686  	if (pebs_err) {
687  		for_each_possible_cpu(cpu)
688  			release_pebs_buffer(cpu);
689  	}
690  
691  	if (bts_err && pebs_err) {
692  		for_each_possible_cpu(cpu)
693  			release_ds_buffer(cpu);
694  	} else {
695  		if (x86_pmu.bts && !bts_err)
696  			x86_pmu.bts_active = 1;
697  
698  		if (x86_pmu.pebs && !pebs_err)
699  			x86_pmu.pebs_active = 1;
700  
701  		for_each_possible_cpu(cpu) {
702  			/*
703  			 * Ignores wrmsr_on_cpu() errors for offline CPUs they
704  			 * will get this call through intel_pmu_cpu_starting().
705  			 */
706  			init_debug_store_on_cpu(cpu);
707  		}
708  	}
709  }
710  
711  /*
712   * BTS
713   */
714  
715  struct event_constraint bts_constraint =
716  	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
717  
intel_pmu_enable_bts(u64 config)718  void intel_pmu_enable_bts(u64 config)
719  {
720  	unsigned long debugctlmsr;
721  
722  	debugctlmsr = get_debugctlmsr();
723  
724  	debugctlmsr |= DEBUGCTLMSR_TR;
725  	debugctlmsr |= DEBUGCTLMSR_BTS;
726  	if (config & ARCH_PERFMON_EVENTSEL_INT)
727  		debugctlmsr |= DEBUGCTLMSR_BTINT;
728  
729  	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
730  		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
731  
732  	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
733  		debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
734  
735  	update_debugctlmsr(debugctlmsr);
736  }
737  
intel_pmu_disable_bts(void)738  void intel_pmu_disable_bts(void)
739  {
740  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
741  	unsigned long debugctlmsr;
742  
743  	if (!cpuc->ds)
744  		return;
745  
746  	debugctlmsr = get_debugctlmsr();
747  
748  	debugctlmsr &=
749  		~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
750  		  DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
751  
752  	update_debugctlmsr(debugctlmsr);
753  }
754  
intel_pmu_drain_bts_buffer(void)755  int intel_pmu_drain_bts_buffer(void)
756  {
757  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
758  	struct debug_store *ds = cpuc->ds;
759  	struct bts_record {
760  		u64	from;
761  		u64	to;
762  		u64	flags;
763  	};
764  	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
765  	struct bts_record *at, *base, *top;
766  	struct perf_output_handle handle;
767  	struct perf_event_header header;
768  	struct perf_sample_data data;
769  	unsigned long skip = 0;
770  	struct pt_regs regs;
771  
772  	if (!event)
773  		return 0;
774  
775  	if (!x86_pmu.bts_active)
776  		return 0;
777  
778  	base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
779  	top  = (struct bts_record *)(unsigned long)ds->bts_index;
780  
781  	if (top <= base)
782  		return 0;
783  
784  	memset(&regs, 0, sizeof(regs));
785  
786  	ds->bts_index = ds->bts_buffer_base;
787  
788  	perf_sample_data_init(&data, 0, event->hw.last_period);
789  
790  	/*
791  	 * BTS leaks kernel addresses in branches across the cpl boundary,
792  	 * such as traps or system calls, so unless the user is asking for
793  	 * kernel tracing (and right now it's not possible), we'd need to
794  	 * filter them out. But first we need to count how many of those we
795  	 * have in the current batch. This is an extra O(n) pass, however,
796  	 * it's much faster than the other one especially considering that
797  	 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
798  	 * alloc_bts_buffer()).
799  	 */
800  	for (at = base; at < top; at++) {
801  		/*
802  		 * Note that right now *this* BTS code only works if
803  		 * attr::exclude_kernel is set, but let's keep this extra
804  		 * check here in case that changes.
805  		 */
806  		if (event->attr.exclude_kernel &&
807  		    (kernel_ip(at->from) || kernel_ip(at->to)))
808  			skip++;
809  	}
810  
811  	/*
812  	 * Prepare a generic sample, i.e. fill in the invariant fields.
813  	 * We will overwrite the from and to address before we output
814  	 * the sample.
815  	 */
816  	rcu_read_lock();
817  	perf_prepare_sample(&data, event, &regs);
818  	perf_prepare_header(&header, &data, event, &regs);
819  
820  	if (perf_output_begin(&handle, &data, event,
821  			      header.size * (top - base - skip)))
822  		goto unlock;
823  
824  	for (at = base; at < top; at++) {
825  		/* Filter out any records that contain kernel addresses. */
826  		if (event->attr.exclude_kernel &&
827  		    (kernel_ip(at->from) || kernel_ip(at->to)))
828  			continue;
829  
830  		data.ip		= at->from;
831  		data.addr	= at->to;
832  
833  		perf_output_sample(&handle, &header, &data, event);
834  	}
835  
836  	perf_output_end(&handle);
837  
838  	/* There's new data available. */
839  	event->hw.interrupts++;
840  	event->pending_kill = POLL_IN;
841  unlock:
842  	rcu_read_unlock();
843  	return 1;
844  }
845  
intel_pmu_drain_pebs_buffer(void)846  static inline void intel_pmu_drain_pebs_buffer(void)
847  {
848  	struct perf_sample_data data;
849  
850  	x86_pmu.drain_pebs(NULL, &data);
851  }
852  
853  /*
854   * PEBS
855   */
856  struct event_constraint intel_core2_pebs_event_constraints[] = {
857  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
858  	INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
859  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
860  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
861  	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
862  	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
863  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
864  	EVENT_CONSTRAINT_END
865  };
866  
867  struct event_constraint intel_atom_pebs_event_constraints[] = {
868  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
869  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
870  	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
871  	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
872  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
873  	/* Allow all events as PEBS with no flags */
874  	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
875  	EVENT_CONSTRAINT_END
876  };
877  
878  struct event_constraint intel_slm_pebs_event_constraints[] = {
879  	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
880  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
881  	/* Allow all events as PEBS with no flags */
882  	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
883  	EVENT_CONSTRAINT_END
884  };
885  
886  struct event_constraint intel_glm_pebs_event_constraints[] = {
887  	/* Allow all events as PEBS with no flags */
888  	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
889  	EVENT_CONSTRAINT_END
890  };
891  
892  struct event_constraint intel_grt_pebs_event_constraints[] = {
893  	/* Allow all events as PEBS with no flags */
894  	INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
895  	INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
896  	EVENT_CONSTRAINT_END
897  };
898  
899  struct event_constraint intel_nehalem_pebs_event_constraints[] = {
900  	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
901  	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
902  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
903  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
904  	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
905  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
906  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
907  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
908  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
909  	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
910  	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
911  	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
912  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
913  	EVENT_CONSTRAINT_END
914  };
915  
916  struct event_constraint intel_westmere_pebs_event_constraints[] = {
917  	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
918  	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
919  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
920  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
921  	INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
922  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
923  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
924  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
925  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
926  	INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
927  	INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
928  	/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
929  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
930  	EVENT_CONSTRAINT_END
931  };
932  
933  struct event_constraint intel_snb_pebs_event_constraints[] = {
934  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
935  	INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
936  	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
937  	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
938  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
939          INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
940          INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
941          INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
942          INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
943  	/* Allow all events as PEBS with no flags */
944  	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
945  	EVENT_CONSTRAINT_END
946  };
947  
948  struct event_constraint intel_ivb_pebs_event_constraints[] = {
949          INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
950          INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
951  	INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
952  	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
953  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
954  	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
955  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
956  	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
957  	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
958  	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
959  	INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
960  	/* Allow all events as PEBS with no flags */
961  	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
962          EVENT_CONSTRAINT_END
963  };
964  
965  struct event_constraint intel_hsw_pebs_event_constraints[] = {
966  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
967  	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
968  	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
969  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
970  	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
971  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
972  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
973  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
974  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
975  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
976  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
977  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
978  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
979  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
980  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
981  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
982  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
983  	/* Allow all events as PEBS with no flags */
984  	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
985  	EVENT_CONSTRAINT_END
986  };
987  
988  struct event_constraint intel_bdw_pebs_event_constraints[] = {
989  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
990  	INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
991  	/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
992  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
993  	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
994  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
995  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
996  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
997  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
998  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
999  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
1000  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
1001  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
1002  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
1003  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
1004  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
1005  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
1006  	/* Allow all events as PEBS with no flags */
1007  	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1008  	EVENT_CONSTRAINT_END
1009  };
1010  
1011  
1012  struct event_constraint intel_skl_pebs_event_constraints[] = {
1013  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
1014  	/* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
1015  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
1016  	/* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
1017  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
1018  	INTEL_PLD_CONSTRAINT(0x1cd, 0xf),		      /* MEM_TRANS_RETIRED.* */
1019  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
1020  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
1021  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
1022  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
1023  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
1024  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
1025  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
1026  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
1027  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
1028  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
1029  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
1030  	/* Allow all events as PEBS with no flags */
1031  	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
1032  	EVENT_CONSTRAINT_END
1033  };
1034  
1035  struct event_constraint intel_icl_pebs_event_constraints[] = {
1036  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL),	/* old INST_RETIRED.PREC_DIST */
1037  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1038  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),	/* SLOTS */
1039  
1040  	INTEL_PLD_CONSTRAINT(0x1cd, 0xff),			/* MEM_TRANS_RETIRED.LOAD_LATENCY */
1041  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1042  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1043  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1044  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1045  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1046  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1047  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1048  
1049  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
1050  
1051  	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),		/* MEM_INST_RETIRED.* */
1052  
1053  	/*
1054  	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1055  	 * need the full constraints from the main table.
1056  	 */
1057  
1058  	EVENT_CONSTRAINT_END
1059  };
1060  
1061  struct event_constraint intel_spr_pebs_event_constraints[] = {
1062  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
1063  	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
1064  
1065  	INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
1066  	INTEL_PLD_CONSTRAINT(0x1cd, 0xfe),
1067  	INTEL_PSD_CONSTRAINT(0x2cd, 0x1),
1068  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_LOADS */
1069  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),	/* MEM_INST_RETIRED.STLB_MISS_STORES */
1070  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),	/* MEM_INST_RETIRED.LOCK_LOADS */
1071  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_LOADS */
1072  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),	/* MEM_INST_RETIRED.SPLIT_STORES */
1073  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),	/* MEM_INST_RETIRED.ALL_LOADS */
1074  	INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),	/* MEM_INST_RETIRED.ALL_STORES */
1075  
1076  	INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf),
1077  
1078  	INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),
1079  
1080  	/*
1081  	 * Everything else is handled by PMU_FL_PEBS_ALL, because we
1082  	 * need the full constraints from the main table.
1083  	 */
1084  
1085  	EVENT_CONSTRAINT_END
1086  };
1087  
intel_pebs_constraints(struct perf_event * event)1088  struct event_constraint *intel_pebs_constraints(struct perf_event *event)
1089  {
1090  	struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints);
1091  	struct event_constraint *c;
1092  
1093  	if (!event->attr.precise_ip)
1094  		return NULL;
1095  
1096  	if (pebs_constraints) {
1097  		for_each_event_constraint(c, pebs_constraints) {
1098  			if (constraint_match(c, event->hw.config)) {
1099  				event->hw.flags |= c->flags;
1100  				return c;
1101  			}
1102  		}
1103  	}
1104  
1105  	/*
1106  	 * Extended PEBS support
1107  	 * Makes the PEBS code search the normal constraints.
1108  	 */
1109  	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1110  		return NULL;
1111  
1112  	return &emptyconstraint;
1113  }
1114  
1115  /*
1116   * We need the sched_task callback even for per-cpu events when we use
1117   * the large interrupt threshold, such that we can provide PID and TID
1118   * to PEBS samples.
1119   */
pebs_needs_sched_cb(struct cpu_hw_events * cpuc)1120  static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
1121  {
1122  	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
1123  		return false;
1124  
1125  	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
1126  }
1127  
intel_pmu_pebs_sched_task(struct perf_event_pmu_context * pmu_ctx,bool sched_in)1128  void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
1129  {
1130  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1131  
1132  	if (!sched_in && pebs_needs_sched_cb(cpuc))
1133  		intel_pmu_drain_pebs_buffer();
1134  }
1135  
pebs_update_threshold(struct cpu_hw_events * cpuc)1136  static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
1137  {
1138  	struct debug_store *ds = cpuc->ds;
1139  	int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
1140  	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
1141  	u64 threshold;
1142  	int reserved;
1143  
1144  	if (cpuc->n_pebs_via_pt)
1145  		return;
1146  
1147  	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
1148  		reserved = max_pebs_events + num_counters_fixed;
1149  	else
1150  		reserved = max_pebs_events;
1151  
1152  	if (cpuc->n_pebs == cpuc->n_large_pebs) {
1153  		threshold = ds->pebs_absolute_maximum -
1154  			reserved * cpuc->pebs_record_size;
1155  	} else {
1156  		threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
1157  	}
1158  
1159  	ds->pebs_interrupt_threshold = threshold;
1160  }
1161  
adaptive_pebs_record_size_update(void)1162  static void adaptive_pebs_record_size_update(void)
1163  {
1164  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1165  	u64 pebs_data_cfg = cpuc->pebs_data_cfg;
1166  	int sz = sizeof(struct pebs_basic);
1167  
1168  	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
1169  		sz += sizeof(struct pebs_meminfo);
1170  	if (pebs_data_cfg & PEBS_DATACFG_GP)
1171  		sz += sizeof(struct pebs_gprs);
1172  	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
1173  		sz += sizeof(struct pebs_xmm);
1174  	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
1175  		sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1176  
1177  	cpuc->pebs_record_size = sz;
1178  }
1179  
1180  #define PERF_PEBS_MEMINFO_TYPE	(PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
1181  				PERF_SAMPLE_PHYS_ADDR |			     \
1182  				PERF_SAMPLE_WEIGHT_TYPE |		     \
1183  				PERF_SAMPLE_TRANSACTION |		     \
1184  				PERF_SAMPLE_DATA_PAGE_SIZE)
1185  
pebs_update_adaptive_cfg(struct perf_event * event)1186  static u64 pebs_update_adaptive_cfg(struct perf_event *event)
1187  {
1188  	struct perf_event_attr *attr = &event->attr;
1189  	u64 sample_type = attr->sample_type;
1190  	u64 pebs_data_cfg = 0;
1191  	bool gprs, tsx_weight;
1192  
1193  	if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
1194  	    attr->precise_ip > 1)
1195  		return pebs_data_cfg;
1196  
1197  	if (sample_type & PERF_PEBS_MEMINFO_TYPE)
1198  		pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
1199  
1200  	/*
1201  	 * We need GPRs when:
1202  	 * + user requested them
1203  	 * + precise_ip < 2 for the non event IP
1204  	 * + For RTM TSX weight we need GPRs for the abort code.
1205  	 */
1206  	gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
1207  	       (attr->sample_regs_intr & PEBS_GP_REGS);
1208  
1209  	tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
1210  		     ((attr->config & INTEL_ARCH_EVENT_MASK) ==
1211  		      x86_pmu.rtm_abort_event);
1212  
1213  	if (gprs || (attr->precise_ip < 2) || tsx_weight)
1214  		pebs_data_cfg |= PEBS_DATACFG_GP;
1215  
1216  	if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
1217  	    (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
1218  		pebs_data_cfg |= PEBS_DATACFG_XMMS;
1219  
1220  	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1221  		/*
1222  		 * For now always log all LBRs. Could configure this
1223  		 * later.
1224  		 */
1225  		pebs_data_cfg |= PEBS_DATACFG_LBRS |
1226  			((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
1227  	}
1228  
1229  	return pebs_data_cfg;
1230  }
1231  
1232  static void
pebs_update_state(bool needed_cb,struct cpu_hw_events * cpuc,struct perf_event * event,bool add)1233  pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
1234  		  struct perf_event *event, bool add)
1235  {
1236  	struct pmu *pmu = event->pmu;
1237  
1238  	/*
1239  	 * Make sure we get updated with the first PEBS event.
1240  	 * During removal, ->pebs_data_cfg is still valid for
1241  	 * the last PEBS event. Don't clear it.
1242  	 */
1243  	if ((cpuc->n_pebs == 1) && add)
1244  		cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
1245  
1246  	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
1247  		if (!needed_cb)
1248  			perf_sched_cb_inc(pmu);
1249  		else
1250  			perf_sched_cb_dec(pmu);
1251  
1252  		cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
1253  	}
1254  
1255  	/*
1256  	 * The PEBS record doesn't shrink on pmu::del(). Doing so would require
1257  	 * iterating all remaining PEBS events to reconstruct the config.
1258  	 */
1259  	if (x86_pmu.intel_cap.pebs_baseline && add) {
1260  		u64 pebs_data_cfg;
1261  
1262  		pebs_data_cfg = pebs_update_adaptive_cfg(event);
1263  		/*
1264  		 * Be sure to update the thresholds when we change the record.
1265  		 */
1266  		if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
1267  			cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
1268  	}
1269  }
1270  
intel_pmu_pebs_add(struct perf_event * event)1271  void intel_pmu_pebs_add(struct perf_event *event)
1272  {
1273  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1274  	struct hw_perf_event *hwc = &event->hw;
1275  	bool needed_cb = pebs_needs_sched_cb(cpuc);
1276  
1277  	cpuc->n_pebs++;
1278  	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1279  		cpuc->n_large_pebs++;
1280  	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1281  		cpuc->n_pebs_via_pt++;
1282  
1283  	pebs_update_state(needed_cb, cpuc, event, true);
1284  }
1285  
intel_pmu_pebs_via_pt_disable(struct perf_event * event)1286  static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
1287  {
1288  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1289  
1290  	if (!is_pebs_pt(event))
1291  		return;
1292  
1293  	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
1294  		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
1295  }
1296  
intel_pmu_pebs_via_pt_enable(struct perf_event * event)1297  static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
1298  {
1299  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1300  	struct hw_perf_event *hwc = &event->hw;
1301  	struct debug_store *ds = cpuc->ds;
1302  	u64 value = ds->pebs_event_reset[hwc->idx];
1303  	u32 base = MSR_RELOAD_PMC0;
1304  	unsigned int idx = hwc->idx;
1305  
1306  	if (!is_pebs_pt(event))
1307  		return;
1308  
1309  	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
1310  		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
1311  
1312  	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
1313  
1314  	if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
1315  		base = MSR_RELOAD_FIXED_CTR0;
1316  		idx = hwc->idx - INTEL_PMC_IDX_FIXED;
1317  		if (x86_pmu.intel_cap.pebs_format < 5)
1318  			value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx];
1319  		else
1320  			value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
1321  	}
1322  	wrmsrl(base + idx, value);
1323  }
1324  
intel_pmu_drain_large_pebs(struct cpu_hw_events * cpuc)1325  static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
1326  {
1327  	if (cpuc->n_pebs == cpuc->n_large_pebs &&
1328  	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
1329  		intel_pmu_drain_pebs_buffer();
1330  }
1331  
intel_pmu_pebs_enable(struct perf_event * event)1332  void intel_pmu_pebs_enable(struct perf_event *event)
1333  {
1334  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1335  	u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
1336  	struct hw_perf_event *hwc = &event->hw;
1337  	struct debug_store *ds = cpuc->ds;
1338  	unsigned int idx = hwc->idx;
1339  
1340  	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1341  
1342  	cpuc->pebs_enabled |= 1ULL << hwc->idx;
1343  
1344  	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
1345  		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
1346  	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1347  		cpuc->pebs_enabled |= 1ULL << 63;
1348  
1349  	if (x86_pmu.intel_cap.pebs_baseline) {
1350  		hwc->config |= ICL_EVENTSEL_ADAPTIVE;
1351  		if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
1352  			/*
1353  			 * drain_pebs() assumes uniform record size;
1354  			 * hence we need to drain when changing said
1355  			 * size.
1356  			 */
1357  			intel_pmu_drain_pebs_buffer();
1358  			adaptive_pebs_record_size_update();
1359  			wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
1360  			cpuc->active_pebs_data_cfg = pebs_data_cfg;
1361  		}
1362  	}
1363  	if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
1364  		cpuc->pebs_data_cfg = pebs_data_cfg;
1365  		pebs_update_threshold(cpuc);
1366  	}
1367  
1368  	if (idx >= INTEL_PMC_IDX_FIXED) {
1369  		if (x86_pmu.intel_cap.pebs_format < 5)
1370  			idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED);
1371  		else
1372  			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
1373  	}
1374  
1375  	/*
1376  	 * Use auto-reload if possible to save a MSR write in the PMI.
1377  	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
1378  	 */
1379  	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
1380  		ds->pebs_event_reset[idx] =
1381  			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
1382  	} else {
1383  		ds->pebs_event_reset[idx] = 0;
1384  	}
1385  
1386  	intel_pmu_pebs_via_pt_enable(event);
1387  }
1388  
intel_pmu_pebs_del(struct perf_event * event)1389  void intel_pmu_pebs_del(struct perf_event *event)
1390  {
1391  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1392  	struct hw_perf_event *hwc = &event->hw;
1393  	bool needed_cb = pebs_needs_sched_cb(cpuc);
1394  
1395  	cpuc->n_pebs--;
1396  	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
1397  		cpuc->n_large_pebs--;
1398  	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
1399  		cpuc->n_pebs_via_pt--;
1400  
1401  	pebs_update_state(needed_cb, cpuc, event, false);
1402  }
1403  
intel_pmu_pebs_disable(struct perf_event * event)1404  void intel_pmu_pebs_disable(struct perf_event *event)
1405  {
1406  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1407  	struct hw_perf_event *hwc = &event->hw;
1408  
1409  	intel_pmu_drain_large_pebs(cpuc);
1410  
1411  	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1412  
1413  	if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
1414  	    (x86_pmu.version < 5))
1415  		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
1416  	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
1417  		cpuc->pebs_enabled &= ~(1ULL << 63);
1418  
1419  	intel_pmu_pebs_via_pt_disable(event);
1420  
1421  	if (cpuc->enabled)
1422  		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1423  
1424  	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1425  }
1426  
intel_pmu_pebs_enable_all(void)1427  void intel_pmu_pebs_enable_all(void)
1428  {
1429  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1430  
1431  	if (cpuc->pebs_enabled)
1432  		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1433  }
1434  
intel_pmu_pebs_disable_all(void)1435  void intel_pmu_pebs_disable_all(void)
1436  {
1437  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1438  
1439  	if (cpuc->pebs_enabled)
1440  		__intel_pmu_pebs_disable_all();
1441  }
1442  
intel_pmu_pebs_fixup_ip(struct pt_regs * regs)1443  static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
1444  {
1445  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1446  	unsigned long from = cpuc->lbr_entries[0].from;
1447  	unsigned long old_to, to = cpuc->lbr_entries[0].to;
1448  	unsigned long ip = regs->ip;
1449  	int is_64bit = 0;
1450  	void *kaddr;
1451  	int size;
1452  
1453  	/*
1454  	 * We don't need to fixup if the PEBS assist is fault like
1455  	 */
1456  	if (!x86_pmu.intel_cap.pebs_trap)
1457  		return 1;
1458  
1459  	/*
1460  	 * No LBR entry, no basic block, no rewinding
1461  	 */
1462  	if (!cpuc->lbr_stack.nr || !from || !to)
1463  		return 0;
1464  
1465  	/*
1466  	 * Basic blocks should never cross user/kernel boundaries
1467  	 */
1468  	if (kernel_ip(ip) != kernel_ip(to))
1469  		return 0;
1470  
1471  	/*
1472  	 * unsigned math, either ip is before the start (impossible) or
1473  	 * the basic block is larger than 1 page (sanity)
1474  	 */
1475  	if ((ip - to) > PEBS_FIXUP_SIZE)
1476  		return 0;
1477  
1478  	/*
1479  	 * We sampled a branch insn, rewind using the LBR stack
1480  	 */
1481  	if (ip == to) {
1482  		set_linear_ip(regs, from);
1483  		return 1;
1484  	}
1485  
1486  	size = ip - to;
1487  	if (!kernel_ip(ip)) {
1488  		int bytes;
1489  		u8 *buf = this_cpu_read(insn_buffer);
1490  
1491  		/* 'size' must fit our buffer, see above */
1492  		bytes = copy_from_user_nmi(buf, (void __user *)to, size);
1493  		if (bytes != 0)
1494  			return 0;
1495  
1496  		kaddr = buf;
1497  	} else {
1498  		kaddr = (void *)to;
1499  	}
1500  
1501  	do {
1502  		struct insn insn;
1503  
1504  		old_to = to;
1505  
1506  #ifdef CONFIG_X86_64
1507  		is_64bit = kernel_ip(to) || any_64bit_mode(regs);
1508  #endif
1509  		insn_init(&insn, kaddr, size, is_64bit);
1510  
1511  		/*
1512  		 * Make sure there was not a problem decoding the instruction.
1513  		 * This is doubly important because we have an infinite loop if
1514  		 * insn.length=0.
1515  		 */
1516  		if (insn_get_length(&insn))
1517  			break;
1518  
1519  		to += insn.length;
1520  		kaddr += insn.length;
1521  		size -= insn.length;
1522  	} while (to < ip);
1523  
1524  	if (to == ip) {
1525  		set_linear_ip(regs, old_to);
1526  		return 1;
1527  	}
1528  
1529  	/*
1530  	 * Even though we decoded the basic block, the instruction stream
1531  	 * never matched the given IP, either the TO or the IP got corrupted.
1532  	 */
1533  	return 0;
1534  }
1535  
intel_get_tsx_weight(u64 tsx_tuning)1536  static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
1537  {
1538  	if (tsx_tuning) {
1539  		union hsw_tsx_tuning tsx = { .value = tsx_tuning };
1540  		return tsx.cycles_last_block;
1541  	}
1542  	return 0;
1543  }
1544  
intel_get_tsx_transaction(u64 tsx_tuning,u64 ax)1545  static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
1546  {
1547  	u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
1548  
1549  	/* For RTM XABORTs also log the abort code from AX */
1550  	if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
1551  		txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1552  	return txn;
1553  }
1554  
get_pebs_status(void * n)1555  static inline u64 get_pebs_status(void *n)
1556  {
1557  	if (x86_pmu.intel_cap.pebs_format < 4)
1558  		return ((struct pebs_record_nhm *)n)->status;
1559  	return ((struct pebs_basic *)n)->applicable_counters;
1560  }
1561  
1562  #define PERF_X86_EVENT_PEBS_HSW_PREC \
1563  		(PERF_X86_EVENT_PEBS_ST_HSW | \
1564  		 PERF_X86_EVENT_PEBS_LD_HSW | \
1565  		 PERF_X86_EVENT_PEBS_NA_HSW)
1566  
get_data_src(struct perf_event * event,u64 aux)1567  static u64 get_data_src(struct perf_event *event, u64 aux)
1568  {
1569  	u64 val = PERF_MEM_NA;
1570  	int fl = event->hw.flags;
1571  	bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
1572  
1573  	if (fl & PERF_X86_EVENT_PEBS_LDLAT)
1574  		val = load_latency_data(event, aux);
1575  	else if (fl & PERF_X86_EVENT_PEBS_STLAT)
1576  		val = store_latency_data(event, aux);
1577  	else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
1578  		val = x86_pmu.pebs_latency_data(event, aux);
1579  	else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
1580  		val = precise_datala_hsw(event, aux);
1581  	else if (fst)
1582  		val = precise_store_data(aux);
1583  	return val;
1584  }
1585  
setup_pebs_time(struct perf_event * event,struct perf_sample_data * data,u64 tsc)1586  static void setup_pebs_time(struct perf_event *event,
1587  			    struct perf_sample_data *data,
1588  			    u64 tsc)
1589  {
1590  	/* Converting to a user-defined clock is not supported yet. */
1591  	if (event->attr.use_clockid != 0)
1592  		return;
1593  
1594  	/*
1595  	 * Doesn't support the conversion when the TSC is unstable.
1596  	 * The TSC unstable case is a corner case and very unlikely to
1597  	 * happen. If it happens, the TSC in a PEBS record will be
1598  	 * dropped and fall back to perf_event_clock().
1599  	 */
1600  	if (!using_native_sched_clock() || !sched_clock_stable())
1601  		return;
1602  
1603  	data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
1604  	data->sample_flags |= PERF_SAMPLE_TIME;
1605  }
1606  
1607  #define PERF_SAMPLE_ADDR_TYPE	(PERF_SAMPLE_ADDR |		\
1608  				 PERF_SAMPLE_PHYS_ADDR |	\
1609  				 PERF_SAMPLE_DATA_PAGE_SIZE)
1610  
setup_pebs_fixed_sample_data(struct perf_event * event,struct pt_regs * iregs,void * __pebs,struct perf_sample_data * data,struct pt_regs * regs)1611  static void setup_pebs_fixed_sample_data(struct perf_event *event,
1612  				   struct pt_regs *iregs, void *__pebs,
1613  				   struct perf_sample_data *data,
1614  				   struct pt_regs *regs)
1615  {
1616  	/*
1617  	 * We cast to the biggest pebs_record but are careful not to
1618  	 * unconditionally access the 'extra' entries.
1619  	 */
1620  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1621  	struct pebs_record_skl *pebs = __pebs;
1622  	u64 sample_type;
1623  	int fll;
1624  
1625  	if (pebs == NULL)
1626  		return;
1627  
1628  	sample_type = event->attr.sample_type;
1629  	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
1630  
1631  	perf_sample_data_init(data, 0, event->hw.last_period);
1632  
1633  	data->period = event->hw.last_period;
1634  
1635  	/*
1636  	 * Use latency for weight (only avail with PEBS-LL)
1637  	 */
1638  	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
1639  		data->weight.full = pebs->lat;
1640  		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
1641  	}
1642  
1643  	/*
1644  	 * data.data_src encodes the data source
1645  	 */
1646  	if (sample_type & PERF_SAMPLE_DATA_SRC) {
1647  		data->data_src.val = get_data_src(event, pebs->dse);
1648  		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
1649  	}
1650  
1651  	/*
1652  	 * We must however always use iregs for the unwinder to stay sane; the
1653  	 * record BP,SP,IP can point into thin air when the record is from a
1654  	 * previous PMI context or an (I)RET happened between the record and
1655  	 * PMI.
1656  	 */
1657  	if (sample_type & PERF_SAMPLE_CALLCHAIN)
1658  		perf_sample_save_callchain(data, event, iregs);
1659  
1660  	/*
1661  	 * We use the interrupt regs as a base because the PEBS record does not
1662  	 * contain a full regs set, specifically it seems to lack segment
1663  	 * descriptors, which get used by things like user_mode().
1664  	 *
1665  	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
1666  	 */
1667  	*regs = *iregs;
1668  
1669  	/*
1670  	 * Initialize regs_>flags from PEBS,
1671  	 * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
1672  	 * i.e., do not rely on it being zero:
1673  	 */
1674  	regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
1675  
1676  	if (sample_type & PERF_SAMPLE_REGS_INTR) {
1677  		regs->ax = pebs->ax;
1678  		regs->bx = pebs->bx;
1679  		regs->cx = pebs->cx;
1680  		regs->dx = pebs->dx;
1681  		regs->si = pebs->si;
1682  		regs->di = pebs->di;
1683  
1684  		regs->bp = pebs->bp;
1685  		regs->sp = pebs->sp;
1686  
1687  #ifndef CONFIG_X86_32
1688  		regs->r8 = pebs->r8;
1689  		regs->r9 = pebs->r9;
1690  		regs->r10 = pebs->r10;
1691  		regs->r11 = pebs->r11;
1692  		regs->r12 = pebs->r12;
1693  		regs->r13 = pebs->r13;
1694  		regs->r14 = pebs->r14;
1695  		regs->r15 = pebs->r15;
1696  #endif
1697  	}
1698  
1699  	if (event->attr.precise_ip > 1) {
1700  		/*
1701  		 * Haswell and later processors have an 'eventing IP'
1702  		 * (real IP) which fixes the off-by-1 skid in hardware.
1703  		 * Use it when precise_ip >= 2 :
1704  		 */
1705  		if (x86_pmu.intel_cap.pebs_format >= 2) {
1706  			set_linear_ip(regs, pebs->real_ip);
1707  			regs->flags |= PERF_EFLAGS_EXACT;
1708  		} else {
1709  			/* Otherwise, use PEBS off-by-1 IP: */
1710  			set_linear_ip(regs, pebs->ip);
1711  
1712  			/*
1713  			 * With precise_ip >= 2, try to fix up the off-by-1 IP
1714  			 * using the LBR. If successful, the fixup function
1715  			 * corrects regs->ip and calls set_linear_ip() on regs:
1716  			 */
1717  			if (intel_pmu_pebs_fixup_ip(regs))
1718  				regs->flags |= PERF_EFLAGS_EXACT;
1719  		}
1720  	} else {
1721  		/*
1722  		 * When precise_ip == 1, return the PEBS off-by-1 IP,
1723  		 * no fixup attempted:
1724  		 */
1725  		set_linear_ip(regs, pebs->ip);
1726  	}
1727  
1728  
1729  	if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
1730  	    x86_pmu.intel_cap.pebs_format >= 1) {
1731  		data->addr = pebs->dla;
1732  		data->sample_flags |= PERF_SAMPLE_ADDR;
1733  	}
1734  
1735  	if (x86_pmu.intel_cap.pebs_format >= 2) {
1736  		/* Only set the TSX weight when no memory weight. */
1737  		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
1738  			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
1739  			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
1740  		}
1741  		if (sample_type & PERF_SAMPLE_TRANSACTION) {
1742  			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
1743  							      pebs->ax);
1744  			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
1745  		}
1746  	}
1747  
1748  	/*
1749  	 * v3 supplies an accurate time stamp, so we use that
1750  	 * for the time stamp.
1751  	 *
1752  	 * We can only do this for the default trace clock.
1753  	 */
1754  	if (x86_pmu.intel_cap.pebs_format >= 3)
1755  		setup_pebs_time(event, data, pebs->tsc);
1756  
1757  	if (has_branch_stack(event))
1758  		perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
1759  }
1760  
adaptive_pebs_save_regs(struct pt_regs * regs,struct pebs_gprs * gprs)1761  static void adaptive_pebs_save_regs(struct pt_regs *regs,
1762  				    struct pebs_gprs *gprs)
1763  {
1764  	regs->ax = gprs->ax;
1765  	regs->bx = gprs->bx;
1766  	regs->cx = gprs->cx;
1767  	regs->dx = gprs->dx;
1768  	regs->si = gprs->si;
1769  	regs->di = gprs->di;
1770  	regs->bp = gprs->bp;
1771  	regs->sp = gprs->sp;
1772  #ifndef CONFIG_X86_32
1773  	regs->r8 = gprs->r8;
1774  	regs->r9 = gprs->r9;
1775  	regs->r10 = gprs->r10;
1776  	regs->r11 = gprs->r11;
1777  	regs->r12 = gprs->r12;
1778  	regs->r13 = gprs->r13;
1779  	regs->r14 = gprs->r14;
1780  	regs->r15 = gprs->r15;
1781  #endif
1782  }
1783  
1784  #define PEBS_LATENCY_MASK			0xffff
1785  #define PEBS_CACHE_LATENCY_OFFSET		32
1786  #define PEBS_RETIRE_LATENCY_OFFSET		32
1787  
1788  /*
1789   * With adaptive PEBS the layout depends on what fields are configured.
1790   */
1791  
setup_pebs_adaptive_sample_data(struct perf_event * event,struct pt_regs * iregs,void * __pebs,struct perf_sample_data * data,struct pt_regs * regs)1792  static void setup_pebs_adaptive_sample_data(struct perf_event *event,
1793  					    struct pt_regs *iregs, void *__pebs,
1794  					    struct perf_sample_data *data,
1795  					    struct pt_regs *regs)
1796  {
1797  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1798  	struct pebs_basic *basic = __pebs;
1799  	void *next_record = basic + 1;
1800  	u64 sample_type;
1801  	u64 format_size;
1802  	struct pebs_meminfo *meminfo = NULL;
1803  	struct pebs_gprs *gprs = NULL;
1804  	struct x86_perf_regs *perf_regs;
1805  
1806  	if (basic == NULL)
1807  		return;
1808  
1809  	perf_regs = container_of(regs, struct x86_perf_regs, regs);
1810  	perf_regs->xmm_regs = NULL;
1811  
1812  	sample_type = event->attr.sample_type;
1813  	format_size = basic->format_size;
1814  	perf_sample_data_init(data, 0, event->hw.last_period);
1815  	data->period = event->hw.last_period;
1816  
1817  	setup_pebs_time(event, data, basic->tsc);
1818  
1819  	/*
1820  	 * We must however always use iregs for the unwinder to stay sane; the
1821  	 * record BP,SP,IP can point into thin air when the record is from a
1822  	 * previous PMI context or an (I)RET happened between the record and
1823  	 * PMI.
1824  	 */
1825  	if (sample_type & PERF_SAMPLE_CALLCHAIN)
1826  		perf_sample_save_callchain(data, event, iregs);
1827  
1828  	*regs = *iregs;
1829  	/* The ip in basic is EventingIP */
1830  	set_linear_ip(regs, basic->ip);
1831  	regs->flags = PERF_EFLAGS_EXACT;
1832  
1833  	if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
1834  		if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
1835  			data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
1836  		else
1837  			data->weight.var3_w = 0;
1838  	}
1839  
1840  	/*
1841  	 * The record for MEMINFO is in front of GP
1842  	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
1843  	 * Save the pointer here but process later.
1844  	 */
1845  	if (format_size & PEBS_DATACFG_MEMINFO) {
1846  		meminfo = next_record;
1847  		next_record = meminfo + 1;
1848  	}
1849  
1850  	if (format_size & PEBS_DATACFG_GP) {
1851  		gprs = next_record;
1852  		next_record = gprs + 1;
1853  
1854  		if (event->attr.precise_ip < 2) {
1855  			set_linear_ip(regs, gprs->ip);
1856  			regs->flags &= ~PERF_EFLAGS_EXACT;
1857  		}
1858  
1859  		if (sample_type & PERF_SAMPLE_REGS_INTR)
1860  			adaptive_pebs_save_regs(regs, gprs);
1861  	}
1862  
1863  	if (format_size & PEBS_DATACFG_MEMINFO) {
1864  		if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1865  			u64 weight = meminfo->latency;
1866  
1867  			if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
1868  				data->weight.var2_w = weight & PEBS_LATENCY_MASK;
1869  				weight >>= PEBS_CACHE_LATENCY_OFFSET;
1870  			}
1871  
1872  			/*
1873  			 * Although meminfo::latency is defined as a u64,
1874  			 * only the lower 32 bits include the valid data
1875  			 * in practice on Ice Lake and earlier platforms.
1876  			 */
1877  			if (sample_type & PERF_SAMPLE_WEIGHT) {
1878  				data->weight.full = weight ?:
1879  					intel_get_tsx_weight(meminfo->tsx_tuning);
1880  			} else {
1881  				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
1882  					intel_get_tsx_weight(meminfo->tsx_tuning);
1883  			}
1884  			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
1885  		}
1886  
1887  		if (sample_type & PERF_SAMPLE_DATA_SRC) {
1888  			data->data_src.val = get_data_src(event, meminfo->aux);
1889  			data->sample_flags |= PERF_SAMPLE_DATA_SRC;
1890  		}
1891  
1892  		if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
1893  			data->addr = meminfo->address;
1894  			data->sample_flags |= PERF_SAMPLE_ADDR;
1895  		}
1896  
1897  		if (sample_type & PERF_SAMPLE_TRANSACTION) {
1898  			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
1899  							  gprs ? gprs->ax : 0);
1900  			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
1901  		}
1902  	}
1903  
1904  	if (format_size & PEBS_DATACFG_XMMS) {
1905  		struct pebs_xmm *xmm = next_record;
1906  
1907  		next_record = xmm + 1;
1908  		perf_regs->xmm_regs = xmm->xmm;
1909  	}
1910  
1911  	if (format_size & PEBS_DATACFG_LBRS) {
1912  		struct lbr_entry *lbr = next_record;
1913  		int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
1914  					& 0xff) + 1;
1915  		next_record = next_record + num_lbr * sizeof(struct lbr_entry);
1916  
1917  		if (has_branch_stack(event)) {
1918  			intel_pmu_store_pebs_lbrs(lbr);
1919  			perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
1920  		}
1921  	}
1922  
1923  	WARN_ONCE(next_record != __pebs + (format_size >> 48),
1924  			"PEBS record size %llu, expected %llu, config %llx\n",
1925  			format_size >> 48,
1926  			(u64)(next_record - __pebs),
1927  			basic->format_size);
1928  }
1929  
1930  static inline void *
get_next_pebs_record_by_bit(void * base,void * top,int bit)1931  get_next_pebs_record_by_bit(void *base, void *top, int bit)
1932  {
1933  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1934  	void *at;
1935  	u64 pebs_status;
1936  
1937  	/*
1938  	 * fmt0 does not have a status bitfield (does not use
1939  	 * perf_record_nhm format)
1940  	 */
1941  	if (x86_pmu.intel_cap.pebs_format < 1)
1942  		return base;
1943  
1944  	if (base == NULL)
1945  		return NULL;
1946  
1947  	for (at = base; at < top; at += cpuc->pebs_record_size) {
1948  		unsigned long status = get_pebs_status(at);
1949  
1950  		if (test_bit(bit, (unsigned long *)&status)) {
1951  			/* PEBS v3 has accurate status bits */
1952  			if (x86_pmu.intel_cap.pebs_format >= 3)
1953  				return at;
1954  
1955  			if (status == (1 << bit))
1956  				return at;
1957  
1958  			/* clear non-PEBS bit and re-check */
1959  			pebs_status = status & cpuc->pebs_enabled;
1960  			pebs_status &= PEBS_COUNTER_MASK;
1961  			if (pebs_status == (1 << bit))
1962  				return at;
1963  		}
1964  	}
1965  	return NULL;
1966  }
1967  
intel_pmu_auto_reload_read(struct perf_event * event)1968  void intel_pmu_auto_reload_read(struct perf_event *event)
1969  {
1970  	WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
1971  
1972  	perf_pmu_disable(event->pmu);
1973  	intel_pmu_drain_pebs_buffer();
1974  	perf_pmu_enable(event->pmu);
1975  }
1976  
1977  /*
1978   * Special variant of intel_pmu_save_and_restart() for auto-reload.
1979   */
1980  static int
intel_pmu_save_and_restart_reload(struct perf_event * event,int count)1981  intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
1982  {
1983  	struct hw_perf_event *hwc = &event->hw;
1984  	int shift = 64 - x86_pmu.cntval_bits;
1985  	u64 period = hwc->sample_period;
1986  	u64 prev_raw_count, new_raw_count;
1987  	s64 new, old;
1988  
1989  	WARN_ON(!period);
1990  
1991  	/*
1992  	 * drain_pebs() only happens when the PMU is disabled.
1993  	 */
1994  	WARN_ON(this_cpu_read(cpu_hw_events.enabled));
1995  
1996  	prev_raw_count = local64_read(&hwc->prev_count);
1997  	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
1998  	local64_set(&hwc->prev_count, new_raw_count);
1999  
2000  	/*
2001  	 * Since the counter increments a negative counter value and
2002  	 * overflows on the sign switch, giving the interval:
2003  	 *
2004  	 *   [-period, 0]
2005  	 *
2006  	 * the difference between two consecutive reads is:
2007  	 *
2008  	 *   A) value2 - value1;
2009  	 *      when no overflows have happened in between,
2010  	 *
2011  	 *   B) (0 - value1) + (value2 - (-period));
2012  	 *      when one overflow happened in between,
2013  	 *
2014  	 *   C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
2015  	 *      when @n overflows happened in between.
2016  	 *
2017  	 * Here A) is the obvious difference, B) is the extension to the
2018  	 * discrete interval, where the first term is to the top of the
2019  	 * interval and the second term is from the bottom of the next
2020  	 * interval and C) the extension to multiple intervals, where the
2021  	 * middle term is the whole intervals covered.
2022  	 *
2023  	 * An equivalent of C, by reduction, is:
2024  	 *
2025  	 *   value2 - value1 + n * period
2026  	 */
2027  	new = ((s64)(new_raw_count << shift) >> shift);
2028  	old = ((s64)(prev_raw_count << shift) >> shift);
2029  	local64_add(new - old + count * period, &event->count);
2030  
2031  	local64_set(&hwc->period_left, -new);
2032  
2033  	perf_event_update_userpage(event);
2034  
2035  	return 0;
2036  }
2037  
2038  static __always_inline void
__intel_pmu_pebs_event(struct perf_event * event,struct pt_regs * iregs,struct perf_sample_data * data,void * base,void * top,int bit,int count,void (* setup_sample)(struct perf_event *,struct pt_regs *,void *,struct perf_sample_data *,struct pt_regs *))2039  __intel_pmu_pebs_event(struct perf_event *event,
2040  		       struct pt_regs *iregs,
2041  		       struct perf_sample_data *data,
2042  		       void *base, void *top,
2043  		       int bit, int count,
2044  		       void (*setup_sample)(struct perf_event *,
2045  					    struct pt_regs *,
2046  					    void *,
2047  					    struct perf_sample_data *,
2048  					    struct pt_regs *))
2049  {
2050  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2051  	struct hw_perf_event *hwc = &event->hw;
2052  	struct x86_perf_regs perf_regs;
2053  	struct pt_regs *regs = &perf_regs.regs;
2054  	void *at = get_next_pebs_record_by_bit(base, top, bit);
2055  	static struct pt_regs dummy_iregs;
2056  
2057  	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
2058  		/*
2059  		 * Now, auto-reload is only enabled in fixed period mode.
2060  		 * The reload value is always hwc->sample_period.
2061  		 * May need to change it, if auto-reload is enabled in
2062  		 * freq mode later.
2063  		 */
2064  		intel_pmu_save_and_restart_reload(event, count);
2065  	} else if (!intel_pmu_save_and_restart(event))
2066  		return;
2067  
2068  	if (!iregs)
2069  		iregs = &dummy_iregs;
2070  
2071  	while (count > 1) {
2072  		setup_sample(event, iregs, at, data, regs);
2073  		perf_event_output(event, data, regs);
2074  		at += cpuc->pebs_record_size;
2075  		at = get_next_pebs_record_by_bit(at, top, bit);
2076  		count--;
2077  	}
2078  
2079  	setup_sample(event, iregs, at, data, regs);
2080  	if (iregs == &dummy_iregs) {
2081  		/*
2082  		 * The PEBS records may be drained in the non-overflow context,
2083  		 * e.g., large PEBS + context switch. Perf should treat the
2084  		 * last record the same as other PEBS records, and doesn't
2085  		 * invoke the generic overflow handler.
2086  		 */
2087  		perf_event_output(event, data, regs);
2088  	} else {
2089  		/*
2090  		 * All but the last records are processed.
2091  		 * The last one is left to be able to call the overflow handler.
2092  		 */
2093  		if (perf_event_overflow(event, data, regs))
2094  			x86_pmu_stop(event, 0);
2095  	}
2096  }
2097  
intel_pmu_drain_pebs_core(struct pt_regs * iregs,struct perf_sample_data * data)2098  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
2099  {
2100  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2101  	struct debug_store *ds = cpuc->ds;
2102  	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
2103  	struct pebs_record_core *at, *top;
2104  	int n;
2105  
2106  	if (!x86_pmu.pebs_active)
2107  		return;
2108  
2109  	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
2110  	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
2111  
2112  	/*
2113  	 * Whatever else happens, drain the thing
2114  	 */
2115  	ds->pebs_index = ds->pebs_buffer_base;
2116  
2117  	if (!test_bit(0, cpuc->active_mask))
2118  		return;
2119  
2120  	WARN_ON_ONCE(!event);
2121  
2122  	if (!event->attr.precise_ip)
2123  		return;
2124  
2125  	n = top - at;
2126  	if (n <= 0) {
2127  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2128  			intel_pmu_save_and_restart_reload(event, 0);
2129  		return;
2130  	}
2131  
2132  	__intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
2133  			       setup_pebs_fixed_sample_data);
2134  }
2135  
intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events * cpuc,int size)2136  static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
2137  {
2138  	struct perf_event *event;
2139  	int bit;
2140  
2141  	/*
2142  	 * The drain_pebs() could be called twice in a short period
2143  	 * for auto-reload event in pmu::read(). There are no
2144  	 * overflows have happened in between.
2145  	 * It needs to call intel_pmu_save_and_restart_reload() to
2146  	 * update the event->count for this case.
2147  	 */
2148  	for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
2149  		event = cpuc->events[bit];
2150  		if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2151  			intel_pmu_save_and_restart_reload(event, 0);
2152  	}
2153  }
2154  
intel_pmu_drain_pebs_nhm(struct pt_regs * iregs,struct perf_sample_data * data)2155  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)
2156  {
2157  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2158  	struct debug_store *ds = cpuc->ds;
2159  	struct perf_event *event;
2160  	void *base, *at, *top;
2161  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2162  	short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2163  	int bit, i, size;
2164  	u64 mask;
2165  
2166  	if (!x86_pmu.pebs_active)
2167  		return;
2168  
2169  	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
2170  	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
2171  
2172  	ds->pebs_index = ds->pebs_buffer_base;
2173  
2174  	mask = (1ULL << x86_pmu.max_pebs_events) - 1;
2175  	size = x86_pmu.max_pebs_events;
2176  	if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
2177  		mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
2178  		size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
2179  	}
2180  
2181  	if (unlikely(base >= top)) {
2182  		intel_pmu_pebs_event_update_no_drain(cpuc, size);
2183  		return;
2184  	}
2185  
2186  	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
2187  		struct pebs_record_nhm *p = at;
2188  		u64 pebs_status;
2189  
2190  		pebs_status = p->status & cpuc->pebs_enabled;
2191  		pebs_status &= mask;
2192  
2193  		/* PEBS v3 has more accurate status bits */
2194  		if (x86_pmu.intel_cap.pebs_format >= 3) {
2195  			for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2196  				counts[bit]++;
2197  
2198  			continue;
2199  		}
2200  
2201  		/*
2202  		 * On some CPUs the PEBS status can be zero when PEBS is
2203  		 * racing with clearing of GLOBAL_STATUS.
2204  		 *
2205  		 * Normally we would drop that record, but in the
2206  		 * case when there is only a single active PEBS event
2207  		 * we can assume it's for that event.
2208  		 */
2209  		if (!pebs_status && cpuc->pebs_enabled &&
2210  			!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
2211  			pebs_status = p->status = cpuc->pebs_enabled;
2212  
2213  		bit = find_first_bit((unsigned long *)&pebs_status,
2214  					x86_pmu.max_pebs_events);
2215  		if (bit >= x86_pmu.max_pebs_events)
2216  			continue;
2217  
2218  		/*
2219  		 * The PEBS hardware does not deal well with the situation
2220  		 * when events happen near to each other and multiple bits
2221  		 * are set. But it should happen rarely.
2222  		 *
2223  		 * If these events include one PEBS and multiple non-PEBS
2224  		 * events, it doesn't impact PEBS record. The record will
2225  		 * be handled normally. (slow path)
2226  		 *
2227  		 * If these events include two or more PEBS events, the
2228  		 * records for the events can be collapsed into a single
2229  		 * one, and it's not possible to reconstruct all events
2230  		 * that caused the PEBS record. It's called collision.
2231  		 * If collision happened, the record will be dropped.
2232  		 */
2233  		if (pebs_status != (1ULL << bit)) {
2234  			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
2235  				error[i]++;
2236  			continue;
2237  		}
2238  
2239  		counts[bit]++;
2240  	}
2241  
2242  	for_each_set_bit(bit, (unsigned long *)&mask, size) {
2243  		if ((counts[bit] == 0) && (error[bit] == 0))
2244  			continue;
2245  
2246  		event = cpuc->events[bit];
2247  		if (WARN_ON_ONCE(!event))
2248  			continue;
2249  
2250  		if (WARN_ON_ONCE(!event->attr.precise_ip))
2251  			continue;
2252  
2253  		/* log dropped samples number */
2254  		if (error[bit]) {
2255  			perf_log_lost_samples(event, error[bit]);
2256  
2257  			if (iregs && perf_event_account_interrupt(event))
2258  				x86_pmu_stop(event, 0);
2259  		}
2260  
2261  		if (counts[bit]) {
2262  			__intel_pmu_pebs_event(event, iregs, data, base,
2263  					       top, bit, counts[bit],
2264  					       setup_pebs_fixed_sample_data);
2265  		}
2266  	}
2267  }
2268  
intel_pmu_drain_pebs_icl(struct pt_regs * iregs,struct perf_sample_data * data)2269  static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
2270  {
2271  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2272  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2273  	int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events);
2274  	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
2275  	struct debug_store *ds = cpuc->ds;
2276  	struct perf_event *event;
2277  	void *base, *at, *top;
2278  	int bit, size;
2279  	u64 mask;
2280  
2281  	if (!x86_pmu.pebs_active)
2282  		return;
2283  
2284  	base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
2285  	top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
2286  
2287  	ds->pebs_index = ds->pebs_buffer_base;
2288  
2289  	mask = ((1ULL << max_pebs_events) - 1) |
2290  	       (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
2291  	size = INTEL_PMC_IDX_FIXED + num_counters_fixed;
2292  
2293  	if (unlikely(base >= top)) {
2294  		intel_pmu_pebs_event_update_no_drain(cpuc, size);
2295  		return;
2296  	}
2297  
2298  	for (at = base; at < top; at += cpuc->pebs_record_size) {
2299  		u64 pebs_status;
2300  
2301  		pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2302  		pebs_status &= mask;
2303  
2304  		for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
2305  			counts[bit]++;
2306  	}
2307  
2308  	for_each_set_bit(bit, (unsigned long *)&mask, size) {
2309  		if (counts[bit] == 0)
2310  			continue;
2311  
2312  		event = cpuc->events[bit];
2313  		if (WARN_ON_ONCE(!event))
2314  			continue;
2315  
2316  		if (WARN_ON_ONCE(!event->attr.precise_ip))
2317  			continue;
2318  
2319  		__intel_pmu_pebs_event(event, iregs, data, base,
2320  				       top, bit, counts[bit],
2321  				       setup_pebs_adaptive_sample_data);
2322  	}
2323  }
2324  
2325  /*
2326   * BTS, PEBS probe and setup
2327   */
2328  
intel_ds_init(void)2329  void __init intel_ds_init(void)
2330  {
2331  	/*
2332  	 * No support for 32bit formats
2333  	 */
2334  	if (!boot_cpu_has(X86_FEATURE_DTES64))
2335  		return;
2336  
2337  	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
2338  	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
2339  	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2340  	if (x86_pmu.version <= 4)
2341  		x86_pmu.pebs_no_isolation = 1;
2342  
2343  	if (x86_pmu.pebs) {
2344  		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
2345  		char *pebs_qual = "";
2346  		int format = x86_pmu.intel_cap.pebs_format;
2347  
2348  		if (format < 4)
2349  			x86_pmu.intel_cap.pebs_baseline = 0;
2350  
2351  		switch (format) {
2352  		case 0:
2353  			pr_cont("PEBS fmt0%c, ", pebs_type);
2354  			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
2355  			/*
2356  			 * Using >PAGE_SIZE buffers makes the WRMSR to
2357  			 * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
2358  			 * mysteriously hang on Core2.
2359  			 *
2360  			 * As a workaround, we don't do this.
2361  			 */
2362  			x86_pmu.pebs_buffer_size = PAGE_SIZE;
2363  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
2364  			break;
2365  
2366  		case 1:
2367  			pr_cont("PEBS fmt1%c, ", pebs_type);
2368  			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
2369  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2370  			break;
2371  
2372  		case 2:
2373  			pr_cont("PEBS fmt2%c, ", pebs_type);
2374  			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
2375  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2376  			break;
2377  
2378  		case 3:
2379  			pr_cont("PEBS fmt3%c, ", pebs_type);
2380  			x86_pmu.pebs_record_size =
2381  						sizeof(struct pebs_record_skl);
2382  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
2383  			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
2384  			break;
2385  
2386  		case 5:
2387  			x86_pmu.pebs_ept = 1;
2388  			fallthrough;
2389  		case 4:
2390  			x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
2391  			x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
2392  			if (x86_pmu.intel_cap.pebs_baseline) {
2393  				x86_pmu.large_pebs_flags |=
2394  					PERF_SAMPLE_BRANCH_STACK |
2395  					PERF_SAMPLE_TIME;
2396  				x86_pmu.flags |= PMU_FL_PEBS_ALL;
2397  				x86_pmu.pebs_capable = ~0ULL;
2398  				pebs_qual = "-baseline";
2399  				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
2400  			} else {
2401  				/* Only basic record supported */
2402  				x86_pmu.large_pebs_flags &=
2403  					~(PERF_SAMPLE_ADDR |
2404  					  PERF_SAMPLE_TIME |
2405  					  PERF_SAMPLE_DATA_SRC |
2406  					  PERF_SAMPLE_TRANSACTION |
2407  					  PERF_SAMPLE_REGS_USER |
2408  					  PERF_SAMPLE_REGS_INTR);
2409  			}
2410  			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
2411  
2412  			if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
2413  				pr_cont("PEBS-via-PT, ");
2414  				x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
2415  			}
2416  
2417  			break;
2418  
2419  		default:
2420  			pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
2421  			x86_pmu.pebs = 0;
2422  		}
2423  	}
2424  }
2425  
perf_restore_debug_store(void)2426  void perf_restore_debug_store(void)
2427  {
2428  	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2429  
2430  	if (!x86_pmu.bts && !x86_pmu.pebs)
2431  		return;
2432  
2433  	wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
2434  }
2435