xref: /openbmc/linux/arch/x86/events/amd/brs.c (revision 9a87ffc99ec8eb8d35eed7c4f816d75f5cc9662e)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Implement support for AMD Fam19h Branch Sampling feature
4   * Based on specifications published in AMD PPR Fam19 Model 01
5   *
6   * Copyright 2021 Google LLC
7   * Contributed by Stephane Eranian <eranian@google.com>
8   */
9  #include <linux/kernel.h>
10  #include <linux/jump_label.h>
11  #include <asm/msr.h>
12  #include <asm/cpufeature.h>
13  
14  #include "../perf_event.h"
15  
16  #define BRS_POISON	0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
17  
18  /* Debug Extension Configuration register layout */
19  union amd_debug_extn_cfg {
20  	__u64 val;
21  	struct {
22  		__u64	rsvd0:2,  /* reserved */
23  			brsmen:1, /* branch sample enable */
24  			rsvd4_3:2,/* reserved - must be 0x3 */
25  			vb:1,     /* valid branches recorded */
26  			rsvd2:10, /* reserved */
27  			msroff:4, /* index of next entry to write */
28  			rsvd3:4,  /* reserved */
29  			pmc:3,    /* #PMC holding the sampling event */
30  			rsvd4:37; /* reserved */
31  	};
32  };
33  
brs_from(int idx)34  static inline unsigned int brs_from(int idx)
35  {
36  	return MSR_AMD_SAMP_BR_FROM + 2 * idx;
37  }
38  
brs_to(int idx)39  static inline unsigned int brs_to(int idx)
40  {
41  	return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
42  }
43  
set_debug_extn_cfg(u64 val)44  static __always_inline void set_debug_extn_cfg(u64 val)
45  {
46  	/* bits[4:3] must always be set to 11b */
47  	__wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
48  }
49  
get_debug_extn_cfg(void)50  static __always_inline u64 get_debug_extn_cfg(void)
51  {
52  	return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
53  }
54  
amd_brs_detect(void)55  static bool __init amd_brs_detect(void)
56  {
57  	if (!cpu_feature_enabled(X86_FEATURE_BRS))
58  		return false;
59  
60  	switch (boot_cpu_data.x86) {
61  	case 0x19: /* AMD Fam19h (Zen3) */
62  		x86_pmu.lbr_nr = 16;
63  
64  		/* No hardware filtering supported */
65  		x86_pmu.lbr_sel_map = NULL;
66  		x86_pmu.lbr_sel_mask = 0;
67  		break;
68  	default:
69  		return false;
70  	}
71  
72  	return true;
73  }
74  
75  /*
76   * Current BRS implementation does not support branch type or privilege level
77   * filtering. Therefore, this function simply enforces these limitations. No need for
78   * a br_sel_map. Software filtering is not supported because it would not correlate well
79   * with a sampling period.
80   */
amd_brs_setup_filter(struct perf_event * event)81  static int amd_brs_setup_filter(struct perf_event *event)
82  {
83  	u64 type = event->attr.branch_sample_type;
84  
85  	/* No BRS support */
86  	if (!x86_pmu.lbr_nr)
87  		return -EOPNOTSUPP;
88  
89  	/* Can only capture all branches, i.e., no filtering */
90  	if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
91  		return -EINVAL;
92  
93  	return 0;
94  }
95  
amd_is_brs_event(struct perf_event * e)96  static inline int amd_is_brs_event(struct perf_event *e)
97  {
98  	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
99  }
100  
amd_brs_hw_config(struct perf_event * event)101  int amd_brs_hw_config(struct perf_event *event)
102  {
103  	int ret = 0;
104  
105  	/*
106  	 * Due to interrupt holding, BRS is not recommended in
107  	 * counting mode.
108  	 */
109  	if (!is_sampling_event(event))
110  		return -EINVAL;
111  
112  	/*
113  	 * Due to the way BRS operates by holding the interrupt until
114  	 * lbr_nr entries have been captured, it does not make sense
115  	 * to allow sampling on BRS with an event that does not match
116  	 * what BRS is capturing, i.e., retired taken branches.
117  	 * Otherwise the correlation with the event's period is even
118  	 * more loose:
119  	 *
120  	 * With retired taken branch:
121  	 *   Effective P = P + 16 + X
122  	 * With any other event:
123  	 *   Effective P = P + Y + X
124  	 *
125  	 * Where X is the number of taken branches due to interrupt
126  	 * skid. Skid is large.
127  	 *
128  	 * Where Y is the occurences of the event while BRS is
129  	 * capturing the lbr_nr entries.
130  	 *
131  	 * By using retired taken branches, we limit the impact on the
132  	 * Y variable. We know it cannot be more than the depth of
133  	 * BRS.
134  	 */
135  	if (!amd_is_brs_event(event))
136  		return -EINVAL;
137  
138  	/*
139  	 * BRS implementation does not work with frequency mode
140  	 * reprogramming of the period.
141  	 */
142  	if (event->attr.freq)
143  		return -EINVAL;
144  	/*
145  	 * The kernel subtracts BRS depth from period, so it must
146  	 * be big enough.
147  	 */
148  	if (event->attr.sample_period <= x86_pmu.lbr_nr)
149  		return -EINVAL;
150  
151  	/*
152  	 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
153  	 */
154  	ret = amd_brs_setup_filter(event);
155  
156  	/* only set in case of success */
157  	if (!ret)
158  		event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
159  
160  	return ret;
161  }
162  
163  /* tos = top of stack, i.e., last valid entry written */
amd_brs_get_tos(union amd_debug_extn_cfg * cfg)164  static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
165  {
166  	/*
167  	 * msroff: index of next entry to write so top-of-stack is one off
168  	 * if BRS is full then msroff is set back to 0.
169  	 */
170  	return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
171  }
172  
173  /*
174   * make sure we have a sane BRS offset to begin with
175   * especially with kexec
176   */
amd_brs_reset(void)177  void amd_brs_reset(void)
178  {
179  	if (!cpu_feature_enabled(X86_FEATURE_BRS))
180  		return;
181  
182  	/*
183  	 * Reset config
184  	 */
185  	set_debug_extn_cfg(0);
186  
187  	/*
188  	 * Mark first entry as poisoned
189  	 */
190  	wrmsrl(brs_to(0), BRS_POISON);
191  }
192  
amd_brs_init(void)193  int __init amd_brs_init(void)
194  {
195  	if (!amd_brs_detect())
196  		return -EOPNOTSUPP;
197  
198  	pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
199  
200  	return 0;
201  }
202  
amd_brs_enable(void)203  void amd_brs_enable(void)
204  {
205  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
206  	union amd_debug_extn_cfg cfg;
207  
208  	/* Activate only on first user */
209  	if (++cpuc->brs_active > 1)
210  		return;
211  
212  	cfg.val    = 0; /* reset all fields */
213  	cfg.brsmen = 1; /* enable branch sampling */
214  
215  	/* Set enable bit */
216  	set_debug_extn_cfg(cfg.val);
217  }
218  
amd_brs_enable_all(void)219  void amd_brs_enable_all(void)
220  {
221  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
222  	if (cpuc->lbr_users)
223  		amd_brs_enable();
224  }
225  
amd_brs_disable(void)226  void amd_brs_disable(void)
227  {
228  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
229  	union amd_debug_extn_cfg cfg;
230  
231  	/* Check if active (could be disabled via x86_pmu_disable_all()) */
232  	if (!cpuc->brs_active)
233  		return;
234  
235  	/* Only disable for last user */
236  	if (--cpuc->brs_active)
237  		return;
238  
239  	/*
240  	 * Clear the brsmen bit but preserve the others as they contain
241  	 * useful state such as vb and msroff
242  	 */
243  	cfg.val = get_debug_extn_cfg();
244  
245  	/*
246  	 * When coming in on interrupt and BRS is full, then hw will have
247  	 * already stopped BRS, no need to issue wrmsr again
248  	 */
249  	if (cfg.brsmen) {
250  		cfg.brsmen = 0;
251  		set_debug_extn_cfg(cfg.val);
252  	}
253  }
254  
amd_brs_disable_all(void)255  void amd_brs_disable_all(void)
256  {
257  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
258  	if (cpuc->lbr_users)
259  		amd_brs_disable();
260  }
261  
amd_brs_match_plm(struct perf_event * event,u64 to)262  static bool amd_brs_match_plm(struct perf_event *event, u64 to)
263  {
264  	int type = event->attr.branch_sample_type;
265  	int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
266  	int plm_u = PERF_SAMPLE_BRANCH_USER;
267  
268  	if (!(type & plm_k) && kernel_ip(to))
269  		return 0;
270  
271  	if (!(type & plm_u) && !kernel_ip(to))
272  		return 0;
273  
274  	return 1;
275  }
276  
277  /*
278   * Caller must ensure amd_brs_inuse() is true before calling
279   * return:
280   */
amd_brs_drain(void)281  void amd_brs_drain(void)
282  {
283  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
284  	struct perf_event *event = cpuc->events[0];
285  	struct perf_branch_entry *br = cpuc->lbr_entries;
286  	union amd_debug_extn_cfg cfg;
287  	u32 i, nr = 0, num, tos, start;
288  	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
289  
290  	/*
291  	 * BRS event forced on PMC0,
292  	 * so check if there is an event.
293  	 * It is possible to have lbr_users > 0 but the event
294  	 * not yet scheduled due to long latency PMU irq
295  	 */
296  	if (!event)
297  		goto empty;
298  
299  	cfg.val = get_debug_extn_cfg();
300  
301  	/* Sanity check [0-x86_pmu.lbr_nr] */
302  	if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
303  		goto empty;
304  
305  	/* No valid branch */
306  	if (cfg.vb == 0)
307  		goto empty;
308  
309  	/*
310  	 * msr.off points to next entry to be written
311  	 * tos = most recent entry index = msr.off - 1
312  	 * BRS register buffer saturates, so we know we have
313  	 * start < tos and that we have to read from start to tos
314  	 */
315  	start = 0;
316  	tos = amd_brs_get_tos(&cfg);
317  
318  	num = tos - start + 1;
319  
320  	/*
321  	 * BRS is only one pass (saturation) from MSROFF to depth-1
322  	 * MSROFF wraps to zero when buffer is full
323  	 */
324  	for (i = 0; i < num; i++) {
325  		u32 brs_idx = tos - i;
326  		u64 from, to;
327  
328  		rdmsrl(brs_to(brs_idx), to);
329  
330  		/* Entry does not belong to us (as marked by kernel) */
331  		if (to == BRS_POISON)
332  			break;
333  
334  		/*
335  		 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
336  		 * Necessary to generate proper virtual addresses suitable for
337  		 * symbolization
338  		 */
339  		to = (u64)(((s64)to << shift) >> shift);
340  
341  		if (!amd_brs_match_plm(event, to))
342  			continue;
343  
344  		rdmsrl(brs_from(brs_idx), from);
345  
346  		perf_clear_branch_entry_bitfields(br+nr);
347  
348  		br[nr].from = from;
349  		br[nr].to   = to;
350  
351  		nr++;
352  	}
353  empty:
354  	/* Record number of sampled branches */
355  	cpuc->lbr_stack.nr = nr;
356  }
357  
358  /*
359   * Poison most recent entry to prevent reuse by next task
360   * required because BRS entry are not tagged by PID
361   */
amd_brs_poison_buffer(void)362  static void amd_brs_poison_buffer(void)
363  {
364  	union amd_debug_extn_cfg cfg;
365  	unsigned int idx;
366  
367  	/* Get current state */
368  	cfg.val = get_debug_extn_cfg();
369  
370  	/* idx is most recently written entry */
371  	idx = amd_brs_get_tos(&cfg);
372  
373  	/* Poison target of entry */
374  	wrmsrl(brs_to(idx), BRS_POISON);
375  }
376  
377  /*
378   * On context switch in, we need to make sure no samples from previous user
379   * are left in the BRS.
380   *
381   * On ctxswin, sched_in = true, called after the PMU has started
382   * On ctxswout, sched_in = false, called before the PMU is stopped
383   */
amd_pmu_brs_sched_task(struct perf_event_pmu_context * pmu_ctx,bool sched_in)384  void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
385  {
386  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
387  
388  	/* no active users */
389  	if (!cpuc->lbr_users)
390  		return;
391  
392  	/*
393  	 * On context switch in, we need to ensure we do not use entries
394  	 * from previous BRS user on that CPU, so we poison the buffer as
395  	 * a faster way compared to resetting all entries.
396  	 */
397  	if (sched_in)
398  		amd_brs_poison_buffer();
399  }
400  
401  /*
402   * called from ACPI processor_idle.c or acpi_pad.c
403   * with interrupts disabled
404   */
perf_amd_brs_lopwr_cb(bool lopwr_in)405  void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
406  {
407  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
408  	union amd_debug_extn_cfg cfg;
409  
410  	/*
411  	 * on mwait in, we may end up in non C0 state.
412  	 * we must disable branch sampling to avoid holding the NMI
413  	 * for too long. We disable it in hardware but we
414  	 * keep the state in cpuc, so we can re-enable.
415  	 *
416  	 * The hardware will deliver the NMI if needed when brsmen cleared
417  	 */
418  	if (cpuc->brs_active) {
419  		cfg.val = get_debug_extn_cfg();
420  		cfg.brsmen = !lopwr_in;
421  		set_debug_extn_cfg(cfg.val);
422  	}
423  }
424  
425  DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
426  EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
427  
amd_brs_lopwr_init(void)428  void __init amd_brs_lopwr_init(void)
429  {
430  	static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
431  }
432