xref: /openbmc/linux/arch/x86/events/amd/brs.c (revision 22b6e7f3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Implement support for AMD Fam19h Branch Sampling feature
4  * Based on specifications published in AMD PPR Fam19 Model 01
5  *
6  * Copyright 2021 Google LLC
7  * Contributed by Stephane Eranian <eranian@google.com>
8  */
9 #include <linux/kernel.h>
10 #include <linux/jump_label.h>
11 #include <asm/msr.h>
12 #include <asm/cpufeature.h>
13 
14 #include "../perf_event.h"
15 
16 #define BRS_POISON	0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
17 
18 /* Debug Extension Configuration register layout */
19 union amd_debug_extn_cfg {
20 	__u64 val;
21 	struct {
22 		__u64	rsvd0:2,  /* reserved */
23 			brsmen:1, /* branch sample enable */
24 			rsvd4_3:2,/* reserved - must be 0x3 */
25 			vb:1,     /* valid branches recorded */
26 			rsvd2:10, /* reserved */
27 			msroff:4, /* index of next entry to write */
28 			rsvd3:4,  /* reserved */
29 			pmc:3,    /* #PMC holding the sampling event */
30 			rsvd4:37; /* reserved */
31 	};
32 };
33 
34 static inline unsigned int brs_from(int idx)
35 {
36 	return MSR_AMD_SAMP_BR_FROM + 2 * idx;
37 }
38 
39 static inline unsigned int brs_to(int idx)
40 {
41 	return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
42 }
43 
44 static __always_inline void set_debug_extn_cfg(u64 val)
45 {
46 	/* bits[4:3] must always be set to 11b */
47 	__wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
48 }
49 
50 static __always_inline u64 get_debug_extn_cfg(void)
51 {
52 	return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
53 }
54 
55 static bool __init amd_brs_detect(void)
56 {
57 	if (!cpu_feature_enabled(X86_FEATURE_BRS))
58 		return false;
59 
60 	switch (boot_cpu_data.x86) {
61 	case 0x19: /* AMD Fam19h (Zen3) */
62 		x86_pmu.lbr_nr = 16;
63 
64 		/* No hardware filtering supported */
65 		x86_pmu.lbr_sel_map = NULL;
66 		x86_pmu.lbr_sel_mask = 0;
67 		break;
68 	default:
69 		return false;
70 	}
71 
72 	return true;
73 }
74 
75 /*
76  * Current BRS implementation does not support branch type or privilege level
77  * filtering. Therefore, this function simply enforces these limitations. No need for
78  * a br_sel_map. Software filtering is not supported because it would not correlate well
79  * with a sampling period.
80  */
81 static int amd_brs_setup_filter(struct perf_event *event)
82 {
83 	u64 type = event->attr.branch_sample_type;
84 
85 	/* No BRS support */
86 	if (!x86_pmu.lbr_nr)
87 		return -EOPNOTSUPP;
88 
89 	/* Can only capture all branches, i.e., no filtering */
90 	if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
91 		return -EINVAL;
92 
93 	return 0;
94 }
95 
96 static inline int amd_is_brs_event(struct perf_event *e)
97 {
98 	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
99 }
100 
101 int amd_brs_hw_config(struct perf_event *event)
102 {
103 	int ret = 0;
104 
105 	/*
106 	 * Due to interrupt holding, BRS is not recommended in
107 	 * counting mode.
108 	 */
109 	if (!is_sampling_event(event))
110 		return -EINVAL;
111 
112 	/*
113 	 * Due to the way BRS operates by holding the interrupt until
114 	 * lbr_nr entries have been captured, it does not make sense
115 	 * to allow sampling on BRS with an event that does not match
116 	 * what BRS is capturing, i.e., retired taken branches.
117 	 * Otherwise the correlation with the event's period is even
118 	 * more loose:
119 	 *
120 	 * With retired taken branch:
121 	 *   Effective P = P + 16 + X
122 	 * With any other event:
123 	 *   Effective P = P + Y + X
124 	 *
125 	 * Where X is the number of taken branches due to interrupt
126 	 * skid. Skid is large.
127 	 *
128 	 * Where Y is the occurences of the event while BRS is
129 	 * capturing the lbr_nr entries.
130 	 *
131 	 * By using retired taken branches, we limit the impact on the
132 	 * Y variable. We know it cannot be more than the depth of
133 	 * BRS.
134 	 */
135 	if (!amd_is_brs_event(event))
136 		return -EINVAL;
137 
138 	/*
139 	 * BRS implementation does not work with frequency mode
140 	 * reprogramming of the period.
141 	 */
142 	if (event->attr.freq)
143 		return -EINVAL;
144 	/*
145 	 * The kernel subtracts BRS depth from period, so it must
146 	 * be big enough.
147 	 */
148 	if (event->attr.sample_period <= x86_pmu.lbr_nr)
149 		return -EINVAL;
150 
151 	/*
152 	 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
153 	 */
154 	ret = amd_brs_setup_filter(event);
155 
156 	/* only set in case of success */
157 	if (!ret)
158 		event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
159 
160 	return ret;
161 }
162 
163 /* tos = top of stack, i.e., last valid entry written */
164 static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
165 {
166 	/*
167 	 * msroff: index of next entry to write so top-of-stack is one off
168 	 * if BRS is full then msroff is set back to 0.
169 	 */
170 	return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
171 }
172 
173 /*
174  * make sure we have a sane BRS offset to begin with
175  * especially with kexec
176  */
177 void amd_brs_reset(void)
178 {
179 	if (!cpu_feature_enabled(X86_FEATURE_BRS))
180 		return;
181 
182 	/*
183 	 * Reset config
184 	 */
185 	set_debug_extn_cfg(0);
186 
187 	/*
188 	 * Mark first entry as poisoned
189 	 */
190 	wrmsrl(brs_to(0), BRS_POISON);
191 }
192 
193 int __init amd_brs_init(void)
194 {
195 	if (!amd_brs_detect())
196 		return -EOPNOTSUPP;
197 
198 	pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
199 
200 	return 0;
201 }
202 
203 void amd_brs_enable(void)
204 {
205 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
206 	union amd_debug_extn_cfg cfg;
207 
208 	/* Activate only on first user */
209 	if (++cpuc->brs_active > 1)
210 		return;
211 
212 	cfg.val    = 0; /* reset all fields */
213 	cfg.brsmen = 1; /* enable branch sampling */
214 
215 	/* Set enable bit */
216 	set_debug_extn_cfg(cfg.val);
217 }
218 
219 void amd_brs_enable_all(void)
220 {
221 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
222 	if (cpuc->lbr_users)
223 		amd_brs_enable();
224 }
225 
226 void amd_brs_disable(void)
227 {
228 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
229 	union amd_debug_extn_cfg cfg;
230 
231 	/* Check if active (could be disabled via x86_pmu_disable_all()) */
232 	if (!cpuc->brs_active)
233 		return;
234 
235 	/* Only disable for last user */
236 	if (--cpuc->brs_active)
237 		return;
238 
239 	/*
240 	 * Clear the brsmen bit but preserve the others as they contain
241 	 * useful state such as vb and msroff
242 	 */
243 	cfg.val = get_debug_extn_cfg();
244 
245 	/*
246 	 * When coming in on interrupt and BRS is full, then hw will have
247 	 * already stopped BRS, no need to issue wrmsr again
248 	 */
249 	if (cfg.brsmen) {
250 		cfg.brsmen = 0;
251 		set_debug_extn_cfg(cfg.val);
252 	}
253 }
254 
255 void amd_brs_disable_all(void)
256 {
257 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
258 	if (cpuc->lbr_users)
259 		amd_brs_disable();
260 }
261 
262 static bool amd_brs_match_plm(struct perf_event *event, u64 to)
263 {
264 	int type = event->attr.branch_sample_type;
265 	int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
266 	int plm_u = PERF_SAMPLE_BRANCH_USER;
267 
268 	if (!(type & plm_k) && kernel_ip(to))
269 		return 0;
270 
271 	if (!(type & plm_u) && !kernel_ip(to))
272 		return 0;
273 
274 	return 1;
275 }
276 
277 /*
278  * Caller must ensure amd_brs_inuse() is true before calling
279  * return:
280  */
281 void amd_brs_drain(void)
282 {
283 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
284 	struct perf_event *event = cpuc->events[0];
285 	struct perf_branch_entry *br = cpuc->lbr_entries;
286 	union amd_debug_extn_cfg cfg;
287 	u32 i, nr = 0, num, tos, start;
288 	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
289 
290 	/*
291 	 * BRS event forced on PMC0,
292 	 * so check if there is an event.
293 	 * It is possible to have lbr_users > 0 but the event
294 	 * not yet scheduled due to long latency PMU irq
295 	 */
296 	if (!event)
297 		goto empty;
298 
299 	cfg.val = get_debug_extn_cfg();
300 
301 	/* Sanity check [0-x86_pmu.lbr_nr] */
302 	if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
303 		goto empty;
304 
305 	/* No valid branch */
306 	if (cfg.vb == 0)
307 		goto empty;
308 
309 	/*
310 	 * msr.off points to next entry to be written
311 	 * tos = most recent entry index = msr.off - 1
312 	 * BRS register buffer saturates, so we know we have
313 	 * start < tos and that we have to read from start to tos
314 	 */
315 	start = 0;
316 	tos = amd_brs_get_tos(&cfg);
317 
318 	num = tos - start + 1;
319 
320 	/*
321 	 * BRS is only one pass (saturation) from MSROFF to depth-1
322 	 * MSROFF wraps to zero when buffer is full
323 	 */
324 	for (i = 0; i < num; i++) {
325 		u32 brs_idx = tos - i;
326 		u64 from, to;
327 
328 		rdmsrl(brs_to(brs_idx), to);
329 
330 		/* Entry does not belong to us (as marked by kernel) */
331 		if (to == BRS_POISON)
332 			break;
333 
334 		/*
335 		 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
336 		 * Necessary to generate proper virtual addresses suitable for
337 		 * symbolization
338 		 */
339 		to = (u64)(((s64)to << shift) >> shift);
340 
341 		if (!amd_brs_match_plm(event, to))
342 			continue;
343 
344 		rdmsrl(brs_from(brs_idx), from);
345 
346 		perf_clear_branch_entry_bitfields(br+nr);
347 
348 		br[nr].from = from;
349 		br[nr].to   = to;
350 
351 		nr++;
352 	}
353 empty:
354 	/* Record number of sampled branches */
355 	cpuc->lbr_stack.nr = nr;
356 }
357 
358 /*
359  * Poison most recent entry to prevent reuse by next task
360  * required because BRS entry are not tagged by PID
361  */
362 static void amd_brs_poison_buffer(void)
363 {
364 	union amd_debug_extn_cfg cfg;
365 	unsigned int idx;
366 
367 	/* Get current state */
368 	cfg.val = get_debug_extn_cfg();
369 
370 	/* idx is most recently written entry */
371 	idx = amd_brs_get_tos(&cfg);
372 
373 	/* Poison target of entry */
374 	wrmsrl(brs_to(idx), BRS_POISON);
375 }
376 
377 /*
378  * On context switch in, we need to make sure no samples from previous user
379  * are left in the BRS.
380  *
381  * On ctxswin, sched_in = true, called after the PMU has started
382  * On ctxswout, sched_in = false, called before the PMU is stopped
383  */
384 void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
385 {
386 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
387 
388 	/* no active users */
389 	if (!cpuc->lbr_users)
390 		return;
391 
392 	/*
393 	 * On context switch in, we need to ensure we do not use entries
394 	 * from previous BRS user on that CPU, so we poison the buffer as
395 	 * a faster way compared to resetting all entries.
396 	 */
397 	if (sched_in)
398 		amd_brs_poison_buffer();
399 }
400 
401 /*
402  * called from ACPI processor_idle.c or acpi_pad.c
403  * with interrupts disabled
404  */
405 void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
406 {
407 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
408 	union amd_debug_extn_cfg cfg;
409 
410 	/*
411 	 * on mwait in, we may end up in non C0 state.
412 	 * we must disable branch sampling to avoid holding the NMI
413 	 * for too long. We disable it in hardware but we
414 	 * keep the state in cpuc, so we can re-enable.
415 	 *
416 	 * The hardware will deliver the NMI if needed when brsmen cleared
417 	 */
418 	if (cpuc->brs_active) {
419 		cfg.val = get_debug_extn_cfg();
420 		cfg.brsmen = !lopwr_in;
421 		set_debug_extn_cfg(cfg.val);
422 	}
423 }
424 
425 DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
426 EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
427 
428 void __init amd_brs_lopwr_init(void)
429 {
430 	static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
431 }
432