1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/perf_event.h>
3 #include <asm/perf_event.h>
4
5 #include "../perf_event.h"
6
7 /* LBR Branch Select valid bits */
8 #define LBR_SELECT_MASK 0x1ff
9
10 /*
11 * LBR Branch Select filter bits which when set, ensures that the
12 * corresponding type of branches are not recorded
13 */
14 #define LBR_SELECT_KERNEL 0 /* Branches ending in CPL = 0 */
15 #define LBR_SELECT_USER 1 /* Branches ending in CPL > 0 */
16 #define LBR_SELECT_JCC 2 /* Conditional branches */
17 #define LBR_SELECT_CALL_NEAR_REL 3 /* Near relative calls */
18 #define LBR_SELECT_CALL_NEAR_IND 4 /* Indirect relative calls */
19 #define LBR_SELECT_RET_NEAR 5 /* Near returns */
20 #define LBR_SELECT_JMP_NEAR_IND 6 /* Near indirect jumps (excl. calls and returns) */
21 #define LBR_SELECT_JMP_NEAR_REL 7 /* Near relative jumps (excl. calls) */
22 #define LBR_SELECT_FAR_BRANCH 8 /* Far branches */
23
24 #define LBR_KERNEL BIT(LBR_SELECT_KERNEL)
25 #define LBR_USER BIT(LBR_SELECT_USER)
26 #define LBR_JCC BIT(LBR_SELECT_JCC)
27 #define LBR_REL_CALL BIT(LBR_SELECT_CALL_NEAR_REL)
28 #define LBR_IND_CALL BIT(LBR_SELECT_CALL_NEAR_IND)
29 #define LBR_RETURN BIT(LBR_SELECT_RET_NEAR)
30 #define LBR_REL_JMP BIT(LBR_SELECT_JMP_NEAR_REL)
31 #define LBR_IND_JMP BIT(LBR_SELECT_JMP_NEAR_IND)
32 #define LBR_FAR BIT(LBR_SELECT_FAR_BRANCH)
33 #define LBR_NOT_SUPP -1 /* unsupported filter */
34 #define LBR_IGNORE 0
35
36 #define LBR_ANY \
37 (LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | \
38 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
39
40 struct branch_entry {
41 union {
42 struct {
43 u64 ip:58;
44 u64 ip_sign_ext:5;
45 u64 mispredict:1;
46 } split;
47 u64 full;
48 } from;
49
50 union {
51 struct {
52 u64 ip:58;
53 u64 ip_sign_ext:3;
54 u64 reserved:1;
55 u64 spec:1;
56 u64 valid:1;
57 } split;
58 u64 full;
59 } to;
60 };
61
amd_pmu_lbr_set_from(unsigned int idx,u64 val)62 static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
63 {
64 wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
65 }
66
amd_pmu_lbr_set_to(unsigned int idx,u64 val)67 static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
68 {
69 wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
70 }
71
amd_pmu_lbr_get_from(unsigned int idx)72 static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
73 {
74 u64 val;
75
76 rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
77
78 return val;
79 }
80
amd_pmu_lbr_get_to(unsigned int idx)81 static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
82 {
83 u64 val;
84
85 rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
86
87 return val;
88 }
89
sign_ext_branch_ip(u64 ip)90 static __always_inline u64 sign_ext_branch_ip(u64 ip)
91 {
92 u32 shift = 64 - boot_cpu_data.x86_virt_bits;
93
94 return (u64)(((s64)ip << shift) >> shift);
95 }
96
amd_pmu_lbr_filter(void)97 static void amd_pmu_lbr_filter(void)
98 {
99 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100 int br_sel = cpuc->br_sel, offset, type, i, j;
101 bool compress = false;
102 bool fused_only = false;
103 u64 from, to;
104
105 /* If sampling all branches, there is nothing to filter */
106 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108 fused_only = true;
109
110 for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111 from = cpuc->lbr_entries[i].from;
112 to = cpuc->lbr_entries[i].to;
113 type = branch_type_fused(from, to, 0, &offset);
114
115 /*
116 * Adjust the branch from address in case of instruction
117 * fusion where it points to an instruction preceding the
118 * actual branch
119 */
120 if (offset) {
121 cpuc->lbr_entries[i].from += offset;
122 if (fused_only)
123 continue;
124 }
125
126 /* If type does not correspond, then discard */
127 if (type == X86_BR_NONE || (br_sel & type) != type) {
128 cpuc->lbr_entries[i].from = 0; /* mark invalid */
129 compress = true;
130 }
131
132 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133 cpuc->lbr_entries[i].type = common_branch_type(type);
134 }
135
136 if (!compress)
137 return;
138
139 /* Remove all invalid entries */
140 for (i = 0; i < cpuc->lbr_stack.nr; ) {
141 if (!cpuc->lbr_entries[i].from) {
142 j = i;
143 while (++j < cpuc->lbr_stack.nr)
144 cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145 cpuc->lbr_stack.nr--;
146 if (!cpuc->lbr_entries[i].from)
147 continue;
148 }
149 i++;
150 }
151 }
152
153 static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154 PERF_BR_SPEC_NA,
155 PERF_BR_SPEC_WRONG_PATH,
156 PERF_BR_NON_SPEC_CORRECT_PATH,
157 PERF_BR_SPEC_CORRECT_PATH,
158 };
159
amd_pmu_lbr_read(void)160 void amd_pmu_lbr_read(void)
161 {
162 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163 struct perf_branch_entry *br = cpuc->lbr_entries;
164 struct branch_entry entry;
165 int out = 0, idx, i;
166
167 if (!cpuc->lbr_users)
168 return;
169
170 for (i = 0; i < x86_pmu.lbr_nr; i++) {
171 entry.from.full = amd_pmu_lbr_get_from(i);
172 entry.to.full = amd_pmu_lbr_get_to(i);
173
174 /*
175 * Check if a branch has been logged; if valid = 0, spec = 0
176 * then no branch was recorded; if reserved = 1 then an
177 * erroneous branch was recorded (see Erratum 1452)
178 */
179 if ((!entry.to.split.valid && !entry.to.split.spec) ||
180 entry.to.split.reserved)
181 continue;
182
183 perf_clear_branch_entry_bitfields(br + out);
184
185 br[out].from = sign_ext_branch_ip(entry.from.split.ip);
186 br[out].to = sign_ext_branch_ip(entry.to.split.ip);
187 br[out].mispred = entry.from.split.mispredict;
188 br[out].predicted = !br[out].mispred;
189
190 /*
191 * Set branch speculation information using the status of
192 * the valid and spec bits.
193 *
194 * When valid = 0, spec = 0, no branch was recorded and the
195 * entry is discarded as seen above.
196 *
197 * When valid = 0, spec = 1, the recorded branch was
198 * speculative but took the wrong path.
199 *
200 * When valid = 1, spec = 0, the recorded branch was
201 * non-speculative but took the correct path.
202 *
203 * When valid = 1, spec = 1, the recorded branch was
204 * speculative and took the correct path
205 */
206 idx = (entry.to.split.valid << 1) | entry.to.split.spec;
207 br[out].spec = lbr_spec_map[idx];
208 out++;
209 }
210
211 cpuc->lbr_stack.nr = out;
212
213 /*
214 * Internal register renaming always ensures that LBR From[0] and
215 * LBR To[0] always represent the TOS
216 */
217 cpuc->lbr_stack.hw_idx = 0;
218
219 /* Perform further software filtering */
220 amd_pmu_lbr_filter();
221 }
222
223 static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
224 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
225 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
226 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGNORE,
227
228 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
229 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
230 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
231 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
232 [PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT] = LBR_NOT_SUPP,
233 [PERF_SAMPLE_BRANCH_IN_TX_SHIFT] = LBR_NOT_SUPP,
234 [PERF_SAMPLE_BRANCH_NO_TX_SHIFT] = LBR_NOT_SUPP,
235 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
236
237 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP,
238 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
239 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL,
240
241 [PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT] = LBR_NOT_SUPP,
242 [PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT] = LBR_NOT_SUPP,
243 };
244
amd_pmu_lbr_setup_filter(struct perf_event * event)245 static int amd_pmu_lbr_setup_filter(struct perf_event *event)
246 {
247 struct hw_perf_event_extra *reg = &event->hw.branch_reg;
248 u64 br_type = event->attr.branch_sample_type;
249 u64 mask = 0, v;
250 int i;
251
252 /* No LBR support */
253 if (!x86_pmu.lbr_nr)
254 return -EOPNOTSUPP;
255
256 if (br_type & PERF_SAMPLE_BRANCH_USER)
257 mask |= X86_BR_USER;
258
259 if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
260 mask |= X86_BR_KERNEL;
261
262 /* Ignore BRANCH_HV here */
263
264 if (br_type & PERF_SAMPLE_BRANCH_ANY)
265 mask |= X86_BR_ANY;
266
267 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
268 mask |= X86_BR_ANY_CALL;
269
270 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
271 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
272
273 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
274 mask |= X86_BR_IND_CALL;
275
276 if (br_type & PERF_SAMPLE_BRANCH_COND)
277 mask |= X86_BR_JCC;
278
279 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
280 mask |= X86_BR_IND_JMP;
281
282 if (br_type & PERF_SAMPLE_BRANCH_CALL)
283 mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
284
285 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
286 mask |= X86_BR_TYPE_SAVE;
287
288 reg->reg = mask;
289 mask = 0;
290
291 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
292 if (!(br_type & BIT_ULL(i)))
293 continue;
294
295 v = lbr_select_map[i];
296 if (v == LBR_NOT_SUPP)
297 return -EOPNOTSUPP;
298
299 if (v != LBR_IGNORE)
300 mask |= v;
301 }
302
303 /* Filter bits operate in suppress mode */
304 reg->config = mask ^ LBR_SELECT_MASK;
305
306 return 0;
307 }
308
amd_pmu_lbr_hw_config(struct perf_event * event)309 int amd_pmu_lbr_hw_config(struct perf_event *event)
310 {
311 int ret = 0;
312
313 /* LBR is not recommended in counting mode */
314 if (!is_sampling_event(event))
315 return -EINVAL;
316
317 ret = amd_pmu_lbr_setup_filter(event);
318 if (!ret)
319 event->attach_state |= PERF_ATTACH_SCHED_CB;
320
321 return ret;
322 }
323
amd_pmu_lbr_reset(void)324 void amd_pmu_lbr_reset(void)
325 {
326 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
327 int i;
328
329 if (!x86_pmu.lbr_nr)
330 return;
331
332 /* Reset all branch records individually */
333 for (i = 0; i < x86_pmu.lbr_nr; i++) {
334 amd_pmu_lbr_set_from(i, 0);
335 amd_pmu_lbr_set_to(i, 0);
336 }
337
338 cpuc->last_task_ctx = NULL;
339 cpuc->last_log_id = 0;
340 wrmsrl(MSR_AMD64_LBR_SELECT, 0);
341 }
342
amd_pmu_lbr_add(struct perf_event * event)343 void amd_pmu_lbr_add(struct perf_event *event)
344 {
345 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
346 struct hw_perf_event_extra *reg = &event->hw.branch_reg;
347
348 if (!x86_pmu.lbr_nr)
349 return;
350
351 if (has_branch_stack(event)) {
352 cpuc->lbr_select = 1;
353 cpuc->lbr_sel->config = reg->config;
354 cpuc->br_sel = reg->reg;
355 }
356
357 perf_sched_cb_inc(event->pmu);
358
359 if (!cpuc->lbr_users++ && !event->total_time_running)
360 amd_pmu_lbr_reset();
361 }
362
amd_pmu_lbr_del(struct perf_event * event)363 void amd_pmu_lbr_del(struct perf_event *event)
364 {
365 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
366
367 if (!x86_pmu.lbr_nr)
368 return;
369
370 if (has_branch_stack(event))
371 cpuc->lbr_select = 0;
372
373 cpuc->lbr_users--;
374 WARN_ON_ONCE(cpuc->lbr_users < 0);
375 perf_sched_cb_dec(event->pmu);
376 }
377
amd_pmu_lbr_sched_task(struct perf_event_pmu_context * pmu_ctx,bool sched_in)378 void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
379 {
380 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
381
382 /*
383 * A context switch can flip the address space and LBR entries are
384 * not tagged with an identifier. Hence, branches cannot be resolved
385 * from the old address space and the LBR records should be wiped.
386 */
387 if (cpuc->lbr_users && sched_in)
388 amd_pmu_lbr_reset();
389 }
390
amd_pmu_lbr_enable_all(void)391 void amd_pmu_lbr_enable_all(void)
392 {
393 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
394 u64 lbr_select, dbg_ctl, dbg_extn_cfg;
395
396 if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
397 return;
398
399 /* Set hardware branch filter */
400 if (cpuc->lbr_select) {
401 lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
402 wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
403 }
404
405 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
406 rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
407 wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
408 }
409
410 rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
411 wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
412 }
413
amd_pmu_lbr_disable_all(void)414 void amd_pmu_lbr_disable_all(void)
415 {
416 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
417 u64 dbg_ctl, dbg_extn_cfg;
418
419 if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
420 return;
421
422 rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
423 wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
424
425 if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
426 rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
427 wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
428 }
429 }
430
amd_pmu_lbr_init(void)431 __init int amd_pmu_lbr_init(void)
432 {
433 union cpuid_0x80000022_ebx ebx;
434
435 if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
436 return -EOPNOTSUPP;
437
438 /* Set number of entries */
439 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
440 x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
441
442 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
443
444 return 0;
445 }
446