1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/perf_event.h> 3 #include <linux/types.h> 4 5 #include <asm/perf_event.h> 6 #include <asm/msr.h> 7 8 #include "../perf_event.h" 9 10 /* 11 * Intel LBR_SELECT bits 12 * Intel Vol3a, April 2011, Section 16.7 Table 16-10 13 * 14 * Hardware branch filter (not available on all CPUs) 15 */ 16 #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ 17 #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ 18 #define LBR_JCC_BIT 2 /* do not capture conditional branches */ 19 #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ 20 #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ 21 #define LBR_RETURN_BIT 5 /* do not capture near returns */ 22 #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ 23 #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ 24 #define LBR_FAR_BIT 8 /* do not capture far branches */ 25 #define LBR_CALL_STACK_BIT 9 /* enable call stack */ 26 27 /* 28 * Following bit only exists in Linux; we mask it out before writing it to 29 * the actual MSR. But it helps the constraint perf code to understand 30 * that this is a separate configuration. 31 */ 32 #define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */ 33 34 #define LBR_KERNEL (1 << LBR_KERNEL_BIT) 35 #define LBR_USER (1 << LBR_USER_BIT) 36 #define LBR_JCC (1 << LBR_JCC_BIT) 37 #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) 38 #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) 39 #define LBR_RETURN (1 << LBR_RETURN_BIT) 40 #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) 41 #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) 42 #define LBR_FAR (1 << LBR_FAR_BIT) 43 #define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) 44 #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) 45 46 #define LBR_PLM (LBR_KERNEL | LBR_USER) 47 48 #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ 49 #define LBR_NOT_SUPP -1 /* LBR filter not supported */ 50 #define LBR_IGN 0 /* ignored */ 51 52 #define LBR_ANY \ 53 (LBR_JCC |\ 54 LBR_REL_CALL |\ 55 LBR_IND_CALL |\ 56 LBR_RETURN |\ 57 LBR_REL_JMP |\ 58 LBR_IND_JMP |\ 59 LBR_FAR) 60 61 #define LBR_FROM_FLAG_MISPRED BIT_ULL(63) 62 #define LBR_FROM_FLAG_IN_TX BIT_ULL(62) 63 #define LBR_FROM_FLAG_ABORT BIT_ULL(61) 64 65 #define LBR_FROM_SIGNEXT_2MSB (BIT_ULL(60) | BIT_ULL(59)) 66 67 /* 68 * Intel LBR_CTL bits 69 * 70 * Hardware branch filter for Arch LBR 71 */ 72 #define ARCH_LBR_KERNEL_BIT 1 /* capture at ring0 */ 73 #define ARCH_LBR_USER_BIT 2 /* capture at ring > 0 */ 74 #define ARCH_LBR_CALL_STACK_BIT 3 /* enable call stack */ 75 #define ARCH_LBR_JCC_BIT 16 /* capture conditional branches */ 76 #define ARCH_LBR_REL_JMP_BIT 17 /* capture relative jumps */ 77 #define ARCH_LBR_IND_JMP_BIT 18 /* capture indirect jumps */ 78 #define ARCH_LBR_REL_CALL_BIT 19 /* capture relative calls */ 79 #define ARCH_LBR_IND_CALL_BIT 20 /* capture indirect calls */ 80 #define ARCH_LBR_RETURN_BIT 21 /* capture near returns */ 81 #define ARCH_LBR_OTHER_BRANCH_BIT 22 /* capture other branches */ 82 83 #define ARCH_LBR_KERNEL (1ULL << ARCH_LBR_KERNEL_BIT) 84 #define ARCH_LBR_USER (1ULL << ARCH_LBR_USER_BIT) 85 #define ARCH_LBR_CALL_STACK (1ULL << ARCH_LBR_CALL_STACK_BIT) 86 #define ARCH_LBR_JCC (1ULL << ARCH_LBR_JCC_BIT) 87 #define ARCH_LBR_REL_JMP (1ULL << ARCH_LBR_REL_JMP_BIT) 88 #define ARCH_LBR_IND_JMP (1ULL << ARCH_LBR_IND_JMP_BIT) 89 #define ARCH_LBR_REL_CALL (1ULL << ARCH_LBR_REL_CALL_BIT) 90 #define ARCH_LBR_IND_CALL (1ULL << ARCH_LBR_IND_CALL_BIT) 91 #define ARCH_LBR_RETURN (1ULL << ARCH_LBR_RETURN_BIT) 92 #define ARCH_LBR_OTHER_BRANCH (1ULL << ARCH_LBR_OTHER_BRANCH_BIT) 93 94 #define ARCH_LBR_ANY \ 95 (ARCH_LBR_JCC |\ 96 ARCH_LBR_REL_JMP |\ 97 ARCH_LBR_IND_JMP |\ 98 ARCH_LBR_REL_CALL |\ 99 ARCH_LBR_IND_CALL |\ 100 ARCH_LBR_RETURN |\ 101 ARCH_LBR_OTHER_BRANCH) 102 103 #define ARCH_LBR_CTL_MASK 0x7f000e 104 105 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); 106 107 static __always_inline bool is_lbr_call_stack_bit_set(u64 config) 108 { 109 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 110 return !!(config & ARCH_LBR_CALL_STACK); 111 112 return !!(config & LBR_CALL_STACK); 113 } 114 115 /* 116 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI 117 * otherwise it becomes near impossible to get a reliable stack. 118 */ 119 120 static void __intel_pmu_lbr_enable(bool pmi) 121 { 122 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 123 u64 debugctl, lbr_select = 0, orig_debugctl; 124 125 /* 126 * No need to unfreeze manually, as v4 can do that as part 127 * of the GLOBAL_STATUS ack. 128 */ 129 if (pmi && x86_pmu.version >= 4) 130 return; 131 132 /* 133 * No need to reprogram LBR_SELECT in a PMI, as it 134 * did not change. 135 */ 136 if (cpuc->lbr_sel) 137 lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; 138 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) 139 wrmsrl(MSR_LBR_SELECT, lbr_select); 140 141 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 142 orig_debugctl = debugctl; 143 144 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 145 debugctl |= DEBUGCTLMSR_LBR; 146 /* 147 * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. 148 * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions 149 * may cause superfluous increase/decrease of LBR_TOS. 150 */ 151 if (is_lbr_call_stack_bit_set(lbr_select)) 152 debugctl &= ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 153 else 154 debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; 155 156 if (orig_debugctl != debugctl) 157 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); 158 159 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 160 wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); 161 } 162 163 void intel_pmu_lbr_reset_32(void) 164 { 165 int i; 166 167 for (i = 0; i < x86_pmu.lbr_nr; i++) 168 wrmsrl(x86_pmu.lbr_from + i, 0); 169 } 170 171 void intel_pmu_lbr_reset_64(void) 172 { 173 int i; 174 175 for (i = 0; i < x86_pmu.lbr_nr; i++) { 176 wrmsrl(x86_pmu.lbr_from + i, 0); 177 wrmsrl(x86_pmu.lbr_to + i, 0); 178 if (x86_pmu.lbr_has_info) 179 wrmsrl(x86_pmu.lbr_info + i, 0); 180 } 181 } 182 183 static void intel_pmu_arch_lbr_reset(void) 184 { 185 /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ 186 wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); 187 } 188 189 void intel_pmu_lbr_reset(void) 190 { 191 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 192 193 if (!x86_pmu.lbr_nr) 194 return; 195 196 x86_pmu.lbr_reset(); 197 198 cpuc->last_task_ctx = NULL; 199 cpuc->last_log_id = 0; 200 if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) 201 wrmsrl(MSR_LBR_SELECT, 0); 202 } 203 204 /* 205 * TOS = most recently recorded branch 206 */ 207 static inline u64 intel_pmu_lbr_tos(void) 208 { 209 u64 tos; 210 211 rdmsrl(x86_pmu.lbr_tos, tos); 212 return tos; 213 } 214 215 enum { 216 LBR_NONE, 217 LBR_VALID, 218 }; 219 220 /* 221 * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x 222 * are the TSX flags when TSX is supported, but when TSX is not supported 223 * they have no consistent behavior: 224 * 225 * - For wrmsr(), bits 61:62 are considered part of the sign extension. 226 * - For HW updates (branch captures) bits 61:62 are always OFF and are not 227 * part of the sign extension. 228 * 229 * Therefore, if: 230 * 231 * 1) LBR format LBR_FORMAT_EIP_FLAGS2 232 * 2) CPU has no TSX support enabled 233 * 234 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any 235 * value from rdmsr() must be converted to have a 61 bits sign extension, 236 * ignoring the TSX flags. 237 */ 238 static inline bool lbr_from_signext_quirk_needed(void) 239 { 240 bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || 241 boot_cpu_has(X86_FEATURE_RTM); 242 243 return !tsx_support; 244 } 245 246 static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); 247 248 /* If quirk is enabled, ensure sign extension is 63 bits: */ 249 inline u64 lbr_from_signext_quirk_wr(u64 val) 250 { 251 if (static_branch_unlikely(&lbr_from_quirk_key)) { 252 /* 253 * Sign extend into bits 61:62 while preserving bit 63. 254 * 255 * Quirk is enabled when TSX is disabled. Therefore TSX bits 256 * in val are always OFF and must be changed to be sign 257 * extension bits. Since bits 59:60 are guaranteed to be 258 * part of the sign extension bits, we can just copy them 259 * to 61:62. 260 */ 261 val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2; 262 } 263 return val; 264 } 265 266 /* 267 * If quirk is needed, ensure sign extension is 61 bits: 268 */ 269 static u64 lbr_from_signext_quirk_rd(u64 val) 270 { 271 if (static_branch_unlikely(&lbr_from_quirk_key)) { 272 /* 273 * Quirk is on when TSX is not enabled. Therefore TSX 274 * flags must be read as OFF. 275 */ 276 val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT); 277 } 278 return val; 279 } 280 281 static __always_inline void wrlbr_from(unsigned int idx, u64 val) 282 { 283 val = lbr_from_signext_quirk_wr(val); 284 wrmsrl(x86_pmu.lbr_from + idx, val); 285 } 286 287 static __always_inline void wrlbr_to(unsigned int idx, u64 val) 288 { 289 wrmsrl(x86_pmu.lbr_to + idx, val); 290 } 291 292 static __always_inline void wrlbr_info(unsigned int idx, u64 val) 293 { 294 wrmsrl(x86_pmu.lbr_info + idx, val); 295 } 296 297 static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) 298 { 299 u64 val; 300 301 if (lbr) 302 return lbr->from; 303 304 rdmsrl(x86_pmu.lbr_from + idx, val); 305 306 return lbr_from_signext_quirk_rd(val); 307 } 308 309 static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) 310 { 311 u64 val; 312 313 if (lbr) 314 return lbr->to; 315 316 rdmsrl(x86_pmu.lbr_to + idx, val); 317 318 return val; 319 } 320 321 static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) 322 { 323 u64 val; 324 325 if (lbr) 326 return lbr->info; 327 328 rdmsrl(x86_pmu.lbr_info + idx, val); 329 330 return val; 331 } 332 333 static inline void 334 wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 335 { 336 wrlbr_from(idx, lbr->from); 337 wrlbr_to(idx, lbr->to); 338 if (need_info) 339 wrlbr_info(idx, lbr->info); 340 } 341 342 static inline bool 343 rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) 344 { 345 u64 from = rdlbr_from(idx, NULL); 346 347 /* Don't read invalid entry */ 348 if (!from) 349 return false; 350 351 lbr->from = from; 352 lbr->to = rdlbr_to(idx, NULL); 353 if (need_info) 354 lbr->info = rdlbr_info(idx, NULL); 355 356 return true; 357 } 358 359 void intel_pmu_lbr_restore(void *ctx) 360 { 361 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 362 struct x86_perf_task_context *task_ctx = ctx; 363 bool need_info = x86_pmu.lbr_has_info; 364 u64 tos = task_ctx->tos; 365 unsigned lbr_idx, mask; 366 int i; 367 368 mask = x86_pmu.lbr_nr - 1; 369 for (i = 0; i < task_ctx->valid_lbrs; i++) { 370 lbr_idx = (tos - i) & mask; 371 wrlbr_all(&task_ctx->lbr[i], lbr_idx, need_info); 372 } 373 374 for (; i < x86_pmu.lbr_nr; i++) { 375 lbr_idx = (tos - i) & mask; 376 wrlbr_from(lbr_idx, 0); 377 wrlbr_to(lbr_idx, 0); 378 if (need_info) 379 wrlbr_info(lbr_idx, 0); 380 } 381 382 wrmsrl(x86_pmu.lbr_tos, tos); 383 384 if (cpuc->lbr_select) 385 wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 386 } 387 388 static void intel_pmu_arch_lbr_restore(void *ctx) 389 { 390 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 391 struct lbr_entry *entries = task_ctx->entries; 392 int i; 393 394 /* Fast reset the LBRs before restore if the call stack is not full. */ 395 if (!entries[x86_pmu.lbr_nr - 1].from) 396 intel_pmu_arch_lbr_reset(); 397 398 for (i = 0; i < x86_pmu.lbr_nr; i++) { 399 if (!entries[i].from) 400 break; 401 wrlbr_all(&entries[i], i, true); 402 } 403 } 404 405 /* 406 * Restore the Architecture LBR state from the xsave area in the perf 407 * context data for the task via the XRSTORS instruction. 408 */ 409 static void intel_pmu_arch_lbr_xrstors(void *ctx) 410 { 411 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 412 413 xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR); 414 } 415 416 static __always_inline bool lbr_is_reset_in_cstate(void *ctx) 417 { 418 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 419 return x86_pmu.lbr_deep_c_reset && !rdlbr_from(0, NULL); 420 421 return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); 422 } 423 424 static void __intel_pmu_lbr_restore(void *ctx) 425 { 426 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 427 428 if (task_context_opt(ctx)->lbr_callstack_users == 0 || 429 task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { 430 intel_pmu_lbr_reset(); 431 return; 432 } 433 434 /* 435 * Does not restore the LBR registers, if 436 * - No one else touched them, and 437 * - Was not cleared in Cstate 438 */ 439 if ((ctx == cpuc->last_task_ctx) && 440 (task_context_opt(ctx)->log_id == cpuc->last_log_id) && 441 !lbr_is_reset_in_cstate(ctx)) { 442 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 443 return; 444 } 445 446 x86_pmu.lbr_restore(ctx); 447 448 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 449 } 450 451 void intel_pmu_lbr_save(void *ctx) 452 { 453 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 454 struct x86_perf_task_context *task_ctx = ctx; 455 bool need_info = x86_pmu.lbr_has_info; 456 unsigned lbr_idx, mask; 457 u64 tos; 458 int i; 459 460 mask = x86_pmu.lbr_nr - 1; 461 tos = intel_pmu_lbr_tos(); 462 for (i = 0; i < x86_pmu.lbr_nr; i++) { 463 lbr_idx = (tos - i) & mask; 464 if (!rdlbr_all(&task_ctx->lbr[i], lbr_idx, need_info)) 465 break; 466 } 467 task_ctx->valid_lbrs = i; 468 task_ctx->tos = tos; 469 470 if (cpuc->lbr_select) 471 rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); 472 } 473 474 static void intel_pmu_arch_lbr_save(void *ctx) 475 { 476 struct x86_perf_task_context_arch_lbr *task_ctx = ctx; 477 struct lbr_entry *entries = task_ctx->entries; 478 int i; 479 480 for (i = 0; i < x86_pmu.lbr_nr; i++) { 481 if (!rdlbr_all(&entries[i], i, true)) 482 break; 483 } 484 485 /* LBR call stack is not full. Reset is required in restore. */ 486 if (i < x86_pmu.lbr_nr) 487 entries[x86_pmu.lbr_nr - 1].from = 0; 488 } 489 490 /* 491 * Save the Architecture LBR state to the xsave area in the perf 492 * context data for the task via the XSAVES instruction. 493 */ 494 static void intel_pmu_arch_lbr_xsaves(void *ctx) 495 { 496 struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx; 497 498 xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR); 499 } 500 501 static void __intel_pmu_lbr_save(void *ctx) 502 { 503 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 504 505 if (task_context_opt(ctx)->lbr_callstack_users == 0) { 506 task_context_opt(ctx)->lbr_stack_state = LBR_NONE; 507 return; 508 } 509 510 x86_pmu.lbr_save(ctx); 511 512 task_context_opt(ctx)->lbr_stack_state = LBR_VALID; 513 514 cpuc->last_task_ctx = ctx; 515 cpuc->last_log_id = ++task_context_opt(ctx)->log_id; 516 } 517 518 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev, 519 struct perf_event_context *next) 520 { 521 void *prev_ctx_data, *next_ctx_data; 522 523 swap(prev->task_ctx_data, next->task_ctx_data); 524 525 /* 526 * Architecture specific synchronization makes sense in 527 * case both prev->task_ctx_data and next->task_ctx_data 528 * pointers are allocated. 529 */ 530 531 prev_ctx_data = next->task_ctx_data; 532 next_ctx_data = prev->task_ctx_data; 533 534 if (!prev_ctx_data || !next_ctx_data) 535 return; 536 537 swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, 538 task_context_opt(next_ctx_data)->lbr_callstack_users); 539 } 540 541 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 542 { 543 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 544 void *task_ctx; 545 546 if (!cpuc->lbr_users) 547 return; 548 549 /* 550 * If LBR callstack feature is enabled and the stack was saved when 551 * the task was scheduled out, restore the stack. Otherwise flush 552 * the LBR stack. 553 */ 554 task_ctx = ctx ? ctx->task_ctx_data : NULL; 555 if (task_ctx) { 556 if (sched_in) 557 __intel_pmu_lbr_restore(task_ctx); 558 else 559 __intel_pmu_lbr_save(task_ctx); 560 return; 561 } 562 563 /* 564 * Since a context switch can flip the address space and LBR entries 565 * are not tagged with an identifier, we need to wipe the LBR, even for 566 * per-cpu events. You simply cannot resolve the branches from the old 567 * address space. 568 */ 569 if (sched_in) 570 intel_pmu_lbr_reset(); 571 } 572 573 static inline bool branch_user_callstack(unsigned br_sel) 574 { 575 return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); 576 } 577 578 void intel_pmu_lbr_add(struct perf_event *event) 579 { 580 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 581 582 if (!x86_pmu.lbr_nr) 583 return; 584 585 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 586 cpuc->lbr_select = 1; 587 588 cpuc->br_sel = event->hw.branch_reg.reg; 589 590 if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) 591 task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users++; 592 593 /* 594 * Request pmu::sched_task() callback, which will fire inside the 595 * regular perf event scheduling, so that call will: 596 * 597 * - restore or wipe; when LBR-callstack, 598 * - wipe; otherwise, 599 * 600 * when this is from __perf_event_task_sched_in(). 601 * 602 * However, if this is from perf_install_in_context(), no such callback 603 * will follow and we'll need to reset the LBR here if this is the 604 * first LBR event. 605 * 606 * The problem is, we cannot tell these cases apart... but we can 607 * exclude the biggest chunk of cases by looking at 608 * event->total_time_running. An event that has accrued runtime cannot 609 * be 'new'. Conversely, a new event can get installed through the 610 * context switch path for the first time. 611 */ 612 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 613 cpuc->lbr_pebs_users++; 614 perf_sched_cb_inc(event->ctx->pmu); 615 if (!cpuc->lbr_users++ && !event->total_time_running) 616 intel_pmu_lbr_reset(); 617 } 618 619 void release_lbr_buffers(void) 620 { 621 struct kmem_cache *kmem_cache; 622 struct cpu_hw_events *cpuc; 623 int cpu; 624 625 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 626 return; 627 628 for_each_possible_cpu(cpu) { 629 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 630 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 631 if (kmem_cache && cpuc->lbr_xsave) { 632 kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 633 cpuc->lbr_xsave = NULL; 634 } 635 } 636 } 637 638 void reserve_lbr_buffers(void) 639 { 640 struct kmem_cache *kmem_cache; 641 struct cpu_hw_events *cpuc; 642 int cpu; 643 644 if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 645 return; 646 647 for_each_possible_cpu(cpu) { 648 cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 649 kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 650 if (!kmem_cache || cpuc->lbr_xsave) 651 continue; 652 653 cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, 654 GFP_KERNEL | __GFP_ZERO, 655 cpu_to_node(cpu)); 656 } 657 } 658 659 void intel_pmu_lbr_del(struct perf_event *event) 660 { 661 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 662 663 if (!x86_pmu.lbr_nr) 664 return; 665 666 if (branch_user_callstack(cpuc->br_sel) && 667 event->ctx->task_ctx_data) 668 task_context_opt(event->ctx->task_ctx_data)->lbr_callstack_users--; 669 670 if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) 671 cpuc->lbr_select = 0; 672 673 if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0) 674 cpuc->lbr_pebs_users--; 675 cpuc->lbr_users--; 676 WARN_ON_ONCE(cpuc->lbr_users < 0); 677 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 678 perf_sched_cb_dec(event->ctx->pmu); 679 } 680 681 static inline bool vlbr_exclude_host(void) 682 { 683 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 684 685 return test_bit(INTEL_PMC_IDX_FIXED_VLBR, 686 (unsigned long *)&cpuc->intel_ctrl_guest_mask); 687 } 688 689 void intel_pmu_lbr_enable_all(bool pmi) 690 { 691 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 692 693 if (cpuc->lbr_users && !vlbr_exclude_host()) 694 __intel_pmu_lbr_enable(pmi); 695 } 696 697 void intel_pmu_lbr_disable_all(void) 698 { 699 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 700 701 if (cpuc->lbr_users && !vlbr_exclude_host()) { 702 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) 703 return __intel_pmu_arch_lbr_disable(); 704 705 __intel_pmu_lbr_disable(); 706 } 707 } 708 709 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) 710 { 711 unsigned long mask = x86_pmu.lbr_nr - 1; 712 struct perf_branch_entry *br = cpuc->lbr_entries; 713 u64 tos = intel_pmu_lbr_tos(); 714 int i; 715 716 for (i = 0; i < x86_pmu.lbr_nr; i++) { 717 unsigned long lbr_idx = (tos - i) & mask; 718 union { 719 struct { 720 u32 from; 721 u32 to; 722 }; 723 u64 lbr; 724 } msr_lastbranch; 725 726 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); 727 728 perf_clear_branch_entry_bitfields(br); 729 730 br->from = msr_lastbranch.from; 731 br->to = msr_lastbranch.to; 732 br++; 733 } 734 cpuc->lbr_stack.nr = i; 735 cpuc->lbr_stack.hw_idx = tos; 736 } 737 738 /* 739 * Due to lack of segmentation in Linux the effective address (offset) 740 * is the same as the linear address, allowing us to merge the LIP and EIP 741 * LBR formats. 742 */ 743 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) 744 { 745 bool need_info = false, call_stack = false; 746 unsigned long mask = x86_pmu.lbr_nr - 1; 747 struct perf_branch_entry *br = cpuc->lbr_entries; 748 u64 tos = intel_pmu_lbr_tos(); 749 int i; 750 int out = 0; 751 int num = x86_pmu.lbr_nr; 752 753 if (cpuc->lbr_sel) { 754 need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); 755 if (cpuc->lbr_sel->config & LBR_CALL_STACK) 756 call_stack = true; 757 } 758 759 for (i = 0; i < num; i++) { 760 unsigned long lbr_idx = (tos - i) & mask; 761 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 762 u16 cycles = 0; 763 764 from = rdlbr_from(lbr_idx, NULL); 765 to = rdlbr_to(lbr_idx, NULL); 766 767 /* 768 * Read LBR call stack entries 769 * until invalid entry (0s) is detected. 770 */ 771 if (call_stack && !from) 772 break; 773 774 if (x86_pmu.lbr_has_info) { 775 if (need_info) { 776 u64 info; 777 778 info = rdlbr_info(lbr_idx, NULL); 779 mis = !!(info & LBR_INFO_MISPRED); 780 pred = !mis; 781 cycles = (info & LBR_INFO_CYCLES); 782 if (x86_pmu.lbr_has_tsx) { 783 in_tx = !!(info & LBR_INFO_IN_TX); 784 abort = !!(info & LBR_INFO_ABORT); 785 } 786 } 787 } else { 788 int skip = 0; 789 790 if (x86_pmu.lbr_from_flags) { 791 mis = !!(from & LBR_FROM_FLAG_MISPRED); 792 pred = !mis; 793 skip = 1; 794 } 795 if (x86_pmu.lbr_has_tsx) { 796 in_tx = !!(from & LBR_FROM_FLAG_IN_TX); 797 abort = !!(from & LBR_FROM_FLAG_ABORT); 798 skip = 3; 799 } 800 from = (u64)((((s64)from) << skip) >> skip); 801 802 if (x86_pmu.lbr_to_cycles) { 803 cycles = ((to >> 48) & LBR_INFO_CYCLES); 804 to = (u64)((((s64)to) << 16) >> 16); 805 } 806 } 807 808 /* 809 * Some CPUs report duplicated abort records, 810 * with the second entry not having an abort bit set. 811 * Skip them here. This loop runs backwards, 812 * so we need to undo the previous record. 813 * If the abort just happened outside the window 814 * the extra entry cannot be removed. 815 */ 816 if (abort && x86_pmu.lbr_double_abort && out > 0) 817 out--; 818 819 perf_clear_branch_entry_bitfields(br+out); 820 br[out].from = from; 821 br[out].to = to; 822 br[out].mispred = mis; 823 br[out].predicted = pred; 824 br[out].in_tx = in_tx; 825 br[out].abort = abort; 826 br[out].cycles = cycles; 827 out++; 828 } 829 cpuc->lbr_stack.nr = out; 830 cpuc->lbr_stack.hw_idx = tos; 831 } 832 833 static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); 834 static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); 835 static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); 836 837 static __always_inline int get_lbr_br_type(u64 info) 838 { 839 int type = 0; 840 841 if (static_branch_likely(&x86_lbr_type)) 842 type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; 843 844 return type; 845 } 846 847 static __always_inline bool get_lbr_mispred(u64 info) 848 { 849 bool mispred = 0; 850 851 if (static_branch_likely(&x86_lbr_mispred)) 852 mispred = !!(info & LBR_INFO_MISPRED); 853 854 return mispred; 855 } 856 857 static __always_inline u16 get_lbr_cycles(u64 info) 858 { 859 u16 cycles = info & LBR_INFO_CYCLES; 860 861 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 862 (!static_branch_likely(&x86_lbr_cycles) || 863 !(info & LBR_INFO_CYC_CNT_VALID))) 864 cycles = 0; 865 866 return cycles; 867 } 868 869 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 870 struct lbr_entry *entries) 871 { 872 struct perf_branch_entry *e; 873 struct lbr_entry *lbr; 874 u64 from, to, info; 875 int i; 876 877 for (i = 0; i < x86_pmu.lbr_nr; i++) { 878 lbr = entries ? &entries[i] : NULL; 879 e = &cpuc->lbr_entries[i]; 880 881 from = rdlbr_from(i, lbr); 882 /* 883 * Read LBR entries until invalid entry (0s) is detected. 884 */ 885 if (!from) 886 break; 887 888 to = rdlbr_to(i, lbr); 889 info = rdlbr_info(i, lbr); 890 891 perf_clear_branch_entry_bitfields(e); 892 893 e->from = from; 894 e->to = to; 895 e->mispred = get_lbr_mispred(info); 896 e->predicted = !e->mispred; 897 e->in_tx = !!(info & LBR_INFO_IN_TX); 898 e->abort = !!(info & LBR_INFO_ABORT); 899 e->cycles = get_lbr_cycles(info); 900 e->type = get_lbr_br_type(info); 901 } 902 903 cpuc->lbr_stack.nr = i; 904 } 905 906 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) 907 { 908 intel_pmu_store_lbr(cpuc, NULL); 909 } 910 911 static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) 912 { 913 struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; 914 915 if (!xsave) { 916 intel_pmu_store_lbr(cpuc, NULL); 917 return; 918 } 919 xsaves(&xsave->xsave, XFEATURE_MASK_LBR); 920 921 intel_pmu_store_lbr(cpuc, xsave->lbr.entries); 922 } 923 924 void intel_pmu_lbr_read(void) 925 { 926 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 927 928 /* 929 * Don't read when all LBRs users are using adaptive PEBS. 930 * 931 * This could be smarter and actually check the event, 932 * but this simple approach seems to work for now. 933 */ 934 if (!cpuc->lbr_users || vlbr_exclude_host() || 935 cpuc->lbr_users == cpuc->lbr_pebs_users) 936 return; 937 938 x86_pmu.lbr_read(cpuc); 939 940 intel_pmu_lbr_filter(cpuc); 941 } 942 943 /* 944 * SW filter is used: 945 * - in case there is no HW filter 946 * - in case the HW filter has errata or limitations 947 */ 948 static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 949 { 950 u64 br_type = event->attr.branch_sample_type; 951 int mask = 0; 952 953 if (br_type & PERF_SAMPLE_BRANCH_USER) 954 mask |= X86_BR_USER; 955 956 if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 957 mask |= X86_BR_KERNEL; 958 959 /* we ignore BRANCH_HV here */ 960 961 if (br_type & PERF_SAMPLE_BRANCH_ANY) 962 mask |= X86_BR_ANY; 963 964 if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) 965 mask |= X86_BR_ANY_CALL; 966 967 if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 968 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; 969 970 if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) 971 mask |= X86_BR_IND_CALL; 972 973 if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) 974 mask |= X86_BR_ABORT; 975 976 if (br_type & PERF_SAMPLE_BRANCH_IN_TX) 977 mask |= X86_BR_IN_TX; 978 979 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 980 mask |= X86_BR_NO_TX; 981 982 if (br_type & PERF_SAMPLE_BRANCH_COND) 983 mask |= X86_BR_JCC; 984 985 if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { 986 if (!x86_pmu_has_lbr_callstack()) 987 return -EOPNOTSUPP; 988 if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) 989 return -EINVAL; 990 mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | 991 X86_BR_CALL_STACK; 992 } 993 994 if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) 995 mask |= X86_BR_IND_JMP; 996 997 if (br_type & PERF_SAMPLE_BRANCH_CALL) 998 mask |= X86_BR_CALL | X86_BR_ZERO_CALL; 999 1000 if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE) 1001 mask |= X86_BR_TYPE_SAVE; 1002 1003 /* 1004 * stash actual user request into reg, it may 1005 * be used by fixup code for some CPU 1006 */ 1007 event->hw.branch_reg.reg = mask; 1008 return 0; 1009 } 1010 1011 /* 1012 * setup the HW LBR filter 1013 * Used only when available, may not be enough to disambiguate 1014 * all branches, may need the help of the SW filter 1015 */ 1016 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) 1017 { 1018 struct hw_perf_event_extra *reg; 1019 u64 br_type = event->attr.branch_sample_type; 1020 u64 mask = 0, v; 1021 int i; 1022 1023 for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { 1024 if (!(br_type & (1ULL << i))) 1025 continue; 1026 1027 v = x86_pmu.lbr_sel_map[i]; 1028 if (v == LBR_NOT_SUPP) 1029 return -EOPNOTSUPP; 1030 1031 if (v != LBR_IGN) 1032 mask |= v; 1033 } 1034 1035 reg = &event->hw.branch_reg; 1036 reg->idx = EXTRA_REG_LBR; 1037 1038 if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { 1039 reg->config = mask; 1040 1041 /* 1042 * The Arch LBR HW can retrieve the common branch types 1043 * from the LBR_INFO. It doesn't require the high overhead 1044 * SW disassemble. 1045 * Enable the branch type by default for the Arch LBR. 1046 */ 1047 reg->reg |= X86_BR_TYPE_SAVE; 1048 return 0; 1049 } 1050 1051 /* 1052 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 1053 * in suppress mode. So LBR_SELECT should be set to 1054 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 1055 * But the 10th bit LBR_CALL_STACK does not operate 1056 * in suppress mode. 1057 */ 1058 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); 1059 1060 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 1061 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 1062 x86_pmu.lbr_has_info) 1063 reg->config |= LBR_NO_INFO; 1064 1065 return 0; 1066 } 1067 1068 int intel_pmu_setup_lbr_filter(struct perf_event *event) 1069 { 1070 int ret = 0; 1071 1072 /* 1073 * no LBR on this PMU 1074 */ 1075 if (!x86_pmu.lbr_nr) 1076 return -EOPNOTSUPP; 1077 1078 /* 1079 * setup SW LBR filter 1080 */ 1081 ret = intel_pmu_setup_sw_lbr_filter(event); 1082 if (ret) 1083 return ret; 1084 1085 /* 1086 * setup HW LBR filter, if any 1087 */ 1088 if (x86_pmu.lbr_sel_map) 1089 ret = intel_pmu_setup_hw_lbr_filter(event); 1090 1091 return ret; 1092 } 1093 1094 enum { 1095 ARCH_LBR_BR_TYPE_JCC = 0, 1096 ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, 1097 ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, 1098 ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, 1099 ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, 1100 ARCH_LBR_BR_TYPE_NEAR_RET = 5, 1101 ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, 1102 1103 ARCH_LBR_BR_TYPE_MAP_MAX = 16, 1104 }; 1105 1106 static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { 1107 [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, 1108 [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, 1109 [ARCH_LBR_BR_TYPE_NEAR_REL_JMP] = X86_BR_JMP, 1110 [ARCH_LBR_BR_TYPE_NEAR_IND_CALL] = X86_BR_IND_CALL, 1111 [ARCH_LBR_BR_TYPE_NEAR_REL_CALL] = X86_BR_CALL, 1112 [ARCH_LBR_BR_TYPE_NEAR_RET] = X86_BR_RET, 1113 }; 1114 1115 /* 1116 * implement actual branch filter based on user demand. 1117 * Hardware may not exactly satisfy that request, thus 1118 * we need to inspect opcodes. Mismatched branches are 1119 * discarded. Therefore, the number of branches returned 1120 * in PERF_SAMPLE_BRANCH_STACK sample may vary. 1121 */ 1122 static void 1123 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) 1124 { 1125 u64 from, to; 1126 int br_sel = cpuc->br_sel; 1127 int i, j, type, to_plm; 1128 bool compress = false; 1129 1130 /* if sampling all branches, then nothing to filter */ 1131 if (((br_sel & X86_BR_ALL) == X86_BR_ALL) && 1132 ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE)) 1133 return; 1134 1135 for (i = 0; i < cpuc->lbr_stack.nr; i++) { 1136 1137 from = cpuc->lbr_entries[i].from; 1138 to = cpuc->lbr_entries[i].to; 1139 type = cpuc->lbr_entries[i].type; 1140 1141 /* 1142 * Parse the branch type recorded in LBR_x_INFO MSR. 1143 * Doesn't support OTHER_BRANCH decoding for now. 1144 * OTHER_BRANCH branch type still rely on software decoding. 1145 */ 1146 if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 1147 type <= ARCH_LBR_BR_TYPE_KNOWN_MAX) { 1148 to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; 1149 type = arch_lbr_br_type_map[type] | to_plm; 1150 } else 1151 type = branch_type(from, to, cpuc->lbr_entries[i].abort); 1152 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { 1153 if (cpuc->lbr_entries[i].in_tx) 1154 type |= X86_BR_IN_TX; 1155 else 1156 type |= X86_BR_NO_TX; 1157 } 1158 1159 /* if type does not correspond, then discard */ 1160 if (type == X86_BR_NONE || (br_sel & type) != type) { 1161 cpuc->lbr_entries[i].from = 0; 1162 compress = true; 1163 } 1164 1165 if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) 1166 cpuc->lbr_entries[i].type = common_branch_type(type); 1167 } 1168 1169 if (!compress) 1170 return; 1171 1172 /* remove all entries with from=0 */ 1173 for (i = 0; i < cpuc->lbr_stack.nr; ) { 1174 if (!cpuc->lbr_entries[i].from) { 1175 j = i; 1176 while (++j < cpuc->lbr_stack.nr) 1177 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 1178 cpuc->lbr_stack.nr--; 1179 if (!cpuc->lbr_entries[i].from) 1180 continue; 1181 } 1182 i++; 1183 } 1184 } 1185 1186 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) 1187 { 1188 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1189 1190 /* Cannot get TOS for large PEBS and Arch LBR */ 1191 if (static_cpu_has(X86_FEATURE_ARCH_LBR) || 1192 (cpuc->n_pebs == cpuc->n_large_pebs)) 1193 cpuc->lbr_stack.hw_idx = -1ULL; 1194 else 1195 cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); 1196 1197 intel_pmu_store_lbr(cpuc, lbr); 1198 intel_pmu_lbr_filter(cpuc); 1199 } 1200 1201 /* 1202 * Map interface branch filters onto LBR filters 1203 */ 1204 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1205 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1206 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1207 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1208 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1209 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP 1210 | LBR_IND_JMP | LBR_FAR, 1211 /* 1212 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 1213 */ 1214 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = 1215 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 1216 /* 1217 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 1218 */ 1219 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, 1220 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1221 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1222 }; 1223 1224 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1225 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1226 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1227 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1228 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1229 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1230 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1231 | LBR_FAR, 1232 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1233 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1234 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1235 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1236 }; 1237 1238 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1239 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, 1240 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, 1241 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, 1242 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1243 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, 1244 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1245 | LBR_FAR, 1246 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, 1247 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, 1248 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL 1249 | LBR_RETURN | LBR_CALL_STACK, 1250 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, 1251 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, 1252 }; 1253 1254 static int arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { 1255 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = ARCH_LBR_ANY, 1256 [PERF_SAMPLE_BRANCH_USER_SHIFT] = ARCH_LBR_USER, 1257 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = ARCH_LBR_KERNEL, 1258 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, 1259 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = ARCH_LBR_RETURN | 1260 ARCH_LBR_OTHER_BRANCH, 1261 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = ARCH_LBR_REL_CALL | 1262 ARCH_LBR_IND_CALL | 1263 ARCH_LBR_OTHER_BRANCH, 1264 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = ARCH_LBR_IND_CALL, 1265 [PERF_SAMPLE_BRANCH_COND_SHIFT] = ARCH_LBR_JCC, 1266 [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = ARCH_LBR_REL_CALL | 1267 ARCH_LBR_IND_CALL | 1268 ARCH_LBR_RETURN | 1269 ARCH_LBR_CALL_STACK, 1270 [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = ARCH_LBR_IND_JMP, 1271 [PERF_SAMPLE_BRANCH_CALL_SHIFT] = ARCH_LBR_REL_CALL, 1272 }; 1273 1274 /* core */ 1275 void __init intel_pmu_lbr_init_core(void) 1276 { 1277 x86_pmu.lbr_nr = 4; 1278 x86_pmu.lbr_tos = MSR_LBR_TOS; 1279 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1280 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1281 1282 /* 1283 * SW branch filter usage: 1284 * - compensate for lack of HW filter 1285 */ 1286 } 1287 1288 /* nehalem/westmere */ 1289 void __init intel_pmu_lbr_init_nhm(void) 1290 { 1291 x86_pmu.lbr_nr = 16; 1292 x86_pmu.lbr_tos = MSR_LBR_TOS; 1293 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1294 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1295 1296 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1297 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1298 1299 /* 1300 * SW branch filter usage: 1301 * - workaround LBR_SEL errata (see above) 1302 * - support syscall, sysret capture. 1303 * That requires LBR_FAR but that means far 1304 * jmp need to be filtered out 1305 */ 1306 } 1307 1308 /* sandy bridge */ 1309 void __init intel_pmu_lbr_init_snb(void) 1310 { 1311 x86_pmu.lbr_nr = 16; 1312 x86_pmu.lbr_tos = MSR_LBR_TOS; 1313 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1314 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1315 1316 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1317 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1318 1319 /* 1320 * SW branch filter usage: 1321 * - support syscall, sysret capture. 1322 * That requires LBR_FAR but that means far 1323 * jmp need to be filtered out 1324 */ 1325 } 1326 1327 static inline struct kmem_cache * 1328 create_lbr_kmem_cache(size_t size, size_t align) 1329 { 1330 return kmem_cache_create("x86_lbr", size, align, 0, NULL); 1331 } 1332 1333 /* haswell */ 1334 void intel_pmu_lbr_init_hsw(void) 1335 { 1336 size_t size = sizeof(struct x86_perf_task_context); 1337 1338 x86_pmu.lbr_nr = 16; 1339 x86_pmu.lbr_tos = MSR_LBR_TOS; 1340 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1341 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1342 1343 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1344 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1345 1346 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1347 } 1348 1349 /* skylake */ 1350 __init void intel_pmu_lbr_init_skl(void) 1351 { 1352 size_t size = sizeof(struct x86_perf_task_context); 1353 1354 x86_pmu.lbr_nr = 32; 1355 x86_pmu.lbr_tos = MSR_LBR_TOS; 1356 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1357 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1358 x86_pmu.lbr_info = MSR_LBR_INFO_0; 1359 1360 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1361 x86_pmu.lbr_sel_map = hsw_lbr_sel_map; 1362 1363 x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1364 1365 /* 1366 * SW branch filter usage: 1367 * - support syscall, sysret capture. 1368 * That requires LBR_FAR but that means far 1369 * jmp need to be filtered out 1370 */ 1371 } 1372 1373 /* atom */ 1374 void __init intel_pmu_lbr_init_atom(void) 1375 { 1376 /* 1377 * only models starting at stepping 10 seems 1378 * to have an operational LBR which can freeze 1379 * on PMU interrupt 1380 */ 1381 if (boot_cpu_data.x86_model == 28 1382 && boot_cpu_data.x86_stepping < 10) { 1383 pr_cont("LBR disabled due to erratum"); 1384 return; 1385 } 1386 1387 x86_pmu.lbr_nr = 8; 1388 x86_pmu.lbr_tos = MSR_LBR_TOS; 1389 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1390 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1391 1392 /* 1393 * SW branch filter usage: 1394 * - compensate for lack of HW filter 1395 */ 1396 } 1397 1398 /* slm */ 1399 void __init intel_pmu_lbr_init_slm(void) 1400 { 1401 x86_pmu.lbr_nr = 8; 1402 x86_pmu.lbr_tos = MSR_LBR_TOS; 1403 x86_pmu.lbr_from = MSR_LBR_CORE_FROM; 1404 x86_pmu.lbr_to = MSR_LBR_CORE_TO; 1405 1406 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1407 x86_pmu.lbr_sel_map = nhm_lbr_sel_map; 1408 1409 /* 1410 * SW branch filter usage: 1411 * - compensate for lack of HW filter 1412 */ 1413 pr_cont("8-deep LBR, "); 1414 } 1415 1416 /* Knights Landing */ 1417 void intel_pmu_lbr_init_knl(void) 1418 { 1419 x86_pmu.lbr_nr = 8; 1420 x86_pmu.lbr_tos = MSR_LBR_TOS; 1421 x86_pmu.lbr_from = MSR_LBR_NHM_FROM; 1422 x86_pmu.lbr_to = MSR_LBR_NHM_TO; 1423 1424 x86_pmu.lbr_sel_mask = LBR_SEL_MASK; 1425 x86_pmu.lbr_sel_map = snb_lbr_sel_map; 1426 1427 /* Knights Landing does have MISPREDICT bit */ 1428 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) 1429 x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; 1430 } 1431 1432 void intel_pmu_lbr_init(void) 1433 { 1434 switch (x86_pmu.intel_cap.lbr_format) { 1435 case LBR_FORMAT_EIP_FLAGS2: 1436 x86_pmu.lbr_has_tsx = 1; 1437 x86_pmu.lbr_from_flags = 1; 1438 if (lbr_from_signext_quirk_needed()) 1439 static_branch_enable(&lbr_from_quirk_key); 1440 break; 1441 1442 case LBR_FORMAT_EIP_FLAGS: 1443 x86_pmu.lbr_from_flags = 1; 1444 break; 1445 1446 case LBR_FORMAT_INFO: 1447 x86_pmu.lbr_has_tsx = 1; 1448 fallthrough; 1449 case LBR_FORMAT_INFO2: 1450 x86_pmu.lbr_has_info = 1; 1451 break; 1452 1453 case LBR_FORMAT_TIME: 1454 x86_pmu.lbr_from_flags = 1; 1455 x86_pmu.lbr_to_cycles = 1; 1456 break; 1457 } 1458 1459 if (x86_pmu.lbr_has_info) { 1460 /* 1461 * Only used in combination with baseline pebs. 1462 */ 1463 static_branch_enable(&x86_lbr_mispred); 1464 static_branch_enable(&x86_lbr_cycles); 1465 } 1466 } 1467 1468 /* 1469 * LBR state size is variable based on the max number of registers. 1470 * This calculates the expected state size, which should match 1471 * what the hardware enumerates for the size of XFEATURE_LBR. 1472 */ 1473 static inline unsigned int get_lbr_state_size(void) 1474 { 1475 return sizeof(struct arch_lbr_state) + 1476 x86_pmu.lbr_nr * sizeof(struct lbr_entry); 1477 } 1478 1479 static bool is_arch_lbr_xsave_available(void) 1480 { 1481 if (!boot_cpu_has(X86_FEATURE_XSAVES)) 1482 return false; 1483 1484 /* 1485 * Check the LBR state with the corresponding software structure. 1486 * Disable LBR XSAVES support if the size doesn't match. 1487 */ 1488 if (xfeature_size(XFEATURE_LBR) == 0) 1489 return false; 1490 1491 if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) 1492 return false; 1493 1494 return true; 1495 } 1496 1497 void __init intel_pmu_arch_lbr_init(void) 1498 { 1499 struct pmu *pmu = x86_get_pmu(smp_processor_id()); 1500 union cpuid28_eax eax; 1501 union cpuid28_ebx ebx; 1502 union cpuid28_ecx ecx; 1503 unsigned int unused_edx; 1504 bool arch_lbr_xsave; 1505 size_t size; 1506 u64 lbr_nr; 1507 1508 /* Arch LBR Capabilities */ 1509 cpuid(28, &eax.full, &ebx.full, &ecx.full, &unused_edx); 1510 1511 lbr_nr = fls(eax.split.lbr_depth_mask) * 8; 1512 if (!lbr_nr) 1513 goto clear_arch_lbr; 1514 1515 /* Apply the max depth of Arch LBR */ 1516 if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) 1517 goto clear_arch_lbr; 1518 1519 x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; 1520 x86_pmu.lbr_deep_c_reset = eax.split.lbr_deep_c_reset; 1521 x86_pmu.lbr_lip = eax.split.lbr_lip; 1522 x86_pmu.lbr_cpl = ebx.split.lbr_cpl; 1523 x86_pmu.lbr_filter = ebx.split.lbr_filter; 1524 x86_pmu.lbr_call_stack = ebx.split.lbr_call_stack; 1525 x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 1526 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 1527 x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 1528 x86_pmu.lbr_nr = lbr_nr; 1529 1530 if (x86_pmu.lbr_mispred) 1531 static_branch_enable(&x86_lbr_mispred); 1532 if (x86_pmu.lbr_timed_lbr) 1533 static_branch_enable(&x86_lbr_cycles); 1534 if (x86_pmu.lbr_br_type) 1535 static_branch_enable(&x86_lbr_type); 1536 1537 arch_lbr_xsave = is_arch_lbr_xsave_available(); 1538 if (arch_lbr_xsave) { 1539 size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) + 1540 get_lbr_state_size(); 1541 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 1542 XSAVE_ALIGNMENT); 1543 } 1544 1545 if (!pmu->task_ctx_cache) { 1546 arch_lbr_xsave = false; 1547 1548 size = sizeof(struct x86_perf_task_context_arch_lbr) + 1549 lbr_nr * sizeof(struct lbr_entry); 1550 pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0); 1551 } 1552 1553 x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0; 1554 x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0; 1555 x86_pmu.lbr_info = MSR_ARCH_LBR_INFO_0; 1556 1557 /* LBR callstack requires both CPL and Branch Filtering support */ 1558 if (!x86_pmu.lbr_cpl || 1559 !x86_pmu.lbr_filter || 1560 !x86_pmu.lbr_call_stack) 1561 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_NOT_SUPP; 1562 1563 if (!x86_pmu.lbr_cpl) { 1564 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_NOT_SUPP; 1565 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_NOT_SUPP; 1566 } else if (!x86_pmu.lbr_filter) { 1567 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_NOT_SUPP; 1568 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_NOT_SUPP; 1569 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_NOT_SUPP; 1570 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_NOT_SUPP; 1571 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_NOT_SUPP; 1572 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_NOT_SUPP; 1573 arch_lbr_ctl_map[PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_NOT_SUPP; 1574 } 1575 1576 x86_pmu.lbr_ctl_mask = ARCH_LBR_CTL_MASK; 1577 x86_pmu.lbr_ctl_map = arch_lbr_ctl_map; 1578 1579 if (!x86_pmu.lbr_cpl && !x86_pmu.lbr_filter) 1580 x86_pmu.lbr_ctl_map = NULL; 1581 1582 x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset; 1583 if (arch_lbr_xsave) { 1584 x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves; 1585 x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors; 1586 x86_pmu.lbr_read = intel_pmu_arch_lbr_read_xsave; 1587 pr_cont("XSAVE "); 1588 } else { 1589 x86_pmu.lbr_save = intel_pmu_arch_lbr_save; 1590 x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore; 1591 x86_pmu.lbr_read = intel_pmu_arch_lbr_read; 1592 } 1593 1594 pr_cont("Architectural LBR, "); 1595 1596 return; 1597 1598 clear_arch_lbr: 1599 setup_clear_cpu_cap(X86_FEATURE_ARCH_LBR); 1600 } 1601 1602 /** 1603 * x86_perf_get_lbr - get the LBR records information 1604 * 1605 * @lbr: the caller's memory to store the LBR records information 1606 * 1607 * Returns: 0 indicates the LBR info has been successfully obtained 1608 */ 1609 int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) 1610 { 1611 int lbr_fmt = x86_pmu.intel_cap.lbr_format; 1612 1613 lbr->nr = x86_pmu.lbr_nr; 1614 lbr->from = x86_pmu.lbr_from; 1615 lbr->to = x86_pmu.lbr_to; 1616 lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; 1617 1618 return 0; 1619 } 1620 EXPORT_SYMBOL_GPL(x86_perf_get_lbr); 1621 1622 struct event_constraint vlbr_constraint = 1623 __EVENT_CONSTRAINT(INTEL_FIXED_VLBR_EVENT, (1ULL << INTEL_PMC_IDX_FIXED_VLBR), 1624 FIXED_EVENT_FLAGS, 1, 0, PERF_X86_EVENT_LBR_SELECT); 1625