1 /* 2 * Performance events - AMD IBS 3 * 4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter 5 * 6 * For licencing details see kernel-base/COPYING 7 */ 8 9 #include <linux/perf_event.h> 10 #include <linux/init.h> 11 #include <linux/export.h> 12 #include <linux/pci.h> 13 #include <linux/ptrace.h> 14 #include <linux/syscore_ops.h> 15 #include <linux/sched/clock.h> 16 17 #include <asm/apic.h> 18 19 #include "../perf_event.h" 20 21 static u32 ibs_caps; 22 23 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 24 25 #include <linux/kprobes.h> 26 #include <linux/hardirq.h> 27 28 #include <asm/nmi.h> 29 30 #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) 31 #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT 32 33 34 /* 35 * IBS states: 36 * 37 * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken 38 * and any further add()s must fail. 39 * 40 * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are 41 * complicated by the fact that the IBS hardware can send late NMIs (ie. after 42 * we've cleared the EN bit). 43 * 44 * In order to consume these late NMIs we have the STOPPED state, any NMI that 45 * happens after we've cleared the EN state will clear this bit and report the 46 * NMI handled (this is fundamentally racy in the face or multiple NMI sources, 47 * someone else can consume our BIT and our NMI will go unhandled). 48 * 49 * And since we cannot set/clear this separate bit together with the EN bit, 50 * there are races; if we cleared STARTED early, an NMI could land in 51 * between clearing STARTED and clearing the EN bit (in fact multiple NMIs 52 * could happen if the period is small enough), and consume our STOPPED bit 53 * and trigger streams of unhandled NMIs. 54 * 55 * If, however, we clear STARTED late, an NMI can hit between clearing the 56 * EN bit and clearing STARTED, still see STARTED set and process the event. 57 * If this event will have the VALID bit clear, we bail properly, but this 58 * is not a given. With VALID set we can end up calling pmu::stop() again 59 * (the throttle logic) and trigger the WARNs in there. 60 * 61 * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() 62 * nesting, and clear STARTED late, so that we have a well defined state over 63 * the clearing of the EN bit. 64 * 65 * XXX: we could probably be using !atomic bitops for all this. 66 */ 67 68 enum ibs_states { 69 IBS_ENABLED = 0, 70 IBS_STARTED = 1, 71 IBS_STOPPING = 2, 72 IBS_STOPPED = 3, 73 74 IBS_MAX_STATES, 75 }; 76 77 struct cpu_perf_ibs { 78 struct perf_event *event; 79 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; 80 }; 81 82 struct perf_ibs { 83 struct pmu pmu; 84 unsigned int msr; 85 u64 config_mask; 86 u64 cnt_mask; 87 u64 enable_mask; 88 u64 valid_mask; 89 u64 max_period; 90 unsigned long offset_mask[1]; 91 int offset_max; 92 unsigned int fetch_count_reset_broken : 1; 93 unsigned int fetch_ignore_if_zero_rip : 1; 94 struct cpu_perf_ibs __percpu *pcpu; 95 96 struct attribute **format_attrs; 97 struct attribute_group format_group; 98 const struct attribute_group *attr_groups[2]; 99 100 u64 (*get_count)(u64 config); 101 }; 102 103 struct perf_ibs_data { 104 u32 size; 105 union { 106 u32 data[0]; /* data buffer starts here */ 107 u32 caps; 108 }; 109 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; 110 }; 111 112 static int 113 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) 114 { 115 s64 left = local64_read(&hwc->period_left); 116 s64 period = hwc->sample_period; 117 int overflow = 0; 118 119 /* 120 * If we are way outside a reasonable range then just skip forward: 121 */ 122 if (unlikely(left <= -period)) { 123 left = period; 124 local64_set(&hwc->period_left, left); 125 hwc->last_period = period; 126 overflow = 1; 127 } 128 129 if (unlikely(left < (s64)min)) { 130 left += period; 131 local64_set(&hwc->period_left, left); 132 hwc->last_period = period; 133 overflow = 1; 134 } 135 136 /* 137 * If the hw period that triggers the sw overflow is too short 138 * we might hit the irq handler. This biases the results. 139 * Thus we shorten the next-to-last period and set the last 140 * period to the max period. 141 */ 142 if (left > max) { 143 left -= max; 144 if (left > max) 145 left = max; 146 else if (left < min) 147 left = min; 148 } 149 150 *hw_period = (u64)left; 151 152 return overflow; 153 } 154 155 static int 156 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) 157 { 158 struct hw_perf_event *hwc = &event->hw; 159 int shift = 64 - width; 160 u64 prev_raw_count; 161 u64 delta; 162 163 /* 164 * Careful: an NMI might modify the previous event value. 165 * 166 * Our tactic to handle this is to first atomically read and 167 * exchange a new raw count - then add that new-prev delta 168 * count to the generic event atomically: 169 */ 170 prev_raw_count = local64_read(&hwc->prev_count); 171 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 172 new_raw_count) != prev_raw_count) 173 return 0; 174 175 /* 176 * Now we have the new raw value and have updated the prev 177 * timestamp already. We can now calculate the elapsed delta 178 * (event-)time and add that to the generic event. 179 * 180 * Careful, not all hw sign-extends above the physical width 181 * of the count. 182 */ 183 delta = (new_raw_count << shift) - (prev_raw_count << shift); 184 delta >>= shift; 185 186 local64_add(delta, &event->count); 187 local64_sub(delta, &hwc->period_left); 188 189 return 1; 190 } 191 192 static struct perf_ibs perf_ibs_fetch; 193 static struct perf_ibs perf_ibs_op; 194 195 static struct perf_ibs *get_ibs_pmu(int type) 196 { 197 if (perf_ibs_fetch.pmu.type == type) 198 return &perf_ibs_fetch; 199 if (perf_ibs_op.pmu.type == type) 200 return &perf_ibs_op; 201 return NULL; 202 } 203 204 /* 205 * Use IBS for precise event sampling: 206 * 207 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count 208 * perf record -a -e r076:p ... # same as -e cpu-cycles:p 209 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops 210 * 211 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, 212 * MSRC001_1033) is used to select either cycle or micro-ops counting 213 * mode. 214 * 215 * The rip of IBS samples has skid 0. Thus, IBS supports precise 216 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the 217 * rip is invalid when IBS was not able to record the rip correctly. 218 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. 219 * 220 */ 221 static int perf_ibs_precise_event(struct perf_event *event, u64 *config) 222 { 223 switch (event->attr.precise_ip) { 224 case 0: 225 return -ENOENT; 226 case 1: 227 case 2: 228 break; 229 default: 230 return -EOPNOTSUPP; 231 } 232 233 switch (event->attr.type) { 234 case PERF_TYPE_HARDWARE: 235 switch (event->attr.config) { 236 case PERF_COUNT_HW_CPU_CYCLES: 237 *config = 0; 238 return 0; 239 } 240 break; 241 case PERF_TYPE_RAW: 242 switch (event->attr.config) { 243 case 0x0076: 244 *config = 0; 245 return 0; 246 case 0x00C1: 247 *config = IBS_OP_CNT_CTL; 248 return 0; 249 } 250 break; 251 default: 252 return -ENOENT; 253 } 254 255 return -EOPNOTSUPP; 256 } 257 258 static int perf_ibs_init(struct perf_event *event) 259 { 260 struct hw_perf_event *hwc = &event->hw; 261 struct perf_ibs *perf_ibs; 262 u64 max_cnt, config; 263 int ret; 264 265 perf_ibs = get_ibs_pmu(event->attr.type); 266 if (perf_ibs) { 267 config = event->attr.config; 268 } else { 269 perf_ibs = &perf_ibs_op; 270 ret = perf_ibs_precise_event(event, &config); 271 if (ret) 272 return ret; 273 } 274 275 if (event->pmu != &perf_ibs->pmu) 276 return -ENOENT; 277 278 if (config & ~perf_ibs->config_mask) 279 return -EINVAL; 280 281 if (hwc->sample_period) { 282 if (config & perf_ibs->cnt_mask) 283 /* raw max_cnt may not be set */ 284 return -EINVAL; 285 if (!event->attr.sample_freq && hwc->sample_period & 0x0f) 286 /* 287 * lower 4 bits can not be set in ibs max cnt, 288 * but allowing it in case we adjust the 289 * sample period to set a frequency. 290 */ 291 return -EINVAL; 292 hwc->sample_period &= ~0x0FULL; 293 if (!hwc->sample_period) 294 hwc->sample_period = 0x10; 295 } else { 296 max_cnt = config & perf_ibs->cnt_mask; 297 config &= ~perf_ibs->cnt_mask; 298 event->attr.sample_period = max_cnt << 4; 299 hwc->sample_period = event->attr.sample_period; 300 } 301 302 if (!hwc->sample_period) 303 return -EINVAL; 304 305 /* 306 * If we modify hwc->sample_period, we also need to update 307 * hwc->last_period and hwc->period_left. 308 */ 309 hwc->last_period = hwc->sample_period; 310 local64_set(&hwc->period_left, hwc->sample_period); 311 312 hwc->config_base = perf_ibs->msr; 313 hwc->config = config; 314 315 return 0; 316 } 317 318 static int perf_ibs_set_period(struct perf_ibs *perf_ibs, 319 struct hw_perf_event *hwc, u64 *period) 320 { 321 int overflow; 322 323 /* ignore lower 4 bits in min count: */ 324 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); 325 local64_set(&hwc->prev_count, 0); 326 327 return overflow; 328 } 329 330 static u64 get_ibs_fetch_count(u64 config) 331 { 332 return (config & IBS_FETCH_CNT) >> 12; 333 } 334 335 static u64 get_ibs_op_count(u64 config) 336 { 337 u64 count = 0; 338 339 /* 340 * If the internal 27-bit counter rolled over, the count is MaxCnt 341 * and the lower 7 bits of CurCnt are randomized. 342 * Otherwise CurCnt has the full 27-bit current counter value. 343 */ 344 if (config & IBS_OP_VAL) { 345 count = (config & IBS_OP_MAX_CNT) << 4; 346 if (ibs_caps & IBS_CAPS_OPCNTEXT) 347 count += config & IBS_OP_MAX_CNT_EXT_MASK; 348 } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { 349 count = (config & IBS_OP_CUR_CNT) >> 32; 350 } 351 352 return count; 353 } 354 355 static void 356 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, 357 u64 *config) 358 { 359 u64 count = perf_ibs->get_count(*config); 360 361 /* 362 * Set width to 64 since we do not overflow on max width but 363 * instead on max count. In perf_ibs_set_period() we clear 364 * prev count manually on overflow. 365 */ 366 while (!perf_event_try_update(event, count, 64)) { 367 rdmsrl(event->hw.config_base, *config); 368 count = perf_ibs->get_count(*config); 369 } 370 } 371 372 static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, 373 struct hw_perf_event *hwc, u64 config) 374 { 375 u64 tmp = hwc->config | config; 376 377 if (perf_ibs->fetch_count_reset_broken) 378 wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); 379 380 wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); 381 } 382 383 /* 384 * Erratum #420 Instruction-Based Sampling Engine May Generate 385 * Interrupt that Cannot Be Cleared: 386 * 387 * Must clear counter mask first, then clear the enable bit. See 388 * Revision Guide for AMD Family 10h Processors, Publication #41322. 389 */ 390 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, 391 struct hw_perf_event *hwc, u64 config) 392 { 393 config &= ~perf_ibs->cnt_mask; 394 if (boot_cpu_data.x86 == 0x10) 395 wrmsrl(hwc->config_base, config); 396 config &= ~perf_ibs->enable_mask; 397 wrmsrl(hwc->config_base, config); 398 } 399 400 /* 401 * We cannot restore the ibs pmu state, so we always needs to update 402 * the event while stopping it and then reset the state when starting 403 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in 404 * perf_ibs_start()/perf_ibs_stop() and instead always do it. 405 */ 406 static void perf_ibs_start(struct perf_event *event, int flags) 407 { 408 struct hw_perf_event *hwc = &event->hw; 409 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 410 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 411 u64 period, config = 0; 412 413 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 414 return; 415 416 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 417 hwc->state = 0; 418 419 perf_ibs_set_period(perf_ibs, hwc, &period); 420 if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { 421 config |= period & IBS_OP_MAX_CNT_EXT_MASK; 422 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 423 } 424 config |= period >> 4; 425 426 /* 427 * Set STARTED before enabling the hardware, such that a subsequent NMI 428 * must observe it. 429 */ 430 set_bit(IBS_STARTED, pcpu->state); 431 clear_bit(IBS_STOPPING, pcpu->state); 432 perf_ibs_enable_event(perf_ibs, hwc, config); 433 434 perf_event_update_userpage(event); 435 } 436 437 static void perf_ibs_stop(struct perf_event *event, int flags) 438 { 439 struct hw_perf_event *hwc = &event->hw; 440 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 441 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 442 u64 config; 443 int stopping; 444 445 if (test_and_set_bit(IBS_STOPPING, pcpu->state)) 446 return; 447 448 stopping = test_bit(IBS_STARTED, pcpu->state); 449 450 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) 451 return; 452 453 rdmsrl(hwc->config_base, config); 454 455 if (stopping) { 456 /* 457 * Set STOPPED before disabling the hardware, such that it 458 * must be visible to NMIs the moment we clear the EN bit, 459 * at which point we can generate an !VALID sample which 460 * we need to consume. 461 */ 462 set_bit(IBS_STOPPED, pcpu->state); 463 perf_ibs_disable_event(perf_ibs, hwc, config); 464 /* 465 * Clear STARTED after disabling the hardware; if it were 466 * cleared before an NMI hitting after the clear but before 467 * clearing the EN bit might think it a spurious NMI and not 468 * handle it. 469 * 470 * Clearing it after, however, creates the problem of the NMI 471 * handler seeing STARTED but not having a valid sample. 472 */ 473 clear_bit(IBS_STARTED, pcpu->state); 474 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 475 hwc->state |= PERF_HES_STOPPED; 476 } 477 478 if (hwc->state & PERF_HES_UPTODATE) 479 return; 480 481 /* 482 * Clear valid bit to not count rollovers on update, rollovers 483 * are only updated in the irq handler. 484 */ 485 config &= ~perf_ibs->valid_mask; 486 487 perf_ibs_event_update(perf_ibs, event, &config); 488 hwc->state |= PERF_HES_UPTODATE; 489 } 490 491 static int perf_ibs_add(struct perf_event *event, int flags) 492 { 493 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 494 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 495 496 if (test_and_set_bit(IBS_ENABLED, pcpu->state)) 497 return -ENOSPC; 498 499 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 500 501 pcpu->event = event; 502 503 if (flags & PERF_EF_START) 504 perf_ibs_start(event, PERF_EF_RELOAD); 505 506 return 0; 507 } 508 509 static void perf_ibs_del(struct perf_event *event, int flags) 510 { 511 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 512 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 513 514 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) 515 return; 516 517 perf_ibs_stop(event, PERF_EF_UPDATE); 518 519 pcpu->event = NULL; 520 521 perf_event_update_userpage(event); 522 } 523 524 static void perf_ibs_read(struct perf_event *event) { } 525 526 PMU_FORMAT_ATTR(rand_en, "config:57"); 527 PMU_FORMAT_ATTR(cnt_ctl, "config:19"); 528 529 static struct attribute *ibs_fetch_format_attrs[] = { 530 &format_attr_rand_en.attr, 531 NULL, 532 }; 533 534 static struct attribute *ibs_op_format_attrs[] = { 535 NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ 536 NULL, 537 }; 538 539 static struct perf_ibs perf_ibs_fetch = { 540 .pmu = { 541 .task_ctx_nr = perf_invalid_context, 542 543 .event_init = perf_ibs_init, 544 .add = perf_ibs_add, 545 .del = perf_ibs_del, 546 .start = perf_ibs_start, 547 .stop = perf_ibs_stop, 548 .read = perf_ibs_read, 549 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 550 }, 551 .msr = MSR_AMD64_IBSFETCHCTL, 552 .config_mask = IBS_FETCH_CONFIG_MASK, 553 .cnt_mask = IBS_FETCH_MAX_CNT, 554 .enable_mask = IBS_FETCH_ENABLE, 555 .valid_mask = IBS_FETCH_VAL, 556 .max_period = IBS_FETCH_MAX_CNT << 4, 557 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, 558 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, 559 .format_attrs = ibs_fetch_format_attrs, 560 561 .get_count = get_ibs_fetch_count, 562 }; 563 564 static struct perf_ibs perf_ibs_op = { 565 .pmu = { 566 .task_ctx_nr = perf_invalid_context, 567 568 .event_init = perf_ibs_init, 569 .add = perf_ibs_add, 570 .del = perf_ibs_del, 571 .start = perf_ibs_start, 572 .stop = perf_ibs_stop, 573 .read = perf_ibs_read, 574 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 575 }, 576 .msr = MSR_AMD64_IBSOPCTL, 577 .config_mask = IBS_OP_CONFIG_MASK, 578 .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | 579 IBS_OP_CUR_CNT_RAND, 580 .enable_mask = IBS_OP_ENABLE, 581 .valid_mask = IBS_OP_VAL, 582 .max_period = IBS_OP_MAX_CNT << 4, 583 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, 584 .offset_max = MSR_AMD64_IBSOP_REG_COUNT, 585 .format_attrs = ibs_op_format_attrs, 586 587 .get_count = get_ibs_op_count, 588 }; 589 590 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) 591 { 592 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 593 struct perf_event *event = pcpu->event; 594 struct hw_perf_event *hwc; 595 struct perf_sample_data data; 596 struct perf_raw_record raw; 597 struct pt_regs regs; 598 struct perf_ibs_data ibs_data; 599 int offset, size, check_rip, offset_max, throttle = 0; 600 unsigned int msr; 601 u64 *buf, *config, period, new_config = 0; 602 603 if (!test_bit(IBS_STARTED, pcpu->state)) { 604 fail: 605 /* 606 * Catch spurious interrupts after stopping IBS: After 607 * disabling IBS there could be still incoming NMIs 608 * with samples that even have the valid bit cleared. 609 * Mark all this NMIs as handled. 610 */ 611 if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) 612 return 1; 613 614 return 0; 615 } 616 617 if (WARN_ON_ONCE(!event)) 618 goto fail; 619 620 hwc = &event->hw; 621 msr = hwc->config_base; 622 buf = ibs_data.regs; 623 rdmsrl(msr, *buf); 624 if (!(*buf++ & perf_ibs->valid_mask)) 625 goto fail; 626 627 config = &ibs_data.regs[0]; 628 perf_ibs_event_update(perf_ibs, event, config); 629 perf_sample_data_init(&data, 0, hwc->last_period); 630 if (!perf_ibs_set_period(perf_ibs, hwc, &period)) 631 goto out; /* no sw counter overflow */ 632 633 ibs_data.caps = ibs_caps; 634 size = 1; 635 offset = 1; 636 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); 637 if (event->attr.sample_type & PERF_SAMPLE_RAW) 638 offset_max = perf_ibs->offset_max; 639 else if (check_rip) 640 offset_max = 3; 641 else 642 offset_max = 1; 643 do { 644 rdmsrl(msr + offset, *buf++); 645 size++; 646 offset = find_next_bit(perf_ibs->offset_mask, 647 perf_ibs->offset_max, 648 offset + 1); 649 } while (offset < offset_max); 650 /* 651 * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately 652 * depending on their availability. 653 * Can't add to offset_max as they are staggered 654 */ 655 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 656 if (perf_ibs == &perf_ibs_op) { 657 if (ibs_caps & IBS_CAPS_BRNTRGT) { 658 rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); 659 size++; 660 } 661 if (ibs_caps & IBS_CAPS_OPDATA4) { 662 rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); 663 size++; 664 } 665 } 666 if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { 667 rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); 668 size++; 669 } 670 } 671 ibs_data.size = sizeof(u64) * size; 672 673 regs = *iregs; 674 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { 675 regs.flags &= ~PERF_EFLAGS_EXACT; 676 } else { 677 /* Workaround for erratum #1197 */ 678 if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) 679 goto out; 680 681 set_linear_ip(®s, ibs_data.regs[1]); 682 regs.flags |= PERF_EFLAGS_EXACT; 683 } 684 685 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 686 raw = (struct perf_raw_record){ 687 .frag = { 688 .size = sizeof(u32) + ibs_data.size, 689 .data = ibs_data.data, 690 }, 691 }; 692 data.raw = &raw; 693 } 694 695 throttle = perf_event_overflow(event, &data, ®s); 696 out: 697 if (throttle) { 698 perf_ibs_stop(event, 0); 699 } else { 700 if (perf_ibs == &perf_ibs_op) { 701 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 702 new_config = period & IBS_OP_MAX_CNT_EXT_MASK; 703 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 704 } 705 if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) 706 new_config |= *config & IBS_OP_CUR_CNT_RAND; 707 } 708 new_config |= period >> 4; 709 710 perf_ibs_enable_event(perf_ibs, hwc, new_config); 711 } 712 713 perf_event_update_userpage(event); 714 715 return 1; 716 } 717 718 static int 719 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 720 { 721 u64 stamp = sched_clock(); 722 int handled = 0; 723 724 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); 725 handled += perf_ibs_handle_irq(&perf_ibs_op, regs); 726 727 if (handled) 728 inc_irq_stat(apic_perf_irqs); 729 730 perf_sample_event_took(sched_clock() - stamp); 731 732 return handled; 733 } 734 NOKPROBE_SYMBOL(perf_ibs_nmi_handler); 735 736 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 737 { 738 struct cpu_perf_ibs __percpu *pcpu; 739 int ret; 740 741 pcpu = alloc_percpu(struct cpu_perf_ibs); 742 if (!pcpu) 743 return -ENOMEM; 744 745 perf_ibs->pcpu = pcpu; 746 747 /* register attributes */ 748 if (perf_ibs->format_attrs[0]) { 749 memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); 750 perf_ibs->format_group.name = "format"; 751 perf_ibs->format_group.attrs = perf_ibs->format_attrs; 752 753 memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); 754 perf_ibs->attr_groups[0] = &perf_ibs->format_group; 755 perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; 756 } 757 758 ret = perf_pmu_register(&perf_ibs->pmu, name, -1); 759 if (ret) { 760 perf_ibs->pcpu = NULL; 761 free_percpu(pcpu); 762 } 763 764 return ret; 765 } 766 767 static __init void perf_event_ibs_init(void) 768 { 769 struct attribute **attr = ibs_op_format_attrs; 770 771 /* 772 * Some chips fail to reset the fetch count when it is written; instead 773 * they need a 0-1 transition of IbsFetchEn. 774 */ 775 if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) 776 perf_ibs_fetch.fetch_count_reset_broken = 1; 777 778 if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) 779 perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; 780 781 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); 782 783 if (ibs_caps & IBS_CAPS_OPCNT) { 784 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; 785 *attr++ = &format_attr_cnt_ctl.attr; 786 } 787 788 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 789 perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; 790 perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; 791 perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; 792 } 793 794 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); 795 796 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); 797 pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); 798 } 799 800 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ 801 802 static __init void perf_event_ibs_init(void) { } 803 804 #endif 805 806 /* IBS - apic initialization, for perf and oprofile */ 807 808 static __init u32 __get_ibs_caps(void) 809 { 810 u32 caps; 811 unsigned int max_level; 812 813 if (!boot_cpu_has(X86_FEATURE_IBS)) 814 return 0; 815 816 /* check IBS cpuid feature flags */ 817 max_level = cpuid_eax(0x80000000); 818 if (max_level < IBS_CPUID_FEATURES) 819 return IBS_CAPS_DEFAULT; 820 821 caps = cpuid_eax(IBS_CPUID_FEATURES); 822 if (!(caps & IBS_CAPS_AVAIL)) 823 /* cpuid flags not valid */ 824 return IBS_CAPS_DEFAULT; 825 826 return caps; 827 } 828 829 u32 get_ibs_caps(void) 830 { 831 return ibs_caps; 832 } 833 834 EXPORT_SYMBOL(get_ibs_caps); 835 836 static inline int get_eilvt(int offset) 837 { 838 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); 839 } 840 841 static inline int put_eilvt(int offset) 842 { 843 return !setup_APIC_eilvt(offset, 0, 0, 1); 844 } 845 846 /* 847 * Check and reserve APIC extended interrupt LVT offset for IBS if available. 848 */ 849 static inline int ibs_eilvt_valid(void) 850 { 851 int offset; 852 u64 val; 853 int valid = 0; 854 855 preempt_disable(); 856 857 rdmsrl(MSR_AMD64_IBSCTL, val); 858 offset = val & IBSCTL_LVT_OFFSET_MASK; 859 860 if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 861 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", 862 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 863 goto out; 864 } 865 866 if (!get_eilvt(offset)) { 867 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", 868 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 869 goto out; 870 } 871 872 valid = 1; 873 out: 874 preempt_enable(); 875 876 return valid; 877 } 878 879 static int setup_ibs_ctl(int ibs_eilvt_off) 880 { 881 struct pci_dev *cpu_cfg; 882 int nodes; 883 u32 value = 0; 884 885 nodes = 0; 886 cpu_cfg = NULL; 887 do { 888 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, 889 PCI_DEVICE_ID_AMD_10H_NB_MISC, 890 cpu_cfg); 891 if (!cpu_cfg) 892 break; 893 ++nodes; 894 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 895 | IBSCTL_LVT_OFFSET_VALID); 896 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 897 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { 898 pci_dev_put(cpu_cfg); 899 pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", 900 value); 901 return -EINVAL; 902 } 903 } while (1); 904 905 if (!nodes) { 906 pr_debug("No CPU node configured for IBS\n"); 907 return -ENODEV; 908 } 909 910 return 0; 911 } 912 913 /* 914 * This runs only on the current cpu. We try to find an LVT offset and 915 * setup the local APIC. For this we must disable preemption. On 916 * success we initialize all nodes with this offset. This updates then 917 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of 918 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that 919 * is using the new offset. 920 */ 921 static void force_ibs_eilvt_setup(void) 922 { 923 int offset; 924 int ret; 925 926 preempt_disable(); 927 /* find the next free available EILVT entry, skip offset 0 */ 928 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { 929 if (get_eilvt(offset)) 930 break; 931 } 932 preempt_enable(); 933 934 if (offset == APIC_EILVT_NR_MAX) { 935 pr_debug("No EILVT entry available\n"); 936 return; 937 } 938 939 ret = setup_ibs_ctl(offset); 940 if (ret) 941 goto out; 942 943 if (!ibs_eilvt_valid()) 944 goto out; 945 946 pr_info("LVT offset %d assigned\n", offset); 947 948 return; 949 out: 950 preempt_disable(); 951 put_eilvt(offset); 952 preempt_enable(); 953 return; 954 } 955 956 static void ibs_eilvt_setup(void) 957 { 958 /* 959 * Force LVT offset assignment for family 10h: The offsets are 960 * not assigned by the BIOS for this family, so the OS is 961 * responsible for doing it. If the OS assignment fails, fall 962 * back to BIOS settings and try to setup this. 963 */ 964 if (boot_cpu_data.x86 == 0x10) 965 force_ibs_eilvt_setup(); 966 } 967 968 static inline int get_ibs_lvt_offset(void) 969 { 970 u64 val; 971 972 rdmsrl(MSR_AMD64_IBSCTL, val); 973 if (!(val & IBSCTL_LVT_OFFSET_VALID)) 974 return -EINVAL; 975 976 return val & IBSCTL_LVT_OFFSET_MASK; 977 } 978 979 static void setup_APIC_ibs(void) 980 { 981 int offset; 982 983 offset = get_ibs_lvt_offset(); 984 if (offset < 0) 985 goto failed; 986 987 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) 988 return; 989 failed: 990 pr_warn("perf: IBS APIC setup failed on cpu #%d\n", 991 smp_processor_id()); 992 } 993 994 static void clear_APIC_ibs(void) 995 { 996 int offset; 997 998 offset = get_ibs_lvt_offset(); 999 if (offset >= 0) 1000 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); 1001 } 1002 1003 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) 1004 { 1005 setup_APIC_ibs(); 1006 return 0; 1007 } 1008 1009 #ifdef CONFIG_PM 1010 1011 static int perf_ibs_suspend(void) 1012 { 1013 clear_APIC_ibs(); 1014 return 0; 1015 } 1016 1017 static void perf_ibs_resume(void) 1018 { 1019 ibs_eilvt_setup(); 1020 setup_APIC_ibs(); 1021 } 1022 1023 static struct syscore_ops perf_ibs_syscore_ops = { 1024 .resume = perf_ibs_resume, 1025 .suspend = perf_ibs_suspend, 1026 }; 1027 1028 static void perf_ibs_pm_init(void) 1029 { 1030 register_syscore_ops(&perf_ibs_syscore_ops); 1031 } 1032 1033 #else 1034 1035 static inline void perf_ibs_pm_init(void) { } 1036 1037 #endif 1038 1039 static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) 1040 { 1041 clear_APIC_ibs(); 1042 return 0; 1043 } 1044 1045 static __init int amd_ibs_init(void) 1046 { 1047 u32 caps; 1048 1049 caps = __get_ibs_caps(); 1050 if (!caps) 1051 return -ENODEV; /* ibs not supported by the cpu */ 1052 1053 ibs_eilvt_setup(); 1054 1055 if (!ibs_eilvt_valid()) 1056 return -EINVAL; 1057 1058 perf_ibs_pm_init(); 1059 1060 ibs_caps = caps; 1061 /* make ibs_caps visible to other cpus: */ 1062 smp_mb(); 1063 /* 1064 * x86_pmu_amd_ibs_starting_cpu will be called from core on 1065 * all online cpus. 1066 */ 1067 cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, 1068 "perf/x86/amd/ibs:starting", 1069 x86_pmu_amd_ibs_starting_cpu, 1070 x86_pmu_amd_ibs_dying_cpu); 1071 1072 perf_event_ibs_init(); 1073 1074 return 0; 1075 } 1076 1077 /* Since we need the pci subsystem to init ibs we can't do this earlier: */ 1078 device_initcall(amd_ibs_init); 1079