1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/perf_event.h> 3 #include <linux/export.h> 4 #include <linux/types.h> 5 #include <linux/init.h> 6 #include <linux/slab.h> 7 #include <linux/delay.h> 8 #include <linux/jiffies.h> 9 #include <asm/apicdef.h> 10 #include <asm/nmi.h> 11 12 #include "../perf_event.h" 13 14 static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); 15 static unsigned long perf_nmi_window; 16 17 static __initconst const u64 amd_hw_cache_event_ids 18 [PERF_COUNT_HW_CACHE_MAX] 19 [PERF_COUNT_HW_CACHE_OP_MAX] 20 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 21 { 22 [ C(L1D) ] = { 23 [ C(OP_READ) ] = { 24 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 25 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ 26 }, 27 [ C(OP_WRITE) ] = { 28 [ C(RESULT_ACCESS) ] = 0, 29 [ C(RESULT_MISS) ] = 0, 30 }, 31 [ C(OP_PREFETCH) ] = { 32 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ 33 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ 34 }, 35 }, 36 [ C(L1I ) ] = { 37 [ C(OP_READ) ] = { 38 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ 39 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ 40 }, 41 [ C(OP_WRITE) ] = { 42 [ C(RESULT_ACCESS) ] = -1, 43 [ C(RESULT_MISS) ] = -1, 44 }, 45 [ C(OP_PREFETCH) ] = { 46 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ 47 [ C(RESULT_MISS) ] = 0, 48 }, 49 }, 50 [ C(LL ) ] = { 51 [ C(OP_READ) ] = { 52 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ 53 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ 54 }, 55 [ C(OP_WRITE) ] = { 56 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ 57 [ C(RESULT_MISS) ] = 0, 58 }, 59 [ C(OP_PREFETCH) ] = { 60 [ C(RESULT_ACCESS) ] = 0, 61 [ C(RESULT_MISS) ] = 0, 62 }, 63 }, 64 [ C(DTLB) ] = { 65 [ C(OP_READ) ] = { 66 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 67 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ 68 }, 69 [ C(OP_WRITE) ] = { 70 [ C(RESULT_ACCESS) ] = 0, 71 [ C(RESULT_MISS) ] = 0, 72 }, 73 [ C(OP_PREFETCH) ] = { 74 [ C(RESULT_ACCESS) ] = 0, 75 [ C(RESULT_MISS) ] = 0, 76 }, 77 }, 78 [ C(ITLB) ] = { 79 [ C(OP_READ) ] = { 80 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 81 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ 82 }, 83 [ C(OP_WRITE) ] = { 84 [ C(RESULT_ACCESS) ] = -1, 85 [ C(RESULT_MISS) ] = -1, 86 }, 87 [ C(OP_PREFETCH) ] = { 88 [ C(RESULT_ACCESS) ] = -1, 89 [ C(RESULT_MISS) ] = -1, 90 }, 91 }, 92 [ C(BPU ) ] = { 93 [ C(OP_READ) ] = { 94 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ 95 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ 96 }, 97 [ C(OP_WRITE) ] = { 98 [ C(RESULT_ACCESS) ] = -1, 99 [ C(RESULT_MISS) ] = -1, 100 }, 101 [ C(OP_PREFETCH) ] = { 102 [ C(RESULT_ACCESS) ] = -1, 103 [ C(RESULT_MISS) ] = -1, 104 }, 105 }, 106 [ C(NODE) ] = { 107 [ C(OP_READ) ] = { 108 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ 109 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ 110 }, 111 [ C(OP_WRITE) ] = { 112 [ C(RESULT_ACCESS) ] = -1, 113 [ C(RESULT_MISS) ] = -1, 114 }, 115 [ C(OP_PREFETCH) ] = { 116 [ C(RESULT_ACCESS) ] = -1, 117 [ C(RESULT_MISS) ] = -1, 118 }, 119 }, 120 }; 121 122 static __initconst const u64 amd_hw_cache_event_ids_f17h 123 [PERF_COUNT_HW_CACHE_MAX] 124 [PERF_COUNT_HW_CACHE_OP_MAX] 125 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 126 [C(L1D)] = { 127 [C(OP_READ)] = { 128 [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ 129 [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ 130 }, 131 [C(OP_WRITE)] = { 132 [C(RESULT_ACCESS)] = 0, 133 [C(RESULT_MISS)] = 0, 134 }, 135 [C(OP_PREFETCH)] = { 136 [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ 137 [C(RESULT_MISS)] = 0, 138 }, 139 }, 140 [C(L1I)] = { 141 [C(OP_READ)] = { 142 [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ 143 [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ 144 }, 145 [C(OP_WRITE)] = { 146 [C(RESULT_ACCESS)] = -1, 147 [C(RESULT_MISS)] = -1, 148 }, 149 [C(OP_PREFETCH)] = { 150 [C(RESULT_ACCESS)] = 0, 151 [C(RESULT_MISS)] = 0, 152 }, 153 }, 154 [C(LL)] = { 155 [C(OP_READ)] = { 156 [C(RESULT_ACCESS)] = 0, 157 [C(RESULT_MISS)] = 0, 158 }, 159 [C(OP_WRITE)] = { 160 [C(RESULT_ACCESS)] = 0, 161 [C(RESULT_MISS)] = 0, 162 }, 163 [C(OP_PREFETCH)] = { 164 [C(RESULT_ACCESS)] = 0, 165 [C(RESULT_MISS)] = 0, 166 }, 167 }, 168 [C(DTLB)] = { 169 [C(OP_READ)] = { 170 [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ 171 [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ 172 }, 173 [C(OP_WRITE)] = { 174 [C(RESULT_ACCESS)] = 0, 175 [C(RESULT_MISS)] = 0, 176 }, 177 [C(OP_PREFETCH)] = { 178 [C(RESULT_ACCESS)] = 0, 179 [C(RESULT_MISS)] = 0, 180 }, 181 }, 182 [C(ITLB)] = { 183 [C(OP_READ)] = { 184 [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ 185 [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ 186 }, 187 [C(OP_WRITE)] = { 188 [C(RESULT_ACCESS)] = -1, 189 [C(RESULT_MISS)] = -1, 190 }, 191 [C(OP_PREFETCH)] = { 192 [C(RESULT_ACCESS)] = -1, 193 [C(RESULT_MISS)] = -1, 194 }, 195 }, 196 [C(BPU)] = { 197 [C(OP_READ)] = { 198 [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ 199 [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ 200 }, 201 [C(OP_WRITE)] = { 202 [C(RESULT_ACCESS)] = -1, 203 [C(RESULT_MISS)] = -1, 204 }, 205 [C(OP_PREFETCH)] = { 206 [C(RESULT_ACCESS)] = -1, 207 [C(RESULT_MISS)] = -1, 208 }, 209 }, 210 [C(NODE)] = { 211 [C(OP_READ)] = { 212 [C(RESULT_ACCESS)] = 0, 213 [C(RESULT_MISS)] = 0, 214 }, 215 [C(OP_WRITE)] = { 216 [C(RESULT_ACCESS)] = -1, 217 [C(RESULT_MISS)] = -1, 218 }, 219 [C(OP_PREFETCH)] = { 220 [C(RESULT_ACCESS)] = -1, 221 [C(RESULT_MISS)] = -1, 222 }, 223 }, 224 }; 225 226 /* 227 * AMD Performance Monitor K7 and later, up to and including Family 16h: 228 */ 229 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = 230 { 231 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 232 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 233 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 234 [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 235 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 236 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 237 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 238 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 239 }; 240 241 /* 242 * AMD Performance Monitor Family 17h and later: 243 */ 244 static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = 245 { 246 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 247 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 248 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, 249 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 250 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 251 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, 252 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, 253 }; 254 255 static u64 amd_pmu_event_map(int hw_event) 256 { 257 if (boot_cpu_data.x86 >= 0x17) 258 return amd_f17h_perfmon_event_map[hw_event]; 259 260 return amd_perfmon_event_map[hw_event]; 261 } 262 263 /* 264 * Previously calculated offsets 265 */ 266 static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; 267 static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; 268 269 /* 270 * Legacy CPUs: 271 * 4 counters starting at 0xc0010000 each offset by 1 272 * 273 * CPUs with core performance counter extensions: 274 * 6 counters starting at 0xc0010200 each offset by 2 275 */ 276 static inline int amd_pmu_addr_offset(int index, bool eventsel) 277 { 278 int offset; 279 280 if (!index) 281 return index; 282 283 if (eventsel) 284 offset = event_offsets[index]; 285 else 286 offset = count_offsets[index]; 287 288 if (offset) 289 return offset; 290 291 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 292 offset = index; 293 else 294 offset = index << 1; 295 296 if (eventsel) 297 event_offsets[index] = offset; 298 else 299 count_offsets[index] = offset; 300 301 return offset; 302 } 303 304 static int amd_core_hw_config(struct perf_event *event) 305 { 306 if (event->attr.exclude_host && event->attr.exclude_guest) 307 /* 308 * When HO == GO == 1 the hardware treats that as GO == HO == 0 309 * and will count in both modes. We don't want to count in that 310 * case so we emulate no-counting by setting US = OS = 0. 311 */ 312 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 313 ARCH_PERFMON_EVENTSEL_OS); 314 else if (event->attr.exclude_host) 315 event->hw.config |= AMD64_EVENTSEL_GUESTONLY; 316 else if (event->attr.exclude_guest) 317 event->hw.config |= AMD64_EVENTSEL_HOSTONLY; 318 319 return 0; 320 } 321 322 /* 323 * AMD64 events are detected based on their event codes. 324 */ 325 static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) 326 { 327 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); 328 } 329 330 static inline int amd_is_nb_event(struct hw_perf_event *hwc) 331 { 332 return (hwc->config & 0xe0) == 0xe0; 333 } 334 335 static inline int amd_has_nb(struct cpu_hw_events *cpuc) 336 { 337 struct amd_nb *nb = cpuc->amd_nb; 338 339 return nb && nb->nb_id != -1; 340 } 341 342 static int amd_pmu_hw_config(struct perf_event *event) 343 { 344 int ret; 345 346 /* pass precise event sampling to ibs: */ 347 if (event->attr.precise_ip && get_ibs_caps()) 348 return -ENOENT; 349 350 if (has_branch_stack(event)) 351 return -EOPNOTSUPP; 352 353 ret = x86_pmu_hw_config(event); 354 if (ret) 355 return ret; 356 357 if (event->attr.type == PERF_TYPE_RAW) 358 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 359 360 return amd_core_hw_config(event); 361 } 362 363 static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, 364 struct perf_event *event) 365 { 366 struct amd_nb *nb = cpuc->amd_nb; 367 int i; 368 369 /* 370 * need to scan whole list because event may not have 371 * been assigned during scheduling 372 * 373 * no race condition possible because event can only 374 * be removed on one CPU at a time AND PMU is disabled 375 * when we come here 376 */ 377 for (i = 0; i < x86_pmu.num_counters; i++) { 378 if (cmpxchg(nb->owners + i, event, NULL) == event) 379 break; 380 } 381 } 382 383 /* 384 * AMD64 NorthBridge events need special treatment because 385 * counter access needs to be synchronized across all cores 386 * of a package. Refer to BKDG section 3.12 387 * 388 * NB events are events measuring L3 cache, Hypertransport 389 * traffic. They are identified by an event code >= 0xe00. 390 * They measure events on the NorthBride which is shared 391 * by all cores on a package. NB events are counted on a 392 * shared set of counters. When a NB event is programmed 393 * in a counter, the data actually comes from a shared 394 * counter. Thus, access to those counters needs to be 395 * synchronized. 396 * 397 * We implement the synchronization such that no two cores 398 * can be measuring NB events using the same counters. Thus, 399 * we maintain a per-NB allocation table. The available slot 400 * is propagated using the event_constraint structure. 401 * 402 * We provide only one choice for each NB event based on 403 * the fact that only NB events have restrictions. Consequently, 404 * if a counter is available, there is a guarantee the NB event 405 * will be assigned to it. If no slot is available, an empty 406 * constraint is returned and scheduling will eventually fail 407 * for this event. 408 * 409 * Note that all cores attached the same NB compete for the same 410 * counters to host NB events, this is why we use atomic ops. Some 411 * multi-chip CPUs may have more than one NB. 412 * 413 * Given that resources are allocated (cmpxchg), they must be 414 * eventually freed for others to use. This is accomplished by 415 * calling __amd_put_nb_event_constraints() 416 * 417 * Non NB events are not impacted by this restriction. 418 */ 419 static struct event_constraint * 420 __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 421 struct event_constraint *c) 422 { 423 struct hw_perf_event *hwc = &event->hw; 424 struct amd_nb *nb = cpuc->amd_nb; 425 struct perf_event *old; 426 int idx, new = -1; 427 428 if (!c) 429 c = &unconstrained; 430 431 if (cpuc->is_fake) 432 return c; 433 434 /* 435 * detect if already present, if so reuse 436 * 437 * cannot merge with actual allocation 438 * because of possible holes 439 * 440 * event can already be present yet not assigned (in hwc->idx) 441 * because of successive calls to x86_schedule_events() from 442 * hw_perf_group_sched_in() without hw_perf_enable() 443 */ 444 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { 445 if (new == -1 || hwc->idx == idx) 446 /* assign free slot, prefer hwc->idx */ 447 old = cmpxchg(nb->owners + idx, NULL, event); 448 else if (nb->owners[idx] == event) 449 /* event already present */ 450 old = event; 451 else 452 continue; 453 454 if (old && old != event) 455 continue; 456 457 /* reassign to this slot */ 458 if (new != -1) 459 cmpxchg(nb->owners + new, event, NULL); 460 new = idx; 461 462 /* already present, reuse */ 463 if (old == event) 464 break; 465 } 466 467 if (new == -1) 468 return &emptyconstraint; 469 470 return &nb->event_constraints[new]; 471 } 472 473 static struct amd_nb *amd_alloc_nb(int cpu) 474 { 475 struct amd_nb *nb; 476 int i; 477 478 nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); 479 if (!nb) 480 return NULL; 481 482 nb->nb_id = -1; 483 484 /* 485 * initialize all possible NB constraints 486 */ 487 for (i = 0; i < x86_pmu.num_counters; i++) { 488 __set_bit(i, nb->event_constraints[i].idxmsk); 489 nb->event_constraints[i].weight = 1; 490 } 491 return nb; 492 } 493 494 static int amd_pmu_cpu_prepare(int cpu) 495 { 496 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 497 498 WARN_ON_ONCE(cpuc->amd_nb); 499 500 if (!x86_pmu.amd_nb_constraints) 501 return 0; 502 503 cpuc->amd_nb = amd_alloc_nb(cpu); 504 if (!cpuc->amd_nb) 505 return -ENOMEM; 506 507 return 0; 508 } 509 510 static void amd_pmu_cpu_starting(int cpu) 511 { 512 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 513 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; 514 struct amd_nb *nb; 515 int i, nb_id; 516 517 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 518 519 if (!x86_pmu.amd_nb_constraints) 520 return; 521 522 nb_id = amd_get_nb_id(cpu); 523 WARN_ON_ONCE(nb_id == BAD_APICID); 524 525 for_each_online_cpu(i) { 526 nb = per_cpu(cpu_hw_events, i).amd_nb; 527 if (WARN_ON_ONCE(!nb)) 528 continue; 529 530 if (nb->nb_id == nb_id) { 531 *onln = cpuc->amd_nb; 532 cpuc->amd_nb = nb; 533 break; 534 } 535 } 536 537 cpuc->amd_nb->nb_id = nb_id; 538 cpuc->amd_nb->refcnt++; 539 } 540 541 static void amd_pmu_cpu_dead(int cpu) 542 { 543 struct cpu_hw_events *cpuhw; 544 545 if (!x86_pmu.amd_nb_constraints) 546 return; 547 548 cpuhw = &per_cpu(cpu_hw_events, cpu); 549 550 if (cpuhw->amd_nb) { 551 struct amd_nb *nb = cpuhw->amd_nb; 552 553 if (nb->nb_id == -1 || --nb->refcnt == 0) 554 kfree(nb); 555 556 cpuhw->amd_nb = NULL; 557 } 558 } 559 560 /* 561 * When a PMC counter overflows, an NMI is used to process the event and 562 * reset the counter. NMI latency can result in the counter being updated 563 * before the NMI can run, which can result in what appear to be spurious 564 * NMIs. This function is intended to wait for the NMI to run and reset 565 * the counter to avoid possible unhandled NMI messages. 566 */ 567 #define OVERFLOW_WAIT_COUNT 50 568 569 static void amd_pmu_wait_on_overflow(int idx) 570 { 571 unsigned int i; 572 u64 counter; 573 574 /* 575 * Wait for the counter to be reset if it has overflowed. This loop 576 * should exit very, very quickly, but just in case, don't wait 577 * forever... 578 */ 579 for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { 580 rdmsrl(x86_pmu_event_addr(idx), counter); 581 if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) 582 break; 583 584 /* Might be in IRQ context, so can't sleep */ 585 udelay(1); 586 } 587 } 588 589 static void amd_pmu_disable_all(void) 590 { 591 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 592 int idx; 593 594 x86_pmu_disable_all(); 595 596 /* 597 * This shouldn't be called from NMI context, but add a safeguard here 598 * to return, since if we're in NMI context we can't wait for an NMI 599 * to reset an overflowed counter value. 600 */ 601 if (in_nmi()) 602 return; 603 604 /* 605 * Check each counter for overflow and wait for it to be reset by the 606 * NMI if it has overflowed. This relies on the fact that all active 607 * counters are always enabled when this function is caled and 608 * ARCH_PERFMON_EVENTSEL_INT is always set. 609 */ 610 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 611 if (!test_bit(idx, cpuc->active_mask)) 612 continue; 613 614 amd_pmu_wait_on_overflow(idx); 615 } 616 } 617 618 static void amd_pmu_disable_event(struct perf_event *event) 619 { 620 x86_pmu_disable_event(event); 621 622 /* 623 * This can be called from NMI context (via x86_pmu_stop). The counter 624 * may have overflowed, but either way, we'll never see it get reset 625 * by the NMI if we're already in the NMI. And the NMI latency support 626 * below will take care of any pending NMI that might have been 627 * generated by the overflow. 628 */ 629 if (in_nmi()) 630 return; 631 632 amd_pmu_wait_on_overflow(event->hw.idx); 633 } 634 635 /* 636 * Because of NMI latency, if multiple PMC counters are active or other sources 637 * of NMIs are received, the perf NMI handler can handle one or more overflowed 638 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI 639 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel 640 * back-to-back NMI support won't be active. This PMC handler needs to take into 641 * account that this can occur, otherwise this could result in unknown NMI 642 * messages being issued. Examples of this is PMC overflow while in the NMI 643 * handler when multiple PMCs are active or PMC overflow while handling some 644 * other source of an NMI. 645 * 646 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs 647 * received during this window will be claimed. This prevents extending the 648 * window past when it is possible that latent NMIs should be received. The 649 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has 650 * handled a counter. When an un-handled NMI is received, it will be claimed 651 * only if arriving within that window. 652 */ 653 static int amd_pmu_handle_irq(struct pt_regs *regs) 654 { 655 int handled; 656 657 /* Process any counter overflows */ 658 handled = x86_pmu_handle_irq(regs); 659 660 /* 661 * If a counter was handled, record a timestamp such that un-handled 662 * NMIs will be claimed if arriving within that window. 663 */ 664 if (handled) { 665 this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window); 666 667 return handled; 668 } 669 670 if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) 671 return NMI_DONE; 672 673 return NMI_HANDLED; 674 } 675 676 static struct event_constraint * 677 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 678 struct perf_event *event) 679 { 680 /* 681 * if not NB event or no NB, then no constraints 682 */ 683 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) 684 return &unconstrained; 685 686 return __amd_get_nb_event_constraints(cpuc, event, NULL); 687 } 688 689 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 690 struct perf_event *event) 691 { 692 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 693 __amd_put_nb_event_constraints(cpuc, event); 694 } 695 696 PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 697 PMU_FORMAT_ATTR(umask, "config:8-15" ); 698 PMU_FORMAT_ATTR(edge, "config:18" ); 699 PMU_FORMAT_ATTR(inv, "config:23" ); 700 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 701 702 static struct attribute *amd_format_attr[] = { 703 &format_attr_event.attr, 704 &format_attr_umask.attr, 705 &format_attr_edge.attr, 706 &format_attr_inv.attr, 707 &format_attr_cmask.attr, 708 NULL, 709 }; 710 711 /* AMD Family 15h */ 712 713 #define AMD_EVENT_TYPE_MASK 0x000000F0ULL 714 715 #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL 716 #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL 717 #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL 718 #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL 719 #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL 720 #define AMD_EVENT_EX_LS 0x000000C0ULL 721 #define AMD_EVENT_DE 0x000000D0ULL 722 #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL 723 724 /* 725 * AMD family 15h event code/PMC mappings: 726 * 727 * type = event_code & 0x0F0: 728 * 729 * 0x000 FP PERF_CTL[5:3] 730 * 0x010 FP PERF_CTL[5:3] 731 * 0x020 LS PERF_CTL[5:0] 732 * 0x030 LS PERF_CTL[5:0] 733 * 0x040 DC PERF_CTL[5:0] 734 * 0x050 DC PERF_CTL[5:0] 735 * 0x060 CU PERF_CTL[2:0] 736 * 0x070 CU PERF_CTL[2:0] 737 * 0x080 IC/DE PERF_CTL[2:0] 738 * 0x090 IC/DE PERF_CTL[2:0] 739 * 0x0A0 --- 740 * 0x0B0 --- 741 * 0x0C0 EX/LS PERF_CTL[5:0] 742 * 0x0D0 DE PERF_CTL[2:0] 743 * 0x0E0 NB NB_PERF_CTL[3:0] 744 * 0x0F0 NB NB_PERF_CTL[3:0] 745 * 746 * Exceptions: 747 * 748 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) 749 * 0x003 FP PERF_CTL[3] 750 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) 751 * 0x00B FP PERF_CTL[3] 752 * 0x00D FP PERF_CTL[3] 753 * 0x023 DE PERF_CTL[2:0] 754 * 0x02D LS PERF_CTL[3] 755 * 0x02E LS PERF_CTL[3,0] 756 * 0x031 LS PERF_CTL[2:0] (**) 757 * 0x043 CU PERF_CTL[2:0] 758 * 0x045 CU PERF_CTL[2:0] 759 * 0x046 CU PERF_CTL[2:0] 760 * 0x054 CU PERF_CTL[2:0] 761 * 0x055 CU PERF_CTL[2:0] 762 * 0x08F IC PERF_CTL[0] 763 * 0x187 DE PERF_CTL[0] 764 * 0x188 DE PERF_CTL[0] 765 * 0x0DB EX PERF_CTL[5:0] 766 * 0x0DC LS PERF_CTL[5:0] 767 * 0x0DD LS PERF_CTL[5:0] 768 * 0x0DE LS PERF_CTL[5:0] 769 * 0x0DF LS PERF_CTL[5:0] 770 * 0x1C0 EX PERF_CTL[5:3] 771 * 0x1D6 EX PERF_CTL[5:0] 772 * 0x1D8 EX PERF_CTL[5:0] 773 * 774 * (*) depending on the umask all FPU counters may be used 775 * (**) only one unitmask enabled at a time 776 */ 777 778 static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 779 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 780 static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 781 static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 782 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 783 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 784 785 static struct event_constraint * 786 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, 787 struct perf_event *event) 788 { 789 struct hw_perf_event *hwc = &event->hw; 790 unsigned int event_code = amd_get_event_code(hwc); 791 792 switch (event_code & AMD_EVENT_TYPE_MASK) { 793 case AMD_EVENT_FP: 794 switch (event_code) { 795 case 0x000: 796 if (!(hwc->config & 0x0000F000ULL)) 797 break; 798 if (!(hwc->config & 0x00000F00ULL)) 799 break; 800 return &amd_f15_PMC3; 801 case 0x004: 802 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 803 break; 804 return &amd_f15_PMC3; 805 case 0x003: 806 case 0x00B: 807 case 0x00D: 808 return &amd_f15_PMC3; 809 } 810 return &amd_f15_PMC53; 811 case AMD_EVENT_LS: 812 case AMD_EVENT_DC: 813 case AMD_EVENT_EX_LS: 814 switch (event_code) { 815 case 0x023: 816 case 0x043: 817 case 0x045: 818 case 0x046: 819 case 0x054: 820 case 0x055: 821 return &amd_f15_PMC20; 822 case 0x02D: 823 return &amd_f15_PMC3; 824 case 0x02E: 825 return &amd_f15_PMC30; 826 case 0x031: 827 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 828 return &amd_f15_PMC20; 829 return &emptyconstraint; 830 case 0x1C0: 831 return &amd_f15_PMC53; 832 default: 833 return &amd_f15_PMC50; 834 } 835 case AMD_EVENT_CU: 836 case AMD_EVENT_IC_DE: 837 case AMD_EVENT_DE: 838 switch (event_code) { 839 case 0x08F: 840 case 0x187: 841 case 0x188: 842 return &amd_f15_PMC0; 843 case 0x0DB ... 0x0DF: 844 case 0x1D6: 845 case 0x1D8: 846 return &amd_f15_PMC50; 847 default: 848 return &amd_f15_PMC20; 849 } 850 case AMD_EVENT_NB: 851 /* moved to uncore.c */ 852 return &emptyconstraint; 853 default: 854 return &emptyconstraint; 855 } 856 } 857 858 static ssize_t amd_event_sysfs_show(char *page, u64 config) 859 { 860 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | 861 (config & AMD64_EVENTSEL_EVENT) >> 24; 862 863 return x86_event_sysfs_show(page, config, event); 864 } 865 866 static __initconst const struct x86_pmu amd_pmu = { 867 .name = "AMD", 868 .handle_irq = amd_pmu_handle_irq, 869 .disable_all = amd_pmu_disable_all, 870 .enable_all = x86_pmu_enable_all, 871 .enable = x86_pmu_enable_event, 872 .disable = amd_pmu_disable_event, 873 .hw_config = amd_pmu_hw_config, 874 .schedule_events = x86_schedule_events, 875 .eventsel = MSR_K7_EVNTSEL0, 876 .perfctr = MSR_K7_PERFCTR0, 877 .addr_offset = amd_pmu_addr_offset, 878 .event_map = amd_pmu_event_map, 879 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 880 .num_counters = AMD64_NUM_COUNTERS, 881 .cntval_bits = 48, 882 .cntval_mask = (1ULL << 48) - 1, 883 .apic = 1, 884 /* use highest bit to detect overflow */ 885 .max_period = (1ULL << 47) - 1, 886 .get_event_constraints = amd_get_event_constraints, 887 .put_event_constraints = amd_put_event_constraints, 888 889 .format_attrs = amd_format_attr, 890 .events_sysfs_show = amd_event_sysfs_show, 891 892 .cpu_prepare = amd_pmu_cpu_prepare, 893 .cpu_starting = amd_pmu_cpu_starting, 894 .cpu_dead = amd_pmu_cpu_dead, 895 896 .amd_nb_constraints = 1, 897 }; 898 899 static int __init amd_core_pmu_init(void) 900 { 901 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 902 return 0; 903 904 /* Avoid calulating the value each time in the NMI handler */ 905 perf_nmi_window = msecs_to_jiffies(100); 906 907 switch (boot_cpu_data.x86) { 908 case 0x15: 909 pr_cont("Fam15h "); 910 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 911 break; 912 case 0x17: 913 pr_cont("Fam17h "); 914 /* 915 * In family 17h, there are no event constraints in the PMC hardware. 916 * We fallback to using default amd_get_event_constraints. 917 */ 918 break; 919 case 0x18: 920 pr_cont("Fam18h "); 921 /* Using default amd_get_event_constraints. */ 922 break; 923 default: 924 pr_err("core perfctr but no constraints; unknown hardware!\n"); 925 return -ENODEV; 926 } 927 928 /* 929 * If core performance counter extensions exists, we must use 930 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 931 * amd_pmu_addr_offset(). 932 */ 933 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 934 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 935 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 936 /* 937 * AMD Core perfctr has separate MSRs for the NB events, see 938 * the amd/uncore.c driver. 939 */ 940 x86_pmu.amd_nb_constraints = 0; 941 942 pr_cont("core perfctr, "); 943 return 0; 944 } 945 946 __init int amd_pmu_init(void) 947 { 948 int ret; 949 950 /* Performance-monitoring supported from K7 and later: */ 951 if (boot_cpu_data.x86 < 6) 952 return -ENODEV; 953 954 x86_pmu = amd_pmu; 955 956 ret = amd_core_pmu_init(); 957 if (ret) 958 return ret; 959 960 if (num_possible_cpus() == 1) { 961 /* 962 * No point in allocating data structures to serialize 963 * against other CPUs, when there is only the one CPU. 964 */ 965 x86_pmu.amd_nb_constraints = 0; 966 } 967 968 if (boot_cpu_data.x86 >= 0x17) 969 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); 970 else 971 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 972 973 return 0; 974 } 975 976 void amd_pmu_enable_virt(void) 977 { 978 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 979 980 cpuc->perf_ctr_virt_mask = 0; 981 982 /* Reload all events */ 983 amd_pmu_disable_all(); 984 x86_pmu_enable_all(0); 985 } 986 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); 987 988 void amd_pmu_disable_virt(void) 989 { 990 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 991 992 /* 993 * We only mask out the Host-only bit so that host-only counting works 994 * when SVM is disabled. If someone sets up a guest-only counter when 995 * SVM is disabled the Guest-only bits still gets set and the counter 996 * will not count anything. 997 */ 998 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 999 1000 /* Reload all events */ 1001 amd_pmu_disable_all(); 1002 x86_pmu_enable_all(0); 1003 } 1004 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); 1005