1 #include "uncore.h" 2 3 static struct intel_uncore_type *empty_uncore[] = { NULL, }; 4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore; 5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore; 6 7 static bool pcidrv_registered; 8 struct pci_driver *uncore_pci_driver; 9 /* pci bus to socket mapping */ 10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 12 struct pci_extra_dev *uncore_extra_pci_dev; 13 static int max_packages; 14 15 /* mask of cpus that collect uncore events */ 16 static cpumask_t uncore_cpu_mask; 17 18 /* constraint for the fixed counter */ 19 static struct event_constraint uncore_constraint_fixed = 20 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); 21 struct event_constraint uncore_constraint_empty = 22 EVENT_CONSTRAINT(0, 0, 0); 23 24 static int uncore_pcibus_to_physid(struct pci_bus *bus) 25 { 26 struct pci2phy_map *map; 27 int phys_id = -1; 28 29 raw_spin_lock(&pci2phy_map_lock); 30 list_for_each_entry(map, &pci2phy_map_head, list) { 31 if (map->segment == pci_domain_nr(bus)) { 32 phys_id = map->pbus_to_physid[bus->number]; 33 break; 34 } 35 } 36 raw_spin_unlock(&pci2phy_map_lock); 37 38 return phys_id; 39 } 40 41 static void uncore_free_pcibus_map(void) 42 { 43 struct pci2phy_map *map, *tmp; 44 45 list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) { 46 list_del(&map->list); 47 kfree(map); 48 } 49 } 50 51 struct pci2phy_map *__find_pci2phy_map(int segment) 52 { 53 struct pci2phy_map *map, *alloc = NULL; 54 int i; 55 56 lockdep_assert_held(&pci2phy_map_lock); 57 58 lookup: 59 list_for_each_entry(map, &pci2phy_map_head, list) { 60 if (map->segment == segment) 61 goto end; 62 } 63 64 if (!alloc) { 65 raw_spin_unlock(&pci2phy_map_lock); 66 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); 67 raw_spin_lock(&pci2phy_map_lock); 68 69 if (!alloc) 70 return NULL; 71 72 goto lookup; 73 } 74 75 map = alloc; 76 alloc = NULL; 77 map->segment = segment; 78 for (i = 0; i < 256; i++) 79 map->pbus_to_physid[i] = -1; 80 list_add_tail(&map->list, &pci2phy_map_head); 81 82 end: 83 kfree(alloc); 84 return map; 85 } 86 87 ssize_t uncore_event_show(struct kobject *kobj, 88 struct kobj_attribute *attr, char *buf) 89 { 90 struct uncore_event_desc *event = 91 container_of(attr, struct uncore_event_desc, attr); 92 return sprintf(buf, "%s", event->config); 93 } 94 95 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 96 { 97 return pmu->boxes[topology_logical_package_id(cpu)]; 98 } 99 100 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 101 { 102 u64 count; 103 104 rdmsrl(event->hw.event_base, count); 105 106 return count; 107 } 108 109 /* 110 * generic get constraint function for shared match/mask registers. 111 */ 112 struct event_constraint * 113 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event) 114 { 115 struct intel_uncore_extra_reg *er; 116 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 117 struct hw_perf_event_extra *reg2 = &event->hw.branch_reg; 118 unsigned long flags; 119 bool ok = false; 120 121 /* 122 * reg->alloc can be set due to existing state, so for fake box we 123 * need to ignore this, otherwise we might fail to allocate proper 124 * fake state for this extra reg constraint. 125 */ 126 if (reg1->idx == EXTRA_REG_NONE || 127 (!uncore_box_is_fake(box) && reg1->alloc)) 128 return NULL; 129 130 er = &box->shared_regs[reg1->idx]; 131 raw_spin_lock_irqsave(&er->lock, flags); 132 if (!atomic_read(&er->ref) || 133 (er->config1 == reg1->config && er->config2 == reg2->config)) { 134 atomic_inc(&er->ref); 135 er->config1 = reg1->config; 136 er->config2 = reg2->config; 137 ok = true; 138 } 139 raw_spin_unlock_irqrestore(&er->lock, flags); 140 141 if (ok) { 142 if (!uncore_box_is_fake(box)) 143 reg1->alloc = 1; 144 return NULL; 145 } 146 147 return &uncore_constraint_empty; 148 } 149 150 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event) 151 { 152 struct intel_uncore_extra_reg *er; 153 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; 154 155 /* 156 * Only put constraint if extra reg was actually allocated. Also 157 * takes care of event which do not use an extra shared reg. 158 * 159 * Also, if this is a fake box we shouldn't touch any event state 160 * (reg->alloc) and we don't care about leaving inconsistent box 161 * state either since it will be thrown out. 162 */ 163 if (uncore_box_is_fake(box) || !reg1->alloc) 164 return; 165 166 er = &box->shared_regs[reg1->idx]; 167 atomic_dec(&er->ref); 168 reg1->alloc = 0; 169 } 170 171 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx) 172 { 173 struct intel_uncore_extra_reg *er; 174 unsigned long flags; 175 u64 config; 176 177 er = &box->shared_regs[idx]; 178 179 raw_spin_lock_irqsave(&er->lock, flags); 180 config = er->config; 181 raw_spin_unlock_irqrestore(&er->lock, flags); 182 183 return config; 184 } 185 186 static void uncore_assign_hw_event(struct intel_uncore_box *box, 187 struct perf_event *event, int idx) 188 { 189 struct hw_perf_event *hwc = &event->hw; 190 191 hwc->idx = idx; 192 hwc->last_tag = ++box->tags[idx]; 193 194 if (hwc->idx == UNCORE_PMC_IDX_FIXED) { 195 hwc->event_base = uncore_fixed_ctr(box); 196 hwc->config_base = uncore_fixed_ctl(box); 197 return; 198 } 199 200 hwc->config_base = uncore_event_ctl(box, hwc->idx); 201 hwc->event_base = uncore_perf_ctr(box, hwc->idx); 202 } 203 204 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event) 205 { 206 u64 prev_count, new_count, delta; 207 int shift; 208 209 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) 210 shift = 64 - uncore_fixed_ctr_bits(box); 211 else 212 shift = 64 - uncore_perf_ctr_bits(box); 213 214 /* the hrtimer might modify the previous event value */ 215 again: 216 prev_count = local64_read(&event->hw.prev_count); 217 new_count = uncore_read_counter(box, event); 218 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) 219 goto again; 220 221 delta = (new_count << shift) - (prev_count << shift); 222 delta >>= shift; 223 224 local64_add(delta, &event->count); 225 } 226 227 /* 228 * The overflow interrupt is unavailable for SandyBridge-EP, is broken 229 * for SandyBridge. So we use hrtimer to periodically poll the counter 230 * to avoid overflow. 231 */ 232 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 233 { 234 struct intel_uncore_box *box; 235 struct perf_event *event; 236 unsigned long flags; 237 int bit; 238 239 box = container_of(hrtimer, struct intel_uncore_box, hrtimer); 240 if (!box->n_active || box->cpu != smp_processor_id()) 241 return HRTIMER_NORESTART; 242 /* 243 * disable local interrupt to prevent uncore_pmu_event_start/stop 244 * to interrupt the update process 245 */ 246 local_irq_save(flags); 247 248 /* 249 * handle boxes with an active event list as opposed to active 250 * counters 251 */ 252 list_for_each_entry(event, &box->active_list, active_entry) { 253 uncore_perf_event_update(box, event); 254 } 255 256 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 257 uncore_perf_event_update(box, box->events[bit]); 258 259 local_irq_restore(flags); 260 261 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); 262 return HRTIMER_RESTART; 263 } 264 265 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 266 { 267 hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), 268 HRTIMER_MODE_REL_PINNED); 269 } 270 271 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) 272 { 273 hrtimer_cancel(&box->hrtimer); 274 } 275 276 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) 277 { 278 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 279 box->hrtimer.function = uncore_pmu_hrtimer; 280 } 281 282 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, 283 int node) 284 { 285 int i, size, numshared = type->num_shared_regs ; 286 struct intel_uncore_box *box; 287 288 size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg); 289 290 box = kzalloc_node(size, GFP_KERNEL, node); 291 if (!box) 292 return NULL; 293 294 for (i = 0; i < numshared; i++) 295 raw_spin_lock_init(&box->shared_regs[i].lock); 296 297 uncore_pmu_init_hrtimer(box); 298 box->cpu = -1; 299 box->pci_phys_id = -1; 300 box->pkgid = -1; 301 302 /* set default hrtimer timeout */ 303 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 304 305 INIT_LIST_HEAD(&box->active_list); 306 307 return box; 308 } 309 310 /* 311 * Using uncore_pmu_event_init pmu event_init callback 312 * as a detection point for uncore events. 313 */ 314 static int uncore_pmu_event_init(struct perf_event *event); 315 316 static bool is_uncore_event(struct perf_event *event) 317 { 318 return event->pmu->event_init == uncore_pmu_event_init; 319 } 320 321 static int 322 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, 323 bool dogrp) 324 { 325 struct perf_event *event; 326 int n, max_count; 327 328 max_count = box->pmu->type->num_counters; 329 if (box->pmu->type->fixed_ctl) 330 max_count++; 331 332 if (box->n_events >= max_count) 333 return -EINVAL; 334 335 n = box->n_events; 336 337 if (is_uncore_event(leader)) { 338 box->event_list[n] = leader; 339 n++; 340 } 341 342 if (!dogrp) 343 return n; 344 345 list_for_each_entry(event, &leader->sibling_list, group_entry) { 346 if (!is_uncore_event(event) || 347 event->state <= PERF_EVENT_STATE_OFF) 348 continue; 349 350 if (n >= max_count) 351 return -EINVAL; 352 353 box->event_list[n] = event; 354 n++; 355 } 356 return n; 357 } 358 359 static struct event_constraint * 360 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event) 361 { 362 struct intel_uncore_type *type = box->pmu->type; 363 struct event_constraint *c; 364 365 if (type->ops->get_constraint) { 366 c = type->ops->get_constraint(box, event); 367 if (c) 368 return c; 369 } 370 371 if (event->attr.config == UNCORE_FIXED_EVENT) 372 return &uncore_constraint_fixed; 373 374 if (type->constraints) { 375 for_each_event_constraint(c, type->constraints) { 376 if ((event->hw.config & c->cmask) == c->code) 377 return c; 378 } 379 } 380 381 return &type->unconstrainted; 382 } 383 384 static void uncore_put_event_constraint(struct intel_uncore_box *box, 385 struct perf_event *event) 386 { 387 if (box->pmu->type->ops->put_constraint) 388 box->pmu->type->ops->put_constraint(box, event); 389 } 390 391 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) 392 { 393 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; 394 struct event_constraint *c; 395 int i, wmin, wmax, ret = 0; 396 struct hw_perf_event *hwc; 397 398 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); 399 400 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { 401 c = uncore_get_event_constraint(box, box->event_list[i]); 402 box->event_constraint[i] = c; 403 wmin = min(wmin, c->weight); 404 wmax = max(wmax, c->weight); 405 } 406 407 /* fastpath, try to reuse previous register */ 408 for (i = 0; i < n; i++) { 409 hwc = &box->event_list[i]->hw; 410 c = box->event_constraint[i]; 411 412 /* never assigned */ 413 if (hwc->idx == -1) 414 break; 415 416 /* constraint still honored */ 417 if (!test_bit(hwc->idx, c->idxmsk)) 418 break; 419 420 /* not already used */ 421 if (test_bit(hwc->idx, used_mask)) 422 break; 423 424 __set_bit(hwc->idx, used_mask); 425 if (assign) 426 assign[i] = hwc->idx; 427 } 428 /* slow path */ 429 if (i != n) 430 ret = perf_assign_events(box->event_constraint, n, 431 wmin, wmax, n, assign); 432 433 if (!assign || ret) { 434 for (i = 0; i < n; i++) 435 uncore_put_event_constraint(box, box->event_list[i]); 436 } 437 return ret ? -EINVAL : 0; 438 } 439 440 static void uncore_pmu_event_start(struct perf_event *event, int flags) 441 { 442 struct intel_uncore_box *box = uncore_event_to_box(event); 443 int idx = event->hw.idx; 444 445 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) 446 return; 447 448 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) 449 return; 450 451 event->hw.state = 0; 452 box->events[idx] = event; 453 box->n_active++; 454 __set_bit(idx, box->active_mask); 455 456 local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); 457 uncore_enable_event(box, event); 458 459 if (box->n_active == 1) { 460 uncore_enable_box(box); 461 uncore_pmu_start_hrtimer(box); 462 } 463 } 464 465 static void uncore_pmu_event_stop(struct perf_event *event, int flags) 466 { 467 struct intel_uncore_box *box = uncore_event_to_box(event); 468 struct hw_perf_event *hwc = &event->hw; 469 470 if (__test_and_clear_bit(hwc->idx, box->active_mask)) { 471 uncore_disable_event(box, event); 472 box->n_active--; 473 box->events[hwc->idx] = NULL; 474 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 475 hwc->state |= PERF_HES_STOPPED; 476 477 if (box->n_active == 0) { 478 uncore_disable_box(box); 479 uncore_pmu_cancel_hrtimer(box); 480 } 481 } 482 483 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 484 /* 485 * Drain the remaining delta count out of a event 486 * that we are disabling: 487 */ 488 uncore_perf_event_update(box, event); 489 hwc->state |= PERF_HES_UPTODATE; 490 } 491 } 492 493 static int uncore_pmu_event_add(struct perf_event *event, int flags) 494 { 495 struct intel_uncore_box *box = uncore_event_to_box(event); 496 struct hw_perf_event *hwc = &event->hw; 497 int assign[UNCORE_PMC_IDX_MAX]; 498 int i, n, ret; 499 500 if (!box) 501 return -ENODEV; 502 503 ret = n = uncore_collect_events(box, event, false); 504 if (ret < 0) 505 return ret; 506 507 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 508 if (!(flags & PERF_EF_START)) 509 hwc->state |= PERF_HES_ARCH; 510 511 ret = uncore_assign_events(box, assign, n); 512 if (ret) 513 return ret; 514 515 /* save events moving to new counters */ 516 for (i = 0; i < box->n_events; i++) { 517 event = box->event_list[i]; 518 hwc = &event->hw; 519 520 if (hwc->idx == assign[i] && 521 hwc->last_tag == box->tags[assign[i]]) 522 continue; 523 /* 524 * Ensure we don't accidentally enable a stopped 525 * counter simply because we rescheduled. 526 */ 527 if (hwc->state & PERF_HES_STOPPED) 528 hwc->state |= PERF_HES_ARCH; 529 530 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 531 } 532 533 /* reprogram moved events into new counters */ 534 for (i = 0; i < n; i++) { 535 event = box->event_list[i]; 536 hwc = &event->hw; 537 538 if (hwc->idx != assign[i] || 539 hwc->last_tag != box->tags[assign[i]]) 540 uncore_assign_hw_event(box, event, assign[i]); 541 else if (i < box->n_events) 542 continue; 543 544 if (hwc->state & PERF_HES_ARCH) 545 continue; 546 547 uncore_pmu_event_start(event, 0); 548 } 549 box->n_events = n; 550 551 return 0; 552 } 553 554 static void uncore_pmu_event_del(struct perf_event *event, int flags) 555 { 556 struct intel_uncore_box *box = uncore_event_to_box(event); 557 int i; 558 559 uncore_pmu_event_stop(event, PERF_EF_UPDATE); 560 561 for (i = 0; i < box->n_events; i++) { 562 if (event == box->event_list[i]) { 563 uncore_put_event_constraint(box, event); 564 565 for (++i; i < box->n_events; i++) 566 box->event_list[i - 1] = box->event_list[i]; 567 568 --box->n_events; 569 break; 570 } 571 } 572 573 event->hw.idx = -1; 574 event->hw.last_tag = ~0ULL; 575 } 576 577 void uncore_pmu_event_read(struct perf_event *event) 578 { 579 struct intel_uncore_box *box = uncore_event_to_box(event); 580 uncore_perf_event_update(box, event); 581 } 582 583 /* 584 * validation ensures the group can be loaded onto the 585 * PMU if it was the only group available. 586 */ 587 static int uncore_validate_group(struct intel_uncore_pmu *pmu, 588 struct perf_event *event) 589 { 590 struct perf_event *leader = event->group_leader; 591 struct intel_uncore_box *fake_box; 592 int ret = -EINVAL, n; 593 594 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); 595 if (!fake_box) 596 return -ENOMEM; 597 598 fake_box->pmu = pmu; 599 /* 600 * the event is not yet connected with its 601 * siblings therefore we must first collect 602 * existing siblings, then add the new event 603 * before we can simulate the scheduling 604 */ 605 n = uncore_collect_events(fake_box, leader, true); 606 if (n < 0) 607 goto out; 608 609 fake_box->n_events = n; 610 n = uncore_collect_events(fake_box, event, false); 611 if (n < 0) 612 goto out; 613 614 fake_box->n_events = n; 615 616 ret = uncore_assign_events(fake_box, NULL, n); 617 out: 618 kfree(fake_box); 619 return ret; 620 } 621 622 static int uncore_pmu_event_init(struct perf_event *event) 623 { 624 struct intel_uncore_pmu *pmu; 625 struct intel_uncore_box *box; 626 struct hw_perf_event *hwc = &event->hw; 627 int ret; 628 629 if (event->attr.type != event->pmu->type) 630 return -ENOENT; 631 632 pmu = uncore_event_to_pmu(event); 633 /* no device found for this pmu */ 634 if (pmu->func_id < 0) 635 return -ENOENT; 636 637 /* 638 * Uncore PMU does measure at all privilege level all the time. 639 * So it doesn't make sense to specify any exclude bits. 640 */ 641 if (event->attr.exclude_user || event->attr.exclude_kernel || 642 event->attr.exclude_hv || event->attr.exclude_idle) 643 return -EINVAL; 644 645 /* Sampling not supported yet */ 646 if (hwc->sample_period) 647 return -EINVAL; 648 649 /* 650 * Place all uncore events for a particular physical package 651 * onto a single cpu 652 */ 653 if (event->cpu < 0) 654 return -EINVAL; 655 box = uncore_pmu_to_box(pmu, event->cpu); 656 if (!box || box->cpu < 0) 657 return -EINVAL; 658 event->cpu = box->cpu; 659 event->pmu_private = box; 660 661 event->hw.idx = -1; 662 event->hw.last_tag = ~0ULL; 663 event->hw.extra_reg.idx = EXTRA_REG_NONE; 664 event->hw.branch_reg.idx = EXTRA_REG_NONE; 665 666 if (event->attr.config == UNCORE_FIXED_EVENT) { 667 /* no fixed counter */ 668 if (!pmu->type->fixed_ctl) 669 return -EINVAL; 670 /* 671 * if there is only one fixed counter, only the first pmu 672 * can access the fixed counter 673 */ 674 if (pmu->type->single_fixed && pmu->pmu_idx > 0) 675 return -EINVAL; 676 677 /* fixed counters have event field hardcoded to zero */ 678 hwc->config = 0ULL; 679 } else { 680 hwc->config = event->attr.config & pmu->type->event_mask; 681 if (pmu->type->ops->hw_config) { 682 ret = pmu->type->ops->hw_config(box, event); 683 if (ret) 684 return ret; 685 } 686 } 687 688 if (event->group_leader != event) 689 ret = uncore_validate_group(pmu, event); 690 else 691 ret = 0; 692 693 return ret; 694 } 695 696 static ssize_t uncore_get_attr_cpumask(struct device *dev, 697 struct device_attribute *attr, char *buf) 698 { 699 return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); 700 } 701 702 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); 703 704 static struct attribute *uncore_pmu_attrs[] = { 705 &dev_attr_cpumask.attr, 706 NULL, 707 }; 708 709 static struct attribute_group uncore_pmu_attr_group = { 710 .attrs = uncore_pmu_attrs, 711 }; 712 713 static int uncore_pmu_register(struct intel_uncore_pmu *pmu) 714 { 715 int ret; 716 717 if (!pmu->type->pmu) { 718 pmu->pmu = (struct pmu) { 719 .attr_groups = pmu->type->attr_groups, 720 .task_ctx_nr = perf_invalid_context, 721 .event_init = uncore_pmu_event_init, 722 .add = uncore_pmu_event_add, 723 .del = uncore_pmu_event_del, 724 .start = uncore_pmu_event_start, 725 .stop = uncore_pmu_event_stop, 726 .read = uncore_pmu_event_read, 727 }; 728 } else { 729 pmu->pmu = *pmu->type->pmu; 730 pmu->pmu.attr_groups = pmu->type->attr_groups; 731 } 732 733 if (pmu->type->num_boxes == 1) { 734 if (strlen(pmu->type->name) > 0) 735 sprintf(pmu->name, "uncore_%s", pmu->type->name); 736 else 737 sprintf(pmu->name, "uncore"); 738 } else { 739 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, 740 pmu->pmu_idx); 741 } 742 743 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); 744 if (!ret) 745 pmu->registered = true; 746 return ret; 747 } 748 749 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) 750 { 751 if (!pmu->registered) 752 return; 753 perf_pmu_unregister(&pmu->pmu); 754 pmu->registered = false; 755 } 756 757 static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) 758 { 759 struct intel_uncore_pmu *pmu = type->pmus; 760 struct intel_uncore_box *box; 761 int i, pkg; 762 763 if (pmu) { 764 pkg = topology_physical_package_id(cpu); 765 for (i = 0; i < type->num_boxes; i++, pmu++) { 766 box = pmu->boxes[pkg]; 767 if (box) 768 uncore_box_exit(box); 769 } 770 } 771 } 772 773 static void __init uncore_exit_boxes(void *dummy) 774 { 775 struct intel_uncore_type **types; 776 777 for (types = uncore_msr_uncores; *types; types++) 778 __uncore_exit_boxes(*types++, smp_processor_id()); 779 } 780 781 static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 782 { 783 int pkg; 784 785 for (pkg = 0; pkg < max_packages; pkg++) 786 kfree(pmu->boxes[pkg]); 787 kfree(pmu->boxes); 788 } 789 790 static void __init uncore_type_exit(struct intel_uncore_type *type) 791 { 792 struct intel_uncore_pmu *pmu = type->pmus; 793 int i; 794 795 if (pmu) { 796 for (i = 0; i < type->num_boxes; i++, pmu++) { 797 uncore_pmu_unregister(pmu); 798 uncore_free_boxes(pmu); 799 } 800 kfree(type->pmus); 801 type->pmus = NULL; 802 } 803 kfree(type->events_group); 804 type->events_group = NULL; 805 } 806 807 static void __init uncore_types_exit(struct intel_uncore_type **types) 808 { 809 for (; *types; types++) 810 uncore_type_exit(*types); 811 } 812 813 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) 814 { 815 struct intel_uncore_pmu *pmus; 816 struct attribute_group *attr_group; 817 struct attribute **attrs; 818 size_t size; 819 int i, j; 820 821 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); 822 if (!pmus) 823 return -ENOMEM; 824 825 size = max_packages * sizeof(struct intel_uncore_box *); 826 827 for (i = 0; i < type->num_boxes; i++) { 828 pmus[i].func_id = setid ? i : -1; 829 pmus[i].pmu_idx = i; 830 pmus[i].type = type; 831 pmus[i].boxes = kzalloc(size, GFP_KERNEL); 832 if (!pmus[i].boxes) 833 return -ENOMEM; 834 } 835 836 type->pmus = pmus; 837 type->unconstrainted = (struct event_constraint) 838 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 839 0, type->num_counters, 0, 0); 840 841 if (type->event_descs) { 842 for (i = 0; type->event_descs[i].attr.attr.name; i++); 843 844 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + 845 sizeof(*attr_group), GFP_KERNEL); 846 if (!attr_group) 847 return -ENOMEM; 848 849 attrs = (struct attribute **)(attr_group + 1); 850 attr_group->name = "events"; 851 attr_group->attrs = attrs; 852 853 for (j = 0; j < i; j++) 854 attrs[j] = &type->event_descs[j].attr.attr; 855 856 type->events_group = attr_group; 857 } 858 859 type->pmu_group = &uncore_pmu_attr_group; 860 return 0; 861 } 862 863 static int __init 864 uncore_types_init(struct intel_uncore_type **types, bool setid) 865 { 866 int ret; 867 868 for (; *types; types++) { 869 ret = uncore_type_init(*types, setid); 870 if (ret) 871 return ret; 872 } 873 return 0; 874 } 875 876 /* 877 * add a pci uncore device 878 */ 879 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 880 { 881 struct intel_uncore_type *type; 882 struct intel_uncore_pmu *pmu; 883 struct intel_uncore_box *box; 884 int phys_id, pkg, ret; 885 886 phys_id = uncore_pcibus_to_physid(pdev->bus); 887 if (phys_id < 0) 888 return -ENODEV; 889 890 pkg = topology_phys_to_logical_pkg(phys_id); 891 if (WARN_ON_ONCE(pkg < 0)) 892 return -EINVAL; 893 894 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 895 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 896 897 uncore_extra_pci_dev[pkg].dev[idx] = pdev; 898 pci_set_drvdata(pdev, NULL); 899 return 0; 900 } 901 902 type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 903 /* 904 * for performance monitoring unit with multiple boxes, 905 * each box has a different function id. 906 */ 907 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 908 /* Knights Landing uses a common PCI device ID for multiple instances of 909 * an uncore PMU device type. There is only one entry per device type in 910 * the knl_uncore_pci_ids table inspite of multiple devices present for 911 * some device types. Hence PCI device idx would be 0 for all devices. 912 * So increment pmu pointer to point to an unused array element. 913 */ 914 if (boot_cpu_data.x86_model == 87) { 915 while (pmu->func_id >= 0) 916 pmu++; 917 } 918 919 if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) 920 return -EINVAL; 921 922 box = uncore_alloc_box(type, NUMA_NO_NODE); 923 if (!box) 924 return -ENOMEM; 925 926 if (pmu->func_id < 0) 927 pmu->func_id = pdev->devfn; 928 else 929 WARN_ON_ONCE(pmu->func_id != pdev->devfn); 930 931 atomic_inc(&box->refcnt); 932 box->pci_phys_id = phys_id; 933 box->pkgid = pkg; 934 box->pci_dev = pdev; 935 box->pmu = pmu; 936 uncore_box_init(box); 937 pci_set_drvdata(pdev, box); 938 939 pmu->boxes[pkg] = box; 940 if (atomic_inc_return(&pmu->activeboxes) > 1) 941 return 0; 942 943 /* First active box registers the pmu */ 944 ret = uncore_pmu_register(pmu); 945 if (ret) { 946 pci_set_drvdata(pdev, NULL); 947 pmu->boxes[pkg] = NULL; 948 uncore_box_exit(box); 949 kfree(box); 950 } 951 return ret; 952 } 953 954 static void uncore_pci_remove(struct pci_dev *pdev) 955 { 956 struct intel_uncore_box *box = pci_get_drvdata(pdev); 957 struct intel_uncore_pmu *pmu; 958 int i, phys_id, pkg; 959 960 phys_id = uncore_pcibus_to_physid(pdev->bus); 961 pkg = topology_phys_to_logical_pkg(phys_id); 962 963 box = pci_get_drvdata(pdev); 964 if (!box) { 965 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 966 if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { 967 uncore_extra_pci_dev[pkg].dev[i] = NULL; 968 break; 969 } 970 } 971 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX); 972 return; 973 } 974 975 pmu = box->pmu; 976 if (WARN_ON_ONCE(phys_id != box->pci_phys_id)) 977 return; 978 979 pci_set_drvdata(pdev, NULL); 980 pmu->boxes[pkg] = NULL; 981 if (atomic_dec_return(&pmu->activeboxes) == 0) 982 uncore_pmu_unregister(pmu); 983 uncore_box_exit(box); 984 kfree(box); 985 } 986 987 static int __init uncore_pci_init(void) 988 { 989 size_t size; 990 int ret; 991 992 switch (boot_cpu_data.x86_model) { 993 case 45: /* Sandy Bridge-EP */ 994 ret = snbep_uncore_pci_init(); 995 break; 996 case 62: /* Ivy Bridge-EP */ 997 ret = ivbep_uncore_pci_init(); 998 break; 999 case 63: /* Haswell-EP */ 1000 ret = hswep_uncore_pci_init(); 1001 break; 1002 case 79: /* BDX-EP */ 1003 case 86: /* BDX-DE */ 1004 ret = bdx_uncore_pci_init(); 1005 break; 1006 case 42: /* Sandy Bridge */ 1007 ret = snb_uncore_pci_init(); 1008 break; 1009 case 58: /* Ivy Bridge */ 1010 ret = ivb_uncore_pci_init(); 1011 break; 1012 case 60: /* Haswell */ 1013 case 69: /* Haswell Celeron */ 1014 ret = hsw_uncore_pci_init(); 1015 break; 1016 case 61: /* Broadwell */ 1017 ret = bdw_uncore_pci_init(); 1018 break; 1019 case 87: /* Knights Landing */ 1020 ret = knl_uncore_pci_init(); 1021 break; 1022 case 94: /* SkyLake */ 1023 ret = skl_uncore_pci_init(); 1024 break; 1025 default: 1026 return -ENODEV; 1027 } 1028 1029 if (ret) 1030 return ret; 1031 1032 size = max_packages * sizeof(struct pci_extra_dev); 1033 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1034 if (!uncore_extra_pci_dev) { 1035 ret = -ENOMEM; 1036 goto err; 1037 } 1038 1039 ret = uncore_types_init(uncore_pci_uncores, false); 1040 if (ret) 1041 goto errtype; 1042 1043 uncore_pci_driver->probe = uncore_pci_probe; 1044 uncore_pci_driver->remove = uncore_pci_remove; 1045 1046 ret = pci_register_driver(uncore_pci_driver); 1047 if (ret) 1048 goto errtype; 1049 1050 pcidrv_registered = true; 1051 return 0; 1052 1053 errtype: 1054 uncore_types_exit(uncore_pci_uncores); 1055 kfree(uncore_extra_pci_dev); 1056 uncore_extra_pci_dev = NULL; 1057 uncore_free_pcibus_map(); 1058 err: 1059 uncore_pci_uncores = empty_uncore; 1060 return ret; 1061 } 1062 1063 static void __init uncore_pci_exit(void) 1064 { 1065 if (pcidrv_registered) { 1066 pcidrv_registered = false; 1067 pci_unregister_driver(uncore_pci_driver); 1068 uncore_types_exit(uncore_pci_uncores); 1069 kfree(uncore_extra_pci_dev); 1070 uncore_free_pcibus_map(); 1071 } 1072 } 1073 1074 static void uncore_cpu_dying(int cpu) 1075 { 1076 struct intel_uncore_type *type, **types = uncore_msr_uncores; 1077 struct intel_uncore_pmu *pmu; 1078 struct intel_uncore_box *box; 1079 int i, pkg; 1080 1081 pkg = topology_logical_package_id(cpu); 1082 for (; *types; types++) { 1083 type = *types; 1084 pmu = type->pmus; 1085 for (i = 0; i < type->num_boxes; i++, pmu++) { 1086 box = pmu->boxes[pkg]; 1087 if (box && atomic_dec_return(&box->refcnt) == 0) 1088 uncore_box_exit(box); 1089 } 1090 } 1091 } 1092 1093 static void uncore_cpu_starting(int cpu, bool init) 1094 { 1095 struct intel_uncore_type *type, **types = uncore_msr_uncores; 1096 struct intel_uncore_pmu *pmu; 1097 struct intel_uncore_box *box; 1098 int i, pkg, ncpus = 1; 1099 1100 if (init) { 1101 /* 1102 * On init we get the number of online cpus in the package 1103 * and set refcount for all of them. 1104 */ 1105 ncpus = cpumask_weight(topology_core_cpumask(cpu)); 1106 } 1107 1108 pkg = topology_logical_package_id(cpu); 1109 for (; *types; types++) { 1110 type = *types; 1111 pmu = type->pmus; 1112 for (i = 0; i < type->num_boxes; i++, pmu++) { 1113 box = pmu->boxes[pkg]; 1114 if (!box) 1115 continue; 1116 /* The first cpu on a package activates the box */ 1117 if (atomic_add_return(ncpus, &box->refcnt) == ncpus) 1118 uncore_box_init(box); 1119 } 1120 } 1121 } 1122 1123 static int uncore_cpu_prepare(int cpu) 1124 { 1125 struct intel_uncore_type *type, **types = uncore_msr_uncores; 1126 struct intel_uncore_pmu *pmu; 1127 struct intel_uncore_box *box; 1128 int i, pkg; 1129 1130 pkg = topology_logical_package_id(cpu); 1131 for (; *types; types++) { 1132 type = *types; 1133 pmu = type->pmus; 1134 for (i = 0; i < type->num_boxes; i++, pmu++) { 1135 if (pmu->boxes[pkg]) 1136 continue; 1137 /* First cpu of a package allocates the box */ 1138 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1139 if (!box) 1140 return -ENOMEM; 1141 box->pmu = pmu; 1142 box->pkgid = pkg; 1143 pmu->boxes[pkg] = box; 1144 } 1145 } 1146 return 0; 1147 } 1148 1149 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, 1150 int new_cpu) 1151 { 1152 struct intel_uncore_pmu *pmu = type->pmus; 1153 struct intel_uncore_box *box; 1154 int i, pkg; 1155 1156 pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); 1157 for (i = 0; i < type->num_boxes; i++, pmu++) { 1158 box = pmu->boxes[pkg]; 1159 if (!box) 1160 continue; 1161 1162 if (old_cpu < 0) { 1163 WARN_ON_ONCE(box->cpu != -1); 1164 box->cpu = new_cpu; 1165 continue; 1166 } 1167 1168 WARN_ON_ONCE(box->cpu != old_cpu); 1169 box->cpu = -1; 1170 if (new_cpu < 0) 1171 continue; 1172 1173 uncore_pmu_cancel_hrtimer(box); 1174 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); 1175 box->cpu = new_cpu; 1176 } 1177 } 1178 1179 static void uncore_change_context(struct intel_uncore_type **uncores, 1180 int old_cpu, int new_cpu) 1181 { 1182 for (; *uncores; uncores++) 1183 uncore_change_type_ctx(*uncores, old_cpu, new_cpu); 1184 } 1185 1186 static void uncore_event_exit_cpu(int cpu) 1187 { 1188 int target; 1189 1190 /* Check if exiting cpu is used for collecting uncore events */ 1191 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1192 return; 1193 1194 /* Find a new cpu to collect uncore events */ 1195 target = cpumask_any_but(topology_core_cpumask(cpu), cpu); 1196 1197 /* Migrate uncore events to the new target */ 1198 if (target < nr_cpu_ids) 1199 cpumask_set_cpu(target, &uncore_cpu_mask); 1200 else 1201 target = -1; 1202 1203 uncore_change_context(uncore_msr_uncores, cpu, target); 1204 uncore_change_context(uncore_pci_uncores, cpu, target); 1205 } 1206 1207 static void uncore_event_init_cpu(int cpu) 1208 { 1209 int target; 1210 1211 /* 1212 * Check if there is an online cpu in the package 1213 * which collects uncore events already. 1214 */ 1215 target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); 1216 if (target < nr_cpu_ids) 1217 return; 1218 1219 cpumask_set_cpu(cpu, &uncore_cpu_mask); 1220 1221 uncore_change_context(uncore_msr_uncores, -1, cpu); 1222 uncore_change_context(uncore_pci_uncores, -1, cpu); 1223 } 1224 1225 static int uncore_cpu_notifier(struct notifier_block *self, 1226 unsigned long action, void *hcpu) 1227 { 1228 unsigned int cpu = (long)hcpu; 1229 1230 switch (action & ~CPU_TASKS_FROZEN) { 1231 case CPU_UP_PREPARE: 1232 return notifier_from_errno(uncore_cpu_prepare(cpu)); 1233 1234 case CPU_STARTING: 1235 uncore_cpu_starting(cpu, false); 1236 case CPU_DOWN_FAILED: 1237 uncore_event_init_cpu(cpu); 1238 break; 1239 1240 case CPU_UP_CANCELED: 1241 case CPU_DYING: 1242 uncore_cpu_dying(cpu); 1243 break; 1244 1245 case CPU_DOWN_PREPARE: 1246 uncore_event_exit_cpu(cpu); 1247 break; 1248 } 1249 return NOTIFY_OK; 1250 } 1251 1252 static struct notifier_block uncore_cpu_nb = { 1253 .notifier_call = uncore_cpu_notifier, 1254 /* 1255 * to migrate uncore events, our notifier should be executed 1256 * before perf core's notifier. 1257 */ 1258 .priority = CPU_PRI_PERF + 1, 1259 }; 1260 1261 static int __init type_pmu_register(struct intel_uncore_type *type) 1262 { 1263 int i, ret; 1264 1265 for (i = 0; i < type->num_boxes; i++) { 1266 ret = uncore_pmu_register(&type->pmus[i]); 1267 if (ret) 1268 return ret; 1269 } 1270 return 0; 1271 } 1272 1273 static int __init uncore_msr_pmus_register(void) 1274 { 1275 struct intel_uncore_type **types = uncore_msr_uncores; 1276 int ret; 1277 1278 for (; *types; types++) { 1279 ret = type_pmu_register(*types); 1280 if (ret) 1281 return ret; 1282 } 1283 return 0; 1284 } 1285 1286 static int __init uncore_cpu_init(void) 1287 { 1288 int ret; 1289 1290 switch (boot_cpu_data.x86_model) { 1291 case 26: /* Nehalem */ 1292 case 30: 1293 case 37: /* Westmere */ 1294 case 44: 1295 nhm_uncore_cpu_init(); 1296 break; 1297 case 42: /* Sandy Bridge */ 1298 case 58: /* Ivy Bridge */ 1299 case 60: /* Haswell */ 1300 case 69: /* Haswell */ 1301 case 70: /* Haswell */ 1302 case 61: /* Broadwell */ 1303 case 71: /* Broadwell */ 1304 snb_uncore_cpu_init(); 1305 break; 1306 case 45: /* Sandy Bridge-EP */ 1307 snbep_uncore_cpu_init(); 1308 break; 1309 case 46: /* Nehalem-EX */ 1310 case 47: /* Westmere-EX aka. Xeon E7 */ 1311 nhmex_uncore_cpu_init(); 1312 break; 1313 case 62: /* Ivy Bridge-EP */ 1314 ivbep_uncore_cpu_init(); 1315 break; 1316 case 63: /* Haswell-EP */ 1317 hswep_uncore_cpu_init(); 1318 break; 1319 case 79: /* BDX-EP */ 1320 case 86: /* BDX-DE */ 1321 bdx_uncore_cpu_init(); 1322 break; 1323 case 87: /* Knights Landing */ 1324 knl_uncore_cpu_init(); 1325 break; 1326 default: 1327 return -ENODEV; 1328 } 1329 1330 ret = uncore_types_init(uncore_msr_uncores, true); 1331 if (ret) 1332 goto err; 1333 1334 ret = uncore_msr_pmus_register(); 1335 if (ret) 1336 goto err; 1337 return 0; 1338 err: 1339 uncore_types_exit(uncore_msr_uncores); 1340 uncore_msr_uncores = empty_uncore; 1341 return ret; 1342 } 1343 1344 static void __init uncore_cpu_setup(void *dummy) 1345 { 1346 uncore_cpu_starting(smp_processor_id(), true); 1347 } 1348 1349 /* Lazy to avoid allocation of a few bytes for the normal case */ 1350 static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); 1351 1352 static int __init uncore_cpumask_init(bool msr) 1353 { 1354 unsigned int cpu; 1355 1356 for_each_online_cpu(cpu) { 1357 unsigned int pkg = topology_logical_package_id(cpu); 1358 int ret; 1359 1360 if (test_and_set_bit(pkg, packages)) 1361 continue; 1362 /* 1363 * The first online cpu of each package allocates and takes 1364 * the refcounts for all other online cpus in that package. 1365 * If msrs are not enabled no allocation is required. 1366 */ 1367 if (msr) { 1368 ret = uncore_cpu_prepare(cpu); 1369 if (ret) 1370 return ret; 1371 } 1372 uncore_event_init_cpu(cpu); 1373 smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); 1374 } 1375 __register_cpu_notifier(&uncore_cpu_nb); 1376 return 0; 1377 } 1378 1379 static int __init intel_uncore_init(void) 1380 { 1381 int pret, cret, ret; 1382 1383 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 1384 return -ENODEV; 1385 1386 if (cpu_has_hypervisor) 1387 return -ENODEV; 1388 1389 max_packages = topology_max_packages(); 1390 1391 pret = uncore_pci_init(); 1392 cret = uncore_cpu_init(); 1393 1394 if (cret && pret) 1395 return -ENODEV; 1396 1397 cpu_notifier_register_begin(); 1398 ret = uncore_cpumask_init(!cret); 1399 if (ret) 1400 goto err; 1401 cpu_notifier_register_done(); 1402 return 0; 1403 1404 err: 1405 /* Undo box->init_box() */ 1406 on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); 1407 uncore_types_exit(uncore_msr_uncores); 1408 uncore_pci_exit(); 1409 cpu_notifier_register_done(); 1410 return ret; 1411 } 1412 device_initcall(intel_uncore_init); 1413