xref: /openbmc/linux/arch/x86/events/intel/uncore.c (revision 700364da)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/module.h>
3 
4 #include <asm/cpu_device_id.h>
5 #include <asm/intel-family.h>
6 #include "uncore.h"
7 
8 static struct intel_uncore_type *empty_uncore[] = { NULL, };
9 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
10 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
11 struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
12 
13 static bool pcidrv_registered;
14 struct pci_driver *uncore_pci_driver;
15 /* The PCI driver for the device which the uncore doesn't own. */
16 struct pci_driver *uncore_pci_sub_driver;
17 /* pci bus to socket mapping */
18 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
19 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
20 struct pci_extra_dev *uncore_extra_pci_dev;
21 int __uncore_max_dies;
22 
23 /* mask of cpus that collect uncore events */
24 static cpumask_t uncore_cpu_mask;
25 
26 /* constraint for the fixed counter */
27 static struct event_constraint uncore_constraint_fixed =
28 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
29 struct event_constraint uncore_constraint_empty =
30 	EVENT_CONSTRAINT(0, 0, 0);
31 
32 MODULE_LICENSE("GPL");
33 
34 int uncore_pcibus_to_physid(struct pci_bus *bus)
35 {
36 	struct pci2phy_map *map;
37 	int phys_id = -1;
38 
39 	raw_spin_lock(&pci2phy_map_lock);
40 	list_for_each_entry(map, &pci2phy_map_head, list) {
41 		if (map->segment == pci_domain_nr(bus)) {
42 			phys_id = map->pbus_to_physid[bus->number];
43 			break;
44 		}
45 	}
46 	raw_spin_unlock(&pci2phy_map_lock);
47 
48 	return phys_id;
49 }
50 
51 static void uncore_free_pcibus_map(void)
52 {
53 	struct pci2phy_map *map, *tmp;
54 
55 	list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
56 		list_del(&map->list);
57 		kfree(map);
58 	}
59 }
60 
61 struct pci2phy_map *__find_pci2phy_map(int segment)
62 {
63 	struct pci2phy_map *map, *alloc = NULL;
64 	int i;
65 
66 	lockdep_assert_held(&pci2phy_map_lock);
67 
68 lookup:
69 	list_for_each_entry(map, &pci2phy_map_head, list) {
70 		if (map->segment == segment)
71 			goto end;
72 	}
73 
74 	if (!alloc) {
75 		raw_spin_unlock(&pci2phy_map_lock);
76 		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
77 		raw_spin_lock(&pci2phy_map_lock);
78 
79 		if (!alloc)
80 			return NULL;
81 
82 		goto lookup;
83 	}
84 
85 	map = alloc;
86 	alloc = NULL;
87 	map->segment = segment;
88 	for (i = 0; i < 256; i++)
89 		map->pbus_to_physid[i] = -1;
90 	list_add_tail(&map->list, &pci2phy_map_head);
91 
92 end:
93 	kfree(alloc);
94 	return map;
95 }
96 
97 ssize_t uncore_event_show(struct kobject *kobj,
98 			  struct kobj_attribute *attr, char *buf)
99 {
100 	struct uncore_event_desc *event =
101 		container_of(attr, struct uncore_event_desc, attr);
102 	return sprintf(buf, "%s", event->config);
103 }
104 
105 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
106 {
107 	unsigned int dieid = topology_logical_die_id(cpu);
108 
109 	/*
110 	 * The unsigned check also catches the '-1' return value for non
111 	 * existent mappings in the topology map.
112 	 */
113 	return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
114 }
115 
116 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
117 {
118 	u64 count;
119 
120 	rdmsrl(event->hw.event_base, count);
121 
122 	return count;
123 }
124 
125 void uncore_mmio_exit_box(struct intel_uncore_box *box)
126 {
127 	if (box->io_addr)
128 		iounmap(box->io_addr);
129 }
130 
131 u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
132 			     struct perf_event *event)
133 {
134 	if (!box->io_addr)
135 		return 0;
136 
137 	if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
138 		return 0;
139 
140 	return readq(box->io_addr + event->hw.event_base);
141 }
142 
143 /*
144  * generic get constraint function for shared match/mask registers.
145  */
146 struct event_constraint *
147 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
148 {
149 	struct intel_uncore_extra_reg *er;
150 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
151 	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
152 	unsigned long flags;
153 	bool ok = false;
154 
155 	/*
156 	 * reg->alloc can be set due to existing state, so for fake box we
157 	 * need to ignore this, otherwise we might fail to allocate proper
158 	 * fake state for this extra reg constraint.
159 	 */
160 	if (reg1->idx == EXTRA_REG_NONE ||
161 	    (!uncore_box_is_fake(box) && reg1->alloc))
162 		return NULL;
163 
164 	er = &box->shared_regs[reg1->idx];
165 	raw_spin_lock_irqsave(&er->lock, flags);
166 	if (!atomic_read(&er->ref) ||
167 	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
168 		atomic_inc(&er->ref);
169 		er->config1 = reg1->config;
170 		er->config2 = reg2->config;
171 		ok = true;
172 	}
173 	raw_spin_unlock_irqrestore(&er->lock, flags);
174 
175 	if (ok) {
176 		if (!uncore_box_is_fake(box))
177 			reg1->alloc = 1;
178 		return NULL;
179 	}
180 
181 	return &uncore_constraint_empty;
182 }
183 
184 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
185 {
186 	struct intel_uncore_extra_reg *er;
187 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188 
189 	/*
190 	 * Only put constraint if extra reg was actually allocated. Also
191 	 * takes care of event which do not use an extra shared reg.
192 	 *
193 	 * Also, if this is a fake box we shouldn't touch any event state
194 	 * (reg->alloc) and we don't care about leaving inconsistent box
195 	 * state either since it will be thrown out.
196 	 */
197 	if (uncore_box_is_fake(box) || !reg1->alloc)
198 		return;
199 
200 	er = &box->shared_regs[reg1->idx];
201 	atomic_dec(&er->ref);
202 	reg1->alloc = 0;
203 }
204 
205 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
206 {
207 	struct intel_uncore_extra_reg *er;
208 	unsigned long flags;
209 	u64 config;
210 
211 	er = &box->shared_regs[idx];
212 
213 	raw_spin_lock_irqsave(&er->lock, flags);
214 	config = er->config;
215 	raw_spin_unlock_irqrestore(&er->lock, flags);
216 
217 	return config;
218 }
219 
220 static void uncore_assign_hw_event(struct intel_uncore_box *box,
221 				   struct perf_event *event, int idx)
222 {
223 	struct hw_perf_event *hwc = &event->hw;
224 
225 	hwc->idx = idx;
226 	hwc->last_tag = ++box->tags[idx];
227 
228 	if (uncore_pmc_fixed(hwc->idx)) {
229 		hwc->event_base = uncore_fixed_ctr(box);
230 		hwc->config_base = uncore_fixed_ctl(box);
231 		return;
232 	}
233 
234 	hwc->config_base = uncore_event_ctl(box, hwc->idx);
235 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
236 }
237 
238 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
239 {
240 	u64 prev_count, new_count, delta;
241 	int shift;
242 
243 	if (uncore_pmc_freerunning(event->hw.idx))
244 		shift = 64 - uncore_freerunning_bits(box, event);
245 	else if (uncore_pmc_fixed(event->hw.idx))
246 		shift = 64 - uncore_fixed_ctr_bits(box);
247 	else
248 		shift = 64 - uncore_perf_ctr_bits(box);
249 
250 	/* the hrtimer might modify the previous event value */
251 again:
252 	prev_count = local64_read(&event->hw.prev_count);
253 	new_count = uncore_read_counter(box, event);
254 	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
255 		goto again;
256 
257 	delta = (new_count << shift) - (prev_count << shift);
258 	delta >>= shift;
259 
260 	local64_add(delta, &event->count);
261 }
262 
263 /*
264  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
265  * for SandyBridge. So we use hrtimer to periodically poll the counter
266  * to avoid overflow.
267  */
268 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
269 {
270 	struct intel_uncore_box *box;
271 	struct perf_event *event;
272 	unsigned long flags;
273 	int bit;
274 
275 	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
276 	if (!box->n_active || box->cpu != smp_processor_id())
277 		return HRTIMER_NORESTART;
278 	/*
279 	 * disable local interrupt to prevent uncore_pmu_event_start/stop
280 	 * to interrupt the update process
281 	 */
282 	local_irq_save(flags);
283 
284 	/*
285 	 * handle boxes with an active event list as opposed to active
286 	 * counters
287 	 */
288 	list_for_each_entry(event, &box->active_list, active_entry) {
289 		uncore_perf_event_update(box, event);
290 	}
291 
292 	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
293 		uncore_perf_event_update(box, box->events[bit]);
294 
295 	local_irq_restore(flags);
296 
297 	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
298 	return HRTIMER_RESTART;
299 }
300 
301 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
302 {
303 	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
304 		      HRTIMER_MODE_REL_PINNED);
305 }
306 
307 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
308 {
309 	hrtimer_cancel(&box->hrtimer);
310 }
311 
312 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
313 {
314 	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
315 	box->hrtimer.function = uncore_pmu_hrtimer;
316 }
317 
318 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
319 						 int node)
320 {
321 	int i, size, numshared = type->num_shared_regs ;
322 	struct intel_uncore_box *box;
323 
324 	size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
325 
326 	box = kzalloc_node(size, GFP_KERNEL, node);
327 	if (!box)
328 		return NULL;
329 
330 	for (i = 0; i < numshared; i++)
331 		raw_spin_lock_init(&box->shared_regs[i].lock);
332 
333 	uncore_pmu_init_hrtimer(box);
334 	box->cpu = -1;
335 	box->pci_phys_id = -1;
336 	box->dieid = -1;
337 
338 	/* set default hrtimer timeout */
339 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
340 
341 	INIT_LIST_HEAD(&box->active_list);
342 
343 	return box;
344 }
345 
346 /*
347  * Using uncore_pmu_event_init pmu event_init callback
348  * as a detection point for uncore events.
349  */
350 static int uncore_pmu_event_init(struct perf_event *event);
351 
352 static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
353 {
354 	return &box->pmu->pmu == event->pmu;
355 }
356 
357 static int
358 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
359 		      bool dogrp)
360 {
361 	struct perf_event *event;
362 	int n, max_count;
363 
364 	max_count = box->pmu->type->num_counters;
365 	if (box->pmu->type->fixed_ctl)
366 		max_count++;
367 
368 	if (box->n_events >= max_count)
369 		return -EINVAL;
370 
371 	n = box->n_events;
372 
373 	if (is_box_event(box, leader)) {
374 		box->event_list[n] = leader;
375 		n++;
376 	}
377 
378 	if (!dogrp)
379 		return n;
380 
381 	for_each_sibling_event(event, leader) {
382 		if (!is_box_event(box, event) ||
383 		    event->state <= PERF_EVENT_STATE_OFF)
384 			continue;
385 
386 		if (n >= max_count)
387 			return -EINVAL;
388 
389 		box->event_list[n] = event;
390 		n++;
391 	}
392 	return n;
393 }
394 
395 static struct event_constraint *
396 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
397 {
398 	struct intel_uncore_type *type = box->pmu->type;
399 	struct event_constraint *c;
400 
401 	if (type->ops->get_constraint) {
402 		c = type->ops->get_constraint(box, event);
403 		if (c)
404 			return c;
405 	}
406 
407 	if (event->attr.config == UNCORE_FIXED_EVENT)
408 		return &uncore_constraint_fixed;
409 
410 	if (type->constraints) {
411 		for_each_event_constraint(c, type->constraints) {
412 			if ((event->hw.config & c->cmask) == c->code)
413 				return c;
414 		}
415 	}
416 
417 	return &type->unconstrainted;
418 }
419 
420 static void uncore_put_event_constraint(struct intel_uncore_box *box,
421 					struct perf_event *event)
422 {
423 	if (box->pmu->type->ops->put_constraint)
424 		box->pmu->type->ops->put_constraint(box, event);
425 }
426 
427 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
428 {
429 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
430 	struct event_constraint *c;
431 	int i, wmin, wmax, ret = 0;
432 	struct hw_perf_event *hwc;
433 
434 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
435 
436 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
437 		c = uncore_get_event_constraint(box, box->event_list[i]);
438 		box->event_constraint[i] = c;
439 		wmin = min(wmin, c->weight);
440 		wmax = max(wmax, c->weight);
441 	}
442 
443 	/* fastpath, try to reuse previous register */
444 	for (i = 0; i < n; i++) {
445 		hwc = &box->event_list[i]->hw;
446 		c = box->event_constraint[i];
447 
448 		/* never assigned */
449 		if (hwc->idx == -1)
450 			break;
451 
452 		/* constraint still honored */
453 		if (!test_bit(hwc->idx, c->idxmsk))
454 			break;
455 
456 		/* not already used */
457 		if (test_bit(hwc->idx, used_mask))
458 			break;
459 
460 		__set_bit(hwc->idx, used_mask);
461 		if (assign)
462 			assign[i] = hwc->idx;
463 	}
464 	/* slow path */
465 	if (i != n)
466 		ret = perf_assign_events(box->event_constraint, n,
467 					 wmin, wmax, n, assign);
468 
469 	if (!assign || ret) {
470 		for (i = 0; i < n; i++)
471 			uncore_put_event_constraint(box, box->event_list[i]);
472 	}
473 	return ret ? -EINVAL : 0;
474 }
475 
476 void uncore_pmu_event_start(struct perf_event *event, int flags)
477 {
478 	struct intel_uncore_box *box = uncore_event_to_box(event);
479 	int idx = event->hw.idx;
480 
481 	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
482 		return;
483 
484 	/*
485 	 * Free running counter is read-only and always active.
486 	 * Use the current counter value as start point.
487 	 * There is no overflow interrupt for free running counter.
488 	 * Use hrtimer to periodically poll the counter to avoid overflow.
489 	 */
490 	if (uncore_pmc_freerunning(event->hw.idx)) {
491 		list_add_tail(&event->active_entry, &box->active_list);
492 		local64_set(&event->hw.prev_count,
493 			    uncore_read_counter(box, event));
494 		if (box->n_active++ == 0)
495 			uncore_pmu_start_hrtimer(box);
496 		return;
497 	}
498 
499 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
500 		return;
501 
502 	event->hw.state = 0;
503 	box->events[idx] = event;
504 	box->n_active++;
505 	__set_bit(idx, box->active_mask);
506 
507 	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
508 	uncore_enable_event(box, event);
509 
510 	if (box->n_active == 1)
511 		uncore_pmu_start_hrtimer(box);
512 }
513 
514 void uncore_pmu_event_stop(struct perf_event *event, int flags)
515 {
516 	struct intel_uncore_box *box = uncore_event_to_box(event);
517 	struct hw_perf_event *hwc = &event->hw;
518 
519 	/* Cannot disable free running counter which is read-only */
520 	if (uncore_pmc_freerunning(hwc->idx)) {
521 		list_del(&event->active_entry);
522 		if (--box->n_active == 0)
523 			uncore_pmu_cancel_hrtimer(box);
524 		uncore_perf_event_update(box, event);
525 		return;
526 	}
527 
528 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
529 		uncore_disable_event(box, event);
530 		box->n_active--;
531 		box->events[hwc->idx] = NULL;
532 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
533 		hwc->state |= PERF_HES_STOPPED;
534 
535 		if (box->n_active == 0)
536 			uncore_pmu_cancel_hrtimer(box);
537 	}
538 
539 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
540 		/*
541 		 * Drain the remaining delta count out of a event
542 		 * that we are disabling:
543 		 */
544 		uncore_perf_event_update(box, event);
545 		hwc->state |= PERF_HES_UPTODATE;
546 	}
547 }
548 
549 int uncore_pmu_event_add(struct perf_event *event, int flags)
550 {
551 	struct intel_uncore_box *box = uncore_event_to_box(event);
552 	struct hw_perf_event *hwc = &event->hw;
553 	int assign[UNCORE_PMC_IDX_MAX];
554 	int i, n, ret;
555 
556 	if (!box)
557 		return -ENODEV;
558 
559 	/*
560 	 * The free funning counter is assigned in event_init().
561 	 * The free running counter event and free running counter
562 	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
563 	 */
564 	if (uncore_pmc_freerunning(hwc->idx)) {
565 		if (flags & PERF_EF_START)
566 			uncore_pmu_event_start(event, 0);
567 		return 0;
568 	}
569 
570 	ret = n = uncore_collect_events(box, event, false);
571 	if (ret < 0)
572 		return ret;
573 
574 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
575 	if (!(flags & PERF_EF_START))
576 		hwc->state |= PERF_HES_ARCH;
577 
578 	ret = uncore_assign_events(box, assign, n);
579 	if (ret)
580 		return ret;
581 
582 	/* save events moving to new counters */
583 	for (i = 0; i < box->n_events; i++) {
584 		event = box->event_list[i];
585 		hwc = &event->hw;
586 
587 		if (hwc->idx == assign[i] &&
588 			hwc->last_tag == box->tags[assign[i]])
589 			continue;
590 		/*
591 		 * Ensure we don't accidentally enable a stopped
592 		 * counter simply because we rescheduled.
593 		 */
594 		if (hwc->state & PERF_HES_STOPPED)
595 			hwc->state |= PERF_HES_ARCH;
596 
597 		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
598 	}
599 
600 	/* reprogram moved events into new counters */
601 	for (i = 0; i < n; i++) {
602 		event = box->event_list[i];
603 		hwc = &event->hw;
604 
605 		if (hwc->idx != assign[i] ||
606 			hwc->last_tag != box->tags[assign[i]])
607 			uncore_assign_hw_event(box, event, assign[i]);
608 		else if (i < box->n_events)
609 			continue;
610 
611 		if (hwc->state & PERF_HES_ARCH)
612 			continue;
613 
614 		uncore_pmu_event_start(event, 0);
615 	}
616 	box->n_events = n;
617 
618 	return 0;
619 }
620 
621 void uncore_pmu_event_del(struct perf_event *event, int flags)
622 {
623 	struct intel_uncore_box *box = uncore_event_to_box(event);
624 	int i;
625 
626 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
627 
628 	/*
629 	 * The event for free running counter is not tracked by event_list.
630 	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
631 	 * Because the event and the free running counter are 1:1 mapped.
632 	 */
633 	if (uncore_pmc_freerunning(event->hw.idx))
634 		return;
635 
636 	for (i = 0; i < box->n_events; i++) {
637 		if (event == box->event_list[i]) {
638 			uncore_put_event_constraint(box, event);
639 
640 			for (++i; i < box->n_events; i++)
641 				box->event_list[i - 1] = box->event_list[i];
642 
643 			--box->n_events;
644 			break;
645 		}
646 	}
647 
648 	event->hw.idx = -1;
649 	event->hw.last_tag = ~0ULL;
650 }
651 
652 void uncore_pmu_event_read(struct perf_event *event)
653 {
654 	struct intel_uncore_box *box = uncore_event_to_box(event);
655 	uncore_perf_event_update(box, event);
656 }
657 
658 /*
659  * validation ensures the group can be loaded onto the
660  * PMU if it was the only group available.
661  */
662 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
663 				struct perf_event *event)
664 {
665 	struct perf_event *leader = event->group_leader;
666 	struct intel_uncore_box *fake_box;
667 	int ret = -EINVAL, n;
668 
669 	/* The free running counter is always active. */
670 	if (uncore_pmc_freerunning(event->hw.idx))
671 		return 0;
672 
673 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
674 	if (!fake_box)
675 		return -ENOMEM;
676 
677 	fake_box->pmu = pmu;
678 	/*
679 	 * the event is not yet connected with its
680 	 * siblings therefore we must first collect
681 	 * existing siblings, then add the new event
682 	 * before we can simulate the scheduling
683 	 */
684 	n = uncore_collect_events(fake_box, leader, true);
685 	if (n < 0)
686 		goto out;
687 
688 	fake_box->n_events = n;
689 	n = uncore_collect_events(fake_box, event, false);
690 	if (n < 0)
691 		goto out;
692 
693 	fake_box->n_events = n;
694 
695 	ret = uncore_assign_events(fake_box, NULL, n);
696 out:
697 	kfree(fake_box);
698 	return ret;
699 }
700 
701 static int uncore_pmu_event_init(struct perf_event *event)
702 {
703 	struct intel_uncore_pmu *pmu;
704 	struct intel_uncore_box *box;
705 	struct hw_perf_event *hwc = &event->hw;
706 	int ret;
707 
708 	if (event->attr.type != event->pmu->type)
709 		return -ENOENT;
710 
711 	pmu = uncore_event_to_pmu(event);
712 	/* no device found for this pmu */
713 	if (pmu->func_id < 0)
714 		return -ENOENT;
715 
716 	/* Sampling not supported yet */
717 	if (hwc->sample_period)
718 		return -EINVAL;
719 
720 	/*
721 	 * Place all uncore events for a particular physical package
722 	 * onto a single cpu
723 	 */
724 	if (event->cpu < 0)
725 		return -EINVAL;
726 	box = uncore_pmu_to_box(pmu, event->cpu);
727 	if (!box || box->cpu < 0)
728 		return -EINVAL;
729 	event->cpu = box->cpu;
730 	event->pmu_private = box;
731 
732 	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
733 
734 	event->hw.idx = -1;
735 	event->hw.last_tag = ~0ULL;
736 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
737 	event->hw.branch_reg.idx = EXTRA_REG_NONE;
738 
739 	if (event->attr.config == UNCORE_FIXED_EVENT) {
740 		/* no fixed counter */
741 		if (!pmu->type->fixed_ctl)
742 			return -EINVAL;
743 		/*
744 		 * if there is only one fixed counter, only the first pmu
745 		 * can access the fixed counter
746 		 */
747 		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
748 			return -EINVAL;
749 
750 		/* fixed counters have event field hardcoded to zero */
751 		hwc->config = 0ULL;
752 	} else if (is_freerunning_event(event)) {
753 		hwc->config = event->attr.config;
754 		if (!check_valid_freerunning_event(box, event))
755 			return -EINVAL;
756 		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
757 		/*
758 		 * The free running counter event and free running counter
759 		 * are always 1:1 mapped.
760 		 * The free running counter is always active.
761 		 * Assign the free running counter here.
762 		 */
763 		event->hw.event_base = uncore_freerunning_counter(box, event);
764 	} else {
765 		hwc->config = event->attr.config &
766 			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
767 		if (pmu->type->ops->hw_config) {
768 			ret = pmu->type->ops->hw_config(box, event);
769 			if (ret)
770 				return ret;
771 		}
772 	}
773 
774 	if (event->group_leader != event)
775 		ret = uncore_validate_group(pmu, event);
776 	else
777 		ret = 0;
778 
779 	return ret;
780 }
781 
782 static void uncore_pmu_enable(struct pmu *pmu)
783 {
784 	struct intel_uncore_pmu *uncore_pmu;
785 	struct intel_uncore_box *box;
786 
787 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
788 	if (!uncore_pmu)
789 		return;
790 
791 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
792 	if (!box)
793 		return;
794 
795 	if (uncore_pmu->type->ops->enable_box)
796 		uncore_pmu->type->ops->enable_box(box);
797 }
798 
799 static void uncore_pmu_disable(struct pmu *pmu)
800 {
801 	struct intel_uncore_pmu *uncore_pmu;
802 	struct intel_uncore_box *box;
803 
804 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
805 	if (!uncore_pmu)
806 		return;
807 
808 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
809 	if (!box)
810 		return;
811 
812 	if (uncore_pmu->type->ops->disable_box)
813 		uncore_pmu->type->ops->disable_box(box);
814 }
815 
816 static ssize_t uncore_get_attr_cpumask(struct device *dev,
817 				struct device_attribute *attr, char *buf)
818 {
819 	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
820 }
821 
822 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
823 
824 static struct attribute *uncore_pmu_attrs[] = {
825 	&dev_attr_cpumask.attr,
826 	NULL,
827 };
828 
829 static const struct attribute_group uncore_pmu_attr_group = {
830 	.attrs = uncore_pmu_attrs,
831 };
832 
833 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
834 {
835 	int ret;
836 
837 	if (!pmu->type->pmu) {
838 		pmu->pmu = (struct pmu) {
839 			.attr_groups	= pmu->type->attr_groups,
840 			.task_ctx_nr	= perf_invalid_context,
841 			.pmu_enable	= uncore_pmu_enable,
842 			.pmu_disable	= uncore_pmu_disable,
843 			.event_init	= uncore_pmu_event_init,
844 			.add		= uncore_pmu_event_add,
845 			.del		= uncore_pmu_event_del,
846 			.start		= uncore_pmu_event_start,
847 			.stop		= uncore_pmu_event_stop,
848 			.read		= uncore_pmu_event_read,
849 			.module		= THIS_MODULE,
850 			.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
851 			.attr_update	= pmu->type->attr_update,
852 		};
853 	} else {
854 		pmu->pmu = *pmu->type->pmu;
855 		pmu->pmu.attr_groups = pmu->type->attr_groups;
856 		pmu->pmu.attr_update = pmu->type->attr_update;
857 	}
858 
859 	if (pmu->type->num_boxes == 1) {
860 		if (strlen(pmu->type->name) > 0)
861 			sprintf(pmu->name, "uncore_%s", pmu->type->name);
862 		else
863 			sprintf(pmu->name, "uncore");
864 	} else {
865 		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
866 			pmu->pmu_idx);
867 	}
868 
869 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
870 	if (!ret)
871 		pmu->registered = true;
872 	return ret;
873 }
874 
875 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
876 {
877 	if (!pmu->registered)
878 		return;
879 	perf_pmu_unregister(&pmu->pmu);
880 	pmu->registered = false;
881 }
882 
883 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
884 {
885 	int die;
886 
887 	for (die = 0; die < uncore_max_dies(); die++)
888 		kfree(pmu->boxes[die]);
889 	kfree(pmu->boxes);
890 }
891 
892 static void uncore_type_exit(struct intel_uncore_type *type)
893 {
894 	struct intel_uncore_pmu *pmu = type->pmus;
895 	int i;
896 
897 	if (type->cleanup_mapping)
898 		type->cleanup_mapping(type);
899 
900 	if (pmu) {
901 		for (i = 0; i < type->num_boxes; i++, pmu++) {
902 			uncore_pmu_unregister(pmu);
903 			uncore_free_boxes(pmu);
904 		}
905 		kfree(type->pmus);
906 		type->pmus = NULL;
907 	}
908 	kfree(type->events_group);
909 	type->events_group = NULL;
910 }
911 
912 static void uncore_types_exit(struct intel_uncore_type **types)
913 {
914 	for (; *types; types++)
915 		uncore_type_exit(*types);
916 }
917 
918 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
919 {
920 	struct intel_uncore_pmu *pmus;
921 	size_t size;
922 	int i, j;
923 
924 	pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
925 	if (!pmus)
926 		return -ENOMEM;
927 
928 	size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
929 
930 	for (i = 0; i < type->num_boxes; i++) {
931 		pmus[i].func_id	= setid ? i : -1;
932 		pmus[i].pmu_idx	= i;
933 		pmus[i].type	= type;
934 		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
935 		if (!pmus[i].boxes)
936 			goto err;
937 	}
938 
939 	type->pmus = pmus;
940 	type->unconstrainted = (struct event_constraint)
941 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
942 				0, type->num_counters, 0, 0);
943 
944 	if (type->event_descs) {
945 		struct {
946 			struct attribute_group group;
947 			struct attribute *attrs[];
948 		} *attr_group;
949 		for (i = 0; type->event_descs[i].attr.attr.name; i++);
950 
951 		attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
952 								GFP_KERNEL);
953 		if (!attr_group)
954 			goto err;
955 
956 		attr_group->group.name = "events";
957 		attr_group->group.attrs = attr_group->attrs;
958 
959 		for (j = 0; j < i; j++)
960 			attr_group->attrs[j] = &type->event_descs[j].attr.attr;
961 
962 		type->events_group = &attr_group->group;
963 	}
964 
965 	type->pmu_group = &uncore_pmu_attr_group;
966 
967 	if (type->set_mapping)
968 		type->set_mapping(type);
969 
970 	return 0;
971 
972 err:
973 	for (i = 0; i < type->num_boxes; i++)
974 		kfree(pmus[i].boxes);
975 	kfree(pmus);
976 
977 	return -ENOMEM;
978 }
979 
980 static int __init
981 uncore_types_init(struct intel_uncore_type **types, bool setid)
982 {
983 	int ret;
984 
985 	for (; *types; types++) {
986 		ret = uncore_type_init(*types, setid);
987 		if (ret)
988 			return ret;
989 	}
990 	return 0;
991 }
992 
993 /*
994  * Get the die information of a PCI device.
995  * @pdev: The PCI device.
996  * @phys_id: The physical socket id which the device maps to.
997  * @die: The die id which the device maps to.
998  */
999 static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
1000 				       int *phys_id, int *die)
1001 {
1002 	*phys_id = uncore_pcibus_to_physid(pdev->bus);
1003 	if (*phys_id < 0)
1004 		return -ENODEV;
1005 
1006 	*die = (topology_max_die_per_package() > 1) ? *phys_id :
1007 				topology_phys_to_logical_pkg(*phys_id);
1008 	if (*die < 0)
1009 		return -EINVAL;
1010 
1011 	return 0;
1012 }
1013 
1014 /*
1015  * Find the PMU of a PCI device.
1016  * @pdev: The PCI device.
1017  * @ids: The ID table of the available PCI devices with a PMU.
1018  */
1019 static struct intel_uncore_pmu *
1020 uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1021 {
1022 	struct intel_uncore_pmu *pmu = NULL;
1023 	struct intel_uncore_type *type;
1024 	kernel_ulong_t data;
1025 	unsigned int devfn;
1026 
1027 	while (ids && ids->vendor) {
1028 		if ((ids->vendor == pdev->vendor) &&
1029 		    (ids->device == pdev->device)) {
1030 			data = ids->driver_data;
1031 			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1032 					  UNCORE_PCI_DEV_FUNC(data));
1033 			if (devfn == pdev->devfn) {
1034 				type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1035 				pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1036 				break;
1037 			}
1038 		}
1039 		ids++;
1040 	}
1041 	return pmu;
1042 }
1043 
1044 /*
1045  * Register the PMU for a PCI device
1046  * @pdev: The PCI device.
1047  * @type: The corresponding PMU type of the device.
1048  * @pmu: The corresponding PMU of the device.
1049  * @phys_id: The physical socket id which the device maps to.
1050  * @die: The die id which the device maps to.
1051  */
1052 static int uncore_pci_pmu_register(struct pci_dev *pdev,
1053 				   struct intel_uncore_type *type,
1054 				   struct intel_uncore_pmu *pmu,
1055 				   int phys_id, int die)
1056 {
1057 	struct intel_uncore_box *box;
1058 	int ret;
1059 
1060 	if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1061 		return -EINVAL;
1062 
1063 	box = uncore_alloc_box(type, NUMA_NO_NODE);
1064 	if (!box)
1065 		return -ENOMEM;
1066 
1067 	if (pmu->func_id < 0)
1068 		pmu->func_id = pdev->devfn;
1069 	else
1070 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1071 
1072 	atomic_inc(&box->refcnt);
1073 	box->pci_phys_id = phys_id;
1074 	box->dieid = die;
1075 	box->pci_dev = pdev;
1076 	box->pmu = pmu;
1077 	uncore_box_init(box);
1078 
1079 	pmu->boxes[die] = box;
1080 	if (atomic_inc_return(&pmu->activeboxes) > 1)
1081 		return 0;
1082 
1083 	/* First active box registers the pmu */
1084 	ret = uncore_pmu_register(pmu);
1085 	if (ret) {
1086 		pmu->boxes[die] = NULL;
1087 		uncore_box_exit(box);
1088 		kfree(box);
1089 	}
1090 	return ret;
1091 }
1092 
1093 /*
1094  * add a pci uncore device
1095  */
1096 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1097 {
1098 	struct intel_uncore_type *type;
1099 	struct intel_uncore_pmu *pmu = NULL;
1100 	int phys_id, die, ret;
1101 
1102 	ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
1103 	if (ret)
1104 		return ret;
1105 
1106 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1107 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1108 
1109 		uncore_extra_pci_dev[die].dev[idx] = pdev;
1110 		pci_set_drvdata(pdev, NULL);
1111 		return 0;
1112 	}
1113 
1114 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1115 
1116 	/*
1117 	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1118 	 * for multiple instances of an uncore PMU device type. We should check
1119 	 * PCI slot and func to indicate the uncore box.
1120 	 */
1121 	if (id->driver_data & ~0xffff) {
1122 		struct pci_driver *pci_drv = pdev->driver;
1123 
1124 		pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1125 		if (pmu == NULL)
1126 			return -ENODEV;
1127 	} else {
1128 		/*
1129 		 * for performance monitoring unit with multiple boxes,
1130 		 * each box has a different function id.
1131 		 */
1132 		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1133 	}
1134 
1135 	ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
1136 
1137 	pci_set_drvdata(pdev, pmu->boxes[die]);
1138 
1139 	return ret;
1140 }
1141 
1142 /*
1143  * Unregister the PMU of a PCI device
1144  * @pmu: The corresponding PMU is unregistered.
1145  * @phys_id: The physical socket id which the device maps to.
1146  * @die: The die id which the device maps to.
1147  */
1148 static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
1149 				      int phys_id, int die)
1150 {
1151 	struct intel_uncore_box *box = pmu->boxes[die];
1152 
1153 	if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
1154 		return;
1155 
1156 	pmu->boxes[die] = NULL;
1157 	if (atomic_dec_return(&pmu->activeboxes) == 0)
1158 		uncore_pmu_unregister(pmu);
1159 	uncore_box_exit(box);
1160 	kfree(box);
1161 }
1162 
1163 static void uncore_pci_remove(struct pci_dev *pdev)
1164 {
1165 	struct intel_uncore_box *box;
1166 	struct intel_uncore_pmu *pmu;
1167 	int i, phys_id, die;
1168 
1169 	if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
1170 		return;
1171 
1172 	box = pci_get_drvdata(pdev);
1173 	if (!box) {
1174 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1175 			if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1176 				uncore_extra_pci_dev[die].dev[i] = NULL;
1177 				break;
1178 			}
1179 		}
1180 		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1181 		return;
1182 	}
1183 
1184 	pmu = box->pmu;
1185 
1186 	pci_set_drvdata(pdev, NULL);
1187 
1188 	uncore_pci_pmu_unregister(pmu, phys_id, die);
1189 }
1190 
1191 static int uncore_bus_notify(struct notifier_block *nb,
1192 			     unsigned long action, void *data)
1193 {
1194 	struct device *dev = data;
1195 	struct pci_dev *pdev = to_pci_dev(dev);
1196 	struct intel_uncore_pmu *pmu;
1197 	int phys_id, die;
1198 
1199 	/* Unregister the PMU when the device is going to be deleted. */
1200 	if (action != BUS_NOTIFY_DEL_DEVICE)
1201 		return NOTIFY_DONE;
1202 
1203 	pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
1204 	if (!pmu)
1205 		return NOTIFY_DONE;
1206 
1207 	if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
1208 		return NOTIFY_DONE;
1209 
1210 	uncore_pci_pmu_unregister(pmu, phys_id, die);
1211 
1212 	return NOTIFY_OK;
1213 }
1214 
1215 static struct notifier_block uncore_notifier = {
1216 	.notifier_call = uncore_bus_notify,
1217 };
1218 
1219 static void uncore_pci_sub_driver_init(void)
1220 {
1221 	const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1222 	struct intel_uncore_type *type;
1223 	struct intel_uncore_pmu *pmu;
1224 	struct pci_dev *pci_sub_dev;
1225 	bool notify = false;
1226 	unsigned int devfn;
1227 	int phys_id, die;
1228 
1229 	while (ids && ids->vendor) {
1230 		pci_sub_dev = NULL;
1231 		type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1232 		/*
1233 		 * Search the available device, and register the
1234 		 * corresponding PMU.
1235 		 */
1236 		while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1237 						     ids->device, pci_sub_dev))) {
1238 			devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1239 					  UNCORE_PCI_DEV_FUNC(ids->driver_data));
1240 			if (devfn != pci_sub_dev->devfn)
1241 				continue;
1242 
1243 			pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1244 			if (!pmu)
1245 				continue;
1246 
1247 			if (uncore_pci_get_dev_die_info(pci_sub_dev,
1248 							&phys_id, &die))
1249 				continue;
1250 
1251 			if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1252 						     phys_id, die))
1253 				notify = true;
1254 		}
1255 		ids++;
1256 	}
1257 
1258 	if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
1259 		notify = false;
1260 
1261 	if (!notify)
1262 		uncore_pci_sub_driver = NULL;
1263 }
1264 
1265 static int __init uncore_pci_init(void)
1266 {
1267 	size_t size;
1268 	int ret;
1269 
1270 	size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1271 	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1272 	if (!uncore_extra_pci_dev) {
1273 		ret = -ENOMEM;
1274 		goto err;
1275 	}
1276 
1277 	ret = uncore_types_init(uncore_pci_uncores, false);
1278 	if (ret)
1279 		goto errtype;
1280 
1281 	uncore_pci_driver->probe = uncore_pci_probe;
1282 	uncore_pci_driver->remove = uncore_pci_remove;
1283 
1284 	ret = pci_register_driver(uncore_pci_driver);
1285 	if (ret)
1286 		goto errtype;
1287 
1288 	if (uncore_pci_sub_driver)
1289 		uncore_pci_sub_driver_init();
1290 
1291 	pcidrv_registered = true;
1292 	return 0;
1293 
1294 errtype:
1295 	uncore_types_exit(uncore_pci_uncores);
1296 	kfree(uncore_extra_pci_dev);
1297 	uncore_extra_pci_dev = NULL;
1298 	uncore_free_pcibus_map();
1299 err:
1300 	uncore_pci_uncores = empty_uncore;
1301 	return ret;
1302 }
1303 
1304 static void uncore_pci_exit(void)
1305 {
1306 	if (pcidrv_registered) {
1307 		pcidrv_registered = false;
1308 		if (uncore_pci_sub_driver)
1309 			bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
1310 		pci_unregister_driver(uncore_pci_driver);
1311 		uncore_types_exit(uncore_pci_uncores);
1312 		kfree(uncore_extra_pci_dev);
1313 		uncore_free_pcibus_map();
1314 	}
1315 }
1316 
1317 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1318 				   int new_cpu)
1319 {
1320 	struct intel_uncore_pmu *pmu = type->pmus;
1321 	struct intel_uncore_box *box;
1322 	int i, die;
1323 
1324 	die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1325 	for (i = 0; i < type->num_boxes; i++, pmu++) {
1326 		box = pmu->boxes[die];
1327 		if (!box)
1328 			continue;
1329 
1330 		if (old_cpu < 0) {
1331 			WARN_ON_ONCE(box->cpu != -1);
1332 			box->cpu = new_cpu;
1333 			continue;
1334 		}
1335 
1336 		WARN_ON_ONCE(box->cpu != old_cpu);
1337 		box->cpu = -1;
1338 		if (new_cpu < 0)
1339 			continue;
1340 
1341 		uncore_pmu_cancel_hrtimer(box);
1342 		perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1343 		box->cpu = new_cpu;
1344 	}
1345 }
1346 
1347 static void uncore_change_context(struct intel_uncore_type **uncores,
1348 				  int old_cpu, int new_cpu)
1349 {
1350 	for (; *uncores; uncores++)
1351 		uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1352 }
1353 
1354 static void uncore_box_unref(struct intel_uncore_type **types, int id)
1355 {
1356 	struct intel_uncore_type *type;
1357 	struct intel_uncore_pmu *pmu;
1358 	struct intel_uncore_box *box;
1359 	int i;
1360 
1361 	for (; *types; types++) {
1362 		type = *types;
1363 		pmu = type->pmus;
1364 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1365 			box = pmu->boxes[id];
1366 			if (box && atomic_dec_return(&box->refcnt) == 0)
1367 				uncore_box_exit(box);
1368 		}
1369 	}
1370 }
1371 
1372 static int uncore_event_cpu_offline(unsigned int cpu)
1373 {
1374 	int die, target;
1375 
1376 	/* Check if exiting cpu is used for collecting uncore events */
1377 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1378 		goto unref;
1379 	/* Find a new cpu to collect uncore events */
1380 	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1381 
1382 	/* Migrate uncore events to the new target */
1383 	if (target < nr_cpu_ids)
1384 		cpumask_set_cpu(target, &uncore_cpu_mask);
1385 	else
1386 		target = -1;
1387 
1388 	uncore_change_context(uncore_msr_uncores, cpu, target);
1389 	uncore_change_context(uncore_mmio_uncores, cpu, target);
1390 	uncore_change_context(uncore_pci_uncores, cpu, target);
1391 
1392 unref:
1393 	/* Clear the references */
1394 	die = topology_logical_die_id(cpu);
1395 	uncore_box_unref(uncore_msr_uncores, die);
1396 	uncore_box_unref(uncore_mmio_uncores, die);
1397 	return 0;
1398 }
1399 
1400 static int allocate_boxes(struct intel_uncore_type **types,
1401 			 unsigned int die, unsigned int cpu)
1402 {
1403 	struct intel_uncore_box *box, *tmp;
1404 	struct intel_uncore_type *type;
1405 	struct intel_uncore_pmu *pmu;
1406 	LIST_HEAD(allocated);
1407 	int i;
1408 
1409 	/* Try to allocate all required boxes */
1410 	for (; *types; types++) {
1411 		type = *types;
1412 		pmu = type->pmus;
1413 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1414 			if (pmu->boxes[die])
1415 				continue;
1416 			box = uncore_alloc_box(type, cpu_to_node(cpu));
1417 			if (!box)
1418 				goto cleanup;
1419 			box->pmu = pmu;
1420 			box->dieid = die;
1421 			list_add(&box->active_list, &allocated);
1422 		}
1423 	}
1424 	/* Install them in the pmus */
1425 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1426 		list_del_init(&box->active_list);
1427 		box->pmu->boxes[die] = box;
1428 	}
1429 	return 0;
1430 
1431 cleanup:
1432 	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1433 		list_del_init(&box->active_list);
1434 		kfree(box);
1435 	}
1436 	return -ENOMEM;
1437 }
1438 
1439 static int uncore_box_ref(struct intel_uncore_type **types,
1440 			  int id, unsigned int cpu)
1441 {
1442 	struct intel_uncore_type *type;
1443 	struct intel_uncore_pmu *pmu;
1444 	struct intel_uncore_box *box;
1445 	int i, ret;
1446 
1447 	ret = allocate_boxes(types, id, cpu);
1448 	if (ret)
1449 		return ret;
1450 
1451 	for (; *types; types++) {
1452 		type = *types;
1453 		pmu = type->pmus;
1454 		for (i = 0; i < type->num_boxes; i++, pmu++) {
1455 			box = pmu->boxes[id];
1456 			if (box && atomic_inc_return(&box->refcnt) == 1)
1457 				uncore_box_init(box);
1458 		}
1459 	}
1460 	return 0;
1461 }
1462 
1463 static int uncore_event_cpu_online(unsigned int cpu)
1464 {
1465 	int die, target, msr_ret, mmio_ret;
1466 
1467 	die = topology_logical_die_id(cpu);
1468 	msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1469 	mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1470 	if (msr_ret && mmio_ret)
1471 		return -ENOMEM;
1472 
1473 	/*
1474 	 * Check if there is an online cpu in the package
1475 	 * which collects uncore events already.
1476 	 */
1477 	target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1478 	if (target < nr_cpu_ids)
1479 		return 0;
1480 
1481 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1482 
1483 	if (!msr_ret)
1484 		uncore_change_context(uncore_msr_uncores, -1, cpu);
1485 	if (!mmio_ret)
1486 		uncore_change_context(uncore_mmio_uncores, -1, cpu);
1487 	uncore_change_context(uncore_pci_uncores, -1, cpu);
1488 	return 0;
1489 }
1490 
1491 static int __init type_pmu_register(struct intel_uncore_type *type)
1492 {
1493 	int i, ret;
1494 
1495 	for (i = 0; i < type->num_boxes; i++) {
1496 		ret = uncore_pmu_register(&type->pmus[i]);
1497 		if (ret)
1498 			return ret;
1499 	}
1500 	return 0;
1501 }
1502 
1503 static int __init uncore_msr_pmus_register(void)
1504 {
1505 	struct intel_uncore_type **types = uncore_msr_uncores;
1506 	int ret;
1507 
1508 	for (; *types; types++) {
1509 		ret = type_pmu_register(*types);
1510 		if (ret)
1511 			return ret;
1512 	}
1513 	return 0;
1514 }
1515 
1516 static int __init uncore_cpu_init(void)
1517 {
1518 	int ret;
1519 
1520 	ret = uncore_types_init(uncore_msr_uncores, true);
1521 	if (ret)
1522 		goto err;
1523 
1524 	ret = uncore_msr_pmus_register();
1525 	if (ret)
1526 		goto err;
1527 	return 0;
1528 err:
1529 	uncore_types_exit(uncore_msr_uncores);
1530 	uncore_msr_uncores = empty_uncore;
1531 	return ret;
1532 }
1533 
1534 static int __init uncore_mmio_init(void)
1535 {
1536 	struct intel_uncore_type **types = uncore_mmio_uncores;
1537 	int ret;
1538 
1539 	ret = uncore_types_init(types, true);
1540 	if (ret)
1541 		goto err;
1542 
1543 	for (; *types; types++) {
1544 		ret = type_pmu_register(*types);
1545 		if (ret)
1546 			goto err;
1547 	}
1548 	return 0;
1549 err:
1550 	uncore_types_exit(uncore_mmio_uncores);
1551 	uncore_mmio_uncores = empty_uncore;
1552 	return ret;
1553 }
1554 
1555 struct intel_uncore_init_fun {
1556 	void	(*cpu_init)(void);
1557 	int	(*pci_init)(void);
1558 	void	(*mmio_init)(void);
1559 };
1560 
1561 static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1562 	.cpu_init = nhm_uncore_cpu_init,
1563 };
1564 
1565 static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1566 	.cpu_init = snb_uncore_cpu_init,
1567 	.pci_init = snb_uncore_pci_init,
1568 };
1569 
1570 static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1571 	.cpu_init = snb_uncore_cpu_init,
1572 	.pci_init = ivb_uncore_pci_init,
1573 };
1574 
1575 static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1576 	.cpu_init = snb_uncore_cpu_init,
1577 	.pci_init = hsw_uncore_pci_init,
1578 };
1579 
1580 static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1581 	.cpu_init = snb_uncore_cpu_init,
1582 	.pci_init = bdw_uncore_pci_init,
1583 };
1584 
1585 static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1586 	.cpu_init = snbep_uncore_cpu_init,
1587 	.pci_init = snbep_uncore_pci_init,
1588 };
1589 
1590 static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1591 	.cpu_init = nhmex_uncore_cpu_init,
1592 };
1593 
1594 static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1595 	.cpu_init = ivbep_uncore_cpu_init,
1596 	.pci_init = ivbep_uncore_pci_init,
1597 };
1598 
1599 static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1600 	.cpu_init = hswep_uncore_cpu_init,
1601 	.pci_init = hswep_uncore_pci_init,
1602 };
1603 
1604 static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1605 	.cpu_init = bdx_uncore_cpu_init,
1606 	.pci_init = bdx_uncore_pci_init,
1607 };
1608 
1609 static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1610 	.cpu_init = knl_uncore_cpu_init,
1611 	.pci_init = knl_uncore_pci_init,
1612 };
1613 
1614 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1615 	.cpu_init = skl_uncore_cpu_init,
1616 	.pci_init = skl_uncore_pci_init,
1617 };
1618 
1619 static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1620 	.cpu_init = skx_uncore_cpu_init,
1621 	.pci_init = skx_uncore_pci_init,
1622 };
1623 
1624 static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1625 	.cpu_init = icl_uncore_cpu_init,
1626 	.pci_init = skl_uncore_pci_init,
1627 };
1628 
1629 static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1630 	.cpu_init = tgl_uncore_cpu_init,
1631 	.mmio_init = tgl_uncore_mmio_init,
1632 };
1633 
1634 static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1635 	.cpu_init = tgl_uncore_cpu_init,
1636 	.mmio_init = tgl_l_uncore_mmio_init,
1637 };
1638 
1639 static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1640 	.cpu_init = icx_uncore_cpu_init,
1641 	.pci_init = icx_uncore_pci_init,
1642 	.mmio_init = icx_uncore_mmio_init,
1643 };
1644 
1645 static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1646 	.cpu_init = snr_uncore_cpu_init,
1647 	.pci_init = snr_uncore_pci_init,
1648 	.mmio_init = snr_uncore_mmio_init,
1649 };
1650 
1651 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1652 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		&nhm_uncore_init),
1653 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_uncore_init),
1654 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,		&nhm_uncore_init),
1655 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,		&nhm_uncore_init),
1656 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&snb_uncore_init),
1657 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&ivb_uncore_init),
1658 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&hsw_uncore_init),
1659 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&hsw_uncore_init),
1660 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&hsw_uncore_init),
1661 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&bdw_uncore_init),
1662 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&bdw_uncore_init),
1663 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&snbep_uncore_init),
1664 	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		&nhmex_uncore_init),
1665 	X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,		&nhmex_uncore_init),
1666 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&ivbep_uncore_init),
1667 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&hswep_uncore_init),
1668 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&bdx_uncore_init),
1669 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&bdx_uncore_init),
1670 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&knl_uncore_init),
1671 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&knl_uncore_init),
1672 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&skl_uncore_init),
1673 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&skl_uncore_init),
1674 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&skx_uncore_init),
1675 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&skl_uncore_init),
1676 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&skl_uncore_init),
1677 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&skl_uncore_init),
1678 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&skl_uncore_init),
1679 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_uncore_init),
1680 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,	&icl_uncore_init),
1681 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_uncore_init),
1682 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&icx_uncore_init),
1683 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&icx_uncore_init),
1684 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		&tgl_l_uncore_init),
1685 	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		&tgl_uncore_init),
1686 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
1687 	{},
1688 };
1689 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1690 
1691 static int __init intel_uncore_init(void)
1692 {
1693 	const struct x86_cpu_id *id;
1694 	struct intel_uncore_init_fun *uncore_init;
1695 	int pret = 0, cret = 0, mret = 0, ret;
1696 
1697 	id = x86_match_cpu(intel_uncore_match);
1698 	if (!id)
1699 		return -ENODEV;
1700 
1701 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1702 		return -ENODEV;
1703 
1704 	__uncore_max_dies =
1705 		topology_max_packages() * topology_max_die_per_package();
1706 
1707 	uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1708 	if (uncore_init->pci_init) {
1709 		pret = uncore_init->pci_init();
1710 		if (!pret)
1711 			pret = uncore_pci_init();
1712 	}
1713 
1714 	if (uncore_init->cpu_init) {
1715 		uncore_init->cpu_init();
1716 		cret = uncore_cpu_init();
1717 	}
1718 
1719 	if (uncore_init->mmio_init) {
1720 		uncore_init->mmio_init();
1721 		mret = uncore_mmio_init();
1722 	}
1723 
1724 	if (cret && pret && mret)
1725 		return -ENODEV;
1726 
1727 	/* Install hotplug callbacks to setup the targets for each package */
1728 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1729 				"perf/x86/intel/uncore:online",
1730 				uncore_event_cpu_online,
1731 				uncore_event_cpu_offline);
1732 	if (ret)
1733 		goto err;
1734 	return 0;
1735 
1736 err:
1737 	uncore_types_exit(uncore_msr_uncores);
1738 	uncore_types_exit(uncore_mmio_uncores);
1739 	uncore_pci_exit();
1740 	return ret;
1741 }
1742 module_init(intel_uncore_init);
1743 
1744 static void __exit intel_uncore_exit(void)
1745 {
1746 	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1747 	uncore_types_exit(uncore_msr_uncores);
1748 	uncore_types_exit(uncore_mmio_uncores);
1749 	uncore_pci_exit();
1750 }
1751 module_exit(intel_uncore_exit);
1752