xref: /openbmc/linux/arch/x86/events/amd/uncore.c (revision 791d3ef2e11100449837dc0b6fe884e60ca3a484)
1 /*
2  * Copyright (C) 2013 Advanced Micro Devices, Inc.
3  *
4  * Author: Jacob Shin <jacob.shin@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/perf_event.h>
12 #include <linux/percpu.h>
13 #include <linux/types.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/cpu.h>
17 #include <linux/cpumask.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/perf_event.h>
21 #include <asm/msr.h>
22 #include <asm/smp.h>
23 
24 #define NUM_COUNTERS_NB		4
25 #define NUM_COUNTERS_L2		4
26 #define NUM_COUNTERS_L3		6
27 #define MAX_COUNTERS		6
28 
29 #define RDPMC_BASE_NB		6
30 #define RDPMC_BASE_LLC		10
31 
32 #define COUNTER_SHIFT		16
33 
34 #undef pr_fmt
35 #define pr_fmt(fmt)	"amd_uncore: " fmt
36 
37 static int num_counters_llc;
38 static int num_counters_nb;
39 
40 static HLIST_HEAD(uncore_unused_list);
41 
42 struct amd_uncore {
43 	int id;
44 	int refcnt;
45 	int cpu;
46 	int num_counters;
47 	int rdpmc_base;
48 	u32 msr_base;
49 	cpumask_t *active_mask;
50 	struct pmu *pmu;
51 	struct perf_event *events[MAX_COUNTERS];
52 	struct hlist_node node;
53 };
54 
55 static struct amd_uncore * __percpu *amd_uncore_nb;
56 static struct amd_uncore * __percpu *amd_uncore_llc;
57 
58 static struct pmu amd_nb_pmu;
59 static struct pmu amd_llc_pmu;
60 
61 static cpumask_t amd_nb_active_mask;
62 static cpumask_t amd_llc_active_mask;
63 
64 static bool is_nb_event(struct perf_event *event)
65 {
66 	return event->pmu->type == amd_nb_pmu.type;
67 }
68 
69 static bool is_llc_event(struct perf_event *event)
70 {
71 	return event->pmu->type == amd_llc_pmu.type;
72 }
73 
74 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
75 {
76 	if (is_nb_event(event) && amd_uncore_nb)
77 		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
78 	else if (is_llc_event(event) && amd_uncore_llc)
79 		return *per_cpu_ptr(amd_uncore_llc, event->cpu);
80 
81 	return NULL;
82 }
83 
84 static void amd_uncore_read(struct perf_event *event)
85 {
86 	struct hw_perf_event *hwc = &event->hw;
87 	u64 prev, new;
88 	s64 delta;
89 
90 	/*
91 	 * since we do not enable counter overflow interrupts,
92 	 * we do not have to worry about prev_count changing on us
93 	 */
94 
95 	prev = local64_read(&hwc->prev_count);
96 	rdpmcl(hwc->event_base_rdpmc, new);
97 	local64_set(&hwc->prev_count, new);
98 	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
99 	delta >>= COUNTER_SHIFT;
100 	local64_add(delta, &event->count);
101 }
102 
103 static void amd_uncore_start(struct perf_event *event, int flags)
104 {
105 	struct hw_perf_event *hwc = &event->hw;
106 
107 	if (flags & PERF_EF_RELOAD)
108 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
109 
110 	hwc->state = 0;
111 	wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
112 	perf_event_update_userpage(event);
113 }
114 
115 static void amd_uncore_stop(struct perf_event *event, int flags)
116 {
117 	struct hw_perf_event *hwc = &event->hw;
118 
119 	wrmsrl(hwc->config_base, hwc->config);
120 	hwc->state |= PERF_HES_STOPPED;
121 
122 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
123 		amd_uncore_read(event);
124 		hwc->state |= PERF_HES_UPTODATE;
125 	}
126 }
127 
128 static int amd_uncore_add(struct perf_event *event, int flags)
129 {
130 	int i;
131 	struct amd_uncore *uncore = event_to_amd_uncore(event);
132 	struct hw_perf_event *hwc = &event->hw;
133 
134 	/* are we already assigned? */
135 	if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
136 		goto out;
137 
138 	for (i = 0; i < uncore->num_counters; i++) {
139 		if (uncore->events[i] == event) {
140 			hwc->idx = i;
141 			goto out;
142 		}
143 	}
144 
145 	/* if not, take the first available counter */
146 	hwc->idx = -1;
147 	for (i = 0; i < uncore->num_counters; i++) {
148 		if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
149 			hwc->idx = i;
150 			break;
151 		}
152 	}
153 
154 out:
155 	if (hwc->idx == -1)
156 		return -EBUSY;
157 
158 	hwc->config_base = uncore->msr_base + (2 * hwc->idx);
159 	hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
160 	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
161 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
162 
163 	if (flags & PERF_EF_START)
164 		amd_uncore_start(event, PERF_EF_RELOAD);
165 
166 	return 0;
167 }
168 
169 static void amd_uncore_del(struct perf_event *event, int flags)
170 {
171 	int i;
172 	struct amd_uncore *uncore = event_to_amd_uncore(event);
173 	struct hw_perf_event *hwc = &event->hw;
174 
175 	amd_uncore_stop(event, PERF_EF_UPDATE);
176 
177 	for (i = 0; i < uncore->num_counters; i++) {
178 		if (cmpxchg(&uncore->events[i], event, NULL) == event)
179 			break;
180 	}
181 
182 	hwc->idx = -1;
183 }
184 
185 static int amd_uncore_event_init(struct perf_event *event)
186 {
187 	struct amd_uncore *uncore;
188 	struct hw_perf_event *hwc = &event->hw;
189 
190 	if (event->attr.type != event->pmu->type)
191 		return -ENOENT;
192 
193 	/*
194 	 * NB and Last level cache counters (MSRs) are shared across all cores
195 	 * that share the same NB / Last level cache. Interrupts can be directed
196 	 * to a single target core, however, event counts generated by processes
197 	 * running on other cores cannot be masked out. So we do not support
198 	 * sampling and per-thread events.
199 	 */
200 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
201 		return -EINVAL;
202 
203 	/* NB and Last level cache counters do not have usr/os/guest/host bits */
204 	if (event->attr.exclude_user || event->attr.exclude_kernel ||
205 	    event->attr.exclude_host || event->attr.exclude_guest)
206 		return -EINVAL;
207 
208 	/* and we do not enable counter overflow interrupts */
209 	hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
210 	hwc->idx = -1;
211 
212 	if (event->cpu < 0)
213 		return -EINVAL;
214 
215 	uncore = event_to_amd_uncore(event);
216 	if (!uncore)
217 		return -ENODEV;
218 
219 	/*
220 	 * since request can come in to any of the shared cores, we will remap
221 	 * to a single common cpu.
222 	 */
223 	event->cpu = uncore->cpu;
224 
225 	return 0;
226 }
227 
228 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
229 					    struct device_attribute *attr,
230 					    char *buf)
231 {
232 	cpumask_t *active_mask;
233 	struct pmu *pmu = dev_get_drvdata(dev);
234 
235 	if (pmu->type == amd_nb_pmu.type)
236 		active_mask = &amd_nb_active_mask;
237 	else if (pmu->type == amd_llc_pmu.type)
238 		active_mask = &amd_llc_active_mask;
239 	else
240 		return 0;
241 
242 	return cpumap_print_to_pagebuf(true, buf, active_mask);
243 }
244 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
245 
246 static struct attribute *amd_uncore_attrs[] = {
247 	&dev_attr_cpumask.attr,
248 	NULL,
249 };
250 
251 static struct attribute_group amd_uncore_attr_group = {
252 	.attrs = amd_uncore_attrs,
253 };
254 
255 /*
256  * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
257  * on family
258  */
259 #define AMD_FORMAT_ATTR(_dev, _name, _format)				     \
260 static ssize_t								     \
261 _dev##_show##_name(struct device *dev,					     \
262 		struct device_attribute *attr,				     \
263 		char *page)						     \
264 {									     \
265 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			     \
266 	return sprintf(page, _format "\n");				     \
267 }									     \
268 static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
269 
270 /* Used for each uncore counter type */
271 #define AMD_ATTRIBUTE(_name)						     \
272 static struct attribute *amd_uncore_format_attr_##_name[] = {		     \
273 	&format_attr_event_##_name.attr,				     \
274 	&format_attr_umask.attr,					     \
275 	NULL,								     \
276 };									     \
277 static struct attribute_group amd_uncore_format_group_##_name = {	     \
278 	.name = "format",						     \
279 	.attrs = amd_uncore_format_attr_##_name,			     \
280 };									     \
281 static const struct attribute_group *amd_uncore_attr_groups_##_name[] = {    \
282 	&amd_uncore_attr_group,						     \
283 	&amd_uncore_format_group_##_name,				     \
284 	NULL,								     \
285 };
286 
287 AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
288 AMD_FORMAT_ATTR(umask, , "config:8-15");
289 AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
290 AMD_FORMAT_ATTR(event, _l3, "config:0-7");
291 AMD_ATTRIBUTE(df);
292 AMD_ATTRIBUTE(l3);
293 
294 static struct pmu amd_nb_pmu = {
295 	.task_ctx_nr	= perf_invalid_context,
296 	.event_init	= amd_uncore_event_init,
297 	.add		= amd_uncore_add,
298 	.del		= amd_uncore_del,
299 	.start		= amd_uncore_start,
300 	.stop		= amd_uncore_stop,
301 	.read		= amd_uncore_read,
302 };
303 
304 static struct pmu amd_llc_pmu = {
305 	.task_ctx_nr	= perf_invalid_context,
306 	.event_init	= amd_uncore_event_init,
307 	.add		= amd_uncore_add,
308 	.del		= amd_uncore_del,
309 	.start		= amd_uncore_start,
310 	.stop		= amd_uncore_stop,
311 	.read		= amd_uncore_read,
312 };
313 
314 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
315 {
316 	return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
317 			cpu_to_node(cpu));
318 }
319 
320 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
321 {
322 	struct amd_uncore *uncore_nb = NULL, *uncore_llc;
323 
324 	if (amd_uncore_nb) {
325 		uncore_nb = amd_uncore_alloc(cpu);
326 		if (!uncore_nb)
327 			goto fail;
328 		uncore_nb->cpu = cpu;
329 		uncore_nb->num_counters = num_counters_nb;
330 		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
331 		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
332 		uncore_nb->active_mask = &amd_nb_active_mask;
333 		uncore_nb->pmu = &amd_nb_pmu;
334 		uncore_nb->id = -1;
335 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
336 	}
337 
338 	if (amd_uncore_llc) {
339 		uncore_llc = amd_uncore_alloc(cpu);
340 		if (!uncore_llc)
341 			goto fail;
342 		uncore_llc->cpu = cpu;
343 		uncore_llc->num_counters = num_counters_llc;
344 		uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
345 		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
346 		uncore_llc->active_mask = &amd_llc_active_mask;
347 		uncore_llc->pmu = &amd_llc_pmu;
348 		uncore_llc->id = -1;
349 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
350 	}
351 
352 	return 0;
353 
354 fail:
355 	if (amd_uncore_nb)
356 		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
357 	kfree(uncore_nb);
358 	return -ENOMEM;
359 }
360 
361 static struct amd_uncore *
362 amd_uncore_find_online_sibling(struct amd_uncore *this,
363 			       struct amd_uncore * __percpu *uncores)
364 {
365 	unsigned int cpu;
366 	struct amd_uncore *that;
367 
368 	for_each_online_cpu(cpu) {
369 		that = *per_cpu_ptr(uncores, cpu);
370 
371 		if (!that)
372 			continue;
373 
374 		if (this == that)
375 			continue;
376 
377 		if (this->id == that->id) {
378 			hlist_add_head(&this->node, &uncore_unused_list);
379 			this = that;
380 			break;
381 		}
382 	}
383 
384 	this->refcnt++;
385 	return this;
386 }
387 
388 static int amd_uncore_cpu_starting(unsigned int cpu)
389 {
390 	unsigned int eax, ebx, ecx, edx;
391 	struct amd_uncore *uncore;
392 
393 	if (amd_uncore_nb) {
394 		uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
395 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
396 		uncore->id = ecx & 0xff;
397 
398 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
399 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
400 	}
401 
402 	if (amd_uncore_llc) {
403 		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
404 		uncore->id = per_cpu(cpu_llc_id, cpu);
405 
406 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
407 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
408 	}
409 
410 	return 0;
411 }
412 
413 static void uncore_clean_online(void)
414 {
415 	struct amd_uncore *uncore;
416 	struct hlist_node *n;
417 
418 	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
419 		hlist_del(&uncore->node);
420 		kfree(uncore);
421 	}
422 }
423 
424 static void uncore_online(unsigned int cpu,
425 			  struct amd_uncore * __percpu *uncores)
426 {
427 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
428 
429 	uncore_clean_online();
430 
431 	if (cpu == uncore->cpu)
432 		cpumask_set_cpu(cpu, uncore->active_mask);
433 }
434 
435 static int amd_uncore_cpu_online(unsigned int cpu)
436 {
437 	if (amd_uncore_nb)
438 		uncore_online(cpu, amd_uncore_nb);
439 
440 	if (amd_uncore_llc)
441 		uncore_online(cpu, amd_uncore_llc);
442 
443 	return 0;
444 }
445 
446 static void uncore_down_prepare(unsigned int cpu,
447 				struct amd_uncore * __percpu *uncores)
448 {
449 	unsigned int i;
450 	struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
451 
452 	if (this->cpu != cpu)
453 		return;
454 
455 	/* this cpu is going down, migrate to a shared sibling if possible */
456 	for_each_online_cpu(i) {
457 		struct amd_uncore *that = *per_cpu_ptr(uncores, i);
458 
459 		if (cpu == i)
460 			continue;
461 
462 		if (this == that) {
463 			perf_pmu_migrate_context(this->pmu, cpu, i);
464 			cpumask_clear_cpu(cpu, that->active_mask);
465 			cpumask_set_cpu(i, that->active_mask);
466 			that->cpu = i;
467 			break;
468 		}
469 	}
470 }
471 
472 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
473 {
474 	if (amd_uncore_nb)
475 		uncore_down_prepare(cpu, amd_uncore_nb);
476 
477 	if (amd_uncore_llc)
478 		uncore_down_prepare(cpu, amd_uncore_llc);
479 
480 	return 0;
481 }
482 
483 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
484 {
485 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
486 
487 	if (cpu == uncore->cpu)
488 		cpumask_clear_cpu(cpu, uncore->active_mask);
489 
490 	if (!--uncore->refcnt)
491 		kfree(uncore);
492 	*per_cpu_ptr(uncores, cpu) = NULL;
493 }
494 
495 static int amd_uncore_cpu_dead(unsigned int cpu)
496 {
497 	if (amd_uncore_nb)
498 		uncore_dead(cpu, amd_uncore_nb);
499 
500 	if (amd_uncore_llc)
501 		uncore_dead(cpu, amd_uncore_llc);
502 
503 	return 0;
504 }
505 
506 static int __init amd_uncore_init(void)
507 {
508 	int ret = -ENODEV;
509 
510 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
511 		return -ENODEV;
512 
513 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
514 		return -ENODEV;
515 
516 	if (boot_cpu_data.x86 == 0x17) {
517 		/*
518 		 * For F17h, the Northbridge counters are repurposed as Data
519 		 * Fabric counters. Also, L3 counters are supported too. The PMUs
520 		 * are exported based on  family as either L2 or L3 and NB or DF.
521 		 */
522 		num_counters_nb		  = NUM_COUNTERS_NB;
523 		num_counters_llc	  = NUM_COUNTERS_L3;
524 		amd_nb_pmu.name		  = "amd_df";
525 		amd_llc_pmu.name	  = "amd_l3";
526 		format_attr_event_df.show = &event_show_df;
527 		format_attr_event_l3.show = &event_show_l3;
528 	} else {
529 		num_counters_nb		  = NUM_COUNTERS_NB;
530 		num_counters_llc	  = NUM_COUNTERS_L2;
531 		amd_nb_pmu.name		  = "amd_nb";
532 		amd_llc_pmu.name	  = "amd_l2";
533 		format_attr_event_df	  = format_attr_event;
534 		format_attr_event_l3	  = format_attr_event;
535 	}
536 
537 	amd_nb_pmu.attr_groups	= amd_uncore_attr_groups_df;
538 	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
539 
540 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
541 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
542 		if (!amd_uncore_nb) {
543 			ret = -ENOMEM;
544 			goto fail_nb;
545 		}
546 		ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
547 		if (ret)
548 			goto fail_nb;
549 
550 		pr_info("AMD NB counters detected\n");
551 		ret = 0;
552 	}
553 
554 	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
555 		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
556 		if (!amd_uncore_llc) {
557 			ret = -ENOMEM;
558 			goto fail_llc;
559 		}
560 		ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
561 		if (ret)
562 			goto fail_llc;
563 
564 		pr_info("AMD LLC counters detected\n");
565 		ret = 0;
566 	}
567 
568 	/*
569 	 * Install callbacks. Core will call them for each online cpu.
570 	 */
571 	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
572 			      "perf/x86/amd/uncore:prepare",
573 			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
574 		goto fail_llc;
575 
576 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
577 			      "perf/x86/amd/uncore:starting",
578 			      amd_uncore_cpu_starting, NULL))
579 		goto fail_prep;
580 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
581 			      "perf/x86/amd/uncore:online",
582 			      amd_uncore_cpu_online,
583 			      amd_uncore_cpu_down_prepare))
584 		goto fail_start;
585 	return 0;
586 
587 fail_start:
588 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
589 fail_prep:
590 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
591 fail_llc:
592 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
593 		perf_pmu_unregister(&amd_nb_pmu);
594 	if (amd_uncore_llc)
595 		free_percpu(amd_uncore_llc);
596 fail_nb:
597 	if (amd_uncore_nb)
598 		free_percpu(amd_uncore_nb);
599 
600 	return ret;
601 }
602 device_initcall(amd_uncore_init);
603