xref: /openbmc/linux/arch/x86/events/amd/uncore.c (revision bc1daef6)
1 /*
2  * Copyright (C) 2013 Advanced Micro Devices, Inc.
3  *
4  * Author: Jacob Shin <jacob.shin@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/perf_event.h>
12 #include <linux/percpu.h>
13 #include <linux/types.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/cpu.h>
17 #include <linux/cpumask.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/perf_event.h>
21 #include <asm/msr.h>
22 
23 #define NUM_COUNTERS_NB		4
24 #define NUM_COUNTERS_L2		4
25 #define NUM_COUNTERS_L3		6
26 #define MAX_COUNTERS		6
27 
28 #define RDPMC_BASE_NB		6
29 #define RDPMC_BASE_LLC		10
30 
31 #define COUNTER_SHIFT		16
32 
33 static int num_counters_llc;
34 static int num_counters_nb;
35 
36 static HLIST_HEAD(uncore_unused_list);
37 
38 struct amd_uncore {
39 	int id;
40 	int refcnt;
41 	int cpu;
42 	int num_counters;
43 	int rdpmc_base;
44 	u32 msr_base;
45 	cpumask_t *active_mask;
46 	struct pmu *pmu;
47 	struct perf_event *events[MAX_COUNTERS];
48 	struct hlist_node node;
49 };
50 
51 static struct amd_uncore * __percpu *amd_uncore_nb;
52 static struct amd_uncore * __percpu *amd_uncore_llc;
53 
54 static struct pmu amd_nb_pmu;
55 static struct pmu amd_llc_pmu;
56 
57 static cpumask_t amd_nb_active_mask;
58 static cpumask_t amd_llc_active_mask;
59 
60 static bool is_nb_event(struct perf_event *event)
61 {
62 	return event->pmu->type == amd_nb_pmu.type;
63 }
64 
65 static bool is_llc_event(struct perf_event *event)
66 {
67 	return event->pmu->type == amd_llc_pmu.type;
68 }
69 
70 static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
71 {
72 	if (is_nb_event(event) && amd_uncore_nb)
73 		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
74 	else if (is_llc_event(event) && amd_uncore_llc)
75 		return *per_cpu_ptr(amd_uncore_llc, event->cpu);
76 
77 	return NULL;
78 }
79 
80 static void amd_uncore_read(struct perf_event *event)
81 {
82 	struct hw_perf_event *hwc = &event->hw;
83 	u64 prev, new;
84 	s64 delta;
85 
86 	/*
87 	 * since we do not enable counter overflow interrupts,
88 	 * we do not have to worry about prev_count changing on us
89 	 */
90 
91 	prev = local64_read(&hwc->prev_count);
92 	rdpmcl(hwc->event_base_rdpmc, new);
93 	local64_set(&hwc->prev_count, new);
94 	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
95 	delta >>= COUNTER_SHIFT;
96 	local64_add(delta, &event->count);
97 }
98 
99 static void amd_uncore_start(struct perf_event *event, int flags)
100 {
101 	struct hw_perf_event *hwc = &event->hw;
102 
103 	if (flags & PERF_EF_RELOAD)
104 		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
105 
106 	hwc->state = 0;
107 	wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
108 	perf_event_update_userpage(event);
109 }
110 
111 static void amd_uncore_stop(struct perf_event *event, int flags)
112 {
113 	struct hw_perf_event *hwc = &event->hw;
114 
115 	wrmsrl(hwc->config_base, hwc->config);
116 	hwc->state |= PERF_HES_STOPPED;
117 
118 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
119 		amd_uncore_read(event);
120 		hwc->state |= PERF_HES_UPTODATE;
121 	}
122 }
123 
124 static int amd_uncore_add(struct perf_event *event, int flags)
125 {
126 	int i;
127 	struct amd_uncore *uncore = event_to_amd_uncore(event);
128 	struct hw_perf_event *hwc = &event->hw;
129 
130 	/* are we already assigned? */
131 	if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
132 		goto out;
133 
134 	for (i = 0; i < uncore->num_counters; i++) {
135 		if (uncore->events[i] == event) {
136 			hwc->idx = i;
137 			goto out;
138 		}
139 	}
140 
141 	/* if not, take the first available counter */
142 	hwc->idx = -1;
143 	for (i = 0; i < uncore->num_counters; i++) {
144 		if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
145 			hwc->idx = i;
146 			break;
147 		}
148 	}
149 
150 out:
151 	if (hwc->idx == -1)
152 		return -EBUSY;
153 
154 	hwc->config_base = uncore->msr_base + (2 * hwc->idx);
155 	hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
156 	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
157 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
158 
159 	if (flags & PERF_EF_START)
160 		amd_uncore_start(event, PERF_EF_RELOAD);
161 
162 	return 0;
163 }
164 
165 static void amd_uncore_del(struct perf_event *event, int flags)
166 {
167 	int i;
168 	struct amd_uncore *uncore = event_to_amd_uncore(event);
169 	struct hw_perf_event *hwc = &event->hw;
170 
171 	amd_uncore_stop(event, PERF_EF_UPDATE);
172 
173 	for (i = 0; i < uncore->num_counters; i++) {
174 		if (cmpxchg(&uncore->events[i], event, NULL) == event)
175 			break;
176 	}
177 
178 	hwc->idx = -1;
179 }
180 
181 static int amd_uncore_event_init(struct perf_event *event)
182 {
183 	struct amd_uncore *uncore;
184 	struct hw_perf_event *hwc = &event->hw;
185 
186 	if (event->attr.type != event->pmu->type)
187 		return -ENOENT;
188 
189 	/*
190 	 * NB and Last level cache counters (MSRs) are shared across all cores
191 	 * that share the same NB / Last level cache. Interrupts can be directed
192 	 * to a single target core, however, event counts generated by processes
193 	 * running on other cores cannot be masked out. So we do not support
194 	 * sampling and per-thread events.
195 	 */
196 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
197 		return -EINVAL;
198 
199 	/* NB and Last level cache counters do not have usr/os/guest/host bits */
200 	if (event->attr.exclude_user || event->attr.exclude_kernel ||
201 	    event->attr.exclude_host || event->attr.exclude_guest)
202 		return -EINVAL;
203 
204 	/* and we do not enable counter overflow interrupts */
205 	hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
206 	hwc->idx = -1;
207 
208 	if (event->cpu < 0)
209 		return -EINVAL;
210 
211 	uncore = event_to_amd_uncore(event);
212 	if (!uncore)
213 		return -ENODEV;
214 
215 	/*
216 	 * since request can come in to any of the shared cores, we will remap
217 	 * to a single common cpu.
218 	 */
219 	event->cpu = uncore->cpu;
220 
221 	return 0;
222 }
223 
224 static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
225 					    struct device_attribute *attr,
226 					    char *buf)
227 {
228 	cpumask_t *active_mask;
229 	struct pmu *pmu = dev_get_drvdata(dev);
230 
231 	if (pmu->type == amd_nb_pmu.type)
232 		active_mask = &amd_nb_active_mask;
233 	else if (pmu->type == amd_llc_pmu.type)
234 		active_mask = &amd_llc_active_mask;
235 	else
236 		return 0;
237 
238 	return cpumap_print_to_pagebuf(true, buf, active_mask);
239 }
240 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
241 
242 static struct attribute *amd_uncore_attrs[] = {
243 	&dev_attr_cpumask.attr,
244 	NULL,
245 };
246 
247 static struct attribute_group amd_uncore_attr_group = {
248 	.attrs = amd_uncore_attrs,
249 };
250 
251 PMU_FORMAT_ATTR(event, "config:0-7,32-35");
252 PMU_FORMAT_ATTR(umask, "config:8-15");
253 
254 static struct attribute *amd_uncore_format_attr[] = {
255 	&format_attr_event.attr,
256 	&format_attr_umask.attr,
257 	NULL,
258 };
259 
260 static struct attribute_group amd_uncore_format_group = {
261 	.name = "format",
262 	.attrs = amd_uncore_format_attr,
263 };
264 
265 static const struct attribute_group *amd_uncore_attr_groups[] = {
266 	&amd_uncore_attr_group,
267 	&amd_uncore_format_group,
268 	NULL,
269 };
270 
271 static struct pmu amd_nb_pmu = {
272 	.task_ctx_nr	= perf_invalid_context,
273 	.attr_groups	= amd_uncore_attr_groups,
274 	.name		= "amd_nb",
275 	.event_init	= amd_uncore_event_init,
276 	.add		= amd_uncore_add,
277 	.del		= amd_uncore_del,
278 	.start		= amd_uncore_start,
279 	.stop		= amd_uncore_stop,
280 	.read		= amd_uncore_read,
281 };
282 
283 static struct pmu amd_llc_pmu = {
284 	.task_ctx_nr	= perf_invalid_context,
285 	.attr_groups	= amd_uncore_attr_groups,
286 	.name		= "amd_l2",
287 	.event_init	= amd_uncore_event_init,
288 	.add		= amd_uncore_add,
289 	.del		= amd_uncore_del,
290 	.start		= amd_uncore_start,
291 	.stop		= amd_uncore_stop,
292 	.read		= amd_uncore_read,
293 };
294 
295 static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
296 {
297 	return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
298 			cpu_to_node(cpu));
299 }
300 
301 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
302 {
303 	struct amd_uncore *uncore_nb = NULL, *uncore_llc;
304 
305 	if (amd_uncore_nb) {
306 		uncore_nb = amd_uncore_alloc(cpu);
307 		if (!uncore_nb)
308 			goto fail;
309 		uncore_nb->cpu = cpu;
310 		uncore_nb->num_counters = num_counters_nb;
311 		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
312 		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
313 		uncore_nb->active_mask = &amd_nb_active_mask;
314 		uncore_nb->pmu = &amd_nb_pmu;
315 		uncore_nb->id = -1;
316 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
317 	}
318 
319 	if (amd_uncore_llc) {
320 		uncore_llc = amd_uncore_alloc(cpu);
321 		if (!uncore_llc)
322 			goto fail;
323 		uncore_llc->cpu = cpu;
324 		uncore_llc->num_counters = num_counters_llc;
325 		uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
326 		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
327 		uncore_llc->active_mask = &amd_llc_active_mask;
328 		uncore_llc->pmu = &amd_llc_pmu;
329 		uncore_llc->id = -1;
330 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
331 	}
332 
333 	return 0;
334 
335 fail:
336 	if (amd_uncore_nb)
337 		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
338 	kfree(uncore_nb);
339 	return -ENOMEM;
340 }
341 
342 static struct amd_uncore *
343 amd_uncore_find_online_sibling(struct amd_uncore *this,
344 			       struct amd_uncore * __percpu *uncores)
345 {
346 	unsigned int cpu;
347 	struct amd_uncore *that;
348 
349 	for_each_online_cpu(cpu) {
350 		that = *per_cpu_ptr(uncores, cpu);
351 
352 		if (!that)
353 			continue;
354 
355 		if (this == that)
356 			continue;
357 
358 		if (this->id == that->id) {
359 			hlist_add_head(&this->node, &uncore_unused_list);
360 			this = that;
361 			break;
362 		}
363 	}
364 
365 	this->refcnt++;
366 	return this;
367 }
368 
369 static int amd_uncore_cpu_starting(unsigned int cpu)
370 {
371 	unsigned int eax, ebx, ecx, edx;
372 	struct amd_uncore *uncore;
373 
374 	if (amd_uncore_nb) {
375 		uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
376 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
377 		uncore->id = ecx & 0xff;
378 
379 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
380 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
381 	}
382 
383 	if (amd_uncore_llc) {
384 		unsigned int apicid = cpu_data(cpu).apicid;
385 		unsigned int nshared;
386 
387 		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
388 		cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
389 		nshared = ((eax >> 14) & 0xfff) + 1;
390 		uncore->id = apicid - (apicid % nshared);
391 
392 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
393 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
394 	}
395 
396 	return 0;
397 }
398 
399 static void uncore_clean_online(void)
400 {
401 	struct amd_uncore *uncore;
402 	struct hlist_node *n;
403 
404 	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
405 		hlist_del(&uncore->node);
406 		kfree(uncore);
407 	}
408 }
409 
410 static void uncore_online(unsigned int cpu,
411 			  struct amd_uncore * __percpu *uncores)
412 {
413 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
414 
415 	uncore_clean_online();
416 
417 	if (cpu == uncore->cpu)
418 		cpumask_set_cpu(cpu, uncore->active_mask);
419 }
420 
421 static int amd_uncore_cpu_online(unsigned int cpu)
422 {
423 	if (amd_uncore_nb)
424 		uncore_online(cpu, amd_uncore_nb);
425 
426 	if (amd_uncore_llc)
427 		uncore_online(cpu, amd_uncore_llc);
428 
429 	return 0;
430 }
431 
432 static void uncore_down_prepare(unsigned int cpu,
433 				struct amd_uncore * __percpu *uncores)
434 {
435 	unsigned int i;
436 	struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
437 
438 	if (this->cpu != cpu)
439 		return;
440 
441 	/* this cpu is going down, migrate to a shared sibling if possible */
442 	for_each_online_cpu(i) {
443 		struct amd_uncore *that = *per_cpu_ptr(uncores, i);
444 
445 		if (cpu == i)
446 			continue;
447 
448 		if (this == that) {
449 			perf_pmu_migrate_context(this->pmu, cpu, i);
450 			cpumask_clear_cpu(cpu, that->active_mask);
451 			cpumask_set_cpu(i, that->active_mask);
452 			that->cpu = i;
453 			break;
454 		}
455 	}
456 }
457 
458 static int amd_uncore_cpu_down_prepare(unsigned int cpu)
459 {
460 	if (amd_uncore_nb)
461 		uncore_down_prepare(cpu, amd_uncore_nb);
462 
463 	if (amd_uncore_llc)
464 		uncore_down_prepare(cpu, amd_uncore_llc);
465 
466 	return 0;
467 }
468 
469 static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
470 {
471 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
472 
473 	if (cpu == uncore->cpu)
474 		cpumask_clear_cpu(cpu, uncore->active_mask);
475 
476 	if (!--uncore->refcnt)
477 		kfree(uncore);
478 	*per_cpu_ptr(uncores, cpu) = NULL;
479 }
480 
481 static int amd_uncore_cpu_dead(unsigned int cpu)
482 {
483 	if (amd_uncore_nb)
484 		uncore_dead(cpu, amd_uncore_nb);
485 
486 	if (amd_uncore_llc)
487 		uncore_dead(cpu, amd_uncore_llc);
488 
489 	return 0;
490 }
491 
492 static int __init amd_uncore_init(void)
493 {
494 	int ret = -ENODEV;
495 
496 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
497 		goto fail_nodev;
498 
499 	switch(boot_cpu_data.x86) {
500 		case 23:
501 			/* Family 17h: */
502 			num_counters_nb = NUM_COUNTERS_NB;
503 			num_counters_llc = NUM_COUNTERS_L3;
504 			break;
505 		case 22:
506 			/* Family 16h - may change: */
507 			num_counters_nb = NUM_COUNTERS_NB;
508 			num_counters_llc = NUM_COUNTERS_L2;
509 			break;
510 		default:
511 			/*
512 			 * All prior families have the same number of
513 			 * NorthBridge and Last Level Cache counters
514 			 */
515 			num_counters_nb = NUM_COUNTERS_NB;
516 			num_counters_llc = NUM_COUNTERS_L2;
517 			break;
518 	}
519 
520 	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
521 		goto fail_nodev;
522 
523 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
524 		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
525 		if (!amd_uncore_nb) {
526 			ret = -ENOMEM;
527 			goto fail_nb;
528 		}
529 		ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
530 		if (ret)
531 			goto fail_nb;
532 
533 		pr_info("perf: AMD NB counters detected\n");
534 		ret = 0;
535 	}
536 
537 	if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
538 		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
539 		if (!amd_uncore_llc) {
540 			ret = -ENOMEM;
541 			goto fail_llc;
542 		}
543 		ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
544 		if (ret)
545 			goto fail_llc;
546 
547 		pr_info("perf: AMD LLC counters detected\n");
548 		ret = 0;
549 	}
550 
551 	/*
552 	 * Install callbacks. Core will call them for each online cpu.
553 	 */
554 	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
555 			      "perf/x86/amd/uncore:prepare",
556 			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
557 		goto fail_llc;
558 
559 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
560 			      "perf/x86/amd/uncore:starting",
561 			      amd_uncore_cpu_starting, NULL))
562 		goto fail_prep;
563 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
564 			      "perf/x86/amd/uncore:online",
565 			      amd_uncore_cpu_online,
566 			      amd_uncore_cpu_down_prepare))
567 		goto fail_start;
568 	return 0;
569 
570 fail_start:
571 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
572 fail_prep:
573 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
574 fail_llc:
575 	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
576 		perf_pmu_unregister(&amd_nb_pmu);
577 	if (amd_uncore_llc)
578 		free_percpu(amd_uncore_llc);
579 fail_nb:
580 	if (amd_uncore_nb)
581 		free_percpu(amd_uncore_nb);
582 
583 fail_nodev:
584 	return ret;
585 }
586 device_initcall(amd_uncore_init);
587