xref: /openbmc/linux/arch/x86/events/zhaoxin/core.c (revision 519a8a6c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Zhaoxin PMU; like Intel Architectural PerfMon-v2
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/stddef.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13 #include <linux/nmi.h>
14 
15 #include <asm/cpufeature.h>
16 #include <asm/hardirq.h>
17 #include <asm/apic.h>
18 
19 #include "../perf_event.h"
20 
21 /*
22  * Zhaoxin PerfMon, used on zxc and later.
23  */
24 static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
25 
26 	[PERF_COUNT_HW_CPU_CYCLES]        = 0x0082,
27 	[PERF_COUNT_HW_INSTRUCTIONS]      = 0x00c0,
28 	[PERF_COUNT_HW_CACHE_REFERENCES]  = 0x0515,
29 	[PERF_COUNT_HW_CACHE_MISSES]      = 0x051a,
30 	[PERF_COUNT_HW_BUS_CYCLES]        = 0x0083,
31 };
32 
33 static struct event_constraint zxc_event_constraints[] __read_mostly = {
34 
35 	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
36 	EVENT_CONSTRAINT_END
37 };
38 
39 static struct event_constraint zxd_event_constraints[] __read_mostly = {
40 
41 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
42 	FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
43 	FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
44 	EVENT_CONSTRAINT_END
45 };
46 
47 static __initconst const u64 zxd_hw_cache_event_ids
48 				[PERF_COUNT_HW_CACHE_MAX]
49 				[PERF_COUNT_HW_CACHE_OP_MAX]
50 				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
51 [C(L1D)] = {
52 	[C(OP_READ)] = {
53 		[C(RESULT_ACCESS)] = 0x0042,
54 		[C(RESULT_MISS)] = 0x0538,
55 	},
56 	[C(OP_WRITE)] = {
57 		[C(RESULT_ACCESS)] = 0x0043,
58 		[C(RESULT_MISS)] = 0x0562,
59 	},
60 	[C(OP_PREFETCH)] = {
61 		[C(RESULT_ACCESS)] = -1,
62 		[C(RESULT_MISS)] = -1,
63 	},
64 },
65 [C(L1I)] = {
66 	[C(OP_READ)] = {
67 		[C(RESULT_ACCESS)] = 0x0300,
68 		[C(RESULT_MISS)] = 0x0301,
69 	},
70 	[C(OP_WRITE)] = {
71 		[C(RESULT_ACCESS)] = -1,
72 		[C(RESULT_MISS)] = -1,
73 	},
74 	[C(OP_PREFETCH)] = {
75 		[C(RESULT_ACCESS)] = 0x030a,
76 		[C(RESULT_MISS)] = 0x030b,
77 	},
78 },
79 [C(LL)] = {
80 	[C(OP_READ)] = {
81 		[C(RESULT_ACCESS)] = -1,
82 		[C(RESULT_MISS)] = -1,
83 	},
84 	[C(OP_WRITE)] = {
85 		[C(RESULT_ACCESS)] = -1,
86 		[C(RESULT_MISS)] = -1,
87 	},
88 	[C(OP_PREFETCH)] = {
89 		[C(RESULT_ACCESS)] = -1,
90 		[C(RESULT_MISS)] = -1,
91 	},
92 },
93 [C(DTLB)] = {
94 	[C(OP_READ)] = {
95 		[C(RESULT_ACCESS)] = 0x0042,
96 		[C(RESULT_MISS)] = 0x052c,
97 	},
98 	[C(OP_WRITE)] = {
99 		[C(RESULT_ACCESS)] = 0x0043,
100 		[C(RESULT_MISS)] = 0x0530,
101 	},
102 	[C(OP_PREFETCH)] = {
103 		[C(RESULT_ACCESS)] = 0x0564,
104 		[C(RESULT_MISS)] = 0x0565,
105 	},
106 },
107 [C(ITLB)] = {
108 	[C(OP_READ)] = {
109 		[C(RESULT_ACCESS)] = 0x00c0,
110 		[C(RESULT_MISS)] = 0x0534,
111 	},
112 	[C(OP_WRITE)] = {
113 		[C(RESULT_ACCESS)] = -1,
114 		[C(RESULT_MISS)] = -1,
115 	},
116 	[C(OP_PREFETCH)] = {
117 		[C(RESULT_ACCESS)] = -1,
118 		[C(RESULT_MISS)] = -1,
119 	},
120 },
121 [C(BPU)] = {
122 	[C(OP_READ)] = {
123 		[C(RESULT_ACCESS)] = 0x0700,
124 		[C(RESULT_MISS)] = 0x0709,
125 	},
126 	[C(OP_WRITE)] = {
127 		[C(RESULT_ACCESS)] = -1,
128 		[C(RESULT_MISS)] = -1,
129 	},
130 	[C(OP_PREFETCH)] = {
131 		[C(RESULT_ACCESS)] = -1,
132 		[C(RESULT_MISS)] = -1,
133 	},
134 },
135 [C(NODE)] = {
136 	[C(OP_READ)] = {
137 		[C(RESULT_ACCESS)] = -1,
138 		[C(RESULT_MISS)] = -1,
139 	},
140 	[C(OP_WRITE)] = {
141 		[C(RESULT_ACCESS)] = -1,
142 		[C(RESULT_MISS)] = -1,
143 	},
144 	[C(OP_PREFETCH)] = {
145 		[C(RESULT_ACCESS)] = -1,
146 		[C(RESULT_MISS)] = -1,
147 	},
148 },
149 };
150 
151 static __initconst const u64 zxe_hw_cache_event_ids
152 				[PERF_COUNT_HW_CACHE_MAX]
153 				[PERF_COUNT_HW_CACHE_OP_MAX]
154 				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
155 [C(L1D)] = {
156 	[C(OP_READ)] = {
157 		[C(RESULT_ACCESS)] = 0x0568,
158 		[C(RESULT_MISS)] = 0x054b,
159 	},
160 	[C(OP_WRITE)] = {
161 		[C(RESULT_ACCESS)] = 0x0669,
162 		[C(RESULT_MISS)] = 0x0562,
163 	},
164 	[C(OP_PREFETCH)] = {
165 		[C(RESULT_ACCESS)] = -1,
166 		[C(RESULT_MISS)] = -1,
167 	},
168 },
169 [C(L1I)] = {
170 	[C(OP_READ)] = {
171 		[C(RESULT_ACCESS)] = 0x0300,
172 		[C(RESULT_MISS)] = 0x0301,
173 	},
174 	[C(OP_WRITE)] = {
175 		[C(RESULT_ACCESS)] = -1,
176 		[C(RESULT_MISS)] = -1,
177 	},
178 	[C(OP_PREFETCH)] = {
179 		[C(RESULT_ACCESS)] = 0x030a,
180 		[C(RESULT_MISS)] = 0x030b,
181 	},
182 },
183 [C(LL)] = {
184 	[C(OP_READ)] = {
185 		[C(RESULT_ACCESS)] = 0x0,
186 		[C(RESULT_MISS)] = 0x0,
187 	},
188 	[C(OP_WRITE)] = {
189 		[C(RESULT_ACCESS)] = 0x0,
190 		[C(RESULT_MISS)] = 0x0,
191 	},
192 	[C(OP_PREFETCH)] = {
193 		[C(RESULT_ACCESS)] = 0x0,
194 		[C(RESULT_MISS)] = 0x0,
195 	},
196 },
197 [C(DTLB)] = {
198 	[C(OP_READ)] = {
199 		[C(RESULT_ACCESS)] = 0x0568,
200 		[C(RESULT_MISS)] = 0x052c,
201 	},
202 	[C(OP_WRITE)] = {
203 		[C(RESULT_ACCESS)] = 0x0669,
204 		[C(RESULT_MISS)] = 0x0530,
205 	},
206 	[C(OP_PREFETCH)] = {
207 		[C(RESULT_ACCESS)] = 0x0564,
208 		[C(RESULT_MISS)] = 0x0565,
209 	},
210 },
211 [C(ITLB)] = {
212 	[C(OP_READ)] = {
213 		[C(RESULT_ACCESS)] = 0x00c0,
214 		[C(RESULT_MISS)] = 0x0534,
215 	},
216 	[C(OP_WRITE)] = {
217 		[C(RESULT_ACCESS)] = -1,
218 		[C(RESULT_MISS)] = -1,
219 	},
220 	[C(OP_PREFETCH)] = {
221 		[C(RESULT_ACCESS)] = -1,
222 		[C(RESULT_MISS)] = -1,
223 	},
224 },
225 [C(BPU)] = {
226 	[C(OP_READ)] = {
227 		[C(RESULT_ACCESS)] = 0x0028,
228 		[C(RESULT_MISS)] = 0x0029,
229 	},
230 	[C(OP_WRITE)] = {
231 		[C(RESULT_ACCESS)] = -1,
232 		[C(RESULT_MISS)] = -1,
233 	},
234 	[C(OP_PREFETCH)] = {
235 		[C(RESULT_ACCESS)] = -1,
236 		[C(RESULT_MISS)] = -1,
237 	},
238 },
239 [C(NODE)] = {
240 	[C(OP_READ)] = {
241 		[C(RESULT_ACCESS)] = -1,
242 		[C(RESULT_MISS)] = -1,
243 	},
244 	[C(OP_WRITE)] = {
245 		[C(RESULT_ACCESS)] = -1,
246 		[C(RESULT_MISS)] = -1,
247 	},
248 	[C(OP_PREFETCH)] = {
249 		[C(RESULT_ACCESS)] = -1,
250 		[C(RESULT_MISS)] = -1,
251 	},
252 },
253 };
254 
255 static void zhaoxin_pmu_disable_all(void)
256 {
257 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
258 }
259 
260 static void zhaoxin_pmu_enable_all(int added)
261 {
262 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
263 }
264 
265 static inline u64 zhaoxin_pmu_get_status(void)
266 {
267 	u64 status;
268 
269 	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
270 
271 	return status;
272 }
273 
274 static inline void zhaoxin_pmu_ack_status(u64 ack)
275 {
276 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
277 }
278 
279 static inline void zxc_pmu_ack_status(u64 ack)
280 {
281 	/*
282 	 * ZXC needs global control enabled in order to clear status bits.
283 	 */
284 	zhaoxin_pmu_enable_all(0);
285 	zhaoxin_pmu_ack_status(ack);
286 	zhaoxin_pmu_disable_all();
287 }
288 
289 static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
290 {
291 	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
292 	u64 ctrl_val, mask;
293 
294 	mask = 0xfULL << (idx * 4);
295 
296 	rdmsrl(hwc->config_base, ctrl_val);
297 	ctrl_val &= ~mask;
298 	wrmsrl(hwc->config_base, ctrl_val);
299 }
300 
301 static void zhaoxin_pmu_disable_event(struct perf_event *event)
302 {
303 	struct hw_perf_event *hwc = &event->hw;
304 
305 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
306 		zhaoxin_pmu_disable_fixed(hwc);
307 		return;
308 	}
309 
310 	x86_pmu_disable_event(event);
311 }
312 
313 static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
314 {
315 	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
316 	u64 ctrl_val, bits, mask;
317 
318 	/*
319 	 * Enable IRQ generation (0x8),
320 	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
321 	 * if requested:
322 	 */
323 	bits = 0x8ULL;
324 	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
325 		bits |= 0x2;
326 	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
327 		bits |= 0x1;
328 
329 	bits <<= (idx * 4);
330 	mask = 0xfULL << (idx * 4);
331 
332 	rdmsrl(hwc->config_base, ctrl_val);
333 	ctrl_val &= ~mask;
334 	ctrl_val |= bits;
335 	wrmsrl(hwc->config_base, ctrl_val);
336 }
337 
338 static void zhaoxin_pmu_enable_event(struct perf_event *event)
339 {
340 	struct hw_perf_event *hwc = &event->hw;
341 
342 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
343 		zhaoxin_pmu_enable_fixed(hwc);
344 		return;
345 	}
346 
347 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
348 }
349 
350 /*
351  * This handler is triggered by the local APIC, so the APIC IRQ handling
352  * rules apply:
353  */
354 static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
355 {
356 	struct perf_sample_data data;
357 	struct cpu_hw_events *cpuc;
358 	int handled = 0;
359 	u64 status;
360 	int bit;
361 
362 	cpuc = this_cpu_ptr(&cpu_hw_events);
363 	apic_write(APIC_LVTPC, APIC_DM_NMI);
364 	zhaoxin_pmu_disable_all();
365 	status = zhaoxin_pmu_get_status();
366 	if (!status)
367 		goto done;
368 
369 again:
370 	if (x86_pmu.enabled_ack)
371 		zxc_pmu_ack_status(status);
372 	else
373 		zhaoxin_pmu_ack_status(status);
374 
375 	inc_irq_stat(apic_perf_irqs);
376 
377 	/*
378 	 * CondChgd bit 63 doesn't mean any overflow status. Ignore
379 	 * and clear the bit.
380 	 */
381 	if (__test_and_clear_bit(63, (unsigned long *)&status)) {
382 		if (!status)
383 			goto done;
384 	}
385 
386 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
387 		struct perf_event *event = cpuc->events[bit];
388 
389 		handled++;
390 
391 		if (!test_bit(bit, cpuc->active_mask))
392 			continue;
393 
394 		x86_perf_event_update(event);
395 		perf_sample_data_init(&data, 0, event->hw.last_period);
396 
397 		if (!x86_perf_event_set_period(event))
398 			continue;
399 
400 		if (perf_event_overflow(event, &data, regs))
401 			x86_pmu_stop(event, 0);
402 	}
403 
404 	/*
405 	 * Repeat if there is more work to be done:
406 	 */
407 	status = zhaoxin_pmu_get_status();
408 	if (status)
409 		goto again;
410 
411 done:
412 	zhaoxin_pmu_enable_all(0);
413 	return handled;
414 }
415 
416 static u64 zhaoxin_pmu_event_map(int hw_event)
417 {
418 	return zx_pmon_event_map[hw_event];
419 }
420 
421 static struct event_constraint *
422 zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
423 			struct perf_event *event)
424 {
425 	struct event_constraint *c;
426 
427 	if (x86_pmu.event_constraints) {
428 		for_each_event_constraint(c, x86_pmu.event_constraints) {
429 			if ((event->hw.config & c->cmask) == c->code)
430 				return c;
431 		}
432 	}
433 
434 	return &unconstrained;
435 }
436 
437 PMU_FORMAT_ATTR(event,	"config:0-7");
438 PMU_FORMAT_ATTR(umask,	"config:8-15");
439 PMU_FORMAT_ATTR(edge,	"config:18");
440 PMU_FORMAT_ATTR(inv,	"config:23");
441 PMU_FORMAT_ATTR(cmask,	"config:24-31");
442 
443 static struct attribute *zx_arch_formats_attr[] = {
444 	&format_attr_event.attr,
445 	&format_attr_umask.attr,
446 	&format_attr_edge.attr,
447 	&format_attr_inv.attr,
448 	&format_attr_cmask.attr,
449 	NULL,
450 };
451 
452 static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
453 {
454 	u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
455 
456 	return x86_event_sysfs_show(page, config, event);
457 }
458 
459 static const struct x86_pmu zhaoxin_pmu __initconst = {
460 	.name			= "zhaoxin",
461 	.handle_irq		= zhaoxin_pmu_handle_irq,
462 	.disable_all		= zhaoxin_pmu_disable_all,
463 	.enable_all		= zhaoxin_pmu_enable_all,
464 	.enable			= zhaoxin_pmu_enable_event,
465 	.disable		= zhaoxin_pmu_disable_event,
466 	.hw_config		= x86_pmu_hw_config,
467 	.schedule_events	= x86_schedule_events,
468 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
469 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
470 	.event_map		= zhaoxin_pmu_event_map,
471 	.max_events		= ARRAY_SIZE(zx_pmon_event_map),
472 	.apic			= 1,
473 	/*
474 	 * For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
475 	 */
476 	.max_period		= (1ULL << 47) - 1,
477 	.get_event_constraints	= zhaoxin_get_event_constraints,
478 
479 	.format_attrs		= zx_arch_formats_attr,
480 	.events_sysfs_show	= zhaoxin_event_sysfs_show,
481 };
482 
483 static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
484 	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
485 	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
486 	{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
487 	{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
488 	{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
489 	{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
490 	{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
491 };
492 
493 static __init void zhaoxin_arch_events_quirk(void)
494 {
495 	int bit;
496 
497 	/* disable event that reported as not presend by cpuid */
498 	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
499 		zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
500 		pr_warn("CPUID marked event: \'%s\' unavailable\n",
501 			zx_arch_events_map[bit].name);
502 	}
503 }
504 
505 __init int zhaoxin_pmu_init(void)
506 {
507 	union cpuid10_edx edx;
508 	union cpuid10_eax eax;
509 	union cpuid10_ebx ebx;
510 	struct event_constraint *c;
511 	unsigned int unused;
512 	int version;
513 
514 	pr_info("Welcome to zhaoxin pmu!\n");
515 
516 	/*
517 	 * Check whether the Architectural PerfMon supports
518 	 * hw_event or not.
519 	 */
520 	cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
521 
522 	if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
523 		return -ENODEV;
524 
525 	version = eax.split.version_id;
526 	if (version != 2)
527 		return -ENODEV;
528 
529 	x86_pmu = zhaoxin_pmu;
530 	pr_info("Version check pass!\n");
531 
532 	x86_pmu.version			= version;
533 	x86_pmu.num_counters		= eax.split.num_counters;
534 	x86_pmu.cntval_bits		= eax.split.bit_width;
535 	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
536 	x86_pmu.events_maskl		= ebx.full;
537 	x86_pmu.events_mask_len		= eax.split.mask_length;
538 
539 	x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
540 	x86_add_quirk(zhaoxin_arch_events_quirk);
541 
542 	switch (boot_cpu_data.x86) {
543 	case 0x06:
544 		if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
545 
546 			x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
547 
548 			/* Clearing status works only if the global control is enable on zxc. */
549 			x86_pmu.enabled_ack = 1;
550 
551 			x86_pmu.event_constraints = zxc_event_constraints;
552 			zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
553 			zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
554 			zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
555 			zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
556 
557 			pr_cont("ZXC events, ");
558 			break;
559 		}
560 		return -ENODEV;
561 
562 	case 0x07:
563 		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
564 			X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
565 
566 		zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
567 			X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
568 
569 		switch (boot_cpu_data.x86_model) {
570 		case 0x1b:
571 			memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
572 			       sizeof(hw_cache_event_ids));
573 
574 			x86_pmu.event_constraints = zxd_event_constraints;
575 
576 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
577 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
578 
579 			pr_cont("ZXD events, ");
580 			break;
581 		case 0x3b:
582 			memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
583 			       sizeof(hw_cache_event_ids));
584 
585 			x86_pmu.event_constraints = zxd_event_constraints;
586 
587 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
588 			zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
589 
590 			pr_cont("ZXE events, ");
591 			break;
592 		default:
593 			return -ENODEV;
594 		}
595 		break;
596 
597 	default:
598 		return -ENODEV;
599 	}
600 
601 	x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
602 	x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
603 
604 	if (x86_pmu.event_constraints) {
605 		for_each_event_constraint(c, x86_pmu.event_constraints) {
606 			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
607 			c->weight += x86_pmu.num_counters;
608 		}
609 	}
610 
611 	return 0;
612 }
613 
614