xref: /openbmc/linux/arch/x86/events/intel/cstate.c (revision b8d312aa)
1 /*
2  * Support cstate residency counters
3  *
4  * Copyright (C) 2015, Intel Corp.
5  * Author: Kan Liang (kan.liang@intel.com)
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  */
18 
19 /*
20  * This file export cstate related free running (read-only) counters
21  * for perf. These counters may be use simultaneously by other tools,
22  * such as turbostat. However, it still make sense to implement them
23  * in perf. Because we can conveniently collect them together with
24  * other events, and allow to use them from tools without special MSR
25  * access code.
26  *
27  * The events only support system-wide mode counting. There is no
28  * sampling support because it is not supported by the hardware.
29  *
30  * According to counters' scope and category, two PMUs are registered
31  * with the perf_event core subsystem.
32  *  - 'cstate_core': The counter is available for each physical core.
33  *    The counters include CORE_C*_RESIDENCY.
34  *  - 'cstate_pkg': The counter is available for each physical package.
35  *    The counters include PKG_C*_RESIDENCY.
36  *
37  * All of these counters are specified in the Intel® 64 and IA-32
38  * Architectures Software Developer.s Manual Vol3b.
39  *
40  * Model specific counters:
41  *	MSR_CORE_C1_RES: CORE C1 Residency Counter
42  *			 perf code: 0x00
43  *			 Available model: SLM,AMT,GLM,CNL
44  *			 Scope: Core (each processor core has a MSR)
45  *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
46  *			       perf code: 0x01
47  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
48 						CNL
49  *			       Scope: Core
50  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
51  *			       perf code: 0x02
52  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
53  *						SKL,KNL,GLM,CNL
54  *			       Scope: Core
55  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
56  *			       perf code: 0x03
57  *			       Available model: SNB,IVB,HSW,BDW,SKL,CNL
58  *			       Scope: Core
59  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
60  *			       perf code: 0x00
61  *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
62  *			       Scope: Package (physical package)
63  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
64  *			       perf code: 0x01
65  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
66  *						GLM,CNL
67  *			       Scope: Package (physical package)
68  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
69  *			       perf code: 0x02
70  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
71  *						SKL,KNL,GLM,CNL
72  *			       Scope: Package (physical package)
73  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
74  *			       perf code: 0x03
75  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
76  *			       Scope: Package (physical package)
77  *	MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
78  *			       perf code: 0x04
79  *			       Available model: HSW ULT,KBL,CNL
80  *			       Scope: Package (physical package)
81  *	MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
82  *			       perf code: 0x05
83  *			       Available model: HSW ULT,KBL,CNL
84  *			       Scope: Package (physical package)
85  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
86  *			       perf code: 0x06
87  *			       Available model: HSW ULT,KBL,GLM,CNL
88  *			       Scope: Package (physical package)
89  *
90  */
91 
92 #include <linux/module.h>
93 #include <linux/slab.h>
94 #include <linux/perf_event.h>
95 #include <linux/nospec.h>
96 #include <asm/cpu_device_id.h>
97 #include <asm/intel-family.h>
98 #include "../perf_event.h"
99 #include "../probe.h"
100 
101 MODULE_LICENSE("GPL");
102 
103 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
104 static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
105 				struct kobj_attribute *attr,	\
106 				char *page)			\
107 {								\
108 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
109 	return sprintf(page, _format "\n");			\
110 }								\
111 static struct kobj_attribute format_attr_##_var =		\
112 	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
113 
114 static ssize_t cstate_get_attr_cpumask(struct device *dev,
115 				       struct device_attribute *attr,
116 				       char *buf);
117 
118 /* Model -> events mapping */
119 struct cstate_model {
120 	unsigned long		core_events;
121 	unsigned long		pkg_events;
122 	unsigned long		quirks;
123 };
124 
125 /* Quirk flags */
126 #define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
127 #define KNL_CORE_C6_MSR		(1UL << 1)
128 
129 struct perf_cstate_msr {
130 	u64	msr;
131 	struct	perf_pmu_events_attr *attr;
132 };
133 
134 
135 /* cstate_core PMU */
136 static struct pmu cstate_core_pmu;
137 static bool has_cstate_core;
138 
139 enum perf_cstate_core_events {
140 	PERF_CSTATE_CORE_C1_RES = 0,
141 	PERF_CSTATE_CORE_C3_RES,
142 	PERF_CSTATE_CORE_C6_RES,
143 	PERF_CSTATE_CORE_C7_RES,
144 
145 	PERF_CSTATE_CORE_EVENT_MAX,
146 };
147 
148 PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
149 PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
150 PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
151 PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
152 
153 static unsigned long core_msr_mask;
154 
155 PMU_EVENT_GROUP(events, cstate_core_c1);
156 PMU_EVENT_GROUP(events, cstate_core_c3);
157 PMU_EVENT_GROUP(events, cstate_core_c6);
158 PMU_EVENT_GROUP(events, cstate_core_c7);
159 
160 static bool test_msr(int idx, void *data)
161 {
162 	return test_bit(idx, (unsigned long *) data);
163 }
164 
165 static struct perf_msr core_msr[] = {
166 	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&group_cstate_core_c1,	test_msr },
167 	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&group_cstate_core_c3,	test_msr },
168 	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&group_cstate_core_c6,	test_msr },
169 	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&group_cstate_core_c7,	test_msr },
170 };
171 
172 static struct attribute *attrs_empty[] = {
173 	NULL,
174 };
175 
176 /*
177  * There are no default events, but we need to create
178  * "events" group (with empty attrs) before updating
179  * it with detected events.
180  */
181 static struct attribute_group core_events_attr_group = {
182 	.name = "events",
183 	.attrs = attrs_empty,
184 };
185 
186 DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
187 static struct attribute *core_format_attrs[] = {
188 	&format_attr_core_event.attr,
189 	NULL,
190 };
191 
192 static struct attribute_group core_format_attr_group = {
193 	.name = "format",
194 	.attrs = core_format_attrs,
195 };
196 
197 static cpumask_t cstate_core_cpu_mask;
198 static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
199 
200 static struct attribute *cstate_cpumask_attrs[] = {
201 	&dev_attr_cpumask.attr,
202 	NULL,
203 };
204 
205 static struct attribute_group cpumask_attr_group = {
206 	.attrs = cstate_cpumask_attrs,
207 };
208 
209 static const struct attribute_group *core_attr_groups[] = {
210 	&core_events_attr_group,
211 	&core_format_attr_group,
212 	&cpumask_attr_group,
213 	NULL,
214 };
215 
216 /* cstate_pkg PMU */
217 static struct pmu cstate_pkg_pmu;
218 static bool has_cstate_pkg;
219 
220 enum perf_cstate_pkg_events {
221 	PERF_CSTATE_PKG_C2_RES = 0,
222 	PERF_CSTATE_PKG_C3_RES,
223 	PERF_CSTATE_PKG_C6_RES,
224 	PERF_CSTATE_PKG_C7_RES,
225 	PERF_CSTATE_PKG_C8_RES,
226 	PERF_CSTATE_PKG_C9_RES,
227 	PERF_CSTATE_PKG_C10_RES,
228 
229 	PERF_CSTATE_PKG_EVENT_MAX,
230 };
231 
232 PMU_EVENT_ATTR_STRING(c2-residency,  attr_cstate_pkg_c2,  "event=0x00");
233 PMU_EVENT_ATTR_STRING(c3-residency,  attr_cstate_pkg_c3,  "event=0x01");
234 PMU_EVENT_ATTR_STRING(c6-residency,  attr_cstate_pkg_c6,  "event=0x02");
235 PMU_EVENT_ATTR_STRING(c7-residency,  attr_cstate_pkg_c7,  "event=0x03");
236 PMU_EVENT_ATTR_STRING(c8-residency,  attr_cstate_pkg_c8,  "event=0x04");
237 PMU_EVENT_ATTR_STRING(c9-residency,  attr_cstate_pkg_c9,  "event=0x05");
238 PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
239 
240 static unsigned long pkg_msr_mask;
241 
242 PMU_EVENT_GROUP(events, cstate_pkg_c2);
243 PMU_EVENT_GROUP(events, cstate_pkg_c3);
244 PMU_EVENT_GROUP(events, cstate_pkg_c6);
245 PMU_EVENT_GROUP(events, cstate_pkg_c7);
246 PMU_EVENT_GROUP(events, cstate_pkg_c8);
247 PMU_EVENT_GROUP(events, cstate_pkg_c9);
248 PMU_EVENT_GROUP(events, cstate_pkg_c10);
249 
250 static struct perf_msr pkg_msr[] = {
251 	[PERF_CSTATE_PKG_C2_RES]  = { MSR_PKG_C2_RESIDENCY,	&group_cstate_pkg_c2,	test_msr },
252 	[PERF_CSTATE_PKG_C3_RES]  = { MSR_PKG_C3_RESIDENCY,	&group_cstate_pkg_c3,	test_msr },
253 	[PERF_CSTATE_PKG_C6_RES]  = { MSR_PKG_C6_RESIDENCY,	&group_cstate_pkg_c6,	test_msr },
254 	[PERF_CSTATE_PKG_C7_RES]  = { MSR_PKG_C7_RESIDENCY,	&group_cstate_pkg_c7,	test_msr },
255 	[PERF_CSTATE_PKG_C8_RES]  = { MSR_PKG_C8_RESIDENCY,	&group_cstate_pkg_c8,	test_msr },
256 	[PERF_CSTATE_PKG_C9_RES]  = { MSR_PKG_C9_RESIDENCY,	&group_cstate_pkg_c9,	test_msr },
257 	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&group_cstate_pkg_c10,	test_msr },
258 };
259 
260 static struct attribute_group pkg_events_attr_group = {
261 	.name = "events",
262 	.attrs = attrs_empty,
263 };
264 
265 DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
266 static struct attribute *pkg_format_attrs[] = {
267 	&format_attr_pkg_event.attr,
268 	NULL,
269 };
270 static struct attribute_group pkg_format_attr_group = {
271 	.name = "format",
272 	.attrs = pkg_format_attrs,
273 };
274 
275 static cpumask_t cstate_pkg_cpu_mask;
276 
277 static const struct attribute_group *pkg_attr_groups[] = {
278 	&pkg_events_attr_group,
279 	&pkg_format_attr_group,
280 	&cpumask_attr_group,
281 	NULL,
282 };
283 
284 static ssize_t cstate_get_attr_cpumask(struct device *dev,
285 				       struct device_attribute *attr,
286 				       char *buf)
287 {
288 	struct pmu *pmu = dev_get_drvdata(dev);
289 
290 	if (pmu == &cstate_core_pmu)
291 		return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
292 	else if (pmu == &cstate_pkg_pmu)
293 		return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
294 	else
295 		return 0;
296 }
297 
298 static int cstate_pmu_event_init(struct perf_event *event)
299 {
300 	u64 cfg = event->attr.config;
301 	int cpu;
302 
303 	if (event->attr.type != event->pmu->type)
304 		return -ENOENT;
305 
306 	/* unsupported modes and filters */
307 	if (event->attr.sample_period) /* no sampling */
308 		return -EINVAL;
309 
310 	if (event->cpu < 0)
311 		return -EINVAL;
312 
313 	if (event->pmu == &cstate_core_pmu) {
314 		if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
315 			return -EINVAL;
316 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
317 		if (!(core_msr_mask & (1 << cfg)))
318 			return -EINVAL;
319 		event->hw.event_base = core_msr[cfg].msr;
320 		cpu = cpumask_any_and(&cstate_core_cpu_mask,
321 				      topology_sibling_cpumask(event->cpu));
322 	} else if (event->pmu == &cstate_pkg_pmu) {
323 		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
324 			return -EINVAL;
325 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
326 		if (!(pkg_msr_mask & (1 << cfg)))
327 			return -EINVAL;
328 		event->hw.event_base = pkg_msr[cfg].msr;
329 		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
330 				      topology_die_cpumask(event->cpu));
331 	} else {
332 		return -ENOENT;
333 	}
334 
335 	if (cpu >= nr_cpu_ids)
336 		return -ENODEV;
337 
338 	event->cpu = cpu;
339 	event->hw.config = cfg;
340 	event->hw.idx = -1;
341 	return 0;
342 }
343 
344 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
345 {
346 	u64 val;
347 
348 	rdmsrl(event->hw.event_base, val);
349 	return val;
350 }
351 
352 static void cstate_pmu_event_update(struct perf_event *event)
353 {
354 	struct hw_perf_event *hwc = &event->hw;
355 	u64 prev_raw_count, new_raw_count;
356 
357 again:
358 	prev_raw_count = local64_read(&hwc->prev_count);
359 	new_raw_count = cstate_pmu_read_counter(event);
360 
361 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
362 			    new_raw_count) != prev_raw_count)
363 		goto again;
364 
365 	local64_add(new_raw_count - prev_raw_count, &event->count);
366 }
367 
368 static void cstate_pmu_event_start(struct perf_event *event, int mode)
369 {
370 	local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
371 }
372 
373 static void cstate_pmu_event_stop(struct perf_event *event, int mode)
374 {
375 	cstate_pmu_event_update(event);
376 }
377 
378 static void cstate_pmu_event_del(struct perf_event *event, int mode)
379 {
380 	cstate_pmu_event_stop(event, PERF_EF_UPDATE);
381 }
382 
383 static int cstate_pmu_event_add(struct perf_event *event, int mode)
384 {
385 	if (mode & PERF_EF_START)
386 		cstate_pmu_event_start(event, mode);
387 
388 	return 0;
389 }
390 
391 /*
392  * Check if exiting cpu is the designated reader. If so migrate the
393  * events when there is a valid target available
394  */
395 static int cstate_cpu_exit(unsigned int cpu)
396 {
397 	unsigned int target;
398 
399 	if (has_cstate_core &&
400 	    cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
401 
402 		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
403 		/* Migrate events if there is a valid target */
404 		if (target < nr_cpu_ids) {
405 			cpumask_set_cpu(target, &cstate_core_cpu_mask);
406 			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
407 		}
408 	}
409 
410 	if (has_cstate_pkg &&
411 	    cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
412 
413 		target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
414 		/* Migrate events if there is a valid target */
415 		if (target < nr_cpu_ids) {
416 			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
417 			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
418 		}
419 	}
420 	return 0;
421 }
422 
423 static int cstate_cpu_init(unsigned int cpu)
424 {
425 	unsigned int target;
426 
427 	/*
428 	 * If this is the first online thread of that core, set it in
429 	 * the core cpu mask as the designated reader.
430 	 */
431 	target = cpumask_any_and(&cstate_core_cpu_mask,
432 				 topology_sibling_cpumask(cpu));
433 
434 	if (has_cstate_core && target >= nr_cpu_ids)
435 		cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
436 
437 	/*
438 	 * If this is the first online thread of that package, set it
439 	 * in the package cpu mask as the designated reader.
440 	 */
441 	target = cpumask_any_and(&cstate_pkg_cpu_mask,
442 				 topology_die_cpumask(cpu));
443 	if (has_cstate_pkg && target >= nr_cpu_ids)
444 		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
445 
446 	return 0;
447 }
448 
449 const struct attribute_group *core_attr_update[] = {
450 	&group_cstate_core_c1,
451 	&group_cstate_core_c3,
452 	&group_cstate_core_c6,
453 	&group_cstate_core_c7,
454 	NULL,
455 };
456 
457 const struct attribute_group *pkg_attr_update[] = {
458 	&group_cstate_pkg_c2,
459 	&group_cstate_pkg_c3,
460 	&group_cstate_pkg_c6,
461 	&group_cstate_pkg_c7,
462 	&group_cstate_pkg_c8,
463 	&group_cstate_pkg_c9,
464 	&group_cstate_pkg_c10,
465 	NULL,
466 };
467 
468 static struct pmu cstate_core_pmu = {
469 	.attr_groups	= core_attr_groups,
470 	.attr_update	= core_attr_update,
471 	.name		= "cstate_core",
472 	.task_ctx_nr	= perf_invalid_context,
473 	.event_init	= cstate_pmu_event_init,
474 	.add		= cstate_pmu_event_add,
475 	.del		= cstate_pmu_event_del,
476 	.start		= cstate_pmu_event_start,
477 	.stop		= cstate_pmu_event_stop,
478 	.read		= cstate_pmu_event_update,
479 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
480 	.module		= THIS_MODULE,
481 };
482 
483 static struct pmu cstate_pkg_pmu = {
484 	.attr_groups	= pkg_attr_groups,
485 	.attr_update	= pkg_attr_update,
486 	.name		= "cstate_pkg",
487 	.task_ctx_nr	= perf_invalid_context,
488 	.event_init	= cstate_pmu_event_init,
489 	.add		= cstate_pmu_event_add,
490 	.del		= cstate_pmu_event_del,
491 	.start		= cstate_pmu_event_start,
492 	.stop		= cstate_pmu_event_stop,
493 	.read		= cstate_pmu_event_update,
494 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
495 	.module		= THIS_MODULE,
496 };
497 
498 static const struct cstate_model nhm_cstates __initconst = {
499 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
500 				  BIT(PERF_CSTATE_CORE_C6_RES),
501 
502 	.pkg_events		= BIT(PERF_CSTATE_PKG_C3_RES) |
503 				  BIT(PERF_CSTATE_PKG_C6_RES) |
504 				  BIT(PERF_CSTATE_PKG_C7_RES),
505 };
506 
507 static const struct cstate_model snb_cstates __initconst = {
508 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
509 				  BIT(PERF_CSTATE_CORE_C6_RES) |
510 				  BIT(PERF_CSTATE_CORE_C7_RES),
511 
512 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
513 				  BIT(PERF_CSTATE_PKG_C3_RES) |
514 				  BIT(PERF_CSTATE_PKG_C6_RES) |
515 				  BIT(PERF_CSTATE_PKG_C7_RES),
516 };
517 
518 static const struct cstate_model hswult_cstates __initconst = {
519 	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
520 				  BIT(PERF_CSTATE_CORE_C6_RES) |
521 				  BIT(PERF_CSTATE_CORE_C7_RES),
522 
523 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
524 				  BIT(PERF_CSTATE_PKG_C3_RES) |
525 				  BIT(PERF_CSTATE_PKG_C6_RES) |
526 				  BIT(PERF_CSTATE_PKG_C7_RES) |
527 				  BIT(PERF_CSTATE_PKG_C8_RES) |
528 				  BIT(PERF_CSTATE_PKG_C9_RES) |
529 				  BIT(PERF_CSTATE_PKG_C10_RES),
530 };
531 
532 static const struct cstate_model cnl_cstates __initconst = {
533 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
534 				  BIT(PERF_CSTATE_CORE_C3_RES) |
535 				  BIT(PERF_CSTATE_CORE_C6_RES) |
536 				  BIT(PERF_CSTATE_CORE_C7_RES),
537 
538 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
539 				  BIT(PERF_CSTATE_PKG_C3_RES) |
540 				  BIT(PERF_CSTATE_PKG_C6_RES) |
541 				  BIT(PERF_CSTATE_PKG_C7_RES) |
542 				  BIT(PERF_CSTATE_PKG_C8_RES) |
543 				  BIT(PERF_CSTATE_PKG_C9_RES) |
544 				  BIT(PERF_CSTATE_PKG_C10_RES),
545 };
546 
547 static const struct cstate_model slm_cstates __initconst = {
548 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
549 				  BIT(PERF_CSTATE_CORE_C6_RES),
550 
551 	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),
552 	.quirks			= SLM_PKG_C6_USE_C7_MSR,
553 };
554 
555 
556 static const struct cstate_model knl_cstates __initconst = {
557 	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES),
558 
559 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
560 				  BIT(PERF_CSTATE_PKG_C3_RES) |
561 				  BIT(PERF_CSTATE_PKG_C6_RES),
562 	.quirks			= KNL_CORE_C6_MSR,
563 };
564 
565 
566 static const struct cstate_model glm_cstates __initconst = {
567 	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
568 				  BIT(PERF_CSTATE_CORE_C3_RES) |
569 				  BIT(PERF_CSTATE_CORE_C6_RES),
570 
571 	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
572 				  BIT(PERF_CSTATE_PKG_C3_RES) |
573 				  BIT(PERF_CSTATE_PKG_C6_RES) |
574 				  BIT(PERF_CSTATE_PKG_C10_RES),
575 };
576 
577 
578 #define X86_CSTATES_MODEL(model, states)				\
579 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
580 
581 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
582 	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
583 	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
584 	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
585 
586 	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
587 	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
588 	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
589 
590 	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
591 	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
592 
593 	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
594 	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
595 
596 	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
597 	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,	   snb_cstates),
598 	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
599 
600 	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
601 
602 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
603 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
604 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
605 
606 	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
607 	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
608 	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
609 	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
610 
611 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
612 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
613 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
614 
615 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  hswult_cstates),
616 	X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
617 
618 	X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
619 
620 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
621 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
622 
623 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
624 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
625 
626 	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
627 
628 	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
629 	X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
630 	{ },
631 };
632 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
633 
634 static int __init cstate_probe(const struct cstate_model *cm)
635 {
636 	/* SLM has different MSR for PKG C6 */
637 	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
638 		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
639 
640 	/* KNL has different MSR for CORE C6 */
641 	if (cm->quirks & KNL_CORE_C6_MSR)
642 		pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
643 
644 
645 	core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
646 				       true, (void *) &cm->core_events);
647 
648 	pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
649 				      true, (void *) &cm->pkg_events);
650 
651 	has_cstate_core = !!core_msr_mask;
652 	has_cstate_pkg  = !!pkg_msr_mask;
653 
654 	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
655 }
656 
657 static inline void cstate_cleanup(void)
658 {
659 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
660 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
661 
662 	if (has_cstate_core)
663 		perf_pmu_unregister(&cstate_core_pmu);
664 
665 	if (has_cstate_pkg)
666 		perf_pmu_unregister(&cstate_pkg_pmu);
667 }
668 
669 static int __init cstate_init(void)
670 {
671 	int err;
672 
673 	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
674 			  "perf/x86/cstate:starting", cstate_cpu_init, NULL);
675 	cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
676 			  "perf/x86/cstate:online", NULL, cstate_cpu_exit);
677 
678 	if (has_cstate_core) {
679 		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
680 		if (err) {
681 			has_cstate_core = false;
682 			pr_info("Failed to register cstate core pmu\n");
683 			cstate_cleanup();
684 			return err;
685 		}
686 	}
687 
688 	if (has_cstate_pkg) {
689 		if (topology_max_die_per_package() > 1) {
690 			err = perf_pmu_register(&cstate_pkg_pmu,
691 						"cstate_die", -1);
692 		} else {
693 			err = perf_pmu_register(&cstate_pkg_pmu,
694 						cstate_pkg_pmu.name, -1);
695 		}
696 		if (err) {
697 			has_cstate_pkg = false;
698 			pr_info("Failed to register cstate pkg pmu\n");
699 			cstate_cleanup();
700 			return err;
701 		}
702 	}
703 	return 0;
704 }
705 
706 static int __init cstate_pmu_init(void)
707 {
708 	const struct x86_cpu_id *id;
709 	int err;
710 
711 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
712 		return -ENODEV;
713 
714 	id = x86_match_cpu(intel_cstates_match);
715 	if (!id)
716 		return -ENODEV;
717 
718 	err = cstate_probe((const struct cstate_model *) id->driver_data);
719 	if (err)
720 		return err;
721 
722 	return cstate_init();
723 }
724 module_init(cstate_pmu_init);
725 
726 static void __exit cstate_pmu_exit(void)
727 {
728 	cstate_cleanup();
729 }
730 module_exit(cstate_pmu_exit);
731