1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/resource.h>
33 #include <fcntl.h>
34 #include <signal.h>
35 #include <sys/time.h>
36 #include <stdlib.h>
37 #include <getopt.h>
38 #include <dirent.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include <sched.h>
42 #include <time.h>
43 #include <cpuid.h>
44 #include <linux/capability.h>
45 #include <errno.h>
46 
47 char *proc_stat = "/proc/stat";
48 FILE *outf;
49 int *fd_percpu;
50 struct timespec interval_ts = {5, 0};
51 unsigned int debug;
52 unsigned int rapl_joules;
53 unsigned int summary_only;
54 unsigned int dump_only;
55 unsigned int do_nhm_cstates;
56 unsigned int do_snb_cstates;
57 unsigned int do_knl_cstates;
58 unsigned int do_pc2;
59 unsigned int do_pc3;
60 unsigned int do_pc6;
61 unsigned int do_pc7;
62 unsigned int do_c8_c9_c10;
63 unsigned int do_skl_residency;
64 unsigned int do_slm_cstates;
65 unsigned int use_c1_residency_msr;
66 unsigned int has_aperf;
67 unsigned int has_epb;
68 unsigned int do_irtl_snb;
69 unsigned int do_irtl_hsw;
70 unsigned int units = 1000000;	/* MHz etc */
71 unsigned int genuine_intel;
72 unsigned int has_invariant_tsc;
73 unsigned int do_nhm_platform_info;
74 unsigned int extra_msr_offset32;
75 unsigned int extra_msr_offset64;
76 unsigned int extra_delta_offset32;
77 unsigned int extra_delta_offset64;
78 unsigned int aperf_mperf_multiplier = 1;
79 int do_irq = 1;
80 int do_smi;
81 double bclk;
82 double base_hz;
83 unsigned int has_base_hz;
84 double tsc_tweak = 1.0;
85 unsigned int show_pkg;
86 unsigned int show_core;
87 unsigned int show_cpu;
88 unsigned int show_pkg_only;
89 unsigned int show_core_only;
90 char *output_buffer, *outp;
91 unsigned int do_rapl;
92 unsigned int do_dts;
93 unsigned int do_ptm;
94 unsigned int do_gfx_rc6_ms;
95 unsigned long long  gfx_cur_rc6_ms;
96 unsigned int do_gfx_mhz;
97 unsigned int gfx_cur_mhz;
98 unsigned int tcc_activation_temp;
99 unsigned int tcc_activation_temp_override;
100 double rapl_power_units, rapl_time_units;
101 double rapl_dram_energy_units, rapl_energy_units;
102 double rapl_joule_counter_range;
103 unsigned int do_core_perf_limit_reasons;
104 unsigned int do_gfx_perf_limit_reasons;
105 unsigned int do_ring_perf_limit_reasons;
106 unsigned int crystal_hz;
107 unsigned long long tsc_hz;
108 int base_cpu;
109 double discover_bclk(unsigned int family, unsigned int model);
110 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
111 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
112 unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
113 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
114 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
115 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
116 
117 #define RAPL_PKG		(1 << 0)
118 					/* 0x610 MSR_PKG_POWER_LIMIT */
119 					/* 0x611 MSR_PKG_ENERGY_STATUS */
120 #define RAPL_PKG_PERF_STATUS	(1 << 1)
121 					/* 0x613 MSR_PKG_PERF_STATUS */
122 #define RAPL_PKG_POWER_INFO	(1 << 2)
123 					/* 0x614 MSR_PKG_POWER_INFO */
124 
125 #define RAPL_DRAM		(1 << 3)
126 					/* 0x618 MSR_DRAM_POWER_LIMIT */
127 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
128 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
129 					/* 0x61b MSR_DRAM_PERF_STATUS */
130 #define RAPL_DRAM_POWER_INFO	(1 << 5)
131 					/* 0x61c MSR_DRAM_POWER_INFO */
132 
133 #define RAPL_CORES_POWER_LIMIT	(1 << 6)
134 					/* 0x638 MSR_PP0_POWER_LIMIT */
135 #define RAPL_CORE_POLICY	(1 << 7)
136 					/* 0x63a MSR_PP0_POLICY */
137 
138 #define RAPL_GFX		(1 << 8)
139 					/* 0x640 MSR_PP1_POWER_LIMIT */
140 					/* 0x641 MSR_PP1_ENERGY_STATUS */
141 					/* 0x642 MSR_PP1_POLICY */
142 
143 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
144 					/* 0x639 MSR_PP0_ENERGY_STATUS */
145 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
146 #define	TJMAX_DEFAULT	100
147 
148 #define MAX(a, b) ((a) > (b) ? (a) : (b))
149 
150 int backwards_count;
151 char *progname;
152 
153 cpu_set_t *cpu_present_set, *cpu_affinity_set;
154 size_t cpu_present_setsize, cpu_affinity_setsize;
155 
156 struct thread_data {
157 	unsigned long long tsc;
158 	unsigned long long aperf;
159 	unsigned long long mperf;
160 	unsigned long long c1;
161 	unsigned long long extra_msr64;
162 	unsigned long long extra_delta64;
163 	unsigned long long extra_msr32;
164 	unsigned long long extra_delta32;
165 	unsigned int irq_count;
166 	unsigned int smi_count;
167 	unsigned int cpu_id;
168 	unsigned int flags;
169 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
170 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
171 } *thread_even, *thread_odd;
172 
173 struct core_data {
174 	unsigned long long c3;
175 	unsigned long long c6;
176 	unsigned long long c7;
177 	unsigned int core_temp_c;
178 	unsigned int core_id;
179 } *core_even, *core_odd;
180 
181 struct pkg_data {
182 	unsigned long long pc2;
183 	unsigned long long pc3;
184 	unsigned long long pc6;
185 	unsigned long long pc7;
186 	unsigned long long pc8;
187 	unsigned long long pc9;
188 	unsigned long long pc10;
189 	unsigned long long pkg_wtd_core_c0;
190 	unsigned long long pkg_any_core_c0;
191 	unsigned long long pkg_any_gfxe_c0;
192 	unsigned long long pkg_both_core_gfxe_c0;
193 	long long gfx_rc6_ms;
194 	unsigned int gfx_mhz;
195 	unsigned int package_id;
196 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
197 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
198 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
199 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
200 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
201 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
202 	unsigned int pkg_temp_c;
203 
204 } *package_even, *package_odd;
205 
206 #define ODD_COUNTERS thread_odd, core_odd, package_odd
207 #define EVEN_COUNTERS thread_even, core_even, package_even
208 
209 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
210 	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
211 		topo.num_threads_per_core + \
212 		(core_no) * topo.num_threads_per_core + (thread_no))
213 #define GET_CORE(core_base, core_no, pkg_no) \
214 	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
215 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
216 
217 struct system_summary {
218 	struct thread_data threads;
219 	struct core_data cores;
220 	struct pkg_data packages;
221 } sum, average;
222 
223 
224 struct topo_params {
225 	int num_packages;
226 	int num_cpus;
227 	int num_cores;
228 	int max_cpu_num;
229 	int num_cores_per_pkg;
230 	int num_threads_per_core;
231 } topo;
232 
233 struct timeval tv_even, tv_odd, tv_delta;
234 
235 int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
236 int *irqs_per_cpu;		/* indexed by cpu_num */
237 
238 void setup_all_buffers(void);
239 
240 int cpu_is_not_present(int cpu)
241 {
242 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
243 }
244 /*
245  * run func(thread, core, package) in topology order
246  * skip non-present cpus
247  */
248 
249 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
250 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
251 {
252 	int retval, pkg_no, core_no, thread_no;
253 
254 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
255 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
256 			for (thread_no = 0; thread_no <
257 				topo.num_threads_per_core; ++thread_no) {
258 				struct thread_data *t;
259 				struct core_data *c;
260 				struct pkg_data *p;
261 
262 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
263 
264 				if (cpu_is_not_present(t->cpu_id))
265 					continue;
266 
267 				c = GET_CORE(core_base, core_no, pkg_no);
268 				p = GET_PKG(pkg_base, pkg_no);
269 
270 				retval = func(t, c, p);
271 				if (retval)
272 					return retval;
273 			}
274 		}
275 	}
276 	return 0;
277 }
278 
279 int cpu_migrate(int cpu)
280 {
281 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
282 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
283 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
284 		return -1;
285 	else
286 		return 0;
287 }
288 int get_msr_fd(int cpu)
289 {
290 	char pathname[32];
291 	int fd;
292 
293 	fd = fd_percpu[cpu];
294 
295 	if (fd)
296 		return fd;
297 
298 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
299 	fd = open(pathname, O_RDONLY);
300 	if (fd < 0)
301 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
302 
303 	fd_percpu[cpu] = fd;
304 
305 	return fd;
306 }
307 
308 int get_msr(int cpu, off_t offset, unsigned long long *msr)
309 {
310 	ssize_t retval;
311 
312 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
313 
314 	if (retval != sizeof *msr)
315 		err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
316 
317 	return 0;
318 }
319 
320 /*
321  * Example Format w/ field column widths:
322  *
323  *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
324  * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
325  */
326 
327 void print_header(void)
328 {
329 	if (show_pkg)
330 		outp += sprintf(outp, "\tPackage");
331 	if (show_core)
332 		outp += sprintf(outp, "\tCore");
333 	if (show_cpu)
334 		outp += sprintf(outp, "\tCPU");
335 	if (has_aperf)
336 		outp += sprintf(outp, "\tAvg_MHz");
337 	if (has_aperf)
338 		outp += sprintf(outp, "\tBusy%%");
339 	if (has_aperf)
340 		outp += sprintf(outp, "\tBzy_MHz");
341 	outp += sprintf(outp, "\tTSC_MHz");
342 
343 	if (extra_delta_offset32)
344 		outp += sprintf(outp, "\tcount 0x%03X", extra_delta_offset32);
345 	if (extra_delta_offset64)
346 		outp += sprintf(outp, "\tCOUNT 0x%03X", extra_delta_offset64);
347 	if (extra_msr_offset32)
348 		outp += sprintf(outp, "\tMSR 0x%03X", extra_msr_offset32);
349 	if (extra_msr_offset64)
350 		outp += sprintf(outp, "\tMSR 0x%03X", extra_msr_offset64);
351 
352 	if (!debug)
353 		goto done;
354 
355 	if (do_irq)
356 		outp += sprintf(outp, "\tIRQ");
357 	if (do_smi)
358 		outp += sprintf(outp, "\tSMI");
359 
360 	if (do_nhm_cstates)
361 		outp += sprintf(outp, "\tCPU%%c1");
362 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
363 		outp += sprintf(outp, "\tCPU%%c3");
364 	if (do_nhm_cstates)
365 		outp += sprintf(outp, "\tCPU%%c6");
366 	if (do_snb_cstates)
367 		outp += sprintf(outp, "\tCPU%%c7");
368 
369 	if (do_dts)
370 		outp += sprintf(outp, "\tCoreTmp");
371 	if (do_ptm)
372 		outp += sprintf(outp, "\tPkgTmp");
373 
374 	if (do_gfx_rc6_ms)
375 		outp += sprintf(outp, "\tGFX%%rc6");
376 
377 	if (do_gfx_mhz)
378 		outp += sprintf(outp, "\tGFXMHz");
379 
380 	if (do_skl_residency) {
381 		outp += sprintf(outp, "\tTotl%%C0");
382 		outp += sprintf(outp, "\tAny%%C0");
383 		outp += sprintf(outp, "\tGFX%%C0");
384 		outp += sprintf(outp, "\tCPUGFX%%");
385 	}
386 
387 	if (do_pc2)
388 		outp += sprintf(outp, "\tPkg%%pc2");
389 	if (do_pc3)
390 		outp += sprintf(outp, "\tPkg%%pc3");
391 	if (do_pc6)
392 		outp += sprintf(outp, "\tPkg%%pc6");
393 	if (do_pc7)
394 		outp += sprintf(outp, "\tPkg%%pc7");
395 	if (do_c8_c9_c10) {
396 		outp += sprintf(outp, "\tPkg%%pc8");
397 		outp += sprintf(outp, "\tPkg%%pc9");
398 		outp += sprintf(outp, "\tPk%%pc10");
399 	}
400 
401 	if (do_rapl && !rapl_joules) {
402 		if (do_rapl & RAPL_PKG)
403 			outp += sprintf(outp, "\tPkgWatt");
404 		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
405 			outp += sprintf(outp, "\tCorWatt");
406 		if (do_rapl & RAPL_GFX)
407 			outp += sprintf(outp, "\tGFXWatt");
408 		if (do_rapl & RAPL_DRAM)
409 			outp += sprintf(outp, "\tRAMWatt");
410 		if (do_rapl & RAPL_PKG_PERF_STATUS)
411 			outp += sprintf(outp, "\tPKG_%%");
412 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
413 			outp += sprintf(outp, "\tRAM_%%");
414 	} else if (do_rapl && rapl_joules) {
415 		if (do_rapl & RAPL_PKG)
416 			outp += sprintf(outp, "\tPkg_J");
417 		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
418 			outp += sprintf(outp, "\tCor_J");
419 		if (do_rapl & RAPL_GFX)
420 			outp += sprintf(outp, "\tGFX_J");
421 		if (do_rapl & RAPL_DRAM)
422 			outp += sprintf(outp, "\tRAM_J");
423 		if (do_rapl & RAPL_PKG_PERF_STATUS)
424 			outp += sprintf(outp, "\tPKG_%%");
425 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
426 			outp += sprintf(outp, "\tRAM_%%");
427 	}
428     done:
429 	outp += sprintf(outp, "\n");
430 }
431 
432 int dump_counters(struct thread_data *t, struct core_data *c,
433 	struct pkg_data *p)
434 {
435 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
436 
437 	if (t) {
438 		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
439 			t->cpu_id, t->flags);
440 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
441 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
442 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
443 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
444 		outp += sprintf(outp, "msr0x%x: %08llX\n",
445 			extra_delta_offset32, t->extra_delta32);
446 		outp += sprintf(outp, "msr0x%x: %016llX\n",
447 			extra_delta_offset64, t->extra_delta64);
448 		outp += sprintf(outp, "msr0x%x: %08llX\n",
449 			extra_msr_offset32, t->extra_msr32);
450 		outp += sprintf(outp, "msr0x%x: %016llX\n",
451 			extra_msr_offset64, t->extra_msr64);
452 		if (do_irq)
453 			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
454 		if (do_smi)
455 			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
456 	}
457 
458 	if (c) {
459 		outp += sprintf(outp, "core: %d\n", c->core_id);
460 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
461 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
462 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
463 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
464 	}
465 
466 	if (p) {
467 		outp += sprintf(outp, "package: %d\n", p->package_id);
468 
469 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
470 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
471 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
472 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
473 
474 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
475 		if (do_pc3)
476 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
477 		if (do_pc6)
478 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
479 		if (do_pc7)
480 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
481 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
482 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
483 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
484 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
485 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
486 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
487 		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
488 		outp += sprintf(outp, "Throttle PKG: %0X\n",
489 			p->rapl_pkg_perf_status);
490 		outp += sprintf(outp, "Throttle RAM: %0X\n",
491 			p->rapl_dram_perf_status);
492 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
493 	}
494 
495 	outp += sprintf(outp, "\n");
496 
497 	return 0;
498 }
499 
500 /*
501  * column formatting convention & formats
502  */
503 int format_counters(struct thread_data *t, struct core_data *c,
504 	struct pkg_data *p)
505 {
506 	double interval_float;
507 	char *fmt8;
508 
509 	 /* if showing only 1st thread in core and this isn't one, bail out */
510 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
511 		return 0;
512 
513 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
514 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
515 		return 0;
516 
517 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
518 
519 	/* topo columns, print blanks on 1st (average) line */
520 	if (t == &average.threads) {
521 		if (show_pkg)
522 			outp += sprintf(outp, "\t-");
523 		if (show_core)
524 			outp += sprintf(outp, "\t-");
525 		if (show_cpu)
526 			outp += sprintf(outp, "\t-");
527 	} else {
528 		if (show_pkg) {
529 			if (p)
530 				outp += sprintf(outp, "\t%d", p->package_id);
531 			else
532 				outp += sprintf(outp, "\t-");
533 		}
534 		if (show_core) {
535 			if (c)
536 				outp += sprintf(outp, "\t%d", c->core_id);
537 			else
538 				outp += sprintf(outp, "\t-");
539 		}
540 		if (show_cpu)
541 			outp += sprintf(outp, "\t%d", t->cpu_id);
542 	}
543 
544 	/* Avg_MHz */
545 	if (has_aperf)
546 		outp += sprintf(outp, "\t%.0f",
547 			1.0 / units * t->aperf / interval_float);
548 
549 	/* Busy% */
550 	if (has_aperf)
551 		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
552 
553 	/* Bzy_MHz */
554 	if (has_aperf) {
555 		if (has_base_hz)
556 			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
557 		else
558 			outp += sprintf(outp, "\t%.0f",
559 				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
560 	}
561 
562 	/* TSC_MHz */
563 	outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
564 
565 	/* delta */
566 	if (extra_delta_offset32)
567 		outp += sprintf(outp, "\t%11llu", t->extra_delta32);
568 
569 	/* DELTA */
570 	if (extra_delta_offset64)
571 		outp += sprintf(outp, "\t%11llu", t->extra_delta64);
572 	/* msr */
573 	if (extra_msr_offset32)
574 		outp += sprintf(outp, "\t0x%08llx", t->extra_msr32);
575 
576 	/* MSR */
577 	if (extra_msr_offset64)
578 		outp += sprintf(outp, "\t0x%016llx", t->extra_msr64);
579 
580 	if (!debug)
581 		goto done;
582 
583 	/* IRQ */
584 	if (do_irq)
585 		outp += sprintf(outp, "\t%d", t->irq_count);
586 
587 	/* SMI */
588 	if (do_smi)
589 		outp += sprintf(outp, "\t%d", t->smi_count);
590 
591 	if (do_nhm_cstates)
592 		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
593 
594 	/* print per-core data only for 1st thread in core */
595 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
596 		goto done;
597 
598 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
599 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
600 	if (do_nhm_cstates)
601 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
602 	if (do_snb_cstates)
603 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
604 
605 	if (do_dts)
606 		outp += sprintf(outp, "\t%d", c->core_temp_c);
607 
608 	/* print per-package data only for 1st core in package */
609 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
610 		goto done;
611 
612 	/* PkgTmp */
613 	if (do_ptm)
614 		outp += sprintf(outp, "\t%d", p->pkg_temp_c);
615 
616 	/* GFXrc6 */
617 	if (do_gfx_rc6_ms) {
618 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
619 			outp += sprintf(outp, "\t**.**");
620 		} else {
621 			outp += sprintf(outp, "\t%.2f",
622 				p->gfx_rc6_ms / 10.0 / interval_float);
623 		}
624 	}
625 
626 	/* GFXMHz */
627 	if (do_gfx_mhz)
628 		outp += sprintf(outp, "\t%d", p->gfx_mhz);
629 
630 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
631 	if (do_skl_residency) {
632 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
633 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
634 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
635 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
636 	}
637 
638 	if (do_pc2)
639 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
640 	if (do_pc3)
641 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
642 	if (do_pc6)
643 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
644 	if (do_pc7)
645 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
646 	if (do_c8_c9_c10) {
647 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
648 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
649 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
650 	}
651 
652 	/*
653  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
654  	 * indicate that results are suspect by printing "**" in fraction place.
655  	 */
656 	if (interval_float < rapl_joule_counter_range)
657 		fmt8 = "\t%.2f";
658 	else
659 		fmt8 = "%6.0f**";
660 
661 	if (do_rapl && !rapl_joules) {
662 		if (do_rapl & RAPL_PKG)
663 			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
664 		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
665 			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
666 		if (do_rapl & RAPL_GFX)
667 			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
668 		if (do_rapl & RAPL_DRAM)
669 			outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
670 		if (do_rapl & RAPL_PKG_PERF_STATUS)
671 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
672 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
673 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
674 	} else if (do_rapl && rapl_joules) {
675 		if (do_rapl & RAPL_PKG)
676 			outp += sprintf(outp, fmt8,
677 					p->energy_pkg * rapl_energy_units);
678 		if (do_rapl & RAPL_CORES)
679 			outp += sprintf(outp, fmt8,
680 					p->energy_cores * rapl_energy_units);
681 		if (do_rapl & RAPL_GFX)
682 			outp += sprintf(outp, fmt8,
683 					p->energy_gfx * rapl_energy_units);
684 		if (do_rapl & RAPL_DRAM)
685 			outp += sprintf(outp, fmt8,
686 					p->energy_dram * rapl_dram_energy_units);
687 		if (do_rapl & RAPL_PKG_PERF_STATUS)
688 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
689 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
690 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
691 	}
692 done:
693 	outp += sprintf(outp, "\n");
694 
695 	return 0;
696 }
697 
698 void flush_output_stdout(void)
699 {
700 	FILE *filep;
701 
702 	if (outf == stderr)
703 		filep = stdout;
704 	else
705 		filep = outf;
706 
707 	fputs(output_buffer, filep);
708 	fflush(filep);
709 
710 	outp = output_buffer;
711 }
712 void flush_output_stderr(void)
713 {
714 	fputs(output_buffer, outf);
715 	fflush(outf);
716 	outp = output_buffer;
717 }
718 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
719 {
720 	static int printed;
721 
722 	if (!printed || !summary_only)
723 		print_header();
724 
725 	if (topo.num_cpus > 1)
726 		format_counters(&average.threads, &average.cores,
727 			&average.packages);
728 
729 	printed = 1;
730 
731 	if (summary_only)
732 		return;
733 
734 	for_all_cpus(format_counters, t, c, p);
735 }
736 
737 #define DELTA_WRAP32(new, old)			\
738 	if (new > old) {			\
739 		old = new - old;		\
740 	} else {				\
741 		old = 0x100000000 + new - old;	\
742 	}
743 
744 int
745 delta_package(struct pkg_data *new, struct pkg_data *old)
746 {
747 
748 	if (do_skl_residency) {
749 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
750 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
751 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
752 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
753 	}
754 	old->pc2 = new->pc2 - old->pc2;
755 	if (do_pc3)
756 		old->pc3 = new->pc3 - old->pc3;
757 	if (do_pc6)
758 		old->pc6 = new->pc6 - old->pc6;
759 	if (do_pc7)
760 		old->pc7 = new->pc7 - old->pc7;
761 	old->pc8 = new->pc8 - old->pc8;
762 	old->pc9 = new->pc9 - old->pc9;
763 	old->pc10 = new->pc10 - old->pc10;
764 	old->pkg_temp_c = new->pkg_temp_c;
765 
766 	/* flag an error when rc6 counter resets/wraps */
767 	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
768 		old->gfx_rc6_ms = -1;
769 	else
770 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
771 
772 	old->gfx_mhz = new->gfx_mhz;
773 
774 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
775 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
776 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
777 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
778 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
779 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
780 
781 	return 0;
782 }
783 
784 void
785 delta_core(struct core_data *new, struct core_data *old)
786 {
787 	old->c3 = new->c3 - old->c3;
788 	old->c6 = new->c6 - old->c6;
789 	old->c7 = new->c7 - old->c7;
790 	old->core_temp_c = new->core_temp_c;
791 }
792 
793 /*
794  * old = new - old
795  */
796 int
797 delta_thread(struct thread_data *new, struct thread_data *old,
798 	struct core_data *core_delta)
799 {
800 	old->tsc = new->tsc - old->tsc;
801 
802 	/* check for TSC < 1 Mcycles over interval */
803 	if (old->tsc < (1000 * 1000))
804 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
805 		     "You can disable all c-states by booting with \"idle=poll\"\n"
806 		     "or just the deep ones with \"processor.max_cstate=1\"");
807 
808 	old->c1 = new->c1 - old->c1;
809 
810 	if (has_aperf) {
811 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
812 			old->aperf = new->aperf - old->aperf;
813 			old->mperf = new->mperf - old->mperf;
814 		} else {
815 			return -1;
816 		}
817 	}
818 
819 
820 	if (use_c1_residency_msr) {
821 		/*
822 		 * Some models have a dedicated C1 residency MSR,
823 		 * which should be more accurate than the derivation below.
824 		 */
825 	} else {
826 		/*
827 		 * As counter collection is not atomic,
828 		 * it is possible for mperf's non-halted cycles + idle states
829 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
830 		 */
831 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
832 			old->c1 = 0;
833 		else {
834 			/* normal case, derive c1 */
835 			old->c1 = old->tsc - old->mperf - core_delta->c3
836 				- core_delta->c6 - core_delta->c7;
837 		}
838 	}
839 
840 	if (old->mperf == 0) {
841 		if (debug > 1)
842 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
843 		old->mperf = 1;	/* divide by 0 protection */
844 	}
845 
846 	old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
847 	old->extra_delta32 &= 0xFFFFFFFF;
848 
849 	old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
850 
851 	/*
852 	 * Extra MSR is just a snapshot, simply copy latest w/o subtracting
853 	 */
854 	old->extra_msr32 = new->extra_msr32;
855 	old->extra_msr64 = new->extra_msr64;
856 
857 	if (do_irq)
858 		old->irq_count = new->irq_count - old->irq_count;
859 
860 	if (do_smi)
861 		old->smi_count = new->smi_count - old->smi_count;
862 
863 	return 0;
864 }
865 
866 int delta_cpu(struct thread_data *t, struct core_data *c,
867 	struct pkg_data *p, struct thread_data *t2,
868 	struct core_data *c2, struct pkg_data *p2)
869 {
870 	int retval = 0;
871 
872 	/* calculate core delta only for 1st thread in core */
873 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
874 		delta_core(c, c2);
875 
876 	/* always calculate thread delta */
877 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
878 	if (retval)
879 		return retval;
880 
881 	/* calculate package delta only for 1st core in package */
882 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
883 		retval = delta_package(p, p2);
884 
885 	return retval;
886 }
887 
888 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
889 {
890 	t->tsc = 0;
891 	t->aperf = 0;
892 	t->mperf = 0;
893 	t->c1 = 0;
894 
895 	t->extra_delta32 = 0;
896 	t->extra_delta64 = 0;
897 
898 	t->irq_count = 0;
899 	t->smi_count = 0;
900 
901 	/* tells format_counters to dump all fields from this set */
902 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
903 
904 	c->c3 = 0;
905 	c->c6 = 0;
906 	c->c7 = 0;
907 	c->core_temp_c = 0;
908 
909 	p->pkg_wtd_core_c0 = 0;
910 	p->pkg_any_core_c0 = 0;
911 	p->pkg_any_gfxe_c0 = 0;
912 	p->pkg_both_core_gfxe_c0 = 0;
913 
914 	p->pc2 = 0;
915 	if (do_pc3)
916 		p->pc3 = 0;
917 	if (do_pc6)
918 		p->pc6 = 0;
919 	if (do_pc7)
920 		p->pc7 = 0;
921 	p->pc8 = 0;
922 	p->pc9 = 0;
923 	p->pc10 = 0;
924 
925 	p->energy_pkg = 0;
926 	p->energy_dram = 0;
927 	p->energy_cores = 0;
928 	p->energy_gfx = 0;
929 	p->rapl_pkg_perf_status = 0;
930 	p->rapl_dram_perf_status = 0;
931 	p->pkg_temp_c = 0;
932 
933 	p->gfx_rc6_ms = 0;
934 	p->gfx_mhz = 0;
935 }
936 int sum_counters(struct thread_data *t, struct core_data *c,
937 	struct pkg_data *p)
938 {
939 	average.threads.tsc += t->tsc;
940 	average.threads.aperf += t->aperf;
941 	average.threads.mperf += t->mperf;
942 	average.threads.c1 += t->c1;
943 
944 	average.threads.extra_delta32 += t->extra_delta32;
945 	average.threads.extra_delta64 += t->extra_delta64;
946 
947 	average.threads.irq_count += t->irq_count;
948 	average.threads.smi_count += t->smi_count;
949 
950 	/* sum per-core values only for 1st thread in core */
951 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
952 		return 0;
953 
954 	average.cores.c3 += c->c3;
955 	average.cores.c6 += c->c6;
956 	average.cores.c7 += c->c7;
957 
958 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
959 
960 	/* sum per-pkg values only for 1st core in pkg */
961 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
962 		return 0;
963 
964 	if (do_skl_residency) {
965 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
966 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
967 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
968 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
969 	}
970 
971 	average.packages.pc2 += p->pc2;
972 	if (do_pc3)
973 		average.packages.pc3 += p->pc3;
974 	if (do_pc6)
975 		average.packages.pc6 += p->pc6;
976 	if (do_pc7)
977 		average.packages.pc7 += p->pc7;
978 	average.packages.pc8 += p->pc8;
979 	average.packages.pc9 += p->pc9;
980 	average.packages.pc10 += p->pc10;
981 
982 	average.packages.energy_pkg += p->energy_pkg;
983 	average.packages.energy_dram += p->energy_dram;
984 	average.packages.energy_cores += p->energy_cores;
985 	average.packages.energy_gfx += p->energy_gfx;
986 
987 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
988 	average.packages.gfx_mhz = p->gfx_mhz;
989 
990 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
991 
992 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
993 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
994 	return 0;
995 }
996 /*
997  * sum the counters for all cpus in the system
998  * compute the weighted average
999  */
1000 void compute_average(struct thread_data *t, struct core_data *c,
1001 	struct pkg_data *p)
1002 {
1003 	clear_counters(&average.threads, &average.cores, &average.packages);
1004 
1005 	for_all_cpus(sum_counters, t, c, p);
1006 
1007 	average.threads.tsc /= topo.num_cpus;
1008 	average.threads.aperf /= topo.num_cpus;
1009 	average.threads.mperf /= topo.num_cpus;
1010 	average.threads.c1 /= topo.num_cpus;
1011 
1012 	average.threads.extra_delta32 /= topo.num_cpus;
1013 	average.threads.extra_delta32 &= 0xFFFFFFFF;
1014 
1015 	average.threads.extra_delta64 /= topo.num_cpus;
1016 
1017 	average.cores.c3 /= topo.num_cores;
1018 	average.cores.c6 /= topo.num_cores;
1019 	average.cores.c7 /= topo.num_cores;
1020 
1021 	if (do_skl_residency) {
1022 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1023 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1024 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1025 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1026 	}
1027 
1028 	average.packages.pc2 /= topo.num_packages;
1029 	if (do_pc3)
1030 		average.packages.pc3 /= topo.num_packages;
1031 	if (do_pc6)
1032 		average.packages.pc6 /= topo.num_packages;
1033 	if (do_pc7)
1034 		average.packages.pc7 /= topo.num_packages;
1035 
1036 	average.packages.pc8 /= topo.num_packages;
1037 	average.packages.pc9 /= topo.num_packages;
1038 	average.packages.pc10 /= topo.num_packages;
1039 }
1040 
1041 static unsigned long long rdtsc(void)
1042 {
1043 	unsigned int low, high;
1044 
1045 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1046 
1047 	return low | ((unsigned long long)high) << 32;
1048 }
1049 
1050 /*
1051  * get_counters(...)
1052  * migrate to cpu
1053  * acquire and record local counters for that cpu
1054  */
1055 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1056 {
1057 	int cpu = t->cpu_id;
1058 	unsigned long long msr;
1059 	int aperf_mperf_retry_count = 0;
1060 
1061 	if (cpu_migrate(cpu)) {
1062 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1063 		return -1;
1064 	}
1065 
1066 retry:
1067 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
1068 
1069 	if (has_aperf) {
1070 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1071 
1072 		/*
1073 		 * The TSC, APERF and MPERF must be read together for
1074 		 * APERF/MPERF and MPERF/TSC to give accurate results.
1075 		 *
1076 		 * Unfortunately, APERF and MPERF are read by
1077 		 * individual system call, so delays may occur
1078 		 * between them.  If the time to read them
1079 		 * varies by a large amount, we re-read them.
1080 		 */
1081 
1082 		/*
1083 		 * This initial dummy APERF read has been seen to
1084 		 * reduce jitter in the subsequent reads.
1085 		 */
1086 
1087 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1088 			return -3;
1089 
1090 		t->tsc = rdtsc();	/* re-read close to APERF */
1091 
1092 		tsc_before = t->tsc;
1093 
1094 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1095 			return -3;
1096 
1097 		tsc_between = rdtsc();
1098 
1099 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1100 			return -4;
1101 
1102 		tsc_after = rdtsc();
1103 
1104 		aperf_time = tsc_between - tsc_before;
1105 		mperf_time = tsc_after - tsc_between;
1106 
1107 		/*
1108 		 * If the system call latency to read APERF and MPERF
1109 		 * differ by more than 2x, then try again.
1110 		 */
1111 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1112 			aperf_mperf_retry_count++;
1113 			if (aperf_mperf_retry_count < 5)
1114 				goto retry;
1115 			else
1116 				warnx("cpu%d jitter %lld %lld",
1117 					cpu, aperf_time, mperf_time);
1118 		}
1119 		aperf_mperf_retry_count = 0;
1120 
1121 		t->aperf = t->aperf * aperf_mperf_multiplier;
1122 		t->mperf = t->mperf * aperf_mperf_multiplier;
1123 	}
1124 
1125 	if (do_irq)
1126 		t->irq_count = irqs_per_cpu[cpu];
1127 	if (do_smi) {
1128 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1129 			return -5;
1130 		t->smi_count = msr & 0xFFFFFFFF;
1131 	}
1132 	if (extra_delta_offset32) {
1133 		if (get_msr(cpu, extra_delta_offset32, &msr))
1134 			return -5;
1135 		t->extra_delta32 = msr & 0xFFFFFFFF;
1136 	}
1137 
1138 	if (extra_delta_offset64)
1139 		if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
1140 			return -5;
1141 
1142 	if (extra_msr_offset32) {
1143 		if (get_msr(cpu, extra_msr_offset32, &msr))
1144 			return -5;
1145 		t->extra_msr32 = msr & 0xFFFFFFFF;
1146 	}
1147 
1148 	if (extra_msr_offset64)
1149 		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
1150 			return -5;
1151 
1152 	if (use_c1_residency_msr) {
1153 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1154 			return -6;
1155 	}
1156 
1157 	/* collect core counters only for 1st thread in core */
1158 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1159 		return 0;
1160 
1161 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
1162 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1163 			return -6;
1164 	}
1165 
1166 	if (do_nhm_cstates && !do_knl_cstates) {
1167 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1168 			return -7;
1169 	} else if (do_knl_cstates) {
1170 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1171 			return -7;
1172 	}
1173 
1174 	if (do_snb_cstates)
1175 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1176 			return -8;
1177 
1178 	if (do_dts) {
1179 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1180 			return -9;
1181 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1182 	}
1183 
1184 
1185 	/* collect package counters only for 1st core in package */
1186 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1187 		return 0;
1188 
1189 	if (do_skl_residency) {
1190 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1191 			return -10;
1192 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1193 			return -11;
1194 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1195 			return -12;
1196 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1197 			return -13;
1198 	}
1199 	if (do_pc3)
1200 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1201 			return -9;
1202 	if (do_pc6)
1203 		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1204 			return -10;
1205 	if (do_pc2)
1206 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1207 			return -11;
1208 	if (do_pc7)
1209 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1210 			return -12;
1211 	if (do_c8_c9_c10) {
1212 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1213 			return -13;
1214 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1215 			return -13;
1216 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1217 			return -13;
1218 	}
1219 	if (do_rapl & RAPL_PKG) {
1220 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1221 			return -13;
1222 		p->energy_pkg = msr & 0xFFFFFFFF;
1223 	}
1224 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1225 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1226 			return -14;
1227 		p->energy_cores = msr & 0xFFFFFFFF;
1228 	}
1229 	if (do_rapl & RAPL_DRAM) {
1230 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1231 			return -15;
1232 		p->energy_dram = msr & 0xFFFFFFFF;
1233 	}
1234 	if (do_rapl & RAPL_GFX) {
1235 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1236 			return -16;
1237 		p->energy_gfx = msr & 0xFFFFFFFF;
1238 	}
1239 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
1240 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1241 			return -16;
1242 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1243 	}
1244 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1245 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1246 			return -16;
1247 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1248 	}
1249 	if (do_ptm) {
1250 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1251 			return -17;
1252 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1253 	}
1254 
1255 	if (do_gfx_rc6_ms)
1256 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
1257 
1258 	if (do_gfx_mhz)
1259 		p->gfx_mhz = gfx_cur_mhz;
1260 
1261 	return 0;
1262 }
1263 
1264 /*
1265  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1266  * If you change the values, note they are used both in comparisons
1267  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1268  */
1269 
1270 #define PCLUKN 0 /* Unknown */
1271 #define PCLRSV 1 /* Reserved */
1272 #define PCL__0 2 /* PC0 */
1273 #define PCL__1 3 /* PC1 */
1274 #define PCL__2 4 /* PC2 */
1275 #define PCL__3 5 /* PC3 */
1276 #define PCL__4 6 /* PC4 */
1277 #define PCL__6 7 /* PC6 */
1278 #define PCL_6N 8 /* PC6 No Retention */
1279 #define PCL_6R 9 /* PC6 Retention */
1280 #define PCL__7 10 /* PC7 */
1281 #define PCL_7S 11 /* PC7 Shrink */
1282 #define PCL__8 12 /* PC8 */
1283 #define PCL__9 13 /* PC9 */
1284 #define PCLUNL 14 /* Unlimited */
1285 
1286 int pkg_cstate_limit = PCLUKN;
1287 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1288 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1289 
1290 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1291 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1292 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1293 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1294 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1295 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1296 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1297 
1298 
1299 static void
1300 calculate_tsc_tweak()
1301 {
1302 	tsc_tweak = base_hz / tsc_hz;
1303 }
1304 
1305 static void
1306 dump_nhm_platform_info(void)
1307 {
1308 	unsigned long long msr;
1309 	unsigned int ratio;
1310 
1311 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1312 
1313 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1314 
1315 	ratio = (msr >> 40) & 0xFF;
1316 	fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1317 		ratio, bclk, ratio * bclk);
1318 
1319 	ratio = (msr >> 8) & 0xFF;
1320 	fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
1321 		ratio, bclk, ratio * bclk);
1322 
1323 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1324 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1325 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1326 
1327 	return;
1328 }
1329 
1330 static void
1331 dump_hsw_turbo_ratio_limits(void)
1332 {
1333 	unsigned long long msr;
1334 	unsigned int ratio;
1335 
1336 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1337 
1338 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1339 
1340 	ratio = (msr >> 8) & 0xFF;
1341 	if (ratio)
1342 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1343 			ratio, bclk, ratio * bclk);
1344 
1345 	ratio = (msr >> 0) & 0xFF;
1346 	if (ratio)
1347 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1348 			ratio, bclk, ratio * bclk);
1349 	return;
1350 }
1351 
1352 static void
1353 dump_ivt_turbo_ratio_limits(void)
1354 {
1355 	unsigned long long msr;
1356 	unsigned int ratio;
1357 
1358 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1359 
1360 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1361 
1362 	ratio = (msr >> 56) & 0xFF;
1363 	if (ratio)
1364 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1365 			ratio, bclk, ratio * bclk);
1366 
1367 	ratio = (msr >> 48) & 0xFF;
1368 	if (ratio)
1369 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1370 			ratio, bclk, ratio * bclk);
1371 
1372 	ratio = (msr >> 40) & 0xFF;
1373 	if (ratio)
1374 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1375 			ratio, bclk, ratio * bclk);
1376 
1377 	ratio = (msr >> 32) & 0xFF;
1378 	if (ratio)
1379 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1380 			ratio, bclk, ratio * bclk);
1381 
1382 	ratio = (msr >> 24) & 0xFF;
1383 	if (ratio)
1384 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1385 			ratio, bclk, ratio * bclk);
1386 
1387 	ratio = (msr >> 16) & 0xFF;
1388 	if (ratio)
1389 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1390 			ratio, bclk, ratio * bclk);
1391 
1392 	ratio = (msr >> 8) & 0xFF;
1393 	if (ratio)
1394 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1395 			ratio, bclk, ratio * bclk);
1396 
1397 	ratio = (msr >> 0) & 0xFF;
1398 	if (ratio)
1399 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1400 			ratio, bclk, ratio * bclk);
1401 	return;
1402 }
1403 
1404 static void
1405 dump_nhm_turbo_ratio_limits(void)
1406 {
1407 	unsigned long long msr;
1408 	unsigned int ratio;
1409 
1410 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1411 
1412 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1413 
1414 	ratio = (msr >> 56) & 0xFF;
1415 	if (ratio)
1416 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1417 			ratio, bclk, ratio * bclk);
1418 
1419 	ratio = (msr >> 48) & 0xFF;
1420 	if (ratio)
1421 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1422 			ratio, bclk, ratio * bclk);
1423 
1424 	ratio = (msr >> 40) & 0xFF;
1425 	if (ratio)
1426 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1427 			ratio, bclk, ratio * bclk);
1428 
1429 	ratio = (msr >> 32) & 0xFF;
1430 	if (ratio)
1431 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1432 			ratio, bclk, ratio * bclk);
1433 
1434 	ratio = (msr >> 24) & 0xFF;
1435 	if (ratio)
1436 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1437 			ratio, bclk, ratio * bclk);
1438 
1439 	ratio = (msr >> 16) & 0xFF;
1440 	if (ratio)
1441 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1442 			ratio, bclk, ratio * bclk);
1443 
1444 	ratio = (msr >> 8) & 0xFF;
1445 	if (ratio)
1446 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1447 			ratio, bclk, ratio * bclk);
1448 
1449 	ratio = (msr >> 0) & 0xFF;
1450 	if (ratio)
1451 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1452 			ratio, bclk, ratio * bclk);
1453 	return;
1454 }
1455 
1456 static void
1457 dump_knl_turbo_ratio_limits(void)
1458 {
1459 	const unsigned int buckets_no = 7;
1460 
1461 	unsigned long long msr;
1462 	int delta_cores, delta_ratio;
1463 	int i, b_nr;
1464 	unsigned int cores[buckets_no];
1465 	unsigned int ratio[buckets_no];
1466 
1467 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1468 
1469 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1470 		base_cpu, msr);
1471 
1472 	/**
1473 	 * Turbo encoding in KNL is as follows:
1474 	 * [0] -- Reserved
1475 	 * [7:1] -- Base value of number of active cores of bucket 1.
1476 	 * [15:8] -- Base value of freq ratio of bucket 1.
1477 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
1478 	 * i.e. active cores of bucket 2 =
1479 	 * active cores of bucket 1 + delta
1480 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
1481 	 * i.e. freq ratio of bucket 2 =
1482 	 * freq ratio of bucket 1 - delta
1483 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
1484 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
1485 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
1486 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
1487 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
1488 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
1489 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
1490 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
1491 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
1492 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
1493 	 */
1494 
1495 	b_nr = 0;
1496 	cores[b_nr] = (msr & 0xFF) >> 1;
1497 	ratio[b_nr] = (msr >> 8) & 0xFF;
1498 
1499 	for (i = 16; i < 64; i += 8) {
1500 		delta_cores = (msr >> i) & 0x1F;
1501 		delta_ratio = (msr >> (i + 5)) & 0x7;
1502 
1503 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
1504 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1505 		b_nr++;
1506 	}
1507 
1508 	for (i = buckets_no - 1; i >= 0; i--)
1509 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1510 			fprintf(outf,
1511 				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
1512 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
1513 }
1514 
1515 static void
1516 dump_nhm_cst_cfg(void)
1517 {
1518 	unsigned long long msr;
1519 
1520 	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1521 
1522 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1523 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1524 
1525 	fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
1526 
1527 	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1528 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1529 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1530 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1531 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1532 		(msr & (1 << 15)) ? "" : "UN",
1533 		(unsigned int)msr & 0xF,
1534 		pkg_cstate_limit_strings[pkg_cstate_limit]);
1535 	return;
1536 }
1537 
1538 static void
1539 dump_config_tdp(void)
1540 {
1541 	unsigned long long msr;
1542 
1543 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1544 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1545 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1546 
1547 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1548 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1549 	if (msr) {
1550 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1551 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1552 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1553 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1554 	}
1555 	fprintf(outf, ")\n");
1556 
1557 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1558 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1559 	if (msr) {
1560 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1561 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1562 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1563 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1564 	}
1565 	fprintf(outf, ")\n");
1566 
1567 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1568 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1569 	if ((msr) & 0x3)
1570 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1571 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1572 	fprintf(outf, ")\n");
1573 
1574 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1575 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1576 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1577 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1578 	fprintf(outf, ")\n");
1579 }
1580 
1581 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
1582 
1583 void print_irtl(void)
1584 {
1585 	unsigned long long msr;
1586 
1587 	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
1588 	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
1589 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1590 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1591 
1592 	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
1593 	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
1594 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1595 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1596 
1597 	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
1598 	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
1599 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1600 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1601 
1602 	if (!do_irtl_hsw)
1603 		return;
1604 
1605 	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
1606 	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
1607 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1608 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1609 
1610 	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
1611 	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
1612 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1613 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1614 
1615 	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
1616 	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
1617 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
1618 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
1619 
1620 }
1621 void free_fd_percpu(void)
1622 {
1623 	int i;
1624 
1625 	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
1626 		if (fd_percpu[i] != 0)
1627 			close(fd_percpu[i]);
1628 	}
1629 
1630 	free(fd_percpu);
1631 }
1632 
1633 void free_all_buffers(void)
1634 {
1635 	CPU_FREE(cpu_present_set);
1636 	cpu_present_set = NULL;
1637 	cpu_present_setsize = 0;
1638 
1639 	CPU_FREE(cpu_affinity_set);
1640 	cpu_affinity_set = NULL;
1641 	cpu_affinity_setsize = 0;
1642 
1643 	free(thread_even);
1644 	free(core_even);
1645 	free(package_even);
1646 
1647 	thread_even = NULL;
1648 	core_even = NULL;
1649 	package_even = NULL;
1650 
1651 	free(thread_odd);
1652 	free(core_odd);
1653 	free(package_odd);
1654 
1655 	thread_odd = NULL;
1656 	core_odd = NULL;
1657 	package_odd = NULL;
1658 
1659 	free(output_buffer);
1660 	output_buffer = NULL;
1661 	outp = NULL;
1662 
1663 	free_fd_percpu();
1664 
1665 	free(irq_column_2_cpu);
1666 	free(irqs_per_cpu);
1667 }
1668 
1669 /*
1670  * Open a file, and exit on failure
1671  */
1672 FILE *fopen_or_die(const char *path, const char *mode)
1673 {
1674 	FILE *filep = fopen(path, mode);
1675 	if (!filep)
1676 		err(1, "%s: open failed", path);
1677 	return filep;
1678 }
1679 
1680 /*
1681  * Parse a file containing a single int.
1682  */
1683 int parse_int_file(const char *fmt, ...)
1684 {
1685 	va_list args;
1686 	char path[PATH_MAX];
1687 	FILE *filep;
1688 	int value;
1689 
1690 	va_start(args, fmt);
1691 	vsnprintf(path, sizeof(path), fmt, args);
1692 	va_end(args);
1693 	filep = fopen_or_die(path, "r");
1694 	if (fscanf(filep, "%d", &value) != 1)
1695 		err(1, "%s: failed to parse number from file", path);
1696 	fclose(filep);
1697 	return value;
1698 }
1699 
1700 /*
1701  * get_cpu_position_in_core(cpu)
1702  * return the position of the CPU among its HT siblings in the core
1703  * return -1 if the sibling is not in list
1704  */
1705 int get_cpu_position_in_core(int cpu)
1706 {
1707 	char path[64];
1708 	FILE *filep;
1709 	int this_cpu;
1710 	char character;
1711 	int i;
1712 
1713 	sprintf(path,
1714 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
1715 		cpu);
1716 	filep = fopen(path, "r");
1717 	if (filep == NULL) {
1718 		perror(path);
1719 		exit(1);
1720 	}
1721 
1722 	for (i = 0; i < topo.num_threads_per_core; i++) {
1723 		fscanf(filep, "%d", &this_cpu);
1724 		if (this_cpu == cpu) {
1725 			fclose(filep);
1726 			return i;
1727 		}
1728 
1729 		/* Account for no separator after last thread*/
1730 		if (i != (topo.num_threads_per_core - 1))
1731 			fscanf(filep, "%c", &character);
1732 	}
1733 
1734 	fclose(filep);
1735 	return -1;
1736 }
1737 
1738 /*
1739  * cpu_is_first_core_in_package(cpu)
1740  * return 1 if given CPU is 1st core in package
1741  */
1742 int cpu_is_first_core_in_package(int cpu)
1743 {
1744 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1745 }
1746 
1747 int get_physical_package_id(int cpu)
1748 {
1749 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1750 }
1751 
1752 int get_core_id(int cpu)
1753 {
1754 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1755 }
1756 
1757 int get_num_ht_siblings(int cpu)
1758 {
1759 	char path[80];
1760 	FILE *filep;
1761 	int sib1;
1762 	int matches = 0;
1763 	char character;
1764 	char str[100];
1765 	char *ch;
1766 
1767 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1768 	filep = fopen_or_die(path, "r");
1769 
1770 	/*
1771 	 * file format:
1772 	 * A ',' separated or '-' separated set of numbers
1773 	 * (eg 1-2 or 1,3,4,5)
1774 	 */
1775 	fscanf(filep, "%d%c\n", &sib1, &character);
1776 	fseek(filep, 0, SEEK_SET);
1777 	fgets(str, 100, filep);
1778 	ch = strchr(str, character);
1779 	while (ch != NULL) {
1780 		matches++;
1781 		ch = strchr(ch+1, character);
1782 	}
1783 
1784 	fclose(filep);
1785 	return matches+1;
1786 }
1787 
1788 /*
1789  * run func(thread, core, package) in topology order
1790  * skip non-present cpus
1791  */
1792 
1793 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1794 	struct pkg_data *, struct thread_data *, struct core_data *,
1795 	struct pkg_data *), struct thread_data *thread_base,
1796 	struct core_data *core_base, struct pkg_data *pkg_base,
1797 	struct thread_data *thread_base2, struct core_data *core_base2,
1798 	struct pkg_data *pkg_base2)
1799 {
1800 	int retval, pkg_no, core_no, thread_no;
1801 
1802 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1803 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1804 			for (thread_no = 0; thread_no <
1805 				topo.num_threads_per_core; ++thread_no) {
1806 				struct thread_data *t, *t2;
1807 				struct core_data *c, *c2;
1808 				struct pkg_data *p, *p2;
1809 
1810 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1811 
1812 				if (cpu_is_not_present(t->cpu_id))
1813 					continue;
1814 
1815 				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1816 
1817 				c = GET_CORE(core_base, core_no, pkg_no);
1818 				c2 = GET_CORE(core_base2, core_no, pkg_no);
1819 
1820 				p = GET_PKG(pkg_base, pkg_no);
1821 				p2 = GET_PKG(pkg_base2, pkg_no);
1822 
1823 				retval = func(t, c, p, t2, c2, p2);
1824 				if (retval)
1825 					return retval;
1826 			}
1827 		}
1828 	}
1829 	return 0;
1830 }
1831 
1832 /*
1833  * run func(cpu) on every cpu in /proc/stat
1834  * return max_cpu number
1835  */
1836 int for_all_proc_cpus(int (func)(int))
1837 {
1838 	FILE *fp;
1839 	int cpu_num;
1840 	int retval;
1841 
1842 	fp = fopen_or_die(proc_stat, "r");
1843 
1844 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1845 	if (retval != 0)
1846 		err(1, "%s: failed to parse format", proc_stat);
1847 
1848 	while (1) {
1849 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1850 		if (retval != 1)
1851 			break;
1852 
1853 		retval = func(cpu_num);
1854 		if (retval) {
1855 			fclose(fp);
1856 			return(retval);
1857 		}
1858 	}
1859 	fclose(fp);
1860 	return 0;
1861 }
1862 
1863 void re_initialize(void)
1864 {
1865 	free_all_buffers();
1866 	setup_all_buffers();
1867 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1868 }
1869 
1870 
1871 /*
1872  * count_cpus()
1873  * remember the last one seen, it will be the max
1874  */
1875 int count_cpus(int cpu)
1876 {
1877 	if (topo.max_cpu_num < cpu)
1878 		topo.max_cpu_num = cpu;
1879 
1880 	topo.num_cpus += 1;
1881 	return 0;
1882 }
1883 int mark_cpu_present(int cpu)
1884 {
1885 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1886 	return 0;
1887 }
1888 
1889 /*
1890  * snapshot_proc_interrupts()
1891  *
1892  * read and record summary of /proc/interrupts
1893  *
1894  * return 1 if config change requires a restart, else return 0
1895  */
1896 int snapshot_proc_interrupts(void)
1897 {
1898 	static FILE *fp;
1899 	int column, retval;
1900 
1901 	if (fp == NULL)
1902 		fp = fopen_or_die("/proc/interrupts", "r");
1903 	else
1904 		rewind(fp);
1905 
1906 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
1907 	for (column = 0; column < topo.num_cpus; ++column) {
1908 		int cpu_number;
1909 
1910 		retval = fscanf(fp, " CPU%d", &cpu_number);
1911 		if (retval != 1)
1912 			break;
1913 
1914 		if (cpu_number > topo.max_cpu_num) {
1915 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
1916 			return 1;
1917 		}
1918 
1919 		irq_column_2_cpu[column] = cpu_number;
1920 		irqs_per_cpu[cpu_number] = 0;
1921 	}
1922 
1923 	/* read /proc/interrupt count lines and sum up irqs per cpu */
1924 	while (1) {
1925 		int column;
1926 		char buf[64];
1927 
1928 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
1929 		if (retval != 1)
1930 			break;
1931 
1932 		/* read the count per cpu */
1933 		for (column = 0; column < topo.num_cpus; ++column) {
1934 
1935 			int cpu_number, irq_count;
1936 
1937 			retval = fscanf(fp, " %d", &irq_count);
1938 			if (retval != 1)
1939 				break;
1940 
1941 			cpu_number = irq_column_2_cpu[column];
1942 			irqs_per_cpu[cpu_number] += irq_count;
1943 
1944 		}
1945 
1946 		while (getc(fp) != '\n')
1947 			;	/* flush interrupt description */
1948 
1949 	}
1950 	return 0;
1951 }
1952 /*
1953  * snapshot_gfx_rc6_ms()
1954  *
1955  * record snapshot of
1956  * /sys/class/drm/card0/power/rc6_residency_ms
1957  *
1958  * return 1 if config change requires a restart, else return 0
1959  */
1960 int snapshot_gfx_rc6_ms(void)
1961 {
1962 	FILE *fp;
1963 	int retval;
1964 
1965 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
1966 
1967 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
1968 	if (retval != 1)
1969 		err(1, "GFX rc6");
1970 
1971 	fclose(fp);
1972 
1973 	return 0;
1974 }
1975 /*
1976  * snapshot_gfx_mhz()
1977  *
1978  * record snapshot of
1979  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
1980  *
1981  * return 1 if config change requires a restart, else return 0
1982  */
1983 int snapshot_gfx_mhz(void)
1984 {
1985 	static FILE *fp;
1986 	int retval;
1987 
1988 	if (fp == NULL)
1989 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
1990 	else
1991 		rewind(fp);
1992 
1993 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
1994 	if (retval != 1)
1995 		err(1, "GFX MHz");
1996 
1997 	return 0;
1998 }
1999 
2000 /*
2001  * snapshot /proc and /sys files
2002  *
2003  * return 1 if configuration restart needed, else return 0
2004  */
2005 int snapshot_proc_sysfs_files(void)
2006 {
2007 	if (snapshot_proc_interrupts())
2008 		return 1;
2009 
2010 	if (do_gfx_rc6_ms)
2011 		snapshot_gfx_rc6_ms();
2012 
2013 	if (do_gfx_mhz)
2014 		snapshot_gfx_mhz();
2015 
2016 	return 0;
2017 }
2018 
2019 void turbostat_loop()
2020 {
2021 	int retval;
2022 	int restarted = 0;
2023 
2024 restart:
2025 	restarted++;
2026 
2027 	snapshot_proc_sysfs_files();
2028 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2029 	if (retval < -1) {
2030 		exit(retval);
2031 	} else if (retval == -1) {
2032 		if (restarted > 1) {
2033 			exit(retval);
2034 		}
2035 		re_initialize();
2036 		goto restart;
2037 	}
2038 	restarted = 0;
2039 	gettimeofday(&tv_even, (struct timezone *)NULL);
2040 
2041 	while (1) {
2042 		if (for_all_proc_cpus(cpu_is_not_present)) {
2043 			re_initialize();
2044 			goto restart;
2045 		}
2046 		nanosleep(&interval_ts, NULL);
2047 		if (snapshot_proc_sysfs_files())
2048 			goto restart;
2049 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
2050 		if (retval < -1) {
2051 			exit(retval);
2052 		} else if (retval == -1) {
2053 			re_initialize();
2054 			goto restart;
2055 		}
2056 		gettimeofday(&tv_odd, (struct timezone *)NULL);
2057 		timersub(&tv_odd, &tv_even, &tv_delta);
2058 		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2059 			re_initialize();
2060 			goto restart;
2061 		}
2062 		compute_average(EVEN_COUNTERS);
2063 		format_all_counters(EVEN_COUNTERS);
2064 		flush_output_stdout();
2065 		nanosleep(&interval_ts, NULL);
2066 		if (snapshot_proc_sysfs_files())
2067 			goto restart;
2068 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2069 		if (retval < -1) {
2070 			exit(retval);
2071 		} else if (retval == -1) {
2072 			re_initialize();
2073 			goto restart;
2074 		}
2075 		gettimeofday(&tv_even, (struct timezone *)NULL);
2076 		timersub(&tv_even, &tv_odd, &tv_delta);
2077 		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2078 			re_initialize();
2079 			goto restart;
2080 		}
2081 		compute_average(ODD_COUNTERS);
2082 		format_all_counters(ODD_COUNTERS);
2083 		flush_output_stdout();
2084 	}
2085 }
2086 
2087 void check_dev_msr()
2088 {
2089 	struct stat sb;
2090 	char pathname[32];
2091 
2092 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2093 	if (stat(pathname, &sb))
2094  		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2095 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2096 }
2097 
2098 void check_permissions()
2099 {
2100 	struct __user_cap_header_struct cap_header_data;
2101 	cap_user_header_t cap_header = &cap_header_data;
2102 	struct __user_cap_data_struct cap_data_data;
2103 	cap_user_data_t cap_data = &cap_data_data;
2104 	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2105 	int do_exit = 0;
2106 	char pathname[32];
2107 
2108 	/* check for CAP_SYS_RAWIO */
2109 	cap_header->pid = getpid();
2110 	cap_header->version = _LINUX_CAPABILITY_VERSION;
2111 	if (capget(cap_header, cap_data) < 0)
2112 		err(-6, "capget(2) failed");
2113 
2114 	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2115 		do_exit++;
2116 		warnx("capget(CAP_SYS_RAWIO) failed,"
2117 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
2118 	}
2119 
2120 	/* test file permissions */
2121 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2122 	if (euidaccess(pathname, R_OK)) {
2123 		do_exit++;
2124 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2125 	}
2126 
2127 	/* if all else fails, thell them to be root */
2128 	if (do_exit)
2129 		if (getuid() != 0)
2130 			warnx("... or simply run as root");
2131 
2132 	if (do_exit)
2133 		exit(-6);
2134 }
2135 
2136 /*
2137  * NHM adds support for additional MSRs:
2138  *
2139  * MSR_SMI_COUNT                   0x00000034
2140  *
2141  * MSR_PLATFORM_INFO               0x000000ce
2142  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
2143  *
2144  * MSR_PKG_C3_RESIDENCY            0x000003f8
2145  * MSR_PKG_C6_RESIDENCY            0x000003f9
2146  * MSR_CORE_C3_RESIDENCY           0x000003fc
2147  * MSR_CORE_C6_RESIDENCY           0x000003fd
2148  *
2149  * Side effect:
2150  * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
2151  */
2152 int probe_nhm_msrs(unsigned int family, unsigned int model)
2153 {
2154 	unsigned long long msr;
2155 	unsigned int base_ratio;
2156 	int *pkg_cstate_limits;
2157 
2158 	if (!genuine_intel)
2159 		return 0;
2160 
2161 	if (family != 6)
2162 		return 0;
2163 
2164 	bclk = discover_bclk(family, model);
2165 
2166 	switch (model) {
2167 	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2168 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2169 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
2170 	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
2171 	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
2172 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
2173 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
2174 		pkg_cstate_limits = nhm_pkg_cstate_limits;
2175 		break;
2176 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
2177 	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
2178 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
2179 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
2180 		pkg_cstate_limits = snb_pkg_cstate_limits;
2181 		break;
2182 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2183 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2184 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2185 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2186 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
2187 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
2188 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
2189 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
2190 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2191 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2192 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
2193 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2194 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
2195 		pkg_cstate_limits = hsw_pkg_cstate_limits;
2196 		break;
2197 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
2198 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
2199 		pkg_cstate_limits = slv_pkg_cstate_limits;
2200 		break;
2201 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
2202 		pkg_cstate_limits = amt_pkg_cstate_limits;
2203 		break;
2204 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
2205 	case INTEL_FAM6_XEON_PHI_KNM:
2206 		pkg_cstate_limits = phi_pkg_cstate_limits;
2207 		break;
2208 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
2209 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
2210 		pkg_cstate_limits = bxt_pkg_cstate_limits;
2211 		break;
2212 	default:
2213 		return 0;
2214 	}
2215 	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
2216 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2217 
2218 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2219 	base_ratio = (msr >> 8) & 0xFF;
2220 
2221 	base_hz = base_ratio * bclk * 1000000;
2222 	has_base_hz = 1;
2223 	return 1;
2224 }
2225 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
2226 {
2227 	switch (model) {
2228 	/* Nehalem compatible, but do not include turbo-ratio limit support */
2229 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
2230 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
2231 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
2232 	case INTEL_FAM6_XEON_PHI_KNM:
2233 		return 0;
2234 	default:
2235 		return 1;
2236 	}
2237 }
2238 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2239 {
2240 	if (!genuine_intel)
2241 		return 0;
2242 
2243 	if (family != 6)
2244 		return 0;
2245 
2246 	switch (model) {
2247 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
2248 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
2249 		return 1;
2250 	default:
2251 		return 0;
2252 	}
2253 }
2254 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2255 {
2256 	if (!genuine_intel)
2257 		return 0;
2258 
2259 	if (family != 6)
2260 		return 0;
2261 
2262 	switch (model) {
2263 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
2264 		return 1;
2265 	default:
2266 		return 0;
2267 	}
2268 }
2269 
2270 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2271 {
2272 	if (!genuine_intel)
2273 		return 0;
2274 
2275 	if (family != 6)
2276 		return 0;
2277 
2278 	switch (model) {
2279 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
2280 	case INTEL_FAM6_XEON_PHI_KNM:
2281 		return 1;
2282 	default:
2283 		return 0;
2284 	}
2285 }
2286 int has_config_tdp(unsigned int family, unsigned int model)
2287 {
2288 	if (!genuine_intel)
2289 		return 0;
2290 
2291 	if (family != 6)
2292 		return 0;
2293 
2294 	switch (model) {
2295 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
2296 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2297 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2298 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2299 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2300 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
2301 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
2302 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
2303 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
2304 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2305 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2306 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
2307 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2308 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
2309 
2310 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
2311 	case INTEL_FAM6_XEON_PHI_KNM:
2312 		return 1;
2313 	default:
2314 		return 0;
2315 	}
2316 }
2317 
2318 static void
2319 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2320 {
2321 	if (!do_nhm_platform_info)
2322 		return;
2323 
2324 	dump_nhm_platform_info();
2325 
2326 	if (has_hsw_turbo_ratio_limit(family, model))
2327 		dump_hsw_turbo_ratio_limits();
2328 
2329 	if (has_ivt_turbo_ratio_limit(family, model))
2330 		dump_ivt_turbo_ratio_limits();
2331 
2332 	if (has_nhm_turbo_ratio_limit(family, model))
2333 		dump_nhm_turbo_ratio_limits();
2334 
2335 	if (has_knl_turbo_ratio_limit(family, model))
2336 		dump_knl_turbo_ratio_limits();
2337 
2338 	if (has_config_tdp(family, model))
2339 		dump_config_tdp();
2340 
2341 	dump_nhm_cst_cfg();
2342 }
2343 
2344 
2345 /*
2346  * print_epb()
2347  * Decode the ENERGY_PERF_BIAS MSR
2348  */
2349 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2350 {
2351 	unsigned long long msr;
2352 	char *epb_string;
2353 	int cpu;
2354 
2355 	if (!has_epb)
2356 		return 0;
2357 
2358 	cpu = t->cpu_id;
2359 
2360 	/* EPB is per-package */
2361 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2362 		return 0;
2363 
2364 	if (cpu_migrate(cpu)) {
2365 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2366 		return -1;
2367 	}
2368 
2369 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
2370 		return 0;
2371 
2372 	switch (msr & 0xF) {
2373 	case ENERGY_PERF_BIAS_PERFORMANCE:
2374 		epb_string = "performance";
2375 		break;
2376 	case ENERGY_PERF_BIAS_NORMAL:
2377 		epb_string = "balanced";
2378 		break;
2379 	case ENERGY_PERF_BIAS_POWERSAVE:
2380 		epb_string = "powersave";
2381 		break;
2382 	default:
2383 		epb_string = "custom";
2384 		break;
2385 	}
2386 	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2387 
2388 	return 0;
2389 }
2390 /*
2391  * print_hwp()
2392  * Decode the MSR_HWP_CAPABILITIES
2393  */
2394 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2395 {
2396 	unsigned long long msr;
2397 	int cpu;
2398 
2399 	if (!has_hwp)
2400 		return 0;
2401 
2402 	cpu = t->cpu_id;
2403 
2404 	/* MSR_HWP_CAPABILITIES is per-package */
2405 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2406 		return 0;
2407 
2408 	if (cpu_migrate(cpu)) {
2409 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2410 		return -1;
2411 	}
2412 
2413 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2414 		return 0;
2415 
2416 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2417 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2418 
2419 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2420 	if ((msr & (1 << 0)) == 0)
2421 		return 0;
2422 
2423 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2424 		return 0;
2425 
2426 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2427 			"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2428 			cpu, msr,
2429 			(unsigned int)HWP_HIGHEST_PERF(msr),
2430 			(unsigned int)HWP_GUARANTEED_PERF(msr),
2431 			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2432 			(unsigned int)HWP_LOWEST_PERF(msr));
2433 
2434 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2435 		return 0;
2436 
2437 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2438 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2439 			cpu, msr,
2440 			(unsigned int)(((msr) >> 0) & 0xff),
2441 			(unsigned int)(((msr) >> 8) & 0xff),
2442 			(unsigned int)(((msr) >> 16) & 0xff),
2443 			(unsigned int)(((msr) >> 24) & 0xff),
2444 			(unsigned int)(((msr) >> 32) & 0xff3),
2445 			(unsigned int)(((msr) >> 42) & 0x1));
2446 
2447 	if (has_hwp_pkg) {
2448 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2449 			return 0;
2450 
2451 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2452 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2453 			cpu, msr,
2454 			(unsigned int)(((msr) >> 0) & 0xff),
2455 			(unsigned int)(((msr) >> 8) & 0xff),
2456 			(unsigned int)(((msr) >> 16) & 0xff),
2457 			(unsigned int)(((msr) >> 24) & 0xff),
2458 			(unsigned int)(((msr) >> 32) & 0xff3));
2459 	}
2460 	if (has_hwp_notify) {
2461 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2462 			return 0;
2463 
2464 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2465 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2466 			cpu, msr,
2467 			((msr) & 0x1) ? "EN" : "Dis",
2468 			((msr) & 0x2) ? "EN" : "Dis");
2469 	}
2470 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2471 		return 0;
2472 
2473 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2474 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2475 			cpu, msr,
2476 			((msr) & 0x1) ? "" : "No-",
2477 			((msr) & 0x2) ? "" : "No-");
2478 
2479 	return 0;
2480 }
2481 
2482 /*
2483  * print_perf_limit()
2484  */
2485 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2486 {
2487 	unsigned long long msr;
2488 	int cpu;
2489 
2490 	cpu = t->cpu_id;
2491 
2492 	/* per-package */
2493 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2494 		return 0;
2495 
2496 	if (cpu_migrate(cpu)) {
2497 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2498 		return -1;
2499 	}
2500 
2501 	if (do_core_perf_limit_reasons) {
2502 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2503 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2504 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2505 			(msr & 1 << 15) ? "bit15, " : "",
2506 			(msr & 1 << 14) ? "bit14, " : "",
2507 			(msr & 1 << 13) ? "Transitions, " : "",
2508 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
2509 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
2510 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2511 			(msr & 1 << 9) ? "CorePwr, " : "",
2512 			(msr & 1 << 8) ? "Amps, " : "",
2513 			(msr & 1 << 6) ? "VR-Therm, " : "",
2514 			(msr & 1 << 5) ? "Auto-HWP, " : "",
2515 			(msr & 1 << 4) ? "Graphics, " : "",
2516 			(msr & 1 << 2) ? "bit2, " : "",
2517 			(msr & 1 << 1) ? "ThermStatus, " : "",
2518 			(msr & 1 << 0) ? "PROCHOT, " : "");
2519 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2520 			(msr & 1 << 31) ? "bit31, " : "",
2521 			(msr & 1 << 30) ? "bit30, " : "",
2522 			(msr & 1 << 29) ? "Transitions, " : "",
2523 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
2524 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
2525 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2526 			(msr & 1 << 25) ? "CorePwr, " : "",
2527 			(msr & 1 << 24) ? "Amps, " : "",
2528 			(msr & 1 << 22) ? "VR-Therm, " : "",
2529 			(msr & 1 << 21) ? "Auto-HWP, " : "",
2530 			(msr & 1 << 20) ? "Graphics, " : "",
2531 			(msr & 1 << 18) ? "bit18, " : "",
2532 			(msr & 1 << 17) ? "ThermStatus, " : "",
2533 			(msr & 1 << 16) ? "PROCHOT, " : "");
2534 
2535 	}
2536 	if (do_gfx_perf_limit_reasons) {
2537 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2538 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2539 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2540 			(msr & 1 << 0) ? "PROCHOT, " : "",
2541 			(msr & 1 << 1) ? "ThermStatus, " : "",
2542 			(msr & 1 << 4) ? "Graphics, " : "",
2543 			(msr & 1 << 6) ? "VR-Therm, " : "",
2544 			(msr & 1 << 8) ? "Amps, " : "",
2545 			(msr & 1 << 9) ? "GFXPwr, " : "",
2546 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2547 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
2548 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2549 			(msr & 1 << 16) ? "PROCHOT, " : "",
2550 			(msr & 1 << 17) ? "ThermStatus, " : "",
2551 			(msr & 1 << 20) ? "Graphics, " : "",
2552 			(msr & 1 << 22) ? "VR-Therm, " : "",
2553 			(msr & 1 << 24) ? "Amps, " : "",
2554 			(msr & 1 << 25) ? "GFXPwr, " : "",
2555 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2556 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
2557 	}
2558 	if (do_ring_perf_limit_reasons) {
2559 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2560 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2561 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
2562 			(msr & 1 << 0) ? "PROCHOT, " : "",
2563 			(msr & 1 << 1) ? "ThermStatus, " : "",
2564 			(msr & 1 << 6) ? "VR-Therm, " : "",
2565 			(msr & 1 << 8) ? "Amps, " : "",
2566 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2567 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
2568 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
2569 			(msr & 1 << 16) ? "PROCHOT, " : "",
2570 			(msr & 1 << 17) ? "ThermStatus, " : "",
2571 			(msr & 1 << 22) ? "VR-Therm, " : "",
2572 			(msr & 1 << 24) ? "Amps, " : "",
2573 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2574 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
2575 	}
2576 	return 0;
2577 }
2578 
2579 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
2580 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
2581 
2582 double get_tdp(unsigned int model)
2583 {
2584 	unsigned long long msr;
2585 
2586 	if (do_rapl & RAPL_PKG_POWER_INFO)
2587 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
2588 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
2589 
2590 	switch (model) {
2591 	case INTEL_FAM6_ATOM_SILVERMONT1:
2592 	case INTEL_FAM6_ATOM_SILVERMONT2:
2593 		return 30.0;
2594 	default:
2595 		return 135.0;
2596 	}
2597 }
2598 
2599 /*
2600  * rapl_dram_energy_units_probe()
2601  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
2602  */
2603 static double
2604 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
2605 {
2606 	/* only called for genuine_intel, family 6 */
2607 
2608 	switch (model) {
2609 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2610 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
2611 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
2612 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
2613 	case INTEL_FAM6_XEON_PHI_KNM:
2614 		return (rapl_dram_energy_units = 15.3 / 1000000);
2615 	default:
2616 		return (rapl_energy_units);
2617 	}
2618 }
2619 
2620 
2621 /*
2622  * rapl_probe()
2623  *
2624  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
2625  */
2626 void rapl_probe(unsigned int family, unsigned int model)
2627 {
2628 	unsigned long long msr;
2629 	unsigned int time_unit;
2630 	double tdp;
2631 
2632 	if (!genuine_intel)
2633 		return;
2634 
2635 	if (family != 6)
2636 		return;
2637 
2638 	switch (model) {
2639 	case INTEL_FAM6_SANDYBRIDGE:
2640 	case INTEL_FAM6_IVYBRIDGE:
2641 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2642 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2643 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2644 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
2645 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
2646 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
2647 		break;
2648 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
2649 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
2650 		break;
2651 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2652 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2653 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
2654 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2655 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2656 		break;
2657 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2658 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
2659 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
2660 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
2661 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
2662 	case INTEL_FAM6_XEON_PHI_KNM:
2663 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2664 		break;
2665 	case INTEL_FAM6_SANDYBRIDGE_X:
2666 	case INTEL_FAM6_IVYBRIDGE_X:
2667 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
2668 		break;
2669 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
2670 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
2671 		do_rapl = RAPL_PKG | RAPL_CORES;
2672 		break;
2673 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
2674 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
2675 		break;
2676 	default:
2677 		return;
2678 	}
2679 
2680 	/* units on package 0, verify later other packages match */
2681 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
2682 		return;
2683 
2684 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
2685 	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
2686 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
2687 	else
2688 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
2689 
2690 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
2691 
2692 	time_unit = msr >> 16 & 0xF;
2693 	if (time_unit == 0)
2694 		time_unit = 0xA;
2695 
2696 	rapl_time_units = 1.0 / (1 << (time_unit));
2697 
2698 	tdp = get_tdp(model);
2699 
2700 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
2701 	if (debug)
2702 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
2703 
2704 	return;
2705 }
2706 
2707 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
2708 {
2709 	if (!genuine_intel)
2710 		return;
2711 
2712 	if (family != 6)
2713 		return;
2714 
2715 	switch (model) {
2716 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2717 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2718 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2719 		do_gfx_perf_limit_reasons = 1;
2720 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2721 		do_core_perf_limit_reasons = 1;
2722 		do_ring_perf_limit_reasons = 1;
2723 	default:
2724 		return;
2725 	}
2726 }
2727 
2728 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2729 {
2730 	unsigned long long msr;
2731 	unsigned int dts;
2732 	int cpu;
2733 
2734 	if (!(do_dts || do_ptm))
2735 		return 0;
2736 
2737 	cpu = t->cpu_id;
2738 
2739 	/* DTS is per-core, no need to print for each thread */
2740 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2741 		return 0;
2742 
2743 	if (cpu_migrate(cpu)) {
2744 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2745 		return -1;
2746 	}
2747 
2748 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
2749 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2750 			return 0;
2751 
2752 		dts = (msr >> 16) & 0x7F;
2753 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
2754 			cpu, msr, tcc_activation_temp - dts);
2755 
2756 #ifdef	THERM_DEBUG
2757 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
2758 			return 0;
2759 
2760 		dts = (msr >> 16) & 0x7F;
2761 		dts2 = (msr >> 8) & 0x7F;
2762 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2763 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2764 #endif
2765 	}
2766 
2767 
2768 	if (do_dts) {
2769 		unsigned int resolution;
2770 
2771 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2772 			return 0;
2773 
2774 		dts = (msr >> 16) & 0x7F;
2775 		resolution = (msr >> 27) & 0xF;
2776 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
2777 			cpu, msr, tcc_activation_temp - dts, resolution);
2778 
2779 #ifdef THERM_DEBUG
2780 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
2781 			return 0;
2782 
2783 		dts = (msr >> 16) & 0x7F;
2784 		dts2 = (msr >> 8) & 0x7F;
2785 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2786 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2787 #endif
2788 	}
2789 
2790 	return 0;
2791 }
2792 
2793 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
2794 {
2795 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
2796 		cpu, label,
2797 		((msr >> 15) & 1) ? "EN" : "DIS",
2798 		((msr >> 0) & 0x7FFF) * rapl_power_units,
2799 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
2800 		(((msr >> 16) & 1) ? "EN" : "DIS"));
2801 
2802 	return;
2803 }
2804 
2805 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2806 {
2807 	unsigned long long msr;
2808 	int cpu;
2809 
2810 	if (!do_rapl)
2811 		return 0;
2812 
2813 	/* RAPL counters are per package, so print only for 1st thread/package */
2814 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2815 		return 0;
2816 
2817 	cpu = t->cpu_id;
2818 	if (cpu_migrate(cpu)) {
2819 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2820 		return -1;
2821 	}
2822 
2823 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
2824 		return -1;
2825 
2826 	if (debug) {
2827 		fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
2828 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
2829 			rapl_power_units, rapl_energy_units, rapl_time_units);
2830 	}
2831 	if (do_rapl & RAPL_PKG_POWER_INFO) {
2832 
2833 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
2834                 	return -5;
2835 
2836 
2837 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2838 			cpu, msr,
2839 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2840 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2841 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2842 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2843 
2844 	}
2845 	if (do_rapl & RAPL_PKG) {
2846 
2847 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
2848 			return -9;
2849 
2850 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
2851 			cpu, msr, (msr >> 63) & 1 ? "": "UN");
2852 
2853 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
2854 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
2855 			cpu,
2856 			((msr >> 47) & 1) ? "EN" : "DIS",
2857 			((msr >> 32) & 0x7FFF) * rapl_power_units,
2858 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
2859 			((msr >> 48) & 1) ? "EN" : "DIS");
2860 	}
2861 
2862 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
2863 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
2864                 	return -6;
2865 
2866 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2867 			cpu, msr,
2868 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2869 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2870 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2871 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2872 	}
2873 	if (do_rapl & RAPL_DRAM) {
2874 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
2875 			return -9;
2876 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
2877 				cpu, msr, (msr >> 31) & 1 ? "": "UN");
2878 
2879 		print_power_limit_msr(cpu, msr, "DRAM Limit");
2880 	}
2881 	if (do_rapl & RAPL_CORE_POLICY) {
2882 		if (debug) {
2883 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
2884 				return -7;
2885 
2886 			fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
2887 		}
2888 	}
2889 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
2890 		if (debug) {
2891 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
2892 				return -9;
2893 			fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
2894 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
2895 			print_power_limit_msr(cpu, msr, "Cores Limit");
2896 		}
2897 	}
2898 	if (do_rapl & RAPL_GFX) {
2899 		if (debug) {
2900 			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
2901 				return -8;
2902 
2903 			fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
2904 
2905 			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
2906 				return -9;
2907 			fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
2908 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
2909 			print_power_limit_msr(cpu, msr, "GFX Limit");
2910 		}
2911 	}
2912 	return 0;
2913 }
2914 
2915 /*
2916  * SNB adds support for additional MSRs:
2917  *
2918  * MSR_PKG_C7_RESIDENCY            0x000003fa
2919  * MSR_CORE_C7_RESIDENCY           0x000003fe
2920  * MSR_PKG_C2_RESIDENCY            0x0000060d
2921  */
2922 
2923 int has_snb_msrs(unsigned int family, unsigned int model)
2924 {
2925 	if (!genuine_intel)
2926 		return 0;
2927 
2928 	switch (model) {
2929 	case INTEL_FAM6_SANDYBRIDGE:
2930 	case INTEL_FAM6_SANDYBRIDGE_X:
2931 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
2932 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
2933 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2934 	case INTEL_FAM6_HASWELL_X:	/* HSW */
2935 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2936 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2937 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
2938 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
2939 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
2940 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
2941 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2942 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2943 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
2944 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2945 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
2946 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
2947 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
2948 		return 1;
2949 	}
2950 	return 0;
2951 }
2952 
2953 /*
2954  * HSW adds support for additional MSRs:
2955  *
2956  * MSR_PKG_C8_RESIDENCY		0x00000630
2957  * MSR_PKG_C9_RESIDENCY		0x00000631
2958  * MSR_PKG_C10_RESIDENCY	0x00000632
2959  *
2960  * MSR_PKGC8_IRTL		0x00000633
2961  * MSR_PKGC9_IRTL		0x00000634
2962  * MSR_PKGC10_IRTL		0x00000635
2963  *
2964  */
2965 int has_hsw_msrs(unsigned int family, unsigned int model)
2966 {
2967 	if (!genuine_intel)
2968 		return 0;
2969 
2970 	switch (model) {
2971 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2972 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
2973 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2974 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2975 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
2976 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
2977 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
2978 		return 1;
2979 	}
2980 	return 0;
2981 }
2982 
2983 /*
2984  * SKL adds support for additional MSRS:
2985  *
2986  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
2987  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
2988  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
2989  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
2990  */
2991 int has_skl_msrs(unsigned int family, unsigned int model)
2992 {
2993 	if (!genuine_intel)
2994 		return 0;
2995 
2996 	switch (model) {
2997 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
2998 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
2999 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3000 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3001 		return 1;
3002 	}
3003 	return 0;
3004 }
3005 
3006 
3007 
3008 int is_slm(unsigned int family, unsigned int model)
3009 {
3010 	if (!genuine_intel)
3011 		return 0;
3012 	switch (model) {
3013 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
3014 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3015 		return 1;
3016 	}
3017 	return 0;
3018 }
3019 
3020 int is_knl(unsigned int family, unsigned int model)
3021 {
3022 	if (!genuine_intel)
3023 		return 0;
3024 	switch (model) {
3025 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3026 	case INTEL_FAM6_XEON_PHI_KNM:
3027 		return 1;
3028 	}
3029 	return 0;
3030 }
3031 
3032 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
3033 {
3034 	if (is_knl(family, model))
3035 		return 1024;
3036 	return 1;
3037 }
3038 
3039 #define SLM_BCLK_FREQS 5
3040 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
3041 
3042 double slm_bclk(void)
3043 {
3044 	unsigned long long msr = 3;
3045 	unsigned int i;
3046 	double freq;
3047 
3048 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
3049 		fprintf(outf, "SLM BCLK: unknown\n");
3050 
3051 	i = msr & 0xf;
3052 	if (i >= SLM_BCLK_FREQS) {
3053 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3054 		i = 3;
3055 	}
3056 	freq = slm_freq_table[i];
3057 
3058 	fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
3059 
3060 	return freq;
3061 }
3062 
3063 double discover_bclk(unsigned int family, unsigned int model)
3064 {
3065 	if (has_snb_msrs(family, model) || is_knl(family, model))
3066 		return 100.00;
3067 	else if (is_slm(family, model))
3068 		return slm_bclk();
3069 	else
3070 		return 133.33;
3071 }
3072 
3073 /*
3074  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
3075  * the Thermal Control Circuit (TCC) activates.
3076  * This is usually equal to tjMax.
3077  *
3078  * Older processors do not have this MSR, so there we guess,
3079  * but also allow cmdline over-ride with -T.
3080  *
3081  * Several MSR temperature values are in units of degrees-C
3082  * below this value, including the Digital Thermal Sensor (DTS),
3083  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
3084  */
3085 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3086 {
3087 	unsigned long long msr;
3088 	unsigned int target_c_local;
3089 	int cpu;
3090 
3091 	/* tcc_activation_temp is used only for dts or ptm */
3092 	if (!(do_dts || do_ptm))
3093 		return 0;
3094 
3095 	/* this is a per-package concept */
3096 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3097 		return 0;
3098 
3099 	cpu = t->cpu_id;
3100 	if (cpu_migrate(cpu)) {
3101 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3102 		return -1;
3103 	}
3104 
3105 	if (tcc_activation_temp_override != 0) {
3106 		tcc_activation_temp = tcc_activation_temp_override;
3107 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3108 			cpu, tcc_activation_temp);
3109 		return 0;
3110 	}
3111 
3112 	/* Temperature Target MSR is Nehalem and newer only */
3113 	if (!do_nhm_platform_info)
3114 		goto guess;
3115 
3116 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3117 		goto guess;
3118 
3119 	target_c_local = (msr >> 16) & 0xFF;
3120 
3121 	if (debug)
3122 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3123 			cpu, msr, target_c_local);
3124 
3125 	if (!target_c_local)
3126 		goto guess;
3127 
3128 	tcc_activation_temp = target_c_local;
3129 
3130 	return 0;
3131 
3132 guess:
3133 	tcc_activation_temp = TJMAX_DEFAULT;
3134 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3135 		cpu, tcc_activation_temp);
3136 
3137 	return 0;
3138 }
3139 
3140 void decode_feature_control_msr(void)
3141 {
3142 	unsigned long long msr;
3143 
3144 	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3145 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3146 			base_cpu, msr,
3147 			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3148 			msr & (1 << 18) ? "SGX" : "");
3149 }
3150 
3151 void decode_misc_enable_msr(void)
3152 {
3153 	unsigned long long msr;
3154 
3155 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3156 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
3157 			base_cpu, msr,
3158 			msr & (1 << 3) ? "TCC" : "",
3159 			msr & (1 << 16) ? "EIST" : "",
3160 			msr & (1 << 18) ? "MONITOR" : "");
3161 }
3162 
3163 /*
3164  * Decode MSR_MISC_PWR_MGMT
3165  *
3166  * Decode the bits according to the Nehalem documentation
3167  * bit[0] seems to continue to have same meaning going forward
3168  * bit[1] less so...
3169  */
3170 void decode_misc_pwr_mgmt_msr(void)
3171 {
3172 	unsigned long long msr;
3173 
3174 	if (!do_nhm_platform_info)
3175 		return;
3176 
3177 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3178 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
3179 			base_cpu, msr,
3180 			msr & (1 << 0) ? "DIS" : "EN",
3181 			msr & (1 << 1) ? "EN" : "DIS",
3182 			msr & (1 << 8) ? "EN" : "DIS");
3183 }
3184 
3185 void process_cpuid()
3186 {
3187 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
3188 	unsigned int fms, family, model, stepping;
3189 
3190 	eax = ebx = ecx = edx = 0;
3191 
3192 	__cpuid(0, max_level, ebx, ecx, edx);
3193 
3194 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
3195 		genuine_intel = 1;
3196 
3197 	if (debug)
3198 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
3199 			(char *)&ebx, (char *)&edx, (char *)&ecx);
3200 
3201 	__cpuid(1, fms, ebx, ecx, edx);
3202 	family = (fms >> 8) & 0xf;
3203 	model = (fms >> 4) & 0xf;
3204 	stepping = fms & 0xf;
3205 	if (family == 6 || family == 0xf)
3206 		model += ((fms >> 16) & 0xf) << 4;
3207 
3208 	if (debug) {
3209 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
3210 			max_level, family, model, stepping, family, model, stepping);
3211 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3212 			ecx & (1 << 0) ? "SSE3" : "-",
3213 			ecx & (1 << 3) ? "MONITOR" : "-",
3214 			ecx & (1 << 6) ? "SMX" : "-",
3215 			ecx & (1 << 7) ? "EIST" : "-",
3216 			ecx & (1 << 8) ? "TM2" : "-",
3217 			edx & (1 << 4) ? "TSC" : "-",
3218 			edx & (1 << 5) ? "MSR" : "-",
3219 			edx & (1 << 22) ? "ACPI-TM" : "-",
3220 			edx & (1 << 29) ? "TM" : "-");
3221 	}
3222 
3223 	if (!(edx & (1 << 5)))
3224 		errx(1, "CPUID: no MSR");
3225 
3226 	/*
3227 	 * check max extended function levels of CPUID.
3228 	 * This is needed to check for invariant TSC.
3229 	 * This check is valid for both Intel and AMD.
3230 	 */
3231 	ebx = ecx = edx = 0;
3232 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
3233 
3234 	if (max_extended_level >= 0x80000007) {
3235 
3236 		/*
3237 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3238 		 * this check is valid for both Intel and AMD
3239 		 */
3240 		__cpuid(0x80000007, eax, ebx, ecx, edx);
3241 		has_invariant_tsc = edx & (1 << 8);
3242 	}
3243 
3244 	/*
3245 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3246 	 * this check is valid for both Intel and AMD
3247 	 */
3248 
3249 	__cpuid(0x6, eax, ebx, ecx, edx);
3250 	has_aperf = ecx & (1 << 0);
3251 	do_dts = eax & (1 << 0);
3252 	do_ptm = eax & (1 << 6);
3253 	has_hwp = eax & (1 << 7);
3254 	has_hwp_notify = eax & (1 << 8);
3255 	has_hwp_activity_window = eax & (1 << 9);
3256 	has_hwp_epp = eax & (1 << 10);
3257 	has_hwp_pkg = eax & (1 << 11);
3258 	has_epb = ecx & (1 << 3);
3259 
3260 	if (debug)
3261 		fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
3262 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
3263 			has_aperf ? "" : "No-",
3264 			do_dts ? "" : "No-",
3265 			do_ptm ? "" : "No-",
3266 			has_hwp ? "" : "No-",
3267 			has_hwp_notify ? "" : "No-",
3268 			has_hwp_activity_window ? "" : "No-",
3269 			has_hwp_epp ? "" : "No-",
3270 			has_hwp_pkg ? "" : "No-",
3271 			has_epb ? "" : "No-");
3272 
3273 	if (debug)
3274 		decode_misc_enable_msr();
3275 
3276 	if (max_level >= 0x7 && debug) {
3277 		int has_sgx;
3278 
3279 		ecx = 0;
3280 
3281 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3282 
3283 		has_sgx = ebx & (1 << 2);
3284 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3285 
3286 		if (has_sgx)
3287 			decode_feature_control_msr();
3288 	}
3289 
3290 	if (max_level >= 0x15) {
3291 		unsigned int eax_crystal;
3292 		unsigned int ebx_tsc;
3293 
3294 		/*
3295 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
3296 		 */
3297 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
3298 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
3299 
3300 		if (ebx_tsc != 0) {
3301 
3302 			if (debug && (ebx != 0))
3303 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
3304 					eax_crystal, ebx_tsc, crystal_hz);
3305 
3306 			if (crystal_hz == 0)
3307 				switch(model) {
3308 				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3309 				case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3310 				case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3311 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3312 					crystal_hz = 24000000;	/* 24.0 MHz */
3313 					break;
3314 				case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3315 					crystal_hz = 25000000;	/* 25.0 MHz */
3316 					break;
3317 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3318 				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3319 					crystal_hz = 19200000;	/* 19.2 MHz */
3320 					break;
3321 				default:
3322 					crystal_hz = 0;
3323 			}
3324 
3325 			if (crystal_hz) {
3326 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
3327 				if (debug)
3328 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
3329 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
3330 			}
3331 		}
3332 	}
3333 	if (max_level >= 0x16) {
3334 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
3335 
3336 		/*
3337 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
3338 		 */
3339 		base_mhz = max_mhz = bus_mhz = edx = 0;
3340 
3341 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3342 		if (debug)
3343 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3344 				base_mhz, max_mhz, bus_mhz);
3345 	}
3346 
3347 	if (has_aperf)
3348 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
3349 
3350 	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
3351 	do_snb_cstates = has_snb_msrs(family, model);
3352 	do_irtl_snb = has_snb_msrs(family, model);
3353 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
3354 	do_pc3 = (pkg_cstate_limit >= PCL__3);
3355 	do_pc6 = (pkg_cstate_limit >= PCL__6);
3356 	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
3357 	do_c8_c9_c10 = has_hsw_msrs(family, model);
3358 	do_irtl_hsw = has_hsw_msrs(family, model);
3359 	do_skl_residency = has_skl_msrs(family, model);
3360 	do_slm_cstates = is_slm(family, model);
3361 	do_knl_cstates  = is_knl(family, model);
3362 
3363 	if (debug)
3364 		decode_misc_pwr_mgmt_msr();
3365 
3366 	rapl_probe(family, model);
3367 	perf_limit_reasons_probe(family, model);
3368 
3369 	if (debug)
3370 		dump_cstate_pstate_config_info(family, model);
3371 
3372 	if (has_skl_msrs(family, model))
3373 		calculate_tsc_tweak();
3374 
3375 	do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
3376 
3377 	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
3378 
3379 	return;
3380 }
3381 
3382 void help()
3383 {
3384 	fprintf(outf,
3385 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
3386 	"\n"
3387 	"Turbostat forks the specified COMMAND and prints statistics\n"
3388 	"when COMMAND completes.\n"
3389 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3390 	"to print statistics, until interrupted.\n"
3391 	"--debug	run in \"debug\" mode\n"
3392 	"--interval sec	Override default 5-second measurement interval\n"
3393 	"--help		print this help message\n"
3394 	"--counter msr	print 32-bit counter at address \"msr\"\n"
3395 	"--Counter msr	print 64-bit Counter at address \"msr\"\n"
3396 	"--out file	create or truncate \"file\" for all output\n"
3397 	"--msr msr	print 32-bit value at address \"msr\"\n"
3398 	"--MSR msr	print 64-bit Value at address \"msr\"\n"
3399 	"--version	print version information\n"
3400 	"\n"
3401 	"For more help, run \"man turbostat\"\n");
3402 }
3403 
3404 
3405 /*
3406  * in /dev/cpu/ return success for names that are numbers
3407  * ie. filter out ".", "..", "microcode".
3408  */
3409 int dir_filter(const struct dirent *dirp)
3410 {
3411 	if (isdigit(dirp->d_name[0]))
3412 		return 1;
3413 	else
3414 		return 0;
3415 }
3416 
3417 int open_dev_cpu_msr(int dummy1)
3418 {
3419 	return 0;
3420 }
3421 
3422 void topology_probe()
3423 {
3424 	int i;
3425 	int max_core_id = 0;
3426 	int max_package_id = 0;
3427 	int max_siblings = 0;
3428 	struct cpu_topology {
3429 		int core_id;
3430 		int physical_package_id;
3431 	} *cpus;
3432 
3433 	/* Initialize num_cpus, max_cpu_num */
3434 	topo.num_cpus = 0;
3435 	topo.max_cpu_num = 0;
3436 	for_all_proc_cpus(count_cpus);
3437 	if (!summary_only && topo.num_cpus > 1)
3438 		show_cpu = 1;
3439 
3440 	if (debug > 1)
3441 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
3442 
3443 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
3444 	if (cpus == NULL)
3445 		err(1, "calloc cpus");
3446 
3447 	/*
3448 	 * Allocate and initialize cpu_present_set
3449 	 */
3450 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
3451 	if (cpu_present_set == NULL)
3452 		err(3, "CPU_ALLOC");
3453 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3454 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
3455 	for_all_proc_cpus(mark_cpu_present);
3456 
3457 	/*
3458 	 * Allocate and initialize cpu_affinity_set
3459 	 */
3460 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
3461 	if (cpu_affinity_set == NULL)
3462 		err(3, "CPU_ALLOC");
3463 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3464 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
3465 
3466 
3467 	/*
3468 	 * For online cpus
3469 	 * find max_core_id, max_package_id
3470 	 */
3471 	for (i = 0; i <= topo.max_cpu_num; ++i) {
3472 		int siblings;
3473 
3474 		if (cpu_is_not_present(i)) {
3475 			if (debug > 1)
3476 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
3477 			continue;
3478 		}
3479 		cpus[i].core_id = get_core_id(i);
3480 		if (cpus[i].core_id > max_core_id)
3481 			max_core_id = cpus[i].core_id;
3482 
3483 		cpus[i].physical_package_id = get_physical_package_id(i);
3484 		if (cpus[i].physical_package_id > max_package_id)
3485 			max_package_id = cpus[i].physical_package_id;
3486 
3487 		siblings = get_num_ht_siblings(i);
3488 		if (siblings > max_siblings)
3489 			max_siblings = siblings;
3490 		if (debug > 1)
3491 			fprintf(outf, "cpu %d pkg %d core %d\n",
3492 				i, cpus[i].physical_package_id, cpus[i].core_id);
3493 	}
3494 	topo.num_cores_per_pkg = max_core_id + 1;
3495 	if (debug > 1)
3496 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
3497 			max_core_id, topo.num_cores_per_pkg);
3498 	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
3499 		show_core = 1;
3500 
3501 	topo.num_packages = max_package_id + 1;
3502 	if (debug > 1)
3503 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
3504 			max_package_id, topo.num_packages);
3505 	if (debug && !summary_only && topo.num_packages > 1)
3506 		show_pkg = 1;
3507 
3508 	topo.num_threads_per_core = max_siblings;
3509 	if (debug > 1)
3510 		fprintf(outf, "max_siblings %d\n", max_siblings);
3511 
3512 	free(cpus);
3513 }
3514 
3515 void
3516 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
3517 {
3518 	int i;
3519 
3520 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
3521 		topo.num_packages, sizeof(struct thread_data));
3522 	if (*t == NULL)
3523 		goto error;
3524 
3525 	for (i = 0; i < topo.num_threads_per_core *
3526 		topo.num_cores_per_pkg * topo.num_packages; i++)
3527 		(*t)[i].cpu_id = -1;
3528 
3529 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
3530 		sizeof(struct core_data));
3531 	if (*c == NULL)
3532 		goto error;
3533 
3534 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
3535 		(*c)[i].core_id = -1;
3536 
3537 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
3538 	if (*p == NULL)
3539 		goto error;
3540 
3541 	for (i = 0; i < topo.num_packages; i++)
3542 		(*p)[i].package_id = i;
3543 
3544 	return;
3545 error:
3546 	err(1, "calloc counters");
3547 }
3548 /*
3549  * init_counter()
3550  *
3551  * set cpu_id, core_num, pkg_num
3552  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
3553  *
3554  * increment topo.num_cores when 1st core in pkg seen
3555  */
3556 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
3557 	struct pkg_data *pkg_base, int thread_num, int core_num,
3558 	int pkg_num, int cpu_id)
3559 {
3560 	struct thread_data *t;
3561 	struct core_data *c;
3562 	struct pkg_data *p;
3563 
3564 	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
3565 	c = GET_CORE(core_base, core_num, pkg_num);
3566 	p = GET_PKG(pkg_base, pkg_num);
3567 
3568 	t->cpu_id = cpu_id;
3569 	if (thread_num == 0) {
3570 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
3571 		if (cpu_is_first_core_in_package(cpu_id))
3572 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
3573 	}
3574 
3575 	c->core_id = core_num;
3576 	p->package_id = pkg_num;
3577 }
3578 
3579 
3580 int initialize_counters(int cpu_id)
3581 {
3582 	int my_thread_id, my_core_id, my_package_id;
3583 
3584 	my_package_id = get_physical_package_id(cpu_id);
3585 	my_core_id = get_core_id(cpu_id);
3586 	my_thread_id = get_cpu_position_in_core(cpu_id);
3587 	if (!my_thread_id)
3588 		topo.num_cores++;
3589 
3590 	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3591 	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3592 	return 0;
3593 }
3594 
3595 void allocate_output_buffer()
3596 {
3597 	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
3598 	outp = output_buffer;
3599 	if (outp == NULL)
3600 		err(-1, "calloc output buffer");
3601 }
3602 void allocate_fd_percpu(void)
3603 {
3604 	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3605 	if (fd_percpu == NULL)
3606 		err(-1, "calloc fd_percpu");
3607 }
3608 void allocate_irq_buffers(void)
3609 {
3610 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
3611 	if (irq_column_2_cpu == NULL)
3612 		err(-1, "calloc %d", topo.num_cpus);
3613 
3614 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3615 	if (irqs_per_cpu == NULL)
3616 		err(-1, "calloc %d", topo.max_cpu_num + 1);
3617 }
3618 void setup_all_buffers(void)
3619 {
3620 	topology_probe();
3621 	allocate_irq_buffers();
3622 	allocate_fd_percpu();
3623 	allocate_counters(&thread_even, &core_even, &package_even);
3624 	allocate_counters(&thread_odd, &core_odd, &package_odd);
3625 	allocate_output_buffer();
3626 	for_all_proc_cpus(initialize_counters);
3627 }
3628 
3629 void set_base_cpu(void)
3630 {
3631 	base_cpu = sched_getcpu();
3632 	if (base_cpu < 0)
3633 		err(-ENODEV, "No valid cpus found");
3634 
3635 	if (debug > 1)
3636 		fprintf(outf, "base_cpu = %d\n", base_cpu);
3637 }
3638 
3639 void turbostat_init()
3640 {
3641 	setup_all_buffers();
3642 	set_base_cpu();
3643 	check_dev_msr();
3644 	check_permissions();
3645 	process_cpuid();
3646 
3647 
3648 	if (debug)
3649 		for_all_cpus(print_hwp, ODD_COUNTERS);
3650 
3651 	if (debug)
3652 		for_all_cpus(print_epb, ODD_COUNTERS);
3653 
3654 	if (debug)
3655 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
3656 
3657 	if (debug)
3658 		for_all_cpus(print_rapl, ODD_COUNTERS);
3659 
3660 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
3661 
3662 	if (debug)
3663 		for_all_cpus(print_thermal, ODD_COUNTERS);
3664 
3665 	if (debug && do_irtl_snb)
3666 		print_irtl();
3667 }
3668 
3669 int fork_it(char **argv)
3670 {
3671 	pid_t child_pid;
3672 	int status;
3673 
3674 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
3675 	if (status)
3676 		exit(status);
3677 	/* clear affinity side-effect of get_counters() */
3678 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
3679 	gettimeofday(&tv_even, (struct timezone *)NULL);
3680 
3681 	child_pid = fork();
3682 	if (!child_pid) {
3683 		/* child */
3684 		execvp(argv[0], argv);
3685 	} else {
3686 
3687 		/* parent */
3688 		if (child_pid == -1)
3689 			err(1, "fork");
3690 
3691 		signal(SIGINT, SIG_IGN);
3692 		signal(SIGQUIT, SIG_IGN);
3693 		if (waitpid(child_pid, &status, 0) == -1)
3694 			err(status, "waitpid");
3695 	}
3696 	/*
3697 	 * n.b. fork_it() does not check for errors from for_all_cpus()
3698 	 * because re-starting is problematic when forking
3699 	 */
3700 	for_all_cpus(get_counters, ODD_COUNTERS);
3701 	gettimeofday(&tv_odd, (struct timezone *)NULL);
3702 	timersub(&tv_odd, &tv_even, &tv_delta);
3703 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
3704 		fprintf(outf, "%s: Counter reset detected\n", progname);
3705 	else {
3706 		compute_average(EVEN_COUNTERS);
3707 		format_all_counters(EVEN_COUNTERS);
3708 	}
3709 
3710 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
3711 
3712 	flush_output_stderr();
3713 
3714 	return status;
3715 }
3716 
3717 int get_and_dump_counters(void)
3718 {
3719 	int status;
3720 
3721 	status = for_all_cpus(get_counters, ODD_COUNTERS);
3722 	if (status)
3723 		return status;
3724 
3725 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
3726 	if (status)
3727 		return status;
3728 
3729 	flush_output_stdout();
3730 
3731 	return status;
3732 }
3733 
3734 void print_version() {
3735 	fprintf(outf, "turbostat version 4.14 22 Apr 2016"
3736 		" - Len Brown <lenb@kernel.org>\n");
3737 }
3738 
3739 void cmdline(int argc, char **argv)
3740 {
3741 	int opt;
3742 	int option_index = 0;
3743 	static struct option long_options[] = {
3744 		{"Counter",	required_argument,	0, 'C'},
3745 		{"counter",	required_argument,	0, 'c'},
3746 		{"Dump",	no_argument,		0, 'D'},
3747 		{"debug",	no_argument,		0, 'd'},
3748 		{"interval",	required_argument,	0, 'i'},
3749 		{"help",	no_argument,		0, 'h'},
3750 		{"Joules",	no_argument,		0, 'J'},
3751 		{"MSR",		required_argument,	0, 'M'},
3752 		{"msr",		required_argument,	0, 'm'},
3753 		{"out",		required_argument,	0, 'o'},
3754 		{"Package",	no_argument,		0, 'p'},
3755 		{"processor",	no_argument,		0, 'p'},
3756 		{"Summary",	no_argument,		0, 'S'},
3757 		{"TCC",		required_argument,	0, 'T'},
3758 		{"version",	no_argument,		0, 'v' },
3759 		{0,		0,			0,  0 }
3760 	};
3761 
3762 	progname = argv[0];
3763 
3764 	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
3765 				long_options, &option_index)) != -1) {
3766 		switch (opt) {
3767 		case 'C':
3768 			sscanf(optarg, "%x", &extra_delta_offset64);
3769 			break;
3770 		case 'c':
3771 			sscanf(optarg, "%x", &extra_delta_offset32);
3772 			break;
3773 		case 'D':
3774 			dump_only++;
3775 			break;
3776 		case 'd':
3777 			debug++;
3778 			break;
3779 		case 'h':
3780 		default:
3781 			help();
3782 			exit(1);
3783 		case 'i':
3784 			{
3785 				double interval = strtod(optarg, NULL);
3786 
3787 				if (interval < 0.001) {
3788 					fprintf(outf, "interval %f seconds is too small\n",
3789 						interval);
3790 					exit(2);
3791 				}
3792 
3793 				interval_ts.tv_sec = interval;
3794 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
3795 			}
3796 			break;
3797 		case 'J':
3798 			rapl_joules++;
3799 			break;
3800 		case 'M':
3801 			sscanf(optarg, "%x", &extra_msr_offset64);
3802 			break;
3803 		case 'm':
3804 			sscanf(optarg, "%x", &extra_msr_offset32);
3805 			break;
3806 		case 'o':
3807 			outf = fopen_or_die(optarg, "w");
3808 			break;
3809 		case 'P':
3810 			show_pkg_only++;
3811 			break;
3812 		case 'p':
3813 			show_core_only++;
3814 			break;
3815 		case 'S':
3816 			summary_only++;
3817 			break;
3818 		case 'T':
3819 			tcc_activation_temp_override = atoi(optarg);
3820 			break;
3821 		case 'v':
3822 			print_version();
3823 			exit(0);
3824 			break;
3825 		}
3826 	}
3827 }
3828 
3829 int main(int argc, char **argv)
3830 {
3831 	outf = stderr;
3832 
3833 	cmdline(argc, argv);
3834 
3835 	if (debug)
3836 		print_version();
3837 
3838 	turbostat_init();
3839 
3840 	/* dump counters and exit */
3841 	if (dump_only)
3842 		return get_and_dump_counters();
3843 
3844 	/*
3845 	 * if any params left, it must be a command to fork
3846 	 */
3847 	if (argc - optind)
3848 		return fork_it(argv + optind);
3849 	else
3850 		turbostat_loop();
3851 
3852 	return 0;
3853 }
3854