1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <err.h>
27 #include <unistd.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
30 #include <sys/stat.h>
31 #include <sys/resource.h>
32 #include <fcntl.h>
33 #include <signal.h>
34 #include <sys/time.h>
35 #include <stdlib.h>
36 #include <getopt.h>
37 #include <dirent.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <sched.h>
41 #include <time.h>
42 #include <cpuid.h>
43 #include <linux/capability.h>
44 #include <errno.h>
45 
46 char *proc_stat = "/proc/stat";
47 FILE *outf;
48 int *fd_percpu;
49 struct timespec interval_ts = {5, 0};
50 unsigned int debug;
51 unsigned int rapl_joules;
52 unsigned int summary_only;
53 unsigned int dump_only;
54 unsigned int skip_c0;
55 unsigned int skip_c1;
56 unsigned int do_nhm_cstates;
57 unsigned int do_snb_cstates;
58 unsigned int do_knl_cstates;
59 unsigned int do_pc2;
60 unsigned int do_pc3;
61 unsigned int do_pc6;
62 unsigned int do_pc7;
63 unsigned int do_c8_c9_c10;
64 unsigned int do_skl_residency;
65 unsigned int do_slm_cstates;
66 unsigned int use_c1_residency_msr;
67 unsigned int has_aperf;
68 unsigned int has_epb;
69 unsigned int units = 1000000;	/* MHz etc */
70 unsigned int genuine_intel;
71 unsigned int has_invariant_tsc;
72 unsigned int do_nhm_platform_info;
73 unsigned int extra_msr_offset32;
74 unsigned int extra_msr_offset64;
75 unsigned int extra_delta_offset32;
76 unsigned int extra_delta_offset64;
77 unsigned int aperf_mperf_multiplier = 1;
78 int do_irq = 1;
79 int do_smi;
80 double bclk;
81 double base_hz;
82 unsigned int has_base_hz;
83 double tsc_tweak = 1.0;
84 unsigned int show_pkg;
85 unsigned int show_core;
86 unsigned int show_cpu;
87 unsigned int show_pkg_only;
88 unsigned int show_core_only;
89 char *output_buffer, *outp;
90 unsigned int do_rapl;
91 unsigned int do_dts;
92 unsigned int do_ptm;
93 unsigned int do_gfx_rc6_ms;
94 unsigned long long  gfx_cur_rc6_ms;
95 unsigned int do_gfx_mhz;
96 unsigned int gfx_cur_mhz;
97 unsigned int tcc_activation_temp;
98 unsigned int tcc_activation_temp_override;
99 double rapl_power_units, rapl_time_units;
100 double rapl_dram_energy_units, rapl_energy_units;
101 double rapl_joule_counter_range;
102 unsigned int do_core_perf_limit_reasons;
103 unsigned int do_gfx_perf_limit_reasons;
104 unsigned int do_ring_perf_limit_reasons;
105 unsigned int crystal_hz;
106 unsigned long long tsc_hz;
107 int base_cpu;
108 double discover_bclk(unsigned int family, unsigned int model);
109 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
110 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
111 unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
112 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
113 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
114 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
115 
116 #define RAPL_PKG		(1 << 0)
117 					/* 0x610 MSR_PKG_POWER_LIMIT */
118 					/* 0x611 MSR_PKG_ENERGY_STATUS */
119 #define RAPL_PKG_PERF_STATUS	(1 << 1)
120 					/* 0x613 MSR_PKG_PERF_STATUS */
121 #define RAPL_PKG_POWER_INFO	(1 << 2)
122 					/* 0x614 MSR_PKG_POWER_INFO */
123 
124 #define RAPL_DRAM		(1 << 3)
125 					/* 0x618 MSR_DRAM_POWER_LIMIT */
126 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
127 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
128 					/* 0x61b MSR_DRAM_PERF_STATUS */
129 #define RAPL_DRAM_POWER_INFO	(1 << 5)
130 					/* 0x61c MSR_DRAM_POWER_INFO */
131 
132 #define RAPL_CORES		(1 << 6)
133 					/* 0x638 MSR_PP0_POWER_LIMIT */
134 					/* 0x639 MSR_PP0_ENERGY_STATUS */
135 #define RAPL_CORE_POLICY	(1 << 7)
136 					/* 0x63a MSR_PP0_POLICY */
137 
138 #define RAPL_GFX		(1 << 8)
139 					/* 0x640 MSR_PP1_POWER_LIMIT */
140 					/* 0x641 MSR_PP1_ENERGY_STATUS */
141 					/* 0x642 MSR_PP1_POLICY */
142 #define	TJMAX_DEFAULT	100
143 
144 #define MAX(a, b) ((a) > (b) ? (a) : (b))
145 
146 int aperf_mperf_unstable;
147 int backwards_count;
148 char *progname;
149 
150 cpu_set_t *cpu_present_set, *cpu_affinity_set;
151 size_t cpu_present_setsize, cpu_affinity_setsize;
152 
153 struct thread_data {
154 	unsigned long long tsc;
155 	unsigned long long aperf;
156 	unsigned long long mperf;
157 	unsigned long long c1;
158 	unsigned long long extra_msr64;
159 	unsigned long long extra_delta64;
160 	unsigned long long extra_msr32;
161 	unsigned long long extra_delta32;
162 	unsigned int irq_count;
163 	unsigned int smi_count;
164 	unsigned int cpu_id;
165 	unsigned int flags;
166 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
167 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
168 } *thread_even, *thread_odd;
169 
170 struct core_data {
171 	unsigned long long c3;
172 	unsigned long long c6;
173 	unsigned long long c7;
174 	unsigned int core_temp_c;
175 	unsigned int core_id;
176 } *core_even, *core_odd;
177 
178 struct pkg_data {
179 	unsigned long long pc2;
180 	unsigned long long pc3;
181 	unsigned long long pc6;
182 	unsigned long long pc7;
183 	unsigned long long pc8;
184 	unsigned long long pc9;
185 	unsigned long long pc10;
186 	unsigned long long pkg_wtd_core_c0;
187 	unsigned long long pkg_any_core_c0;
188 	unsigned long long pkg_any_gfxe_c0;
189 	unsigned long long pkg_both_core_gfxe_c0;
190 	unsigned long long gfx_rc6_ms;
191 	unsigned int gfx_mhz;
192 	unsigned int package_id;
193 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
194 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
195 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
196 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
197 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
198 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
199 	unsigned int pkg_temp_c;
200 
201 } *package_even, *package_odd;
202 
203 #define ODD_COUNTERS thread_odd, core_odd, package_odd
204 #define EVEN_COUNTERS thread_even, core_even, package_even
205 
206 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
207 	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
208 		topo.num_threads_per_core + \
209 		(core_no) * topo.num_threads_per_core + (thread_no))
210 #define GET_CORE(core_base, core_no, pkg_no) \
211 	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
212 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
213 
214 struct system_summary {
215 	struct thread_data threads;
216 	struct core_data cores;
217 	struct pkg_data packages;
218 } sum, average;
219 
220 
221 struct topo_params {
222 	int num_packages;
223 	int num_cpus;
224 	int num_cores;
225 	int max_cpu_num;
226 	int num_cores_per_pkg;
227 	int num_threads_per_core;
228 } topo;
229 
230 struct timeval tv_even, tv_odd, tv_delta;
231 
232 int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
233 int *irqs_per_cpu;		/* indexed by cpu_num */
234 
235 void setup_all_buffers(void);
236 
237 int cpu_is_not_present(int cpu)
238 {
239 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
240 }
241 /*
242  * run func(thread, core, package) in topology order
243  * skip non-present cpus
244  */
245 
246 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
247 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
248 {
249 	int retval, pkg_no, core_no, thread_no;
250 
251 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
252 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
253 			for (thread_no = 0; thread_no <
254 				topo.num_threads_per_core; ++thread_no) {
255 				struct thread_data *t;
256 				struct core_data *c;
257 				struct pkg_data *p;
258 
259 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
260 
261 				if (cpu_is_not_present(t->cpu_id))
262 					continue;
263 
264 				c = GET_CORE(core_base, core_no, pkg_no);
265 				p = GET_PKG(pkg_base, pkg_no);
266 
267 				retval = func(t, c, p);
268 				if (retval)
269 					return retval;
270 			}
271 		}
272 	}
273 	return 0;
274 }
275 
276 int cpu_migrate(int cpu)
277 {
278 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
279 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
280 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
281 		return -1;
282 	else
283 		return 0;
284 }
285 int get_msr_fd(int cpu)
286 {
287 	char pathname[32];
288 	int fd;
289 
290 	fd = fd_percpu[cpu];
291 
292 	if (fd)
293 		return fd;
294 
295 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
296 	fd = open(pathname, O_RDONLY);
297 	if (fd < 0)
298 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
299 
300 	fd_percpu[cpu] = fd;
301 
302 	return fd;
303 }
304 
305 int get_msr(int cpu, off_t offset, unsigned long long *msr)
306 {
307 	ssize_t retval;
308 
309 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
310 
311 	if (retval != sizeof *msr)
312 		err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
313 
314 	return 0;
315 }
316 
317 /*
318  * Example Format w/ field column widths:
319  *
320  *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
321  * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
322  */
323 
324 void print_header(void)
325 {
326 	if (show_pkg)
327 		outp += sprintf(outp, " Package");
328 	if (show_core)
329 		outp += sprintf(outp, "    Core");
330 	if (show_cpu)
331 		outp += sprintf(outp, "     CPU");
332 	if (has_aperf)
333 		outp += sprintf(outp, " Avg_MHz");
334 	if (has_aperf)
335 		outp += sprintf(outp, "   Busy%%");
336 	if (has_aperf)
337 		outp += sprintf(outp, " Bzy_MHz");
338 	outp += sprintf(outp, " TSC_MHz");
339 
340 	if (extra_delta_offset32)
341 		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
342 	if (extra_delta_offset64)
343 		outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
344 	if (extra_msr_offset32)
345 		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
346 	if (extra_msr_offset64)
347 		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
348 
349 	if (!debug)
350 		goto done;
351 
352 	if (do_irq)
353 		outp += sprintf(outp, "     IRQ");
354 	if (do_smi)
355 		outp += sprintf(outp, "     SMI");
356 
357 	if (do_nhm_cstates)
358 		outp += sprintf(outp, "  CPU%%c1");
359 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
360 		outp += sprintf(outp, "  CPU%%c3");
361 	if (do_nhm_cstates)
362 		outp += sprintf(outp, "  CPU%%c6");
363 	if (do_snb_cstates)
364 		outp += sprintf(outp, "  CPU%%c7");
365 
366 	if (do_dts)
367 		outp += sprintf(outp, " CoreTmp");
368 	if (do_ptm)
369 		outp += sprintf(outp, "  PkgTmp");
370 
371 	if (do_gfx_rc6_ms)
372 		outp += sprintf(outp, " GFX%%rc6");
373 
374 	if (do_gfx_mhz)
375 		outp += sprintf(outp, "  GFXMHz");
376 
377 	if (do_skl_residency) {
378 		outp += sprintf(outp, " Totl%%C0");
379 		outp += sprintf(outp, "  Any%%C0");
380 		outp += sprintf(outp, "  GFX%%C0");
381 		outp += sprintf(outp, " CPUGFX%%");
382 	}
383 
384 	if (do_pc2)
385 		outp += sprintf(outp, " Pkg%%pc2");
386 	if (do_pc3)
387 		outp += sprintf(outp, " Pkg%%pc3");
388 	if (do_pc6)
389 		outp += sprintf(outp, " Pkg%%pc6");
390 	if (do_pc7)
391 		outp += sprintf(outp, " Pkg%%pc7");
392 	if (do_c8_c9_c10) {
393 		outp += sprintf(outp, " Pkg%%pc8");
394 		outp += sprintf(outp, " Pkg%%pc9");
395 		outp += sprintf(outp, " Pk%%pc10");
396 	}
397 
398 	if (do_rapl && !rapl_joules) {
399 		if (do_rapl & RAPL_PKG)
400 			outp += sprintf(outp, " PkgWatt");
401 		if (do_rapl & RAPL_CORES)
402 			outp += sprintf(outp, " CorWatt");
403 		if (do_rapl & RAPL_GFX)
404 			outp += sprintf(outp, " GFXWatt");
405 		if (do_rapl & RAPL_DRAM)
406 			outp += sprintf(outp, " RAMWatt");
407 		if (do_rapl & RAPL_PKG_PERF_STATUS)
408 			outp += sprintf(outp, "   PKG_%%");
409 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
410 			outp += sprintf(outp, "   RAM_%%");
411 	} else if (do_rapl && rapl_joules) {
412 		if (do_rapl & RAPL_PKG)
413 			outp += sprintf(outp, "   Pkg_J");
414 		if (do_rapl & RAPL_CORES)
415 			outp += sprintf(outp, "   Cor_J");
416 		if (do_rapl & RAPL_GFX)
417 			outp += sprintf(outp, "   GFX_J");
418 		if (do_rapl & RAPL_DRAM)
419 			outp += sprintf(outp, "   RAM_J");
420 		if (do_rapl & RAPL_PKG_PERF_STATUS)
421 			outp += sprintf(outp, "   PKG_%%");
422 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
423 			outp += sprintf(outp, "   RAM_%%");
424 		outp += sprintf(outp, "   time");
425 
426 	}
427     done:
428 	outp += sprintf(outp, "\n");
429 }
430 
431 int dump_counters(struct thread_data *t, struct core_data *c,
432 	struct pkg_data *p)
433 {
434 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
435 
436 	if (t) {
437 		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
438 			t->cpu_id, t->flags);
439 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
440 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
441 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
442 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
443 		outp += sprintf(outp, "msr0x%x: %08llX\n",
444 			extra_delta_offset32, t->extra_delta32);
445 		outp += sprintf(outp, "msr0x%x: %016llX\n",
446 			extra_delta_offset64, t->extra_delta64);
447 		outp += sprintf(outp, "msr0x%x: %08llX\n",
448 			extra_msr_offset32, t->extra_msr32);
449 		outp += sprintf(outp, "msr0x%x: %016llX\n",
450 			extra_msr_offset64, t->extra_msr64);
451 		if (do_irq)
452 			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
453 		if (do_smi)
454 			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
455 	}
456 
457 	if (c) {
458 		outp += sprintf(outp, "core: %d\n", c->core_id);
459 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
460 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
461 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
462 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
463 	}
464 
465 	if (p) {
466 		outp += sprintf(outp, "package: %d\n", p->package_id);
467 
468 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
469 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
470 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
471 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
472 
473 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
474 		if (do_pc3)
475 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
476 		if (do_pc6)
477 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
478 		if (do_pc7)
479 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
480 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
481 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
482 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
483 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
484 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
485 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
486 		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
487 		outp += sprintf(outp, "Throttle PKG: %0X\n",
488 			p->rapl_pkg_perf_status);
489 		outp += sprintf(outp, "Throttle RAM: %0X\n",
490 			p->rapl_dram_perf_status);
491 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
492 	}
493 
494 	outp += sprintf(outp, "\n");
495 
496 	return 0;
497 }
498 
499 /*
500  * column formatting convention & formats
501  */
502 int format_counters(struct thread_data *t, struct core_data *c,
503 	struct pkg_data *p)
504 {
505 	double interval_float;
506 	char *fmt8;
507 
508 	 /* if showing only 1st thread in core and this isn't one, bail out */
509 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
510 		return 0;
511 
512 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
513 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
514 		return 0;
515 
516 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
517 
518 	/* topo columns, print blanks on 1st (average) line */
519 	if (t == &average.threads) {
520 		if (show_pkg)
521 			outp += sprintf(outp, "       -");
522 		if (show_core)
523 			outp += sprintf(outp, "       -");
524 		if (show_cpu)
525 			outp += sprintf(outp, "       -");
526 	} else {
527 		if (show_pkg) {
528 			if (p)
529 				outp += sprintf(outp, "%8d", p->package_id);
530 			else
531 				outp += sprintf(outp, "       -");
532 		}
533 		if (show_core) {
534 			if (c)
535 				outp += sprintf(outp, "%8d", c->core_id);
536 			else
537 				outp += sprintf(outp, "       -");
538 		}
539 		if (show_cpu)
540 			outp += sprintf(outp, "%8d", t->cpu_id);
541 	}
542 
543 	/* Avg_MHz */
544 	if (has_aperf)
545 		outp += sprintf(outp, "%8.0f",
546 			1.0 / units * t->aperf / interval_float);
547 
548 	/* Busy% */
549 	if (has_aperf) {
550 		if (!skip_c0)
551 			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
552 		else
553 			outp += sprintf(outp, "********");
554 	}
555 
556 	/* Bzy_MHz */
557 	if (has_aperf) {
558 		if (has_base_hz)
559 			outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf);
560 		else
561 			outp += sprintf(outp, "%8.0f",
562 				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
563 	}
564 
565 	/* TSC_MHz */
566 	outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
567 
568 	/* delta */
569 	if (extra_delta_offset32)
570 		outp += sprintf(outp, "  %11llu", t->extra_delta32);
571 
572 	/* DELTA */
573 	if (extra_delta_offset64)
574 		outp += sprintf(outp, "  %11llu", t->extra_delta64);
575 	/* msr */
576 	if (extra_msr_offset32)
577 		outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
578 
579 	/* MSR */
580 	if (extra_msr_offset64)
581 		outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
582 
583 	if (!debug)
584 		goto done;
585 
586 	/* IRQ */
587 	if (do_irq)
588 		outp += sprintf(outp, "%8d", t->irq_count);
589 
590 	/* SMI */
591 	if (do_smi)
592 		outp += sprintf(outp, "%8d", t->smi_count);
593 
594 	if (do_nhm_cstates) {
595 		if (!skip_c1)
596 			outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
597 		else
598 			outp += sprintf(outp, "********");
599 	}
600 
601 	/* print per-core data only for 1st thread in core */
602 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
603 		goto done;
604 
605 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
606 		outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
607 	if (do_nhm_cstates)
608 		outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
609 	if (do_snb_cstates)
610 		outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc);
611 
612 	if (do_dts)
613 		outp += sprintf(outp, "%8d", c->core_temp_c);
614 
615 	/* print per-package data only for 1st core in package */
616 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
617 		goto done;
618 
619 	/* PkgTmp */
620 	if (do_ptm)
621 		outp += sprintf(outp, "%8d", p->pkg_temp_c);
622 
623 	/* GFXrc6 */
624 	if (do_gfx_rc6_ms)
625 		outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float);
626 
627 	/* GFXMHz */
628 	if (do_gfx_mhz)
629 		outp += sprintf(outp, "%8d", p->gfx_mhz);
630 
631 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
632 	if (do_skl_residency) {
633 		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
634 		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
635 		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
636 		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
637 	}
638 
639 	if (do_pc2)
640 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
641 	if (do_pc3)
642 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
643 	if (do_pc6)
644 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
645 	if (do_pc7)
646 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
647 	if (do_c8_c9_c10) {
648 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
649 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc);
650 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc);
651 	}
652 
653 	/*
654  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
655  	 * indicate that results are suspect by printing "**" in fraction place.
656  	 */
657 	if (interval_float < rapl_joule_counter_range)
658 		fmt8 = "%8.2f";
659 	else
660 		fmt8 = " %6.0f**";
661 
662 	if (do_rapl && !rapl_joules) {
663 		if (do_rapl & RAPL_PKG)
664 			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
665 		if (do_rapl & RAPL_CORES)
666 			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
667 		if (do_rapl & RAPL_GFX)
668 			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
669 		if (do_rapl & RAPL_DRAM)
670 			outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
671 		if (do_rapl & RAPL_PKG_PERF_STATUS)
672 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
673 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
674 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
675 	} else if (do_rapl && rapl_joules) {
676 		if (do_rapl & RAPL_PKG)
677 			outp += sprintf(outp, fmt8,
678 					p->energy_pkg * rapl_energy_units);
679 		if (do_rapl & RAPL_CORES)
680 			outp += sprintf(outp, fmt8,
681 					p->energy_cores * rapl_energy_units);
682 		if (do_rapl & RAPL_GFX)
683 			outp += sprintf(outp, fmt8,
684 					p->energy_gfx * rapl_energy_units);
685 		if (do_rapl & RAPL_DRAM)
686 			outp += sprintf(outp, fmt8,
687 					p->energy_dram * rapl_dram_energy_units);
688 		if (do_rapl & RAPL_PKG_PERF_STATUS)
689 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
690 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
691 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
692 
693 		outp += sprintf(outp, fmt8, interval_float);
694 	}
695 done:
696 	outp += sprintf(outp, "\n");
697 
698 	return 0;
699 }
700 
701 void flush_output_stdout(void)
702 {
703 	FILE *filep;
704 
705 	if (outf == stderr)
706 		filep = stdout;
707 	else
708 		filep = outf;
709 
710 	fputs(output_buffer, filep);
711 	fflush(filep);
712 
713 	outp = output_buffer;
714 }
715 void flush_output_stderr(void)
716 {
717 	fputs(output_buffer, outf);
718 	fflush(outf);
719 	outp = output_buffer;
720 }
721 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
722 {
723 	static int printed;
724 
725 	if (!printed || !summary_only)
726 		print_header();
727 
728 	if (topo.num_cpus > 1)
729 		format_counters(&average.threads, &average.cores,
730 			&average.packages);
731 
732 	printed = 1;
733 
734 	if (summary_only)
735 		return;
736 
737 	for_all_cpus(format_counters, t, c, p);
738 }
739 
740 #define DELTA_WRAP32(new, old)			\
741 	if (new > old) {			\
742 		old = new - old;		\
743 	} else {				\
744 		old = 0x100000000 + new - old;	\
745 	}
746 
747 void
748 delta_package(struct pkg_data *new, struct pkg_data *old)
749 {
750 
751 	if (do_skl_residency) {
752 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
753 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
754 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
755 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
756 	}
757 	old->pc2 = new->pc2 - old->pc2;
758 	if (do_pc3)
759 		old->pc3 = new->pc3 - old->pc3;
760 	if (do_pc6)
761 		old->pc6 = new->pc6 - old->pc6;
762 	if (do_pc7)
763 		old->pc7 = new->pc7 - old->pc7;
764 	old->pc8 = new->pc8 - old->pc8;
765 	old->pc9 = new->pc9 - old->pc9;
766 	old->pc10 = new->pc10 - old->pc10;
767 	old->pkg_temp_c = new->pkg_temp_c;
768 
769 	old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
770 	old->gfx_mhz = new->gfx_mhz;
771 
772 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
773 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
774 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
775 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
776 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
777 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
778 }
779 
780 void
781 delta_core(struct core_data *new, struct core_data *old)
782 {
783 	old->c3 = new->c3 - old->c3;
784 	old->c6 = new->c6 - old->c6;
785 	old->c7 = new->c7 - old->c7;
786 	old->core_temp_c = new->core_temp_c;
787 }
788 
789 /*
790  * old = new - old
791  */
792 void
793 delta_thread(struct thread_data *new, struct thread_data *old,
794 	struct core_data *core_delta)
795 {
796 	old->tsc = new->tsc - old->tsc;
797 
798 	/* check for TSC < 1 Mcycles over interval */
799 	if (old->tsc < (1000 * 1000))
800 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
801 		     "You can disable all c-states by booting with \"idle=poll\"\n"
802 		     "or just the deep ones with \"processor.max_cstate=1\"");
803 
804 	old->c1 = new->c1 - old->c1;
805 
806 	if (has_aperf) {
807 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
808 			old->aperf = new->aperf - old->aperf;
809 			old->mperf = new->mperf - old->mperf;
810 		} else {
811 
812 			if (!aperf_mperf_unstable) {
813 				fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
814 				fprintf(outf, "* Frequency results do not cover entire interval *\n");
815 				fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
816 
817 				aperf_mperf_unstable = 1;
818 			}
819 			/*
820 			 * mperf delta is likely a huge "positive" number
821 			 * can not use it for calculating c0 time
822 			 */
823 			skip_c0 = 1;
824 			skip_c1 = 1;
825 		}
826 	}
827 
828 
829 	if (use_c1_residency_msr) {
830 		/*
831 		 * Some models have a dedicated C1 residency MSR,
832 		 * which should be more accurate than the derivation below.
833 		 */
834 	} else {
835 		/*
836 		 * As counter collection is not atomic,
837 		 * it is possible for mperf's non-halted cycles + idle states
838 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
839 		 */
840 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
841 			old->c1 = 0;
842 		else {
843 			/* normal case, derive c1 */
844 			old->c1 = old->tsc - old->mperf - core_delta->c3
845 				- core_delta->c6 - core_delta->c7;
846 		}
847 	}
848 
849 	if (old->mperf == 0) {
850 		if (debug > 1)
851 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
852 		old->mperf = 1;	/* divide by 0 protection */
853 	}
854 
855 	old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
856 	old->extra_delta32 &= 0xFFFFFFFF;
857 
858 	old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
859 
860 	/*
861 	 * Extra MSR is just a snapshot, simply copy latest w/o subtracting
862 	 */
863 	old->extra_msr32 = new->extra_msr32;
864 	old->extra_msr64 = new->extra_msr64;
865 
866 	if (do_irq)
867 		old->irq_count = new->irq_count - old->irq_count;
868 
869 	if (do_smi)
870 		old->smi_count = new->smi_count - old->smi_count;
871 }
872 
873 int delta_cpu(struct thread_data *t, struct core_data *c,
874 	struct pkg_data *p, struct thread_data *t2,
875 	struct core_data *c2, struct pkg_data *p2)
876 {
877 	/* calculate core delta only for 1st thread in core */
878 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
879 		delta_core(c, c2);
880 
881 	/* always calculate thread delta */
882 	delta_thread(t, t2, c2);	/* c2 is core delta */
883 
884 	/* calculate package delta only for 1st core in package */
885 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
886 		delta_package(p, p2);
887 
888 	return 0;
889 }
890 
891 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
892 {
893 	t->tsc = 0;
894 	t->aperf = 0;
895 	t->mperf = 0;
896 	t->c1 = 0;
897 
898 	t->extra_delta32 = 0;
899 	t->extra_delta64 = 0;
900 
901 	t->irq_count = 0;
902 	t->smi_count = 0;
903 
904 	/* tells format_counters to dump all fields from this set */
905 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
906 
907 	c->c3 = 0;
908 	c->c6 = 0;
909 	c->c7 = 0;
910 	c->core_temp_c = 0;
911 
912 	p->pkg_wtd_core_c0 = 0;
913 	p->pkg_any_core_c0 = 0;
914 	p->pkg_any_gfxe_c0 = 0;
915 	p->pkg_both_core_gfxe_c0 = 0;
916 
917 	p->pc2 = 0;
918 	if (do_pc3)
919 		p->pc3 = 0;
920 	if (do_pc6)
921 		p->pc6 = 0;
922 	if (do_pc7)
923 		p->pc7 = 0;
924 	p->pc8 = 0;
925 	p->pc9 = 0;
926 	p->pc10 = 0;
927 
928 	p->energy_pkg = 0;
929 	p->energy_dram = 0;
930 	p->energy_cores = 0;
931 	p->energy_gfx = 0;
932 	p->rapl_pkg_perf_status = 0;
933 	p->rapl_dram_perf_status = 0;
934 	p->pkg_temp_c = 0;
935 
936 	p->gfx_rc6_ms = 0;
937 	p->gfx_mhz = 0;
938 }
939 int sum_counters(struct thread_data *t, struct core_data *c,
940 	struct pkg_data *p)
941 {
942 	average.threads.tsc += t->tsc;
943 	average.threads.aperf += t->aperf;
944 	average.threads.mperf += t->mperf;
945 	average.threads.c1 += t->c1;
946 
947 	average.threads.extra_delta32 += t->extra_delta32;
948 	average.threads.extra_delta64 += t->extra_delta64;
949 
950 	average.threads.irq_count += t->irq_count;
951 	average.threads.smi_count += t->smi_count;
952 
953 	/* sum per-core values only for 1st thread in core */
954 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
955 		return 0;
956 
957 	average.cores.c3 += c->c3;
958 	average.cores.c6 += c->c6;
959 	average.cores.c7 += c->c7;
960 
961 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
962 
963 	/* sum per-pkg values only for 1st core in pkg */
964 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
965 		return 0;
966 
967 	if (do_skl_residency) {
968 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
969 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
970 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
971 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
972 	}
973 
974 	average.packages.pc2 += p->pc2;
975 	if (do_pc3)
976 		average.packages.pc3 += p->pc3;
977 	if (do_pc6)
978 		average.packages.pc6 += p->pc6;
979 	if (do_pc7)
980 		average.packages.pc7 += p->pc7;
981 	average.packages.pc8 += p->pc8;
982 	average.packages.pc9 += p->pc9;
983 	average.packages.pc10 += p->pc10;
984 
985 	average.packages.energy_pkg += p->energy_pkg;
986 	average.packages.energy_dram += p->energy_dram;
987 	average.packages.energy_cores += p->energy_cores;
988 	average.packages.energy_gfx += p->energy_gfx;
989 
990 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
991 	average.packages.gfx_mhz = p->gfx_mhz;
992 
993 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
994 
995 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
996 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
997 	return 0;
998 }
999 /*
1000  * sum the counters for all cpus in the system
1001  * compute the weighted average
1002  */
1003 void compute_average(struct thread_data *t, struct core_data *c,
1004 	struct pkg_data *p)
1005 {
1006 	clear_counters(&average.threads, &average.cores, &average.packages);
1007 
1008 	for_all_cpus(sum_counters, t, c, p);
1009 
1010 	average.threads.tsc /= topo.num_cpus;
1011 	average.threads.aperf /= topo.num_cpus;
1012 	average.threads.mperf /= topo.num_cpus;
1013 	average.threads.c1 /= topo.num_cpus;
1014 
1015 	average.threads.extra_delta32 /= topo.num_cpus;
1016 	average.threads.extra_delta32 &= 0xFFFFFFFF;
1017 
1018 	average.threads.extra_delta64 /= topo.num_cpus;
1019 
1020 	average.cores.c3 /= topo.num_cores;
1021 	average.cores.c6 /= topo.num_cores;
1022 	average.cores.c7 /= topo.num_cores;
1023 
1024 	if (do_skl_residency) {
1025 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1026 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1027 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1028 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1029 	}
1030 
1031 	average.packages.pc2 /= topo.num_packages;
1032 	if (do_pc3)
1033 		average.packages.pc3 /= topo.num_packages;
1034 	if (do_pc6)
1035 		average.packages.pc6 /= topo.num_packages;
1036 	if (do_pc7)
1037 		average.packages.pc7 /= topo.num_packages;
1038 
1039 	average.packages.pc8 /= topo.num_packages;
1040 	average.packages.pc9 /= topo.num_packages;
1041 	average.packages.pc10 /= topo.num_packages;
1042 }
1043 
1044 static unsigned long long rdtsc(void)
1045 {
1046 	unsigned int low, high;
1047 
1048 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1049 
1050 	return low | ((unsigned long long)high) << 32;
1051 }
1052 
1053 /*
1054  * get_counters(...)
1055  * migrate to cpu
1056  * acquire and record local counters for that cpu
1057  */
1058 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1059 {
1060 	int cpu = t->cpu_id;
1061 	unsigned long long msr;
1062 	int aperf_mperf_retry_count = 0;
1063 
1064 	if (cpu_migrate(cpu)) {
1065 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1066 		return -1;
1067 	}
1068 
1069 retry:
1070 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
1071 
1072 	if (has_aperf) {
1073 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1074 
1075 		/*
1076 		 * The TSC, APERF and MPERF must be read together for
1077 		 * APERF/MPERF and MPERF/TSC to give accurate results.
1078 		 *
1079 		 * Unfortunately, APERF and MPERF are read by
1080 		 * individual system call, so delays may occur
1081 		 * between them.  If the time to read them
1082 		 * varies by a large amount, we re-read them.
1083 		 */
1084 
1085 		/*
1086 		 * This initial dummy APERF read has been seen to
1087 		 * reduce jitter in the subsequent reads.
1088 		 */
1089 
1090 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1091 			return -3;
1092 
1093 		t->tsc = rdtsc();	/* re-read close to APERF */
1094 
1095 		tsc_before = t->tsc;
1096 
1097 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1098 			return -3;
1099 
1100 		tsc_between = rdtsc();
1101 
1102 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1103 			return -4;
1104 
1105 		tsc_after = rdtsc();
1106 
1107 		aperf_time = tsc_between - tsc_before;
1108 		mperf_time = tsc_after - tsc_between;
1109 
1110 		/*
1111 		 * If the system call latency to read APERF and MPERF
1112 		 * differ by more than 2x, then try again.
1113 		 */
1114 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1115 			aperf_mperf_retry_count++;
1116 			if (aperf_mperf_retry_count < 5)
1117 				goto retry;
1118 			else
1119 				warnx("cpu%d jitter %lld %lld",
1120 					cpu, aperf_time, mperf_time);
1121 		}
1122 		aperf_mperf_retry_count = 0;
1123 
1124 		t->aperf = t->aperf * aperf_mperf_multiplier;
1125 		t->mperf = t->mperf * aperf_mperf_multiplier;
1126 	}
1127 
1128 	if (do_irq)
1129 		t->irq_count = irqs_per_cpu[cpu];
1130 	if (do_smi) {
1131 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1132 			return -5;
1133 		t->smi_count = msr & 0xFFFFFFFF;
1134 	}
1135 	if (extra_delta_offset32) {
1136 		if (get_msr(cpu, extra_delta_offset32, &msr))
1137 			return -5;
1138 		t->extra_delta32 = msr & 0xFFFFFFFF;
1139 	}
1140 
1141 	if (extra_delta_offset64)
1142 		if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
1143 			return -5;
1144 
1145 	if (extra_msr_offset32) {
1146 		if (get_msr(cpu, extra_msr_offset32, &msr))
1147 			return -5;
1148 		t->extra_msr32 = msr & 0xFFFFFFFF;
1149 	}
1150 
1151 	if (extra_msr_offset64)
1152 		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
1153 			return -5;
1154 
1155 	if (use_c1_residency_msr) {
1156 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1157 			return -6;
1158 	}
1159 
1160 	/* collect core counters only for 1st thread in core */
1161 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1162 		return 0;
1163 
1164 	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
1165 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1166 			return -6;
1167 	}
1168 
1169 	if (do_nhm_cstates && !do_knl_cstates) {
1170 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1171 			return -7;
1172 	} else if (do_knl_cstates) {
1173 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1174 			return -7;
1175 	}
1176 
1177 	if (do_snb_cstates)
1178 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1179 			return -8;
1180 
1181 	if (do_dts) {
1182 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1183 			return -9;
1184 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1185 	}
1186 
1187 
1188 	/* collect package counters only for 1st core in package */
1189 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1190 		return 0;
1191 
1192 	if (do_skl_residency) {
1193 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1194 			return -10;
1195 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1196 			return -11;
1197 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1198 			return -12;
1199 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1200 			return -13;
1201 	}
1202 	if (do_pc3)
1203 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1204 			return -9;
1205 	if (do_pc6)
1206 		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1207 			return -10;
1208 	if (do_pc2)
1209 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1210 			return -11;
1211 	if (do_pc7)
1212 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1213 			return -12;
1214 	if (do_c8_c9_c10) {
1215 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1216 			return -13;
1217 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1218 			return -13;
1219 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1220 			return -13;
1221 	}
1222 	if (do_rapl & RAPL_PKG) {
1223 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1224 			return -13;
1225 		p->energy_pkg = msr & 0xFFFFFFFF;
1226 	}
1227 	if (do_rapl & RAPL_CORES) {
1228 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1229 			return -14;
1230 		p->energy_cores = msr & 0xFFFFFFFF;
1231 	}
1232 	if (do_rapl & RAPL_DRAM) {
1233 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1234 			return -15;
1235 		p->energy_dram = msr & 0xFFFFFFFF;
1236 	}
1237 	if (do_rapl & RAPL_GFX) {
1238 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1239 			return -16;
1240 		p->energy_gfx = msr & 0xFFFFFFFF;
1241 	}
1242 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
1243 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1244 			return -16;
1245 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1246 	}
1247 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1248 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1249 			return -16;
1250 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1251 	}
1252 	if (do_ptm) {
1253 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1254 			return -17;
1255 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1256 	}
1257 
1258 	if (do_gfx_rc6_ms)
1259 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
1260 
1261 	if (do_gfx_mhz)
1262 		p->gfx_mhz = gfx_cur_mhz;
1263 
1264 	return 0;
1265 }
1266 
1267 /*
1268  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1269  * If you change the values, note they are used both in comparisons
1270  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1271  */
1272 
1273 #define PCLUKN 0 /* Unknown */
1274 #define PCLRSV 1 /* Reserved */
1275 #define PCL__0 2 /* PC0 */
1276 #define PCL__1 3 /* PC1 */
1277 #define PCL__2 4 /* PC2 */
1278 #define PCL__3 5 /* PC3 */
1279 #define PCL__4 6 /* PC4 */
1280 #define PCL__6 7 /* PC6 */
1281 #define PCL_6N 8 /* PC6 No Retention */
1282 #define PCL_6R 9 /* PC6 Retention */
1283 #define PCL__7 10 /* PC7 */
1284 #define PCL_7S 11 /* PC7 Shrink */
1285 #define PCL__8 12 /* PC8 */
1286 #define PCL__9 13 /* PC9 */
1287 #define PCLUNL 14 /* Unlimited */
1288 
1289 int pkg_cstate_limit = PCLUKN;
1290 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1291 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1292 
1293 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1294 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1295 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1296 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1297 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1298 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1299 
1300 
1301 static void
1302 calculate_tsc_tweak()
1303 {
1304 	tsc_tweak = base_hz / tsc_hz;
1305 }
1306 
1307 static void
1308 dump_nhm_platform_info(void)
1309 {
1310 	unsigned long long msr;
1311 	unsigned int ratio;
1312 
1313 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1314 
1315 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1316 
1317 	ratio = (msr >> 40) & 0xFF;
1318 	fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1319 		ratio, bclk, ratio * bclk);
1320 
1321 	ratio = (msr >> 8) & 0xFF;
1322 	fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
1323 		ratio, bclk, ratio * bclk);
1324 
1325 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1326 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1327 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1328 
1329 	return;
1330 }
1331 
1332 static void
1333 dump_hsw_turbo_ratio_limits(void)
1334 {
1335 	unsigned long long msr;
1336 	unsigned int ratio;
1337 
1338 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1339 
1340 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1341 
1342 	ratio = (msr >> 8) & 0xFF;
1343 	if (ratio)
1344 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1345 			ratio, bclk, ratio * bclk);
1346 
1347 	ratio = (msr >> 0) & 0xFF;
1348 	if (ratio)
1349 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1350 			ratio, bclk, ratio * bclk);
1351 	return;
1352 }
1353 
1354 static void
1355 dump_ivt_turbo_ratio_limits(void)
1356 {
1357 	unsigned long long msr;
1358 	unsigned int ratio;
1359 
1360 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1361 
1362 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1363 
1364 	ratio = (msr >> 56) & 0xFF;
1365 	if (ratio)
1366 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1367 			ratio, bclk, ratio * bclk);
1368 
1369 	ratio = (msr >> 48) & 0xFF;
1370 	if (ratio)
1371 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1372 			ratio, bclk, ratio * bclk);
1373 
1374 	ratio = (msr >> 40) & 0xFF;
1375 	if (ratio)
1376 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1377 			ratio, bclk, ratio * bclk);
1378 
1379 	ratio = (msr >> 32) & 0xFF;
1380 	if (ratio)
1381 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1382 			ratio, bclk, ratio * bclk);
1383 
1384 	ratio = (msr >> 24) & 0xFF;
1385 	if (ratio)
1386 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1387 			ratio, bclk, ratio * bclk);
1388 
1389 	ratio = (msr >> 16) & 0xFF;
1390 	if (ratio)
1391 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1392 			ratio, bclk, ratio * bclk);
1393 
1394 	ratio = (msr >> 8) & 0xFF;
1395 	if (ratio)
1396 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1397 			ratio, bclk, ratio * bclk);
1398 
1399 	ratio = (msr >> 0) & 0xFF;
1400 	if (ratio)
1401 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1402 			ratio, bclk, ratio * bclk);
1403 	return;
1404 }
1405 
1406 static void
1407 dump_nhm_turbo_ratio_limits(void)
1408 {
1409 	unsigned long long msr;
1410 	unsigned int ratio;
1411 
1412 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1413 
1414 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1415 
1416 	ratio = (msr >> 56) & 0xFF;
1417 	if (ratio)
1418 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1419 			ratio, bclk, ratio * bclk);
1420 
1421 	ratio = (msr >> 48) & 0xFF;
1422 	if (ratio)
1423 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1424 			ratio, bclk, ratio * bclk);
1425 
1426 	ratio = (msr >> 40) & 0xFF;
1427 	if (ratio)
1428 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1429 			ratio, bclk, ratio * bclk);
1430 
1431 	ratio = (msr >> 32) & 0xFF;
1432 	if (ratio)
1433 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1434 			ratio, bclk, ratio * bclk);
1435 
1436 	ratio = (msr >> 24) & 0xFF;
1437 	if (ratio)
1438 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1439 			ratio, bclk, ratio * bclk);
1440 
1441 	ratio = (msr >> 16) & 0xFF;
1442 	if (ratio)
1443 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1444 			ratio, bclk, ratio * bclk);
1445 
1446 	ratio = (msr >> 8) & 0xFF;
1447 	if (ratio)
1448 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1449 			ratio, bclk, ratio * bclk);
1450 
1451 	ratio = (msr >> 0) & 0xFF;
1452 	if (ratio)
1453 		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1454 			ratio, bclk, ratio * bclk);
1455 	return;
1456 }
1457 
1458 static void
1459 dump_knl_turbo_ratio_limits(void)
1460 {
1461 	const unsigned int buckets_no = 7;
1462 
1463 	unsigned long long msr;
1464 	int delta_cores, delta_ratio;
1465 	int i, b_nr;
1466 	unsigned int cores[buckets_no];
1467 	unsigned int ratio[buckets_no];
1468 
1469 	get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1470 
1471 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1472 		base_cpu, msr);
1473 
1474 	/**
1475 	 * Turbo encoding in KNL is as follows:
1476 	 * [0] -- Reserved
1477 	 * [7:1] -- Base value of number of active cores of bucket 1.
1478 	 * [15:8] -- Base value of freq ratio of bucket 1.
1479 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
1480 	 * i.e. active cores of bucket 2 =
1481 	 * active cores of bucket 1 + delta
1482 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
1483 	 * i.e. freq ratio of bucket 2 =
1484 	 * freq ratio of bucket 1 - delta
1485 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
1486 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
1487 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
1488 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
1489 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
1490 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
1491 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
1492 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
1493 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
1494 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
1495 	 */
1496 
1497 	b_nr = 0;
1498 	cores[b_nr] = (msr & 0xFF) >> 1;
1499 	ratio[b_nr] = (msr >> 8) & 0xFF;
1500 
1501 	for (i = 16; i < 64; i += 8) {
1502 		delta_cores = (msr >> i) & 0x1F;
1503 		delta_ratio = (msr >> (i + 5)) & 0x7;
1504 
1505 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
1506 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1507 		b_nr++;
1508 	}
1509 
1510 	for (i = buckets_no - 1; i >= 0; i--)
1511 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1512 			fprintf(outf,
1513 				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
1514 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
1515 }
1516 
1517 static void
1518 dump_nhm_cst_cfg(void)
1519 {
1520 	unsigned long long msr;
1521 
1522 	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1523 
1524 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1525 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1526 
1527 	fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
1528 
1529 	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1530 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1531 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1532 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1533 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1534 		(msr & (1 << 15)) ? "" : "UN",
1535 		(unsigned int)msr & 0xF,
1536 		pkg_cstate_limit_strings[pkg_cstate_limit]);
1537 	return;
1538 }
1539 
1540 static void
1541 dump_config_tdp(void)
1542 {
1543 	unsigned long long msr;
1544 
1545 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1546 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1547 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1548 
1549 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1550 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1551 	if (msr) {
1552 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1553 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1554 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1555 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1556 	}
1557 	fprintf(outf, ")\n");
1558 
1559 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1560 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1561 	if (msr) {
1562 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1563 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1564 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1565 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1566 	}
1567 	fprintf(outf, ")\n");
1568 
1569 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1570 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1571 	if ((msr) & 0x3)
1572 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1573 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1574 	fprintf(outf, ")\n");
1575 
1576 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1577 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1578 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1579 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1580 	fprintf(outf, ")\n");
1581 }
1582 void free_fd_percpu(void)
1583 {
1584 	int i;
1585 
1586 	for (i = 0; i < topo.max_cpu_num; ++i) {
1587 		if (fd_percpu[i] != 0)
1588 			close(fd_percpu[i]);
1589 	}
1590 
1591 	free(fd_percpu);
1592 }
1593 
1594 void free_all_buffers(void)
1595 {
1596 	CPU_FREE(cpu_present_set);
1597 	cpu_present_set = NULL;
1598 	cpu_present_setsize = 0;
1599 
1600 	CPU_FREE(cpu_affinity_set);
1601 	cpu_affinity_set = NULL;
1602 	cpu_affinity_setsize = 0;
1603 
1604 	free(thread_even);
1605 	free(core_even);
1606 	free(package_even);
1607 
1608 	thread_even = NULL;
1609 	core_even = NULL;
1610 	package_even = NULL;
1611 
1612 	free(thread_odd);
1613 	free(core_odd);
1614 	free(package_odd);
1615 
1616 	thread_odd = NULL;
1617 	core_odd = NULL;
1618 	package_odd = NULL;
1619 
1620 	free(output_buffer);
1621 	output_buffer = NULL;
1622 	outp = NULL;
1623 
1624 	free_fd_percpu();
1625 
1626 	free(irq_column_2_cpu);
1627 	free(irqs_per_cpu);
1628 }
1629 
1630 /*
1631  * Open a file, and exit on failure
1632  */
1633 FILE *fopen_or_die(const char *path, const char *mode)
1634 {
1635 	FILE *filep = fopen(path, mode);
1636 	if (!filep)
1637 		err(1, "%s: open failed", path);
1638 	return filep;
1639 }
1640 
1641 /*
1642  * Parse a file containing a single int.
1643  */
1644 int parse_int_file(const char *fmt, ...)
1645 {
1646 	va_list args;
1647 	char path[PATH_MAX];
1648 	FILE *filep;
1649 	int value;
1650 
1651 	va_start(args, fmt);
1652 	vsnprintf(path, sizeof(path), fmt, args);
1653 	va_end(args);
1654 	filep = fopen_or_die(path, "r");
1655 	if (fscanf(filep, "%d", &value) != 1)
1656 		err(1, "%s: failed to parse number from file", path);
1657 	fclose(filep);
1658 	return value;
1659 }
1660 
1661 /*
1662  * get_cpu_position_in_core(cpu)
1663  * return the position of the CPU among its HT siblings in the core
1664  * return -1 if the sibling is not in list
1665  */
1666 int get_cpu_position_in_core(int cpu)
1667 {
1668 	char path[64];
1669 	FILE *filep;
1670 	int this_cpu;
1671 	char character;
1672 	int i;
1673 
1674 	sprintf(path,
1675 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
1676 		cpu);
1677 	filep = fopen(path, "r");
1678 	if (filep == NULL) {
1679 		perror(path);
1680 		exit(1);
1681 	}
1682 
1683 	for (i = 0; i < topo.num_threads_per_core; i++) {
1684 		fscanf(filep, "%d", &this_cpu);
1685 		if (this_cpu == cpu) {
1686 			fclose(filep);
1687 			return i;
1688 		}
1689 
1690 		/* Account for no separator after last thread*/
1691 		if (i != (topo.num_threads_per_core - 1))
1692 			fscanf(filep, "%c", &character);
1693 	}
1694 
1695 	fclose(filep);
1696 	return -1;
1697 }
1698 
1699 /*
1700  * cpu_is_first_core_in_package(cpu)
1701  * return 1 if given CPU is 1st core in package
1702  */
1703 int cpu_is_first_core_in_package(int cpu)
1704 {
1705 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1706 }
1707 
1708 int get_physical_package_id(int cpu)
1709 {
1710 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1711 }
1712 
1713 int get_core_id(int cpu)
1714 {
1715 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1716 }
1717 
1718 int get_num_ht_siblings(int cpu)
1719 {
1720 	char path[80];
1721 	FILE *filep;
1722 	int sib1;
1723 	int matches = 0;
1724 	char character;
1725 	char str[100];
1726 	char *ch;
1727 
1728 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1729 	filep = fopen_or_die(path, "r");
1730 
1731 	/*
1732 	 * file format:
1733 	 * A ',' separated or '-' separated set of numbers
1734 	 * (eg 1-2 or 1,3,4,5)
1735 	 */
1736 	fscanf(filep, "%d%c\n", &sib1, &character);
1737 	fseek(filep, 0, SEEK_SET);
1738 	fgets(str, 100, filep);
1739 	ch = strchr(str, character);
1740 	while (ch != NULL) {
1741 		matches++;
1742 		ch = strchr(ch+1, character);
1743 	}
1744 
1745 	fclose(filep);
1746 	return matches+1;
1747 }
1748 
1749 /*
1750  * run func(thread, core, package) in topology order
1751  * skip non-present cpus
1752  */
1753 
1754 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1755 	struct pkg_data *, struct thread_data *, struct core_data *,
1756 	struct pkg_data *), struct thread_data *thread_base,
1757 	struct core_data *core_base, struct pkg_data *pkg_base,
1758 	struct thread_data *thread_base2, struct core_data *core_base2,
1759 	struct pkg_data *pkg_base2)
1760 {
1761 	int retval, pkg_no, core_no, thread_no;
1762 
1763 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1764 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1765 			for (thread_no = 0; thread_no <
1766 				topo.num_threads_per_core; ++thread_no) {
1767 				struct thread_data *t, *t2;
1768 				struct core_data *c, *c2;
1769 				struct pkg_data *p, *p2;
1770 
1771 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1772 
1773 				if (cpu_is_not_present(t->cpu_id))
1774 					continue;
1775 
1776 				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1777 
1778 				c = GET_CORE(core_base, core_no, pkg_no);
1779 				c2 = GET_CORE(core_base2, core_no, pkg_no);
1780 
1781 				p = GET_PKG(pkg_base, pkg_no);
1782 				p2 = GET_PKG(pkg_base2, pkg_no);
1783 
1784 				retval = func(t, c, p, t2, c2, p2);
1785 				if (retval)
1786 					return retval;
1787 			}
1788 		}
1789 	}
1790 	return 0;
1791 }
1792 
1793 /*
1794  * run func(cpu) on every cpu in /proc/stat
1795  * return max_cpu number
1796  */
1797 int for_all_proc_cpus(int (func)(int))
1798 {
1799 	FILE *fp;
1800 	int cpu_num;
1801 	int retval;
1802 
1803 	fp = fopen_or_die(proc_stat, "r");
1804 
1805 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1806 	if (retval != 0)
1807 		err(1, "%s: failed to parse format", proc_stat);
1808 
1809 	while (1) {
1810 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1811 		if (retval != 1)
1812 			break;
1813 
1814 		retval = func(cpu_num);
1815 		if (retval) {
1816 			fclose(fp);
1817 			return(retval);
1818 		}
1819 	}
1820 	fclose(fp);
1821 	return 0;
1822 }
1823 
1824 void re_initialize(void)
1825 {
1826 	free_all_buffers();
1827 	setup_all_buffers();
1828 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1829 }
1830 
1831 
1832 /*
1833  * count_cpus()
1834  * remember the last one seen, it will be the max
1835  */
1836 int count_cpus(int cpu)
1837 {
1838 	if (topo.max_cpu_num < cpu)
1839 		topo.max_cpu_num = cpu;
1840 
1841 	topo.num_cpus += 1;
1842 	return 0;
1843 }
1844 int mark_cpu_present(int cpu)
1845 {
1846 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1847 	return 0;
1848 }
1849 
1850 /*
1851  * snapshot_proc_interrupts()
1852  *
1853  * read and record summary of /proc/interrupts
1854  *
1855  * return 1 if config change requires a restart, else return 0
1856  */
1857 int snapshot_proc_interrupts(void)
1858 {
1859 	static FILE *fp;
1860 	int column, retval;
1861 
1862 	if (fp == NULL)
1863 		fp = fopen_or_die("/proc/interrupts", "r");
1864 	else
1865 		rewind(fp);
1866 
1867 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
1868 	for (column = 0; column < topo.num_cpus; ++column) {
1869 		int cpu_number;
1870 
1871 		retval = fscanf(fp, " CPU%d", &cpu_number);
1872 		if (retval != 1)
1873 			break;
1874 
1875 		if (cpu_number > topo.max_cpu_num) {
1876 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
1877 			return 1;
1878 		}
1879 
1880 		irq_column_2_cpu[column] = cpu_number;
1881 		irqs_per_cpu[cpu_number] = 0;
1882 	}
1883 
1884 	/* read /proc/interrupt count lines and sum up irqs per cpu */
1885 	while (1) {
1886 		int column;
1887 		char buf[64];
1888 
1889 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
1890 		if (retval != 1)
1891 			break;
1892 
1893 		/* read the count per cpu */
1894 		for (column = 0; column < topo.num_cpus; ++column) {
1895 
1896 			int cpu_number, irq_count;
1897 
1898 			retval = fscanf(fp, " %d", &irq_count);
1899 			if (retval != 1)
1900 				break;
1901 
1902 			cpu_number = irq_column_2_cpu[column];
1903 			irqs_per_cpu[cpu_number] += irq_count;
1904 
1905 		}
1906 
1907 		while (getc(fp) != '\n')
1908 			;	/* flush interrupt description */
1909 
1910 	}
1911 	return 0;
1912 }
1913 /*
1914  * snapshot_gfx_rc6_ms()
1915  *
1916  * record snapshot of
1917  * /sys/class/drm/card0/power/rc6_residency_ms
1918  *
1919  * return 1 if config change requires a restart, else return 0
1920  */
1921 int snapshot_gfx_rc6_ms(void)
1922 {
1923 	FILE *fp;
1924 	int retval;
1925 
1926 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
1927 
1928 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
1929 	if (retval != 1)
1930 		err(1, "GFX rc6");
1931 
1932 	fclose(fp);
1933 
1934 	return 0;
1935 }
1936 /*
1937  * snapshot_gfx_mhz()
1938  *
1939  * record snapshot of
1940  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
1941  *
1942  * return 1 if config change requires a restart, else return 0
1943  */
1944 int snapshot_gfx_mhz(void)
1945 {
1946 	static FILE *fp;
1947 	int retval;
1948 
1949 	if (fp == NULL)
1950 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
1951 	else
1952 		rewind(fp);
1953 
1954 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
1955 	if (retval != 1)
1956 		err(1, "GFX MHz");
1957 
1958 	return 0;
1959 }
1960 
1961 /*
1962  * snapshot /proc and /sys files
1963  *
1964  * return 1 if configuration restart needed, else return 0
1965  */
1966 int snapshot_proc_sysfs_files(void)
1967 {
1968 	if (snapshot_proc_interrupts())
1969 		return 1;
1970 
1971 	if (do_gfx_rc6_ms)
1972 		snapshot_gfx_rc6_ms();
1973 
1974 	if (do_gfx_mhz)
1975 		snapshot_gfx_mhz();
1976 
1977 	return 0;
1978 }
1979 
1980 void turbostat_loop()
1981 {
1982 	int retval;
1983 	int restarted = 0;
1984 
1985 restart:
1986 	restarted++;
1987 
1988 	snapshot_proc_sysfs_files();
1989 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1990 	if (retval < -1) {
1991 		exit(retval);
1992 	} else if (retval == -1) {
1993 		if (restarted > 1) {
1994 			exit(retval);
1995 		}
1996 		re_initialize();
1997 		goto restart;
1998 	}
1999 	restarted = 0;
2000 	gettimeofday(&tv_even, (struct timezone *)NULL);
2001 
2002 	while (1) {
2003 		if (for_all_proc_cpus(cpu_is_not_present)) {
2004 			re_initialize();
2005 			goto restart;
2006 		}
2007 		nanosleep(&interval_ts, NULL);
2008 		if (snapshot_proc_sysfs_files())
2009 			goto restart;
2010 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
2011 		if (retval < -1) {
2012 			exit(retval);
2013 		} else if (retval == -1) {
2014 			re_initialize();
2015 			goto restart;
2016 		}
2017 		gettimeofday(&tv_odd, (struct timezone *)NULL);
2018 		timersub(&tv_odd, &tv_even, &tv_delta);
2019 		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
2020 		compute_average(EVEN_COUNTERS);
2021 		format_all_counters(EVEN_COUNTERS);
2022 		flush_output_stdout();
2023 		nanosleep(&interval_ts, NULL);
2024 		if (snapshot_proc_sysfs_files())
2025 			goto restart;
2026 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2027 		if (retval < -1) {
2028 			exit(retval);
2029 		} else if (retval == -1) {
2030 			re_initialize();
2031 			goto restart;
2032 		}
2033 		gettimeofday(&tv_even, (struct timezone *)NULL);
2034 		timersub(&tv_even, &tv_odd, &tv_delta);
2035 		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
2036 		compute_average(ODD_COUNTERS);
2037 		format_all_counters(ODD_COUNTERS);
2038 		flush_output_stdout();
2039 	}
2040 }
2041 
2042 void check_dev_msr()
2043 {
2044 	struct stat sb;
2045 	char pathname[32];
2046 
2047 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2048 	if (stat(pathname, &sb))
2049  		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2050 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2051 }
2052 
2053 void check_permissions()
2054 {
2055 	struct __user_cap_header_struct cap_header_data;
2056 	cap_user_header_t cap_header = &cap_header_data;
2057 	struct __user_cap_data_struct cap_data_data;
2058 	cap_user_data_t cap_data = &cap_data_data;
2059 	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2060 	int do_exit = 0;
2061 	char pathname[32];
2062 
2063 	/* check for CAP_SYS_RAWIO */
2064 	cap_header->pid = getpid();
2065 	cap_header->version = _LINUX_CAPABILITY_VERSION;
2066 	if (capget(cap_header, cap_data) < 0)
2067 		err(-6, "capget(2) failed");
2068 
2069 	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2070 		do_exit++;
2071 		warnx("capget(CAP_SYS_RAWIO) failed,"
2072 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
2073 	}
2074 
2075 	/* test file permissions */
2076 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2077 	if (euidaccess(pathname, R_OK)) {
2078 		do_exit++;
2079 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2080 	}
2081 
2082 	/* if all else fails, thell them to be root */
2083 	if (do_exit)
2084 		if (getuid() != 0)
2085 			warnx("... or simply run as root");
2086 
2087 	if (do_exit)
2088 		exit(-6);
2089 }
2090 
2091 /*
2092  * NHM adds support for additional MSRs:
2093  *
2094  * MSR_SMI_COUNT                   0x00000034
2095  *
2096  * MSR_PLATFORM_INFO               0x000000ce
2097  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
2098  *
2099  * MSR_PKG_C3_RESIDENCY            0x000003f8
2100  * MSR_PKG_C6_RESIDENCY            0x000003f9
2101  * MSR_CORE_C3_RESIDENCY           0x000003fc
2102  * MSR_CORE_C6_RESIDENCY           0x000003fd
2103  *
2104  * Side effect:
2105  * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
2106  */
2107 int probe_nhm_msrs(unsigned int family, unsigned int model)
2108 {
2109 	unsigned long long msr;
2110 	unsigned int base_ratio;
2111 	int *pkg_cstate_limits;
2112 
2113 	if (!genuine_intel)
2114 		return 0;
2115 
2116 	if (family != 6)
2117 		return 0;
2118 
2119 	bclk = discover_bclk(family, model);
2120 
2121 	switch (model) {
2122 	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2123 	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2124 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
2125 	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */
2126 	case 0x2C:	/* Westmere EP - Gulftown */
2127 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
2128 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
2129 		pkg_cstate_limits = nhm_pkg_cstate_limits;
2130 		break;
2131 	case 0x2A:	/* SNB */
2132 	case 0x2D:	/* SNB Xeon */
2133 	case 0x3A:	/* IVB */
2134 	case 0x3E:	/* IVB Xeon */
2135 		pkg_cstate_limits = snb_pkg_cstate_limits;
2136 		break;
2137 	case 0x3C:	/* HSW */
2138 	case 0x3F:	/* HSX */
2139 	case 0x45:	/* HSW */
2140 	case 0x46:	/* HSW */
2141 	case 0x3D:	/* BDW */
2142 	case 0x47:	/* BDW */
2143 	case 0x4F:	/* BDX */
2144 	case 0x56:	/* BDX-DE */
2145 	case 0x4E:	/* SKL */
2146 	case 0x5E:	/* SKL */
2147 		pkg_cstate_limits = hsw_pkg_cstate_limits;
2148 		break;
2149 	case 0x37:	/* BYT */
2150 	case 0x4D:	/* AVN */
2151 		pkg_cstate_limits = slv_pkg_cstate_limits;
2152 		break;
2153 	case 0x4C:	/* AMT */
2154 		pkg_cstate_limits = amt_pkg_cstate_limits;
2155 		break;
2156 	case 0x57:	/* PHI */
2157 		pkg_cstate_limits = phi_pkg_cstate_limits;
2158 		break;
2159 	default:
2160 		return 0;
2161 	}
2162 	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
2163 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
2164 
2165 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2166 	base_ratio = (msr >> 8) & 0xFF;
2167 
2168 	base_hz = base_ratio * bclk * 1000000;
2169 	has_base_hz = 1;
2170 	return 1;
2171 }
2172 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
2173 {
2174 	switch (model) {
2175 	/* Nehalem compatible, but do not include turbo-ratio limit support */
2176 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
2177 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
2178 	case 0x57:	/* PHI - Knights Landing (different MSR definition) */
2179 		return 0;
2180 	default:
2181 		return 1;
2182 	}
2183 }
2184 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2185 {
2186 	if (!genuine_intel)
2187 		return 0;
2188 
2189 	if (family != 6)
2190 		return 0;
2191 
2192 	switch (model) {
2193 	case 0x3E:	/* IVB Xeon */
2194 	case 0x3F:	/* HSW Xeon */
2195 		return 1;
2196 	default:
2197 		return 0;
2198 	}
2199 }
2200 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2201 {
2202 	if (!genuine_intel)
2203 		return 0;
2204 
2205 	if (family != 6)
2206 		return 0;
2207 
2208 	switch (model) {
2209 	case 0x3F:	/* HSW Xeon */
2210 		return 1;
2211 	default:
2212 		return 0;
2213 	}
2214 }
2215 
2216 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2217 {
2218 	if (!genuine_intel)
2219 		return 0;
2220 
2221 	if (family != 6)
2222 		return 0;
2223 
2224 	switch (model) {
2225 	case 0x57:	/* Knights Landing */
2226 		return 1;
2227 	default:
2228 		return 0;
2229 	}
2230 }
2231 int has_config_tdp(unsigned int family, unsigned int model)
2232 {
2233 	if (!genuine_intel)
2234 		return 0;
2235 
2236 	if (family != 6)
2237 		return 0;
2238 
2239 	switch (model) {
2240 	case 0x3A:	/* IVB */
2241 	case 0x3C:	/* HSW */
2242 	case 0x3F:	/* HSX */
2243 	case 0x45:	/* HSW */
2244 	case 0x46:	/* HSW */
2245 	case 0x3D:	/* BDW */
2246 	case 0x47:	/* BDW */
2247 	case 0x4F:	/* BDX */
2248 	case 0x56:	/* BDX-DE */
2249 	case 0x4E:	/* SKL */
2250 	case 0x5E:	/* SKL */
2251 
2252 	case 0x57:	/* Knights Landing */
2253 		return 1;
2254 	default:
2255 		return 0;
2256 	}
2257 }
2258 
2259 static void
2260 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
2261 {
2262 	if (!do_nhm_platform_info)
2263 		return;
2264 
2265 	dump_nhm_platform_info();
2266 
2267 	if (has_hsw_turbo_ratio_limit(family, model))
2268 		dump_hsw_turbo_ratio_limits();
2269 
2270 	if (has_ivt_turbo_ratio_limit(family, model))
2271 		dump_ivt_turbo_ratio_limits();
2272 
2273 	if (has_nhm_turbo_ratio_limit(family, model))
2274 		dump_nhm_turbo_ratio_limits();
2275 
2276 	if (has_knl_turbo_ratio_limit(family, model))
2277 		dump_knl_turbo_ratio_limits();
2278 
2279 	if (has_config_tdp(family, model))
2280 		dump_config_tdp();
2281 
2282 	dump_nhm_cst_cfg();
2283 }
2284 
2285 
2286 /*
2287  * print_epb()
2288  * Decode the ENERGY_PERF_BIAS MSR
2289  */
2290 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2291 {
2292 	unsigned long long msr;
2293 	char *epb_string;
2294 	int cpu;
2295 
2296 	if (!has_epb)
2297 		return 0;
2298 
2299 	cpu = t->cpu_id;
2300 
2301 	/* EPB is per-package */
2302 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2303 		return 0;
2304 
2305 	if (cpu_migrate(cpu)) {
2306 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2307 		return -1;
2308 	}
2309 
2310 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
2311 		return 0;
2312 
2313 	switch (msr & 0xF) {
2314 	case ENERGY_PERF_BIAS_PERFORMANCE:
2315 		epb_string = "performance";
2316 		break;
2317 	case ENERGY_PERF_BIAS_NORMAL:
2318 		epb_string = "balanced";
2319 		break;
2320 	case ENERGY_PERF_BIAS_POWERSAVE:
2321 		epb_string = "powersave";
2322 		break;
2323 	default:
2324 		epb_string = "custom";
2325 		break;
2326 	}
2327 	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2328 
2329 	return 0;
2330 }
2331 /*
2332  * print_hwp()
2333  * Decode the MSR_HWP_CAPABILITIES
2334  */
2335 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2336 {
2337 	unsigned long long msr;
2338 	int cpu;
2339 
2340 	if (!has_hwp)
2341 		return 0;
2342 
2343 	cpu = t->cpu_id;
2344 
2345 	/* MSR_HWP_CAPABILITIES is per-package */
2346 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2347 		return 0;
2348 
2349 	if (cpu_migrate(cpu)) {
2350 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2351 		return -1;
2352 	}
2353 
2354 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2355 		return 0;
2356 
2357 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2358 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2359 
2360 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2361 	if ((msr & (1 << 0)) == 0)
2362 		return 0;
2363 
2364 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2365 		return 0;
2366 
2367 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2368 			"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2369 			cpu, msr,
2370 			(unsigned int)HWP_HIGHEST_PERF(msr),
2371 			(unsigned int)HWP_GUARANTEED_PERF(msr),
2372 			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2373 			(unsigned int)HWP_LOWEST_PERF(msr));
2374 
2375 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2376 		return 0;
2377 
2378 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2379 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2380 			cpu, msr,
2381 			(unsigned int)(((msr) >> 0) & 0xff),
2382 			(unsigned int)(((msr) >> 8) & 0xff),
2383 			(unsigned int)(((msr) >> 16) & 0xff),
2384 			(unsigned int)(((msr) >> 24) & 0xff),
2385 			(unsigned int)(((msr) >> 32) & 0xff3),
2386 			(unsigned int)(((msr) >> 42) & 0x1));
2387 
2388 	if (has_hwp_pkg) {
2389 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2390 			return 0;
2391 
2392 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2393 			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2394 			cpu, msr,
2395 			(unsigned int)(((msr) >> 0) & 0xff),
2396 			(unsigned int)(((msr) >> 8) & 0xff),
2397 			(unsigned int)(((msr) >> 16) & 0xff),
2398 			(unsigned int)(((msr) >> 24) & 0xff),
2399 			(unsigned int)(((msr) >> 32) & 0xff3));
2400 	}
2401 	if (has_hwp_notify) {
2402 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2403 			return 0;
2404 
2405 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2406 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2407 			cpu, msr,
2408 			((msr) & 0x1) ? "EN" : "Dis",
2409 			((msr) & 0x2) ? "EN" : "Dis");
2410 	}
2411 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2412 		return 0;
2413 
2414 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2415 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2416 			cpu, msr,
2417 			((msr) & 0x1) ? "" : "No-",
2418 			((msr) & 0x2) ? "" : "No-");
2419 
2420 	return 0;
2421 }
2422 
2423 /*
2424  * print_perf_limit()
2425  */
2426 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2427 {
2428 	unsigned long long msr;
2429 	int cpu;
2430 
2431 	cpu = t->cpu_id;
2432 
2433 	/* per-package */
2434 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2435 		return 0;
2436 
2437 	if (cpu_migrate(cpu)) {
2438 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2439 		return -1;
2440 	}
2441 
2442 	if (do_core_perf_limit_reasons) {
2443 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2444 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2445 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2446 			(msr & 1 << 15) ? "bit15, " : "",
2447 			(msr & 1 << 14) ? "bit14, " : "",
2448 			(msr & 1 << 13) ? "Transitions, " : "",
2449 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
2450 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
2451 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2452 			(msr & 1 << 9) ? "CorePwr, " : "",
2453 			(msr & 1 << 8) ? "Amps, " : "",
2454 			(msr & 1 << 6) ? "VR-Therm, " : "",
2455 			(msr & 1 << 5) ? "Auto-HWP, " : "",
2456 			(msr & 1 << 4) ? "Graphics, " : "",
2457 			(msr & 1 << 2) ? "bit2, " : "",
2458 			(msr & 1 << 1) ? "ThermStatus, " : "",
2459 			(msr & 1 << 0) ? "PROCHOT, " : "");
2460 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2461 			(msr & 1 << 31) ? "bit31, " : "",
2462 			(msr & 1 << 30) ? "bit30, " : "",
2463 			(msr & 1 << 29) ? "Transitions, " : "",
2464 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
2465 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
2466 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2467 			(msr & 1 << 25) ? "CorePwr, " : "",
2468 			(msr & 1 << 24) ? "Amps, " : "",
2469 			(msr & 1 << 22) ? "VR-Therm, " : "",
2470 			(msr & 1 << 21) ? "Auto-HWP, " : "",
2471 			(msr & 1 << 20) ? "Graphics, " : "",
2472 			(msr & 1 << 18) ? "bit18, " : "",
2473 			(msr & 1 << 17) ? "ThermStatus, " : "",
2474 			(msr & 1 << 16) ? "PROCHOT, " : "");
2475 
2476 	}
2477 	if (do_gfx_perf_limit_reasons) {
2478 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2479 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2480 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2481 			(msr & 1 << 0) ? "PROCHOT, " : "",
2482 			(msr & 1 << 1) ? "ThermStatus, " : "",
2483 			(msr & 1 << 4) ? "Graphics, " : "",
2484 			(msr & 1 << 6) ? "VR-Therm, " : "",
2485 			(msr & 1 << 8) ? "Amps, " : "",
2486 			(msr & 1 << 9) ? "GFXPwr, " : "",
2487 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2488 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
2489 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2490 			(msr & 1 << 16) ? "PROCHOT, " : "",
2491 			(msr & 1 << 17) ? "ThermStatus, " : "",
2492 			(msr & 1 << 20) ? "Graphics, " : "",
2493 			(msr & 1 << 22) ? "VR-Therm, " : "",
2494 			(msr & 1 << 24) ? "Amps, " : "",
2495 			(msr & 1 << 25) ? "GFXPwr, " : "",
2496 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2497 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
2498 	}
2499 	if (do_ring_perf_limit_reasons) {
2500 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2501 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2502 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
2503 			(msr & 1 << 0) ? "PROCHOT, " : "",
2504 			(msr & 1 << 1) ? "ThermStatus, " : "",
2505 			(msr & 1 << 6) ? "VR-Therm, " : "",
2506 			(msr & 1 << 8) ? "Amps, " : "",
2507 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
2508 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
2509 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
2510 			(msr & 1 << 16) ? "PROCHOT, " : "",
2511 			(msr & 1 << 17) ? "ThermStatus, " : "",
2512 			(msr & 1 << 22) ? "VR-Therm, " : "",
2513 			(msr & 1 << 24) ? "Amps, " : "",
2514 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
2515 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
2516 	}
2517 	return 0;
2518 }
2519 
2520 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
2521 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
2522 
2523 double get_tdp(unsigned int model)
2524 {
2525 	unsigned long long msr;
2526 
2527 	if (do_rapl & RAPL_PKG_POWER_INFO)
2528 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
2529 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
2530 
2531 	switch (model) {
2532 	case 0x37:
2533 	case 0x4D:
2534 		return 30.0;
2535 	default:
2536 		return 135.0;
2537 	}
2538 }
2539 
2540 /*
2541  * rapl_dram_energy_units_probe()
2542  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
2543  */
2544 static double
2545 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
2546 {
2547 	/* only called for genuine_intel, family 6 */
2548 
2549 	switch (model) {
2550 	case 0x3F:	/* HSX */
2551 	case 0x4F:	/* BDX */
2552 	case 0x56:	/* BDX-DE */
2553 	case 0x57:	/* KNL */
2554 		return (rapl_dram_energy_units = 15.3 / 1000000);
2555 	default:
2556 		return (rapl_energy_units);
2557 	}
2558 }
2559 
2560 
2561 /*
2562  * rapl_probe()
2563  *
2564  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
2565  */
2566 void rapl_probe(unsigned int family, unsigned int model)
2567 {
2568 	unsigned long long msr;
2569 	unsigned int time_unit;
2570 	double tdp;
2571 
2572 	if (!genuine_intel)
2573 		return;
2574 
2575 	if (family != 6)
2576 		return;
2577 
2578 	switch (model) {
2579 	case 0x2A:
2580 	case 0x3A:
2581 	case 0x3C:	/* HSW */
2582 	case 0x45:	/* HSW */
2583 	case 0x46:	/* HSW */
2584 	case 0x3D:	/* BDW */
2585 	case 0x47:	/* BDW */
2586 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
2587 		break;
2588 	case 0x4E:	/* SKL */
2589 	case 0x5E:	/* SKL */
2590 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2591 		break;
2592 	case 0x3F:	/* HSX */
2593 	case 0x4F:	/* BDX */
2594 	case 0x56:	/* BDX-DE */
2595 	case 0x57:	/* KNL */
2596 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2597 		break;
2598 	case 0x2D:
2599 	case 0x3E:
2600 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
2601 		break;
2602 	case 0x37:	/* BYT */
2603 	case 0x4D:	/* AVN */
2604 		do_rapl = RAPL_PKG | RAPL_CORES ;
2605 		break;
2606 	default:
2607 		return;
2608 	}
2609 
2610 	/* units on package 0, verify later other packages match */
2611 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
2612 		return;
2613 
2614 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
2615 	if (model == 0x37)
2616 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
2617 	else
2618 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
2619 
2620 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
2621 
2622 	time_unit = msr >> 16 & 0xF;
2623 	if (time_unit == 0)
2624 		time_unit = 0xA;
2625 
2626 	rapl_time_units = 1.0 / (1 << (time_unit));
2627 
2628 	tdp = get_tdp(model);
2629 
2630 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
2631 	if (debug)
2632 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
2633 
2634 	return;
2635 }
2636 
2637 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
2638 {
2639 	if (!genuine_intel)
2640 		return;
2641 
2642 	if (family != 6)
2643 		return;
2644 
2645 	switch (model) {
2646 	case 0x3C:	/* HSW */
2647 	case 0x45:	/* HSW */
2648 	case 0x46:	/* HSW */
2649 		do_gfx_perf_limit_reasons = 1;
2650 	case 0x3F:	/* HSX */
2651 		do_core_perf_limit_reasons = 1;
2652 		do_ring_perf_limit_reasons = 1;
2653 	default:
2654 		return;
2655 	}
2656 }
2657 
2658 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2659 {
2660 	unsigned long long msr;
2661 	unsigned int dts;
2662 	int cpu;
2663 
2664 	if (!(do_dts || do_ptm))
2665 		return 0;
2666 
2667 	cpu = t->cpu_id;
2668 
2669 	/* DTS is per-core, no need to print for each thread */
2670 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2671 		return 0;
2672 
2673 	if (cpu_migrate(cpu)) {
2674 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2675 		return -1;
2676 	}
2677 
2678 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
2679 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2680 			return 0;
2681 
2682 		dts = (msr >> 16) & 0x7F;
2683 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
2684 			cpu, msr, tcc_activation_temp - dts);
2685 
2686 #ifdef	THERM_DEBUG
2687 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
2688 			return 0;
2689 
2690 		dts = (msr >> 16) & 0x7F;
2691 		dts2 = (msr >> 8) & 0x7F;
2692 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2693 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2694 #endif
2695 	}
2696 
2697 
2698 	if (do_dts) {
2699 		unsigned int resolution;
2700 
2701 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2702 			return 0;
2703 
2704 		dts = (msr >> 16) & 0x7F;
2705 		resolution = (msr >> 27) & 0xF;
2706 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
2707 			cpu, msr, tcc_activation_temp - dts, resolution);
2708 
2709 #ifdef THERM_DEBUG
2710 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
2711 			return 0;
2712 
2713 		dts = (msr >> 16) & 0x7F;
2714 		dts2 = (msr >> 8) & 0x7F;
2715 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2716 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2717 #endif
2718 	}
2719 
2720 	return 0;
2721 }
2722 
2723 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
2724 {
2725 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
2726 		cpu, label,
2727 		((msr >> 15) & 1) ? "EN" : "DIS",
2728 		((msr >> 0) & 0x7FFF) * rapl_power_units,
2729 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
2730 		(((msr >> 16) & 1) ? "EN" : "DIS"));
2731 
2732 	return;
2733 }
2734 
2735 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2736 {
2737 	unsigned long long msr;
2738 	int cpu;
2739 
2740 	if (!do_rapl)
2741 		return 0;
2742 
2743 	/* RAPL counters are per package, so print only for 1st thread/package */
2744 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2745 		return 0;
2746 
2747 	cpu = t->cpu_id;
2748 	if (cpu_migrate(cpu)) {
2749 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2750 		return -1;
2751 	}
2752 
2753 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
2754 		return -1;
2755 
2756 	if (debug) {
2757 		fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
2758 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
2759 			rapl_power_units, rapl_energy_units, rapl_time_units);
2760 	}
2761 	if (do_rapl & RAPL_PKG_POWER_INFO) {
2762 
2763 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
2764                 	return -5;
2765 
2766 
2767 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2768 			cpu, msr,
2769 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2770 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2771 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2772 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2773 
2774 	}
2775 	if (do_rapl & RAPL_PKG) {
2776 
2777 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
2778 			return -9;
2779 
2780 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
2781 			cpu, msr, (msr >> 63) & 1 ? "": "UN");
2782 
2783 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
2784 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
2785 			cpu,
2786 			((msr >> 47) & 1) ? "EN" : "DIS",
2787 			((msr >> 32) & 0x7FFF) * rapl_power_units,
2788 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
2789 			((msr >> 48) & 1) ? "EN" : "DIS");
2790 	}
2791 
2792 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
2793 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
2794                 	return -6;
2795 
2796 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2797 			cpu, msr,
2798 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2799 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2800 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2801 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2802 	}
2803 	if (do_rapl & RAPL_DRAM) {
2804 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
2805 			return -9;
2806 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
2807 				cpu, msr, (msr >> 31) & 1 ? "": "UN");
2808 
2809 		print_power_limit_msr(cpu, msr, "DRAM Limit");
2810 	}
2811 	if (do_rapl & RAPL_CORE_POLICY) {
2812 		if (debug) {
2813 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
2814 				return -7;
2815 
2816 			fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
2817 		}
2818 	}
2819 	if (do_rapl & RAPL_CORES) {
2820 		if (debug) {
2821 
2822 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
2823 				return -9;
2824 			fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
2825 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
2826 			print_power_limit_msr(cpu, msr, "Cores Limit");
2827 		}
2828 	}
2829 	if (do_rapl & RAPL_GFX) {
2830 		if (debug) {
2831 			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
2832 				return -8;
2833 
2834 			fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
2835 
2836 			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
2837 				return -9;
2838 			fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
2839 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
2840 			print_power_limit_msr(cpu, msr, "GFX Limit");
2841 		}
2842 	}
2843 	return 0;
2844 }
2845 
2846 /*
2847  * SNB adds support for additional MSRs:
2848  *
2849  * MSR_PKG_C7_RESIDENCY            0x000003fa
2850  * MSR_CORE_C7_RESIDENCY           0x000003fe
2851  * MSR_PKG_C2_RESIDENCY            0x0000060d
2852  */
2853 
2854 int has_snb_msrs(unsigned int family, unsigned int model)
2855 {
2856 	if (!genuine_intel)
2857 		return 0;
2858 
2859 	switch (model) {
2860 	case 0x2A:
2861 	case 0x2D:
2862 	case 0x3A:	/* IVB */
2863 	case 0x3E:	/* IVB Xeon */
2864 	case 0x3C:	/* HSW */
2865 	case 0x3F:	/* HSW */
2866 	case 0x45:	/* HSW */
2867 	case 0x46:	/* HSW */
2868 	case 0x3D:	/* BDW */
2869 	case 0x47:	/* BDW */
2870 	case 0x4F:	/* BDX */
2871 	case 0x56:	/* BDX-DE */
2872 	case 0x4E:	/* SKL */
2873 	case 0x5E:	/* SKL */
2874 		return 1;
2875 	}
2876 	return 0;
2877 }
2878 
2879 /*
2880  * HSW adds support for additional MSRs:
2881  *
2882  * MSR_PKG_C8_RESIDENCY            0x00000630
2883  * MSR_PKG_C9_RESIDENCY            0x00000631
2884  * MSR_PKG_C10_RESIDENCY           0x00000632
2885  */
2886 int has_hsw_msrs(unsigned int family, unsigned int model)
2887 {
2888 	if (!genuine_intel)
2889 		return 0;
2890 
2891 	switch (model) {
2892 	case 0x45:	/* HSW */
2893 	case 0x3D:	/* BDW */
2894 	case 0x4E:	/* SKL */
2895 	case 0x5E:	/* SKL */
2896 		return 1;
2897 	}
2898 	return 0;
2899 }
2900 
2901 /*
2902  * SKL adds support for additional MSRS:
2903  *
2904  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
2905  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
2906  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
2907  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
2908  */
2909 int has_skl_msrs(unsigned int family, unsigned int model)
2910 {
2911 	if (!genuine_intel)
2912 		return 0;
2913 
2914 	switch (model) {
2915 	case 0x4E:	/* SKL */
2916 	case 0x5E:	/* SKL */
2917 		return 1;
2918 	}
2919 	return 0;
2920 }
2921 
2922 
2923 
2924 int is_slm(unsigned int family, unsigned int model)
2925 {
2926 	if (!genuine_intel)
2927 		return 0;
2928 	switch (model) {
2929 	case 0x37:	/* BYT */
2930 	case 0x4D:	/* AVN */
2931 		return 1;
2932 	}
2933 	return 0;
2934 }
2935 
2936 int is_knl(unsigned int family, unsigned int model)
2937 {
2938 	if (!genuine_intel)
2939 		return 0;
2940 	switch (model) {
2941 	case 0x57:	/* KNL */
2942 		return 1;
2943 	}
2944 	return 0;
2945 }
2946 
2947 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
2948 {
2949 	if (is_knl(family, model))
2950 		return 1024;
2951 	return 1;
2952 }
2953 
2954 #define SLM_BCLK_FREQS 5
2955 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
2956 
2957 double slm_bclk(void)
2958 {
2959 	unsigned long long msr = 3;
2960 	unsigned int i;
2961 	double freq;
2962 
2963 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
2964 		fprintf(outf, "SLM BCLK: unknown\n");
2965 
2966 	i = msr & 0xf;
2967 	if (i >= SLM_BCLK_FREQS) {
2968 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
2969 		msr = 3;
2970 	}
2971 	freq = slm_freq_table[i];
2972 
2973 	fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
2974 
2975 	return freq;
2976 }
2977 
2978 double discover_bclk(unsigned int family, unsigned int model)
2979 {
2980 	if (has_snb_msrs(family, model) || is_knl(family, model))
2981 		return 100.00;
2982 	else if (is_slm(family, model))
2983 		return slm_bclk();
2984 	else
2985 		return 133.33;
2986 }
2987 
2988 /*
2989  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
2990  * the Thermal Control Circuit (TCC) activates.
2991  * This is usually equal to tjMax.
2992  *
2993  * Older processors do not have this MSR, so there we guess,
2994  * but also allow cmdline over-ride with -T.
2995  *
2996  * Several MSR temperature values are in units of degrees-C
2997  * below this value, including the Digital Thermal Sensor (DTS),
2998  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
2999  */
3000 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3001 {
3002 	unsigned long long msr;
3003 	unsigned int target_c_local;
3004 	int cpu;
3005 
3006 	/* tcc_activation_temp is used only for dts or ptm */
3007 	if (!(do_dts || do_ptm))
3008 		return 0;
3009 
3010 	/* this is a per-package concept */
3011 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3012 		return 0;
3013 
3014 	cpu = t->cpu_id;
3015 	if (cpu_migrate(cpu)) {
3016 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3017 		return -1;
3018 	}
3019 
3020 	if (tcc_activation_temp_override != 0) {
3021 		tcc_activation_temp = tcc_activation_temp_override;
3022 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
3023 			cpu, tcc_activation_temp);
3024 		return 0;
3025 	}
3026 
3027 	/* Temperature Target MSR is Nehalem and newer only */
3028 	if (!do_nhm_platform_info)
3029 		goto guess;
3030 
3031 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
3032 		goto guess;
3033 
3034 	target_c_local = (msr >> 16) & 0xFF;
3035 
3036 	if (debug)
3037 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
3038 			cpu, msr, target_c_local);
3039 
3040 	if (!target_c_local)
3041 		goto guess;
3042 
3043 	tcc_activation_temp = target_c_local;
3044 
3045 	return 0;
3046 
3047 guess:
3048 	tcc_activation_temp = TJMAX_DEFAULT;
3049 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
3050 		cpu, tcc_activation_temp);
3051 
3052 	return 0;
3053 }
3054 
3055 void decode_feature_control_msr(void)
3056 {
3057 	unsigned long long msr;
3058 
3059 	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3060 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3061 			base_cpu, msr,
3062 			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3063 			msr & (1 << 18) ? "SGX" : "");
3064 }
3065 
3066 void decode_misc_enable_msr(void)
3067 {
3068 	unsigned long long msr;
3069 
3070 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3071 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
3072 			base_cpu, msr,
3073 			msr & (1 << 3) ? "TCC" : "",
3074 			msr & (1 << 16) ? "EIST" : "",
3075 			msr & (1 << 18) ? "MONITOR" : "");
3076 }
3077 
3078 /*
3079  * Decode MSR_MISC_PWR_MGMT
3080  *
3081  * Decode the bits according to the Nehalem documentation
3082  * bit[0] seems to continue to have same meaning going forward
3083  * bit[1] less so...
3084  */
3085 void decode_misc_pwr_mgmt_msr(void)
3086 {
3087 	unsigned long long msr;
3088 
3089 	if (!do_nhm_platform_info)
3090 		return;
3091 
3092 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3093 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
3094 			base_cpu, msr,
3095 			msr & (1 << 0) ? "DIS" : "EN",
3096 			msr & (1 << 1) ? "EN" : "DIS");
3097 }
3098 
3099 void process_cpuid()
3100 {
3101 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
3102 	unsigned int fms, family, model, stepping;
3103 
3104 	eax = ebx = ecx = edx = 0;
3105 
3106 	__cpuid(0, max_level, ebx, ecx, edx);
3107 
3108 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
3109 		genuine_intel = 1;
3110 
3111 	if (debug)
3112 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
3113 			(char *)&ebx, (char *)&edx, (char *)&ecx);
3114 
3115 	__cpuid(1, fms, ebx, ecx, edx);
3116 	family = (fms >> 8) & 0xf;
3117 	model = (fms >> 4) & 0xf;
3118 	stepping = fms & 0xf;
3119 	if (family == 6 || family == 0xf)
3120 		model += ((fms >> 16) & 0xf) << 4;
3121 
3122 	if (debug) {
3123 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
3124 			max_level, family, model, stepping, family, model, stepping);
3125 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3126 			ecx & (1 << 0) ? "SSE3" : "-",
3127 			ecx & (1 << 3) ? "MONITOR" : "-",
3128 			ecx & (1 << 6) ? "SMX" : "-",
3129 			ecx & (1 << 7) ? "EIST" : "-",
3130 			ecx & (1 << 8) ? "TM2" : "-",
3131 			edx & (1 << 4) ? "TSC" : "-",
3132 			edx & (1 << 5) ? "MSR" : "-",
3133 			edx & (1 << 22) ? "ACPI-TM" : "-",
3134 			edx & (1 << 29) ? "TM" : "-");
3135 	}
3136 
3137 	if (!(edx & (1 << 5)))
3138 		errx(1, "CPUID: no MSR");
3139 
3140 	/*
3141 	 * check max extended function levels of CPUID.
3142 	 * This is needed to check for invariant TSC.
3143 	 * This check is valid for both Intel and AMD.
3144 	 */
3145 	ebx = ecx = edx = 0;
3146 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
3147 
3148 	if (max_extended_level >= 0x80000007) {
3149 
3150 		/*
3151 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
3152 		 * this check is valid for both Intel and AMD
3153 		 */
3154 		__cpuid(0x80000007, eax, ebx, ecx, edx);
3155 		has_invariant_tsc = edx & (1 << 8);
3156 	}
3157 
3158 	/*
3159 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
3160 	 * this check is valid for both Intel and AMD
3161 	 */
3162 
3163 	__cpuid(0x6, eax, ebx, ecx, edx);
3164 	has_aperf = ecx & (1 << 0);
3165 	do_dts = eax & (1 << 0);
3166 	do_ptm = eax & (1 << 6);
3167 	has_hwp = eax & (1 << 7);
3168 	has_hwp_notify = eax & (1 << 8);
3169 	has_hwp_activity_window = eax & (1 << 9);
3170 	has_hwp_epp = eax & (1 << 10);
3171 	has_hwp_pkg = eax & (1 << 11);
3172 	has_epb = ecx & (1 << 3);
3173 
3174 	if (debug)
3175 		fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
3176 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
3177 			has_aperf ? "" : "No-",
3178 			do_dts ? "" : "No-",
3179 			do_ptm ? "" : "No-",
3180 			has_hwp ? "" : "No-",
3181 			has_hwp_notify ? "" : "No-",
3182 			has_hwp_activity_window ? "" : "No-",
3183 			has_hwp_epp ? "" : "No-",
3184 			has_hwp_pkg ? "" : "No-",
3185 			has_epb ? "" : "No-");
3186 
3187 	if (debug)
3188 		decode_misc_enable_msr();
3189 
3190 	if (max_level >= 0x7) {
3191 		int has_sgx;
3192 
3193 		ecx = 0;
3194 
3195 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3196 
3197 		has_sgx = ebx & (1 << 2);
3198 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3199 
3200 		if (has_sgx)
3201 			decode_feature_control_msr();
3202 	}
3203 
3204 	if (max_level >= 0x15) {
3205 		unsigned int eax_crystal;
3206 		unsigned int ebx_tsc;
3207 
3208 		/*
3209 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
3210 		 */
3211 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
3212 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
3213 
3214 		if (ebx_tsc != 0) {
3215 
3216 			if (debug && (ebx != 0))
3217 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
3218 					eax_crystal, ebx_tsc, crystal_hz);
3219 
3220 			if (crystal_hz == 0)
3221 				switch(model) {
3222 				case 0x4E:	/* SKL */
3223 				case 0x5E:	/* SKL */
3224 					crystal_hz = 24000000;	/* 24 MHz */
3225 					break;
3226 				default:
3227 					crystal_hz = 0;
3228 			}
3229 
3230 			if (crystal_hz) {
3231 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
3232 				if (debug)
3233 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
3234 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
3235 			}
3236 		}
3237 	}
3238 	if (max_level >= 0x16) {
3239 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
3240 
3241 		/*
3242 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
3243 		 */
3244 		base_mhz = max_mhz = bus_mhz = edx = 0;
3245 
3246 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3247 		if (debug)
3248 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3249 				base_mhz, max_mhz, bus_mhz);
3250 	}
3251 
3252 	if (has_aperf)
3253 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
3254 
3255 	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
3256 	do_snb_cstates = has_snb_msrs(family, model);
3257 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
3258 	do_pc3 = (pkg_cstate_limit >= PCL__3);
3259 	do_pc6 = (pkg_cstate_limit >= PCL__6);
3260 	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
3261 	do_c8_c9_c10 = has_hsw_msrs(family, model);
3262 	do_skl_residency = has_skl_msrs(family, model);
3263 	do_slm_cstates = is_slm(family, model);
3264 	do_knl_cstates  = is_knl(family, model);
3265 
3266 	if (debug)
3267 		decode_misc_pwr_mgmt_msr();
3268 
3269 	rapl_probe(family, model);
3270 	perf_limit_reasons_probe(family, model);
3271 
3272 	if (debug)
3273 		dump_cstate_pstate_config_info(family, model);
3274 
3275 	if (has_skl_msrs(family, model))
3276 		calculate_tsc_tweak();
3277 
3278 	do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
3279 
3280 	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
3281 
3282 	return;
3283 }
3284 
3285 void help()
3286 {
3287 	fprintf(outf,
3288 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
3289 	"\n"
3290 	"Turbostat forks the specified COMMAND and prints statistics\n"
3291 	"when COMMAND completes.\n"
3292 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3293 	"to print statistics, until interrupted.\n"
3294 	"--debug	run in \"debug\" mode\n"
3295 	"--interval sec	Override default 5-second measurement interval\n"
3296 	"--help		print this help message\n"
3297 	"--counter msr	print 32-bit counter at address \"msr\"\n"
3298 	"--Counter msr	print 64-bit Counter at address \"msr\"\n"
3299 	"--out file	create or truncate \"file\" for all output\n"
3300 	"--msr msr	print 32-bit value at address \"msr\"\n"
3301 	"--MSR msr	print 64-bit Value at address \"msr\"\n"
3302 	"--version	print version information\n"
3303 	"\n"
3304 	"For more help, run \"man turbostat\"\n");
3305 }
3306 
3307 
3308 /*
3309  * in /dev/cpu/ return success for names that are numbers
3310  * ie. filter out ".", "..", "microcode".
3311  */
3312 int dir_filter(const struct dirent *dirp)
3313 {
3314 	if (isdigit(dirp->d_name[0]))
3315 		return 1;
3316 	else
3317 		return 0;
3318 }
3319 
3320 int open_dev_cpu_msr(int dummy1)
3321 {
3322 	return 0;
3323 }
3324 
3325 void topology_probe()
3326 {
3327 	int i;
3328 	int max_core_id = 0;
3329 	int max_package_id = 0;
3330 	int max_siblings = 0;
3331 	struct cpu_topology {
3332 		int core_id;
3333 		int physical_package_id;
3334 	} *cpus;
3335 
3336 	/* Initialize num_cpus, max_cpu_num */
3337 	topo.num_cpus = 0;
3338 	topo.max_cpu_num = 0;
3339 	for_all_proc_cpus(count_cpus);
3340 	if (!summary_only && topo.num_cpus > 1)
3341 		show_cpu = 1;
3342 
3343 	if (debug > 1)
3344 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
3345 
3346 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
3347 	if (cpus == NULL)
3348 		err(1, "calloc cpus");
3349 
3350 	/*
3351 	 * Allocate and initialize cpu_present_set
3352 	 */
3353 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
3354 	if (cpu_present_set == NULL)
3355 		err(3, "CPU_ALLOC");
3356 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3357 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
3358 	for_all_proc_cpus(mark_cpu_present);
3359 
3360 	/*
3361 	 * Allocate and initialize cpu_affinity_set
3362 	 */
3363 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
3364 	if (cpu_affinity_set == NULL)
3365 		err(3, "CPU_ALLOC");
3366 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3367 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
3368 
3369 
3370 	/*
3371 	 * For online cpus
3372 	 * find max_core_id, max_package_id
3373 	 */
3374 	for (i = 0; i <= topo.max_cpu_num; ++i) {
3375 		int siblings;
3376 
3377 		if (cpu_is_not_present(i)) {
3378 			if (debug > 1)
3379 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
3380 			continue;
3381 		}
3382 		cpus[i].core_id = get_core_id(i);
3383 		if (cpus[i].core_id > max_core_id)
3384 			max_core_id = cpus[i].core_id;
3385 
3386 		cpus[i].physical_package_id = get_physical_package_id(i);
3387 		if (cpus[i].physical_package_id > max_package_id)
3388 			max_package_id = cpus[i].physical_package_id;
3389 
3390 		siblings = get_num_ht_siblings(i);
3391 		if (siblings > max_siblings)
3392 			max_siblings = siblings;
3393 		if (debug > 1)
3394 			fprintf(outf, "cpu %d pkg %d core %d\n",
3395 				i, cpus[i].physical_package_id, cpus[i].core_id);
3396 	}
3397 	topo.num_cores_per_pkg = max_core_id + 1;
3398 	if (debug > 1)
3399 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
3400 			max_core_id, topo.num_cores_per_pkg);
3401 	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
3402 		show_core = 1;
3403 
3404 	topo.num_packages = max_package_id + 1;
3405 	if (debug > 1)
3406 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
3407 			max_package_id, topo.num_packages);
3408 	if (debug && !summary_only && topo.num_packages > 1)
3409 		show_pkg = 1;
3410 
3411 	topo.num_threads_per_core = max_siblings;
3412 	if (debug > 1)
3413 		fprintf(outf, "max_siblings %d\n", max_siblings);
3414 
3415 	free(cpus);
3416 }
3417 
3418 void
3419 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
3420 {
3421 	int i;
3422 
3423 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
3424 		topo.num_packages, sizeof(struct thread_data));
3425 	if (*t == NULL)
3426 		goto error;
3427 
3428 	for (i = 0; i < topo.num_threads_per_core *
3429 		topo.num_cores_per_pkg * topo.num_packages; i++)
3430 		(*t)[i].cpu_id = -1;
3431 
3432 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
3433 		sizeof(struct core_data));
3434 	if (*c == NULL)
3435 		goto error;
3436 
3437 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
3438 		(*c)[i].core_id = -1;
3439 
3440 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
3441 	if (*p == NULL)
3442 		goto error;
3443 
3444 	for (i = 0; i < topo.num_packages; i++)
3445 		(*p)[i].package_id = i;
3446 
3447 	return;
3448 error:
3449 	err(1, "calloc counters");
3450 }
3451 /*
3452  * init_counter()
3453  *
3454  * set cpu_id, core_num, pkg_num
3455  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
3456  *
3457  * increment topo.num_cores when 1st core in pkg seen
3458  */
3459 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
3460 	struct pkg_data *pkg_base, int thread_num, int core_num,
3461 	int pkg_num, int cpu_id)
3462 {
3463 	struct thread_data *t;
3464 	struct core_data *c;
3465 	struct pkg_data *p;
3466 
3467 	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
3468 	c = GET_CORE(core_base, core_num, pkg_num);
3469 	p = GET_PKG(pkg_base, pkg_num);
3470 
3471 	t->cpu_id = cpu_id;
3472 	if (thread_num == 0) {
3473 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
3474 		if (cpu_is_first_core_in_package(cpu_id))
3475 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
3476 	}
3477 
3478 	c->core_id = core_num;
3479 	p->package_id = pkg_num;
3480 }
3481 
3482 
3483 int initialize_counters(int cpu_id)
3484 {
3485 	int my_thread_id, my_core_id, my_package_id;
3486 
3487 	my_package_id = get_physical_package_id(cpu_id);
3488 	my_core_id = get_core_id(cpu_id);
3489 	my_thread_id = get_cpu_position_in_core(cpu_id);
3490 	if (!my_thread_id)
3491 		topo.num_cores++;
3492 
3493 	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3494 	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
3495 	return 0;
3496 }
3497 
3498 void allocate_output_buffer()
3499 {
3500 	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
3501 	outp = output_buffer;
3502 	if (outp == NULL)
3503 		err(-1, "calloc output buffer");
3504 }
3505 void allocate_fd_percpu(void)
3506 {
3507 	fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
3508 	if (fd_percpu == NULL)
3509 		err(-1, "calloc fd_percpu");
3510 }
3511 void allocate_irq_buffers(void)
3512 {
3513 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
3514 	if (irq_column_2_cpu == NULL)
3515 		err(-1, "calloc %d", topo.num_cpus);
3516 
3517 	irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
3518 	if (irqs_per_cpu == NULL)
3519 		err(-1, "calloc %d", topo.max_cpu_num);
3520 }
3521 void setup_all_buffers(void)
3522 {
3523 	topology_probe();
3524 	allocate_irq_buffers();
3525 	allocate_fd_percpu();
3526 	allocate_counters(&thread_even, &core_even, &package_even);
3527 	allocate_counters(&thread_odd, &core_odd, &package_odd);
3528 	allocate_output_buffer();
3529 	for_all_proc_cpus(initialize_counters);
3530 }
3531 
3532 void set_base_cpu(void)
3533 {
3534 	base_cpu = sched_getcpu();
3535 	if (base_cpu < 0)
3536 		err(-ENODEV, "No valid cpus found");
3537 
3538 	if (debug > 1)
3539 		fprintf(outf, "base_cpu = %d\n", base_cpu);
3540 }
3541 
3542 void turbostat_init()
3543 {
3544 	setup_all_buffers();
3545 	set_base_cpu();
3546 	check_dev_msr();
3547 	check_permissions();
3548 	process_cpuid();
3549 
3550 
3551 	if (debug)
3552 		for_all_cpus(print_hwp, ODD_COUNTERS);
3553 
3554 	if (debug)
3555 		for_all_cpus(print_epb, ODD_COUNTERS);
3556 
3557 	if (debug)
3558 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
3559 
3560 	if (debug)
3561 		for_all_cpus(print_rapl, ODD_COUNTERS);
3562 
3563 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
3564 
3565 	if (debug)
3566 		for_all_cpus(print_thermal, ODD_COUNTERS);
3567 }
3568 
3569 int fork_it(char **argv)
3570 {
3571 	pid_t child_pid;
3572 	int status;
3573 
3574 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
3575 	if (status)
3576 		exit(status);
3577 	/* clear affinity side-effect of get_counters() */
3578 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
3579 	gettimeofday(&tv_even, (struct timezone *)NULL);
3580 
3581 	child_pid = fork();
3582 	if (!child_pid) {
3583 		/* child */
3584 		execvp(argv[0], argv);
3585 	} else {
3586 
3587 		/* parent */
3588 		if (child_pid == -1)
3589 			err(1, "fork");
3590 
3591 		signal(SIGINT, SIG_IGN);
3592 		signal(SIGQUIT, SIG_IGN);
3593 		if (waitpid(child_pid, &status, 0) == -1)
3594 			err(status, "waitpid");
3595 	}
3596 	/*
3597 	 * n.b. fork_it() does not check for errors from for_all_cpus()
3598 	 * because re-starting is problematic when forking
3599 	 */
3600 	for_all_cpus(get_counters, ODD_COUNTERS);
3601 	gettimeofday(&tv_odd, (struct timezone *)NULL);
3602 	timersub(&tv_odd, &tv_even, &tv_delta);
3603 	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
3604 	compute_average(EVEN_COUNTERS);
3605 	format_all_counters(EVEN_COUNTERS);
3606 
3607 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
3608 
3609 	flush_output_stderr();
3610 
3611 	return status;
3612 }
3613 
3614 int get_and_dump_counters(void)
3615 {
3616 	int status;
3617 
3618 	status = for_all_cpus(get_counters, ODD_COUNTERS);
3619 	if (status)
3620 		return status;
3621 
3622 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
3623 	if (status)
3624 		return status;
3625 
3626 	flush_output_stdout();
3627 
3628 	return status;
3629 }
3630 
3631 void print_version() {
3632 	fprintf(outf, "turbostat version 4.11 27 Feb 2016"
3633 		" - Len Brown <lenb@kernel.org>\n");
3634 }
3635 
3636 void cmdline(int argc, char **argv)
3637 {
3638 	int opt;
3639 	int option_index = 0;
3640 	static struct option long_options[] = {
3641 		{"Counter",	required_argument,	0, 'C'},
3642 		{"counter",	required_argument,	0, 'c'},
3643 		{"Dump",	no_argument,		0, 'D'},
3644 		{"debug",	no_argument,		0, 'd'},
3645 		{"interval",	required_argument,	0, 'i'},
3646 		{"help",	no_argument,		0, 'h'},
3647 		{"Joules",	no_argument,		0, 'J'},
3648 		{"MSR",		required_argument,	0, 'M'},
3649 		{"msr",		required_argument,	0, 'm'},
3650 		{"out",		required_argument,	0, 'o'},
3651 		{"Package",	no_argument,		0, 'p'},
3652 		{"processor",	no_argument,		0, 'p'},
3653 		{"Summary",	no_argument,		0, 'S'},
3654 		{"TCC",		required_argument,	0, 'T'},
3655 		{"version",	no_argument,		0, 'v' },
3656 		{0,		0,			0,  0 }
3657 	};
3658 
3659 	progname = argv[0];
3660 
3661 	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
3662 				long_options, &option_index)) != -1) {
3663 		switch (opt) {
3664 		case 'C':
3665 			sscanf(optarg, "%x", &extra_delta_offset64);
3666 			break;
3667 		case 'c':
3668 			sscanf(optarg, "%x", &extra_delta_offset32);
3669 			break;
3670 		case 'D':
3671 			dump_only++;
3672 			break;
3673 		case 'd':
3674 			debug++;
3675 			break;
3676 		case 'h':
3677 		default:
3678 			help();
3679 			exit(1);
3680 		case 'i':
3681 			{
3682 				double interval = strtod(optarg, NULL);
3683 
3684 				if (interval < 0.001) {
3685 					fprintf(outf, "interval %f seconds is too small\n",
3686 						interval);
3687 					exit(2);
3688 				}
3689 
3690 				interval_ts.tv_sec = interval;
3691 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
3692 			}
3693 			break;
3694 		case 'J':
3695 			rapl_joules++;
3696 			break;
3697 		case 'M':
3698 			sscanf(optarg, "%x", &extra_msr_offset64);
3699 			break;
3700 		case 'm':
3701 			sscanf(optarg, "%x", &extra_msr_offset32);
3702 			break;
3703 		case 'o':
3704 			outf = fopen_or_die(optarg, "w");
3705 			break;
3706 		case 'P':
3707 			show_pkg_only++;
3708 			break;
3709 		case 'p':
3710 			show_core_only++;
3711 			break;
3712 		case 'S':
3713 			summary_only++;
3714 			break;
3715 		case 'T':
3716 			tcc_activation_temp_override = atoi(optarg);
3717 			break;
3718 		case 'v':
3719 			print_version();
3720 			exit(0);
3721 			break;
3722 		}
3723 	}
3724 }
3725 
3726 int main(int argc, char **argv)
3727 {
3728 	outf = stderr;
3729 
3730 	cmdline(argc, argv);
3731 
3732 	if (debug)
3733 		print_version();
3734 
3735 	turbostat_init();
3736 
3737 	/* dump counters and exit */
3738 	if (dump_only)
3739 		return get_and_dump_counters();
3740 
3741 	/*
3742 	 * if any params left, it must be a command to fork
3743 	 */
3744 	if (argc - optind)
3745 		return fork_it(argv + optind);
3746 	else
3747 		turbostat_loop();
3748 
3749 	return 0;
3750 }
3751