xref: /openbmc/linux/tools/power/x86/turbostat/turbostat.c (revision 2e7c04aec86758e0adfcad4a24c86593b45807a3)
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47 
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int num_iterations;
55 unsigned int debug;
56 unsigned int quiet;
57 unsigned int shown;
58 unsigned int sums_need_wide_columns;
59 unsigned int rapl_joules;
60 unsigned int summary_only;
61 unsigned int list_header_only;
62 unsigned int dump_only;
63 unsigned int do_snb_cstates;
64 unsigned int do_knl_cstates;
65 unsigned int do_slm_cstates;
66 unsigned int do_cnl_cstates;
67 unsigned int use_c1_residency_msr;
68 unsigned int has_aperf;
69 unsigned int has_epb;
70 unsigned int do_irtl_snb;
71 unsigned int do_irtl_hsw;
72 unsigned int units = 1000000;	/* MHz etc */
73 unsigned int genuine_intel;
74 unsigned int has_invariant_tsc;
75 unsigned int do_nhm_platform_info;
76 unsigned int no_MSR_MISC_PWR_MGMT;
77 unsigned int aperf_mperf_multiplier = 1;
78 double bclk;
79 double base_hz;
80 unsigned int has_base_hz;
81 double tsc_tweak = 1.0;
82 unsigned int show_pkg_only;
83 unsigned int show_core_only;
84 char *output_buffer, *outp;
85 unsigned int do_rapl;
86 unsigned int do_dts;
87 unsigned int do_ptm;
88 unsigned long long  gfx_cur_rc6_ms;
89 unsigned long long cpuidle_cur_cpu_lpi_us;
90 unsigned long long cpuidle_cur_sys_lpi_us;
91 unsigned int gfx_cur_mhz;
92 unsigned int tcc_activation_temp;
93 unsigned int tcc_activation_temp_override;
94 double rapl_power_units, rapl_time_units;
95 double rapl_dram_energy_units, rapl_energy_units;
96 double rapl_joule_counter_range;
97 unsigned int do_core_perf_limit_reasons;
98 unsigned int has_automatic_cstate_conversion;
99 unsigned int do_gfx_perf_limit_reasons;
100 unsigned int do_ring_perf_limit_reasons;
101 unsigned int crystal_hz;
102 unsigned long long tsc_hz;
103 int base_cpu;
104 double discover_bclk(unsigned int family, unsigned int model);
105 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
106 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
107 unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
108 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
109 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
110 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
111 unsigned int has_misc_feature_control;
112 unsigned int first_counter_read = 1;
113 
114 #define RAPL_PKG		(1 << 0)
115 					/* 0x610 MSR_PKG_POWER_LIMIT */
116 					/* 0x611 MSR_PKG_ENERGY_STATUS */
117 #define RAPL_PKG_PERF_STATUS	(1 << 1)
118 					/* 0x613 MSR_PKG_PERF_STATUS */
119 #define RAPL_PKG_POWER_INFO	(1 << 2)
120 					/* 0x614 MSR_PKG_POWER_INFO */
121 
122 #define RAPL_DRAM		(1 << 3)
123 					/* 0x618 MSR_DRAM_POWER_LIMIT */
124 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
125 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
126 					/* 0x61b MSR_DRAM_PERF_STATUS */
127 #define RAPL_DRAM_POWER_INFO	(1 << 5)
128 					/* 0x61c MSR_DRAM_POWER_INFO */
129 
130 #define RAPL_CORES_POWER_LIMIT	(1 << 6)
131 					/* 0x638 MSR_PP0_POWER_LIMIT */
132 #define RAPL_CORE_POLICY	(1 << 7)
133 					/* 0x63a MSR_PP0_POLICY */
134 
135 #define RAPL_GFX		(1 << 8)
136 					/* 0x640 MSR_PP1_POWER_LIMIT */
137 					/* 0x641 MSR_PP1_ENERGY_STATUS */
138 					/* 0x642 MSR_PP1_POLICY */
139 
140 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
141 					/* 0x639 MSR_PP0_ENERGY_STATUS */
142 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
143 #define	TJMAX_DEFAULT	100
144 
145 #define MAX(a, b) ((a) > (b) ? (a) : (b))
146 
147 /*
148  * buffer size used by sscanf() for added column names
149  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
150  */
151 #define	NAME_BYTES 20
152 #define PATH_BYTES 128
153 
154 int backwards_count;
155 char *progname;
156 
157 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
158 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
159 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
160 #define MAX_ADDED_COUNTERS 8
161 #define MAX_ADDED_THREAD_COUNTERS 24
162 #define BITMASK_SIZE 32
163 
164 struct thread_data {
165 	struct timeval tv_begin;
166 	struct timeval tv_end;
167 	unsigned long long tsc;
168 	unsigned long long aperf;
169 	unsigned long long mperf;
170 	unsigned long long c1;
171 	unsigned long long  irq_count;
172 	unsigned int smi_count;
173 	unsigned int cpu_id;
174 	unsigned int apic_id;
175 	unsigned int x2apic_id;
176 	unsigned int flags;
177 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
178 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
179 	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
180 } *thread_even, *thread_odd;
181 
182 struct core_data {
183 	unsigned long long c3;
184 	unsigned long long c6;
185 	unsigned long long c7;
186 	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
187 	unsigned int core_temp_c;
188 	unsigned int core_id;
189 	unsigned long long counter[MAX_ADDED_COUNTERS];
190 } *core_even, *core_odd;
191 
192 struct pkg_data {
193 	unsigned long long pc2;
194 	unsigned long long pc3;
195 	unsigned long long pc6;
196 	unsigned long long pc7;
197 	unsigned long long pc8;
198 	unsigned long long pc9;
199 	unsigned long long pc10;
200 	unsigned long long cpu_lpi;
201 	unsigned long long sys_lpi;
202 	unsigned long long pkg_wtd_core_c0;
203 	unsigned long long pkg_any_core_c0;
204 	unsigned long long pkg_any_gfxe_c0;
205 	unsigned long long pkg_both_core_gfxe_c0;
206 	long long gfx_rc6_ms;
207 	unsigned int gfx_mhz;
208 	unsigned int package_id;
209 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
210 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
211 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
212 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
213 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
214 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
215 	unsigned int pkg_temp_c;
216 	unsigned long long counter[MAX_ADDED_COUNTERS];
217 } *package_even, *package_odd;
218 
219 #define ODD_COUNTERS thread_odd, core_odd, package_odd
220 #define EVEN_COUNTERS thread_even, core_even, package_even
221 
222 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
223 	((thread_base) +						      \
224 	 ((pkg_no) *							      \
225 	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
226 	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
227 	 ((core_no) * topo.threads_per_core) +				      \
228 	 (thread_no))
229 
230 #define GET_CORE(core_base, core_no, node_no, pkg_no)			\
231 	((core_base) +							\
232 	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
233 	 ((node_no) * topo.cores_per_node) +				\
234 	 (core_no))
235 
236 
237 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
238 
239 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
240 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
241 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
242 
243 struct msr_counter {
244 	unsigned int msr_num;
245 	char name[NAME_BYTES];
246 	char path[PATH_BYTES];
247 	unsigned int width;
248 	enum counter_type type;
249 	enum counter_format format;
250 	struct msr_counter *next;
251 	unsigned int flags;
252 #define	FLAGS_HIDE	(1 << 0)
253 #define	FLAGS_SHOW	(1 << 1)
254 #define	SYSFS_PERCPU	(1 << 1)
255 };
256 
257 struct sys_counters {
258 	unsigned int added_thread_counters;
259 	unsigned int added_core_counters;
260 	unsigned int added_package_counters;
261 	struct msr_counter *tp;
262 	struct msr_counter *cp;
263 	struct msr_counter *pp;
264 } sys;
265 
266 struct system_summary {
267 	struct thread_data threads;
268 	struct core_data cores;
269 	struct pkg_data packages;
270 } average;
271 
272 struct cpu_topology {
273 	int physical_package_id;
274 	int logical_cpu_id;
275 	int physical_node_id;
276 	int logical_node_id;	/* 0-based count within the package */
277 	int physical_core_id;
278 	int thread_id;
279 	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
280 } *cpus;
281 
282 struct topo_params {
283 	int num_packages;
284 	int num_cpus;
285 	int num_cores;
286 	int max_cpu_num;
287 	int max_node_num;
288 	int nodes_per_pkg;
289 	int cores_per_node;
290 	int threads_per_core;
291 } topo;
292 
293 struct timeval tv_even, tv_odd, tv_delta;
294 
295 int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
296 int *irqs_per_cpu;		/* indexed by cpu_num */
297 
298 void setup_all_buffers(void);
299 
300 int cpu_is_not_present(int cpu)
301 {
302 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
303 }
304 /*
305  * run func(thread, core, package) in topology order
306  * skip non-present cpus
307  */
308 
309 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
310 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
311 {
312 	int retval, pkg_no, core_no, thread_no, node_no;
313 
314 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
315 		for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
316 			for (node_no = 0; node_no < topo.nodes_per_pkg;
317 			     node_no++) {
318 				for (thread_no = 0; thread_no <
319 					topo.threads_per_core; ++thread_no) {
320 					struct thread_data *t;
321 					struct core_data *c;
322 					struct pkg_data *p;
323 
324 					t = GET_THREAD(thread_base, thread_no,
325 						       core_no, node_no,
326 						       pkg_no);
327 
328 					if (cpu_is_not_present(t->cpu_id))
329 						continue;
330 
331 					c = GET_CORE(core_base, core_no,
332 						     node_no, pkg_no);
333 					p = GET_PKG(pkg_base, pkg_no);
334 
335 					retval = func(t, c, p);
336 					if (retval)
337 						return retval;
338 				}
339 			}
340 		}
341 	}
342 	return 0;
343 }
344 
345 int cpu_migrate(int cpu)
346 {
347 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
348 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
349 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
350 		return -1;
351 	else
352 		return 0;
353 }
354 int get_msr_fd(int cpu)
355 {
356 	char pathname[32];
357 	int fd;
358 
359 	fd = fd_percpu[cpu];
360 
361 	if (fd)
362 		return fd;
363 
364 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
365 	fd = open(pathname, O_RDONLY);
366 	if (fd < 0)
367 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
368 
369 	fd_percpu[cpu] = fd;
370 
371 	return fd;
372 }
373 
374 int get_msr(int cpu, off_t offset, unsigned long long *msr)
375 {
376 	ssize_t retval;
377 
378 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
379 
380 	if (retval != sizeof *msr)
381 		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
382 
383 	return 0;
384 }
385 
386 /*
387  * This list matches the column headers, except
388  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
389  * 2. Core and CPU are moved to the end, we can't have strings that contain them
390  *    matching on them for --show and --hide.
391  */
392 struct msr_counter bic[] = {
393 	{ 0x0, "usec" },
394 	{ 0x0, "Time_Of_Day_Seconds" },
395 	{ 0x0, "Package" },
396 	{ 0x0, "Node" },
397 	{ 0x0, "Avg_MHz" },
398 	{ 0x0, "Busy%" },
399 	{ 0x0, "Bzy_MHz" },
400 	{ 0x0, "TSC_MHz" },
401 	{ 0x0, "IRQ" },
402 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
403 	{ 0x0, "sysfs" },
404 	{ 0x0, "CPU%c1" },
405 	{ 0x0, "CPU%c3" },
406 	{ 0x0, "CPU%c6" },
407 	{ 0x0, "CPU%c7" },
408 	{ 0x0, "ThreadC" },
409 	{ 0x0, "CoreTmp" },
410 	{ 0x0, "CoreCnt" },
411 	{ 0x0, "PkgTmp" },
412 	{ 0x0, "GFX%rc6" },
413 	{ 0x0, "GFXMHz" },
414 	{ 0x0, "Pkg%pc2" },
415 	{ 0x0, "Pkg%pc3" },
416 	{ 0x0, "Pkg%pc6" },
417 	{ 0x0, "Pkg%pc7" },
418 	{ 0x0, "Pkg%pc8" },
419 	{ 0x0, "Pkg%pc9" },
420 	{ 0x0, "Pk%pc10" },
421 	{ 0x0, "CPU%LPI" },
422 	{ 0x0, "SYS%LPI" },
423 	{ 0x0, "PkgWatt" },
424 	{ 0x0, "CorWatt" },
425 	{ 0x0, "GFXWatt" },
426 	{ 0x0, "PkgCnt" },
427 	{ 0x0, "RAMWatt" },
428 	{ 0x0, "PKG_%" },
429 	{ 0x0, "RAM_%" },
430 	{ 0x0, "Pkg_J" },
431 	{ 0x0, "Cor_J" },
432 	{ 0x0, "GFX_J" },
433 	{ 0x0, "RAM_J" },
434 	{ 0x0, "Mod%c6" },
435 	{ 0x0, "Totl%C0" },
436 	{ 0x0, "Any%C0" },
437 	{ 0x0, "GFX%C0" },
438 	{ 0x0, "CPUGFX%" },
439 	{ 0x0, "Core" },
440 	{ 0x0, "CPU" },
441 	{ 0x0, "APIC" },
442 	{ 0x0, "X2APIC" },
443 };
444 
445 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
446 #define	BIC_USEC	(1ULL << 0)
447 #define	BIC_TOD		(1ULL << 1)
448 #define	BIC_Package	(1ULL << 2)
449 #define	BIC_Node	(1ULL << 3)
450 #define	BIC_Avg_MHz	(1ULL << 4)
451 #define	BIC_Busy	(1ULL << 5)
452 #define	BIC_Bzy_MHz	(1ULL << 6)
453 #define	BIC_TSC_MHz	(1ULL << 7)
454 #define	BIC_IRQ		(1ULL << 8)
455 #define	BIC_SMI		(1ULL << 9)
456 #define	BIC_sysfs	(1ULL << 10)
457 #define	BIC_CPU_c1	(1ULL << 11)
458 #define	BIC_CPU_c3	(1ULL << 12)
459 #define	BIC_CPU_c6	(1ULL << 13)
460 #define	BIC_CPU_c7	(1ULL << 14)
461 #define	BIC_ThreadC	(1ULL << 15)
462 #define	BIC_CoreTmp	(1ULL << 16)
463 #define	BIC_CoreCnt	(1ULL << 17)
464 #define	BIC_PkgTmp	(1ULL << 18)
465 #define	BIC_GFX_rc6	(1ULL << 19)
466 #define	BIC_GFXMHz	(1ULL << 20)
467 #define	BIC_Pkgpc2	(1ULL << 21)
468 #define	BIC_Pkgpc3	(1ULL << 22)
469 #define	BIC_Pkgpc6	(1ULL << 23)
470 #define	BIC_Pkgpc7	(1ULL << 24)
471 #define	BIC_Pkgpc8	(1ULL << 25)
472 #define	BIC_Pkgpc9	(1ULL << 26)
473 #define	BIC_Pkgpc10	(1ULL << 27)
474 #define BIC_CPU_LPI	(1ULL << 28)
475 #define BIC_SYS_LPI	(1ULL << 29)
476 #define	BIC_PkgWatt	(1ULL << 30)
477 #define	BIC_CorWatt	(1ULL << 31)
478 #define	BIC_GFXWatt	(1ULL << 32)
479 #define	BIC_PkgCnt	(1ULL << 33)
480 #define	BIC_RAMWatt	(1ULL << 34)
481 #define	BIC_PKG__	(1ULL << 35)
482 #define	BIC_RAM__	(1ULL << 36)
483 #define	BIC_Pkg_J	(1ULL << 37)
484 #define	BIC_Cor_J	(1ULL << 38)
485 #define	BIC_GFX_J	(1ULL << 39)
486 #define	BIC_RAM_J	(1ULL << 40)
487 #define	BIC_Mod_c6	(1ULL << 41)
488 #define	BIC_Totl_c0	(1ULL << 42)
489 #define	BIC_Any_c0	(1ULL << 43)
490 #define	BIC_GFX_c0	(1ULL << 44)
491 #define	BIC_CPUGFX	(1ULL << 45)
492 #define	BIC_Core	(1ULL << 46)
493 #define	BIC_CPU		(1ULL << 47)
494 #define	BIC_APIC	(1ULL << 48)
495 #define	BIC_X2APIC	(1ULL << 49)
496 
497 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
498 
499 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
500 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
501 
502 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
503 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
504 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
505 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
506 
507 
508 #define MAX_DEFERRED 16
509 char *deferred_skip_names[MAX_DEFERRED];
510 int deferred_skip_index;
511 
512 /*
513  * HIDE_LIST - hide this list of counters, show the rest [default]
514  * SHOW_LIST - show this list of counters, hide the rest
515  */
516 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
517 
518 void help(void)
519 {
520 	fprintf(outf,
521 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
522 	"\n"
523 	"Turbostat forks the specified COMMAND and prints statistics\n"
524 	"when COMMAND completes.\n"
525 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
526 	"to print statistics, until interrupted.\n"
527 	"  -a, --add	add a counter\n"
528 	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
529 	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
530 	"		  {core | package | j,k,l..m,n-p }\n"
531 	"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
532 	"  -D, --Dump	displays the raw counter values\n"
533 	"  -e, --enable	[all | column]\n"
534 	"		shows all or the specified disabled column\n"
535 	"  -H, --hide [column|column,column,...]\n"
536 	"		hide the specified column(s)\n"
537 	"  -i, --interval sec.subsec\n"
538 	"		Override default 5-second measurement interval\n"
539 	"  -J, --Joules	displays energy in Joules instead of Watts\n"
540 	"  -l, --list	list column headers only\n"
541 	"  -n, --num_iterations num\n"
542 	"		number of the measurement iterations\n"
543 	"  -o, --out file\n"
544 	"		create or truncate \"file\" for all output\n"
545 	"  -q, --quiet	skip decoding system configuration header\n"
546 	"  -s, --show [column|column,column,...]\n"
547 	"		show only the specified column(s)\n"
548 	"  -S, --Summary\n"
549 	"		limits output to 1-line system summary per interval\n"
550 	"  -T, --TCC temperature\n"
551 	"		sets the Thermal Control Circuit temperature in\n"
552 	"		  degrees Celsius\n"
553 	"  -h, --help	print this help message\n"
554 	"  -v, --version	print version information\n"
555 	"\n"
556 	"For more help, run \"man turbostat\"\n");
557 }
558 
559 /*
560  * bic_lookup
561  * for all the strings in comma separate name_list,
562  * set the approprate bit in return value.
563  */
564 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
565 {
566 	int i;
567 	unsigned long long retval = 0;
568 
569 	while (name_list) {
570 		char *comma;
571 
572 		comma = strchr(name_list, ',');
573 
574 		if (comma)
575 			*comma = '\0';
576 
577 		if (!strcmp(name_list, "all"))
578 			return ~0;
579 
580 		for (i = 0; i < MAX_BIC; ++i) {
581 			if (!strcmp(name_list, bic[i].name)) {
582 				retval |= (1ULL << i);
583 				break;
584 			}
585 		}
586 		if (i == MAX_BIC) {
587 			if (mode == SHOW_LIST) {
588 				fprintf(stderr, "Invalid counter name: %s\n", name_list);
589 				exit(-1);
590 			}
591 			deferred_skip_names[deferred_skip_index++] = name_list;
592 			if (debug)
593 				fprintf(stderr, "deferred \"%s\"\n", name_list);
594 			if (deferred_skip_index >= MAX_DEFERRED) {
595 				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
596 					MAX_DEFERRED, name_list);
597 				help();
598 				exit(1);
599 			}
600 		}
601 
602 		name_list = comma;
603 		if (name_list)
604 			name_list++;
605 
606 	}
607 	return retval;
608 }
609 
610 
611 void print_header(char *delim)
612 {
613 	struct msr_counter *mp;
614 	int printed = 0;
615 
616 	if (DO_BIC(BIC_USEC))
617 		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
618 	if (DO_BIC(BIC_TOD))
619 		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
620 	if (DO_BIC(BIC_Package))
621 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
622 	if (DO_BIC(BIC_Node))
623 		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
624 	if (DO_BIC(BIC_Core))
625 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
626 	if (DO_BIC(BIC_CPU))
627 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
628 	if (DO_BIC(BIC_APIC))
629 		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
630 	if (DO_BIC(BIC_X2APIC))
631 		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
632 	if (DO_BIC(BIC_Avg_MHz))
633 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
634 	if (DO_BIC(BIC_Busy))
635 		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
636 	if (DO_BIC(BIC_Bzy_MHz))
637 		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
638 	if (DO_BIC(BIC_TSC_MHz))
639 		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
640 
641 	if (DO_BIC(BIC_IRQ)) {
642 		if (sums_need_wide_columns)
643 			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
644 		else
645 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
646 	}
647 
648 	if (DO_BIC(BIC_SMI))
649 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
650 
651 	for (mp = sys.tp; mp; mp = mp->next) {
652 
653 		if (mp->format == FORMAT_RAW) {
654 			if (mp->width == 64)
655 				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
656 			else
657 				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
658 		} else {
659 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
660 				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
661 			else
662 				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
663 		}
664 	}
665 
666 	if (DO_BIC(BIC_CPU_c1))
667 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
668 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
669 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
670 	if (DO_BIC(BIC_CPU_c6))
671 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
672 	if (DO_BIC(BIC_CPU_c7))
673 		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
674 
675 	if (DO_BIC(BIC_Mod_c6))
676 		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
677 
678 	if (DO_BIC(BIC_CoreTmp))
679 		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
680 
681 	for (mp = sys.cp; mp; mp = mp->next) {
682 		if (mp->format == FORMAT_RAW) {
683 			if (mp->width == 64)
684 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
685 			else
686 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
687 		} else {
688 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
689 				outp += sprintf(outp, "%s%8s", delim, mp->name);
690 			else
691 				outp += sprintf(outp, "%s%s", delim, mp->name);
692 		}
693 	}
694 
695 	if (DO_BIC(BIC_PkgTmp))
696 		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
697 
698 	if (DO_BIC(BIC_GFX_rc6))
699 		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
700 
701 	if (DO_BIC(BIC_GFXMHz))
702 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
703 
704 	if (DO_BIC(BIC_Totl_c0))
705 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
706 	if (DO_BIC(BIC_Any_c0))
707 		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
708 	if (DO_BIC(BIC_GFX_c0))
709 		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
710 	if (DO_BIC(BIC_CPUGFX))
711 		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
712 
713 	if (DO_BIC(BIC_Pkgpc2))
714 		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
715 	if (DO_BIC(BIC_Pkgpc3))
716 		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
717 	if (DO_BIC(BIC_Pkgpc6))
718 		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
719 	if (DO_BIC(BIC_Pkgpc7))
720 		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
721 	if (DO_BIC(BIC_Pkgpc8))
722 		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
723 	if (DO_BIC(BIC_Pkgpc9))
724 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
725 	if (DO_BIC(BIC_Pkgpc10))
726 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
727 	if (DO_BIC(BIC_CPU_LPI))
728 		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
729 	if (DO_BIC(BIC_SYS_LPI))
730 		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
731 
732 	if (do_rapl && !rapl_joules) {
733 		if (DO_BIC(BIC_PkgWatt))
734 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
735 		if (DO_BIC(BIC_CorWatt))
736 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
737 		if (DO_BIC(BIC_GFXWatt))
738 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
739 		if (DO_BIC(BIC_RAMWatt))
740 			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
741 		if (DO_BIC(BIC_PKG__))
742 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
743 		if (DO_BIC(BIC_RAM__))
744 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
745 	} else if (do_rapl && rapl_joules) {
746 		if (DO_BIC(BIC_Pkg_J))
747 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
748 		if (DO_BIC(BIC_Cor_J))
749 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
750 		if (DO_BIC(BIC_GFX_J))
751 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
752 		if (DO_BIC(BIC_RAM_J))
753 			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
754 		if (DO_BIC(BIC_PKG__))
755 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
756 		if (DO_BIC(BIC_RAM__))
757 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
758 	}
759 	for (mp = sys.pp; mp; mp = mp->next) {
760 		if (mp->format == FORMAT_RAW) {
761 			if (mp->width == 64)
762 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
763 			else
764 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
765 		} else {
766 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
767 				outp += sprintf(outp, "%s%8s", delim, mp->name);
768 			else
769 				outp += sprintf(outp, "%s%s", delim, mp->name);
770 		}
771 	}
772 
773 	outp += sprintf(outp, "\n");
774 }
775 
776 int dump_counters(struct thread_data *t, struct core_data *c,
777 	struct pkg_data *p)
778 {
779 	int i;
780 	struct msr_counter *mp;
781 
782 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
783 
784 	if (t) {
785 		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
786 			t->cpu_id, t->flags);
787 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
788 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
789 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
790 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
791 
792 		if (DO_BIC(BIC_IRQ))
793 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
794 		if (DO_BIC(BIC_SMI))
795 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
796 
797 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
798 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
799 				i, mp->msr_num, t->counter[i]);
800 		}
801 	}
802 
803 	if (c) {
804 		outp += sprintf(outp, "core: %d\n", c->core_id);
805 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
806 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
807 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
808 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
809 
810 		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
811 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
812 				i, mp->msr_num, c->counter[i]);
813 		}
814 		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
815 	}
816 
817 	if (p) {
818 		outp += sprintf(outp, "package: %d\n", p->package_id);
819 
820 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
821 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
822 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
823 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
824 
825 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
826 		if (DO_BIC(BIC_Pkgpc3))
827 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
828 		if (DO_BIC(BIC_Pkgpc6))
829 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
830 		if (DO_BIC(BIC_Pkgpc7))
831 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
832 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
833 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
834 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
835 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
836 		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
837 		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
838 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
839 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
840 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
841 		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
842 		outp += sprintf(outp, "Throttle PKG: %0X\n",
843 			p->rapl_pkg_perf_status);
844 		outp += sprintf(outp, "Throttle RAM: %0X\n",
845 			p->rapl_dram_perf_status);
846 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
847 
848 		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
849 			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
850 				i, mp->msr_num, p->counter[i]);
851 		}
852 	}
853 
854 	outp += sprintf(outp, "\n");
855 
856 	return 0;
857 }
858 
859 /*
860  * column formatting convention & formats
861  */
862 int format_counters(struct thread_data *t, struct core_data *c,
863 	struct pkg_data *p)
864 {
865 	double interval_float, tsc;
866 	char *fmt8;
867 	int i;
868 	struct msr_counter *mp;
869 	char *delim = "\t";
870 	int printed = 0;
871 
872 	 /* if showing only 1st thread in core and this isn't one, bail out */
873 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
874 		return 0;
875 
876 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
877 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
878 		return 0;
879 
880 	/*if not summary line and --cpu is used */
881 	if ((t != &average.threads) &&
882 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
883 		return 0;
884 
885 	if (DO_BIC(BIC_USEC)) {
886 		/* on each row, print how many usec each timestamp took to gather */
887 		struct timeval tv;
888 
889 		timersub(&t->tv_end, &t->tv_begin, &tv);
890 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
891 	}
892 
893 	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
894 	if (DO_BIC(BIC_TOD))
895 		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
896 
897 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
898 
899 	tsc = t->tsc * tsc_tweak;
900 
901 	/* topo columns, print blanks on 1st (average) line */
902 	if (t == &average.threads) {
903 		if (DO_BIC(BIC_Package))
904 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
905 		if (DO_BIC(BIC_Node))
906 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
907 		if (DO_BIC(BIC_Core))
908 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
909 		if (DO_BIC(BIC_CPU))
910 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
911 		if (DO_BIC(BIC_APIC))
912 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
913 		if (DO_BIC(BIC_X2APIC))
914 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
915 	} else {
916 		if (DO_BIC(BIC_Package)) {
917 			if (p)
918 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
919 			else
920 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
921 		}
922 		if (DO_BIC(BIC_Node)) {
923 			if (t)
924 				outp += sprintf(outp, "%s%d",
925 						(printed++ ? delim : ""),
926 					      cpus[t->cpu_id].physical_node_id);
927 			else
928 				outp += sprintf(outp, "%s-",
929 						(printed++ ? delim : ""));
930 		}
931 		if (DO_BIC(BIC_Core)) {
932 			if (c)
933 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
934 			else
935 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
936 		}
937 		if (DO_BIC(BIC_CPU))
938 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
939 		if (DO_BIC(BIC_APIC))
940 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
941 		if (DO_BIC(BIC_X2APIC))
942 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
943 	}
944 
945 	if (DO_BIC(BIC_Avg_MHz))
946 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
947 			1.0 / units * t->aperf / interval_float);
948 
949 	if (DO_BIC(BIC_Busy))
950 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
951 
952 	if (DO_BIC(BIC_Bzy_MHz)) {
953 		if (has_base_hz)
954 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
955 		else
956 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
957 				tsc / units * t->aperf / t->mperf / interval_float);
958 	}
959 
960 	if (DO_BIC(BIC_TSC_MHz))
961 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
962 
963 	/* IRQ */
964 	if (DO_BIC(BIC_IRQ)) {
965 		if (sums_need_wide_columns)
966 			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
967 		else
968 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
969 	}
970 
971 	/* SMI */
972 	if (DO_BIC(BIC_SMI))
973 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
974 
975 	/* Added counters */
976 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
977 		if (mp->format == FORMAT_RAW) {
978 			if (mp->width == 32)
979 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
980 			else
981 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
982 		} else if (mp->format == FORMAT_DELTA) {
983 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
984 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
985 			else
986 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
987 		} else if (mp->format == FORMAT_PERCENT) {
988 			if (mp->type == COUNTER_USEC)
989 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
990 			else
991 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
992 		}
993 	}
994 
995 	/* C1 */
996 	if (DO_BIC(BIC_CPU_c1))
997 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
998 
999 
1000 	/* print per-core data only for 1st thread in core */
1001 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1002 		goto done;
1003 
1004 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
1005 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1006 	if (DO_BIC(BIC_CPU_c6))
1007 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1008 	if (DO_BIC(BIC_CPU_c7))
1009 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1010 
1011 	/* Mod%c6 */
1012 	if (DO_BIC(BIC_Mod_c6))
1013 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1014 
1015 	if (DO_BIC(BIC_CoreTmp))
1016 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1017 
1018 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1019 		if (mp->format == FORMAT_RAW) {
1020 			if (mp->width == 32)
1021 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1022 			else
1023 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1024 		} else if (mp->format == FORMAT_DELTA) {
1025 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1026 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1027 			else
1028 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1029 		} else if (mp->format == FORMAT_PERCENT) {
1030 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1031 		}
1032 	}
1033 
1034 	/* print per-package data only for 1st core in package */
1035 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1036 		goto done;
1037 
1038 	/* PkgTmp */
1039 	if (DO_BIC(BIC_PkgTmp))
1040 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1041 
1042 	/* GFXrc6 */
1043 	if (DO_BIC(BIC_GFX_rc6)) {
1044 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
1045 			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1046 		} else {
1047 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1048 				p->gfx_rc6_ms / 10.0 / interval_float);
1049 		}
1050 	}
1051 
1052 	/* GFXMHz */
1053 	if (DO_BIC(BIC_GFXMHz))
1054 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1055 
1056 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1057 	if (DO_BIC(BIC_Totl_c0))
1058 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1059 	if (DO_BIC(BIC_Any_c0))
1060 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1061 	if (DO_BIC(BIC_GFX_c0))
1062 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1063 	if (DO_BIC(BIC_CPUGFX))
1064 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1065 
1066 	if (DO_BIC(BIC_Pkgpc2))
1067 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1068 	if (DO_BIC(BIC_Pkgpc3))
1069 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1070 	if (DO_BIC(BIC_Pkgpc6))
1071 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1072 	if (DO_BIC(BIC_Pkgpc7))
1073 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1074 	if (DO_BIC(BIC_Pkgpc8))
1075 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1076 	if (DO_BIC(BIC_Pkgpc9))
1077 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1078 	if (DO_BIC(BIC_Pkgpc10))
1079 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1080 
1081 	if (DO_BIC(BIC_CPU_LPI))
1082 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1083 	if (DO_BIC(BIC_SYS_LPI))
1084 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1085 
1086 	/*
1087  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
1088  	 * indicate that results are suspect by printing "**" in fraction place.
1089  	 */
1090 	if (interval_float < rapl_joule_counter_range)
1091 		fmt8 = "%s%.2f";
1092 	else
1093 		fmt8 = "%6.0f**";
1094 
1095 	if (DO_BIC(BIC_PkgWatt))
1096 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1097 	if (DO_BIC(BIC_CorWatt))
1098 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1099 	if (DO_BIC(BIC_GFXWatt))
1100 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1101 	if (DO_BIC(BIC_RAMWatt))
1102 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1103 	if (DO_BIC(BIC_Pkg_J))
1104 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1105 	if (DO_BIC(BIC_Cor_J))
1106 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1107 	if (DO_BIC(BIC_GFX_J))
1108 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1109 	if (DO_BIC(BIC_RAM_J))
1110 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1111 	if (DO_BIC(BIC_PKG__))
1112 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1113 	if (DO_BIC(BIC_RAM__))
1114 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1115 
1116 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1117 		if (mp->format == FORMAT_RAW) {
1118 			if (mp->width == 32)
1119 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1120 			else
1121 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1122 		} else if (mp->format == FORMAT_DELTA) {
1123 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1124 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1125 			else
1126 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1127 		} else if (mp->format == FORMAT_PERCENT) {
1128 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1129 		}
1130 	}
1131 
1132 done:
1133 	if (*(outp - 1) != '\n')
1134 		outp += sprintf(outp, "\n");
1135 
1136 	return 0;
1137 }
1138 
1139 void flush_output_stdout(void)
1140 {
1141 	FILE *filep;
1142 
1143 	if (outf == stderr)
1144 		filep = stdout;
1145 	else
1146 		filep = outf;
1147 
1148 	fputs(output_buffer, filep);
1149 	fflush(filep);
1150 
1151 	outp = output_buffer;
1152 }
1153 void flush_output_stderr(void)
1154 {
1155 	fputs(output_buffer, outf);
1156 	fflush(outf);
1157 	outp = output_buffer;
1158 }
1159 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1160 {
1161 	static int printed;
1162 
1163 	if (!printed || !summary_only)
1164 		print_header("\t");
1165 
1166 	format_counters(&average.threads, &average.cores, &average.packages);
1167 
1168 	printed = 1;
1169 
1170 	if (summary_only)
1171 		return;
1172 
1173 	for_all_cpus(format_counters, t, c, p);
1174 }
1175 
1176 #define DELTA_WRAP32(new, old)			\
1177 	if (new > old) {			\
1178 		old = new - old;		\
1179 	} else {				\
1180 		old = 0x100000000 + new - old;	\
1181 	}
1182 
1183 int
1184 delta_package(struct pkg_data *new, struct pkg_data *old)
1185 {
1186 	int i;
1187 	struct msr_counter *mp;
1188 
1189 
1190 	if (DO_BIC(BIC_Totl_c0))
1191 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1192 	if (DO_BIC(BIC_Any_c0))
1193 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1194 	if (DO_BIC(BIC_GFX_c0))
1195 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1196 	if (DO_BIC(BIC_CPUGFX))
1197 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1198 
1199 	old->pc2 = new->pc2 - old->pc2;
1200 	if (DO_BIC(BIC_Pkgpc3))
1201 		old->pc3 = new->pc3 - old->pc3;
1202 	if (DO_BIC(BIC_Pkgpc6))
1203 		old->pc6 = new->pc6 - old->pc6;
1204 	if (DO_BIC(BIC_Pkgpc7))
1205 		old->pc7 = new->pc7 - old->pc7;
1206 	old->pc8 = new->pc8 - old->pc8;
1207 	old->pc9 = new->pc9 - old->pc9;
1208 	old->pc10 = new->pc10 - old->pc10;
1209 	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1210 	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1211 	old->pkg_temp_c = new->pkg_temp_c;
1212 
1213 	/* flag an error when rc6 counter resets/wraps */
1214 	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1215 		old->gfx_rc6_ms = -1;
1216 	else
1217 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1218 
1219 	old->gfx_mhz = new->gfx_mhz;
1220 
1221 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1222 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
1223 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1224 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
1225 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1226 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1227 
1228 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1229 		if (mp->format == FORMAT_RAW)
1230 			old->counter[i] = new->counter[i];
1231 		else
1232 			old->counter[i] = new->counter[i] - old->counter[i];
1233 	}
1234 
1235 	return 0;
1236 }
1237 
1238 void
1239 delta_core(struct core_data *new, struct core_data *old)
1240 {
1241 	int i;
1242 	struct msr_counter *mp;
1243 
1244 	old->c3 = new->c3 - old->c3;
1245 	old->c6 = new->c6 - old->c6;
1246 	old->c7 = new->c7 - old->c7;
1247 	old->core_temp_c = new->core_temp_c;
1248 	old->mc6_us = new->mc6_us - old->mc6_us;
1249 
1250 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1251 		if (mp->format == FORMAT_RAW)
1252 			old->counter[i] = new->counter[i];
1253 		else
1254 			old->counter[i] = new->counter[i] - old->counter[i];
1255 	}
1256 }
1257 
1258 /*
1259  * old = new - old
1260  */
1261 int
1262 delta_thread(struct thread_data *new, struct thread_data *old,
1263 	struct core_data *core_delta)
1264 {
1265 	int i;
1266 	struct msr_counter *mp;
1267 
1268 	/* we run cpuid just the 1st time, copy the results */
1269 	if (DO_BIC(BIC_APIC))
1270 		new->apic_id = old->apic_id;
1271 	if (DO_BIC(BIC_X2APIC))
1272 		new->x2apic_id = old->x2apic_id;
1273 
1274 	/*
1275 	 * the timestamps from start of measurement interval are in "old"
1276 	 * the timestamp from end of measurement interval are in "new"
1277 	 * over-write old w/ new so we can print end of interval values
1278 	 */
1279 
1280 	old->tv_begin = new->tv_begin;
1281 	old->tv_end = new->tv_end;
1282 
1283 	old->tsc = new->tsc - old->tsc;
1284 
1285 	/* check for TSC < 1 Mcycles over interval */
1286 	if (old->tsc < (1000 * 1000))
1287 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1288 		     "You can disable all c-states by booting with \"idle=poll\"\n"
1289 		     "or just the deep ones with \"processor.max_cstate=1\"");
1290 
1291 	old->c1 = new->c1 - old->c1;
1292 
1293 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1294 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1295 			old->aperf = new->aperf - old->aperf;
1296 			old->mperf = new->mperf - old->mperf;
1297 		} else {
1298 			return -1;
1299 		}
1300 	}
1301 
1302 
1303 	if (use_c1_residency_msr) {
1304 		/*
1305 		 * Some models have a dedicated C1 residency MSR,
1306 		 * which should be more accurate than the derivation below.
1307 		 */
1308 	} else {
1309 		/*
1310 		 * As counter collection is not atomic,
1311 		 * it is possible for mperf's non-halted cycles + idle states
1312 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
1313 		 */
1314 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1315 			old->c1 = 0;
1316 		else {
1317 			/* normal case, derive c1 */
1318 			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1319 				- core_delta->c6 - core_delta->c7;
1320 		}
1321 	}
1322 
1323 	if (old->mperf == 0) {
1324 		if (debug > 1)
1325 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1326 		old->mperf = 1;	/* divide by 0 protection */
1327 	}
1328 
1329 	if (DO_BIC(BIC_IRQ))
1330 		old->irq_count = new->irq_count - old->irq_count;
1331 
1332 	if (DO_BIC(BIC_SMI))
1333 		old->smi_count = new->smi_count - old->smi_count;
1334 
1335 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1336 		if (mp->format == FORMAT_RAW)
1337 			old->counter[i] = new->counter[i];
1338 		else
1339 			old->counter[i] = new->counter[i] - old->counter[i];
1340 	}
1341 	return 0;
1342 }
1343 
1344 int delta_cpu(struct thread_data *t, struct core_data *c,
1345 	struct pkg_data *p, struct thread_data *t2,
1346 	struct core_data *c2, struct pkg_data *p2)
1347 {
1348 	int retval = 0;
1349 
1350 	/* calculate core delta only for 1st thread in core */
1351 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1352 		delta_core(c, c2);
1353 
1354 	/* always calculate thread delta */
1355 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
1356 	if (retval)
1357 		return retval;
1358 
1359 	/* calculate package delta only for 1st core in package */
1360 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1361 		retval = delta_package(p, p2);
1362 
1363 	return retval;
1364 }
1365 
1366 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1367 {
1368 	int i;
1369 	struct msr_counter  *mp;
1370 
1371 	t->tv_begin.tv_sec = 0;
1372 	t->tv_begin.tv_usec = 0;
1373 	t->tv_end.tv_sec = 0;
1374 	t->tv_end.tv_usec = 0;
1375 
1376 	t->tsc = 0;
1377 	t->aperf = 0;
1378 	t->mperf = 0;
1379 	t->c1 = 0;
1380 
1381 	t->irq_count = 0;
1382 	t->smi_count = 0;
1383 
1384 	/* tells format_counters to dump all fields from this set */
1385 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1386 
1387 	c->c3 = 0;
1388 	c->c6 = 0;
1389 	c->c7 = 0;
1390 	c->mc6_us = 0;
1391 	c->core_temp_c = 0;
1392 
1393 	p->pkg_wtd_core_c0 = 0;
1394 	p->pkg_any_core_c0 = 0;
1395 	p->pkg_any_gfxe_c0 = 0;
1396 	p->pkg_both_core_gfxe_c0 = 0;
1397 
1398 	p->pc2 = 0;
1399 	if (DO_BIC(BIC_Pkgpc3))
1400 		p->pc3 = 0;
1401 	if (DO_BIC(BIC_Pkgpc6))
1402 		p->pc6 = 0;
1403 	if (DO_BIC(BIC_Pkgpc7))
1404 		p->pc7 = 0;
1405 	p->pc8 = 0;
1406 	p->pc9 = 0;
1407 	p->pc10 = 0;
1408 	p->cpu_lpi = 0;
1409 	p->sys_lpi = 0;
1410 
1411 	p->energy_pkg = 0;
1412 	p->energy_dram = 0;
1413 	p->energy_cores = 0;
1414 	p->energy_gfx = 0;
1415 	p->rapl_pkg_perf_status = 0;
1416 	p->rapl_dram_perf_status = 0;
1417 	p->pkg_temp_c = 0;
1418 
1419 	p->gfx_rc6_ms = 0;
1420 	p->gfx_mhz = 0;
1421 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1422 		t->counter[i] = 0;
1423 
1424 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1425 		c->counter[i] = 0;
1426 
1427 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1428 		p->counter[i] = 0;
1429 }
1430 int sum_counters(struct thread_data *t, struct core_data *c,
1431 	struct pkg_data *p)
1432 {
1433 	int i;
1434 	struct msr_counter *mp;
1435 
1436 	/* copy un-changing apic_id's */
1437 	if (DO_BIC(BIC_APIC))
1438 		average.threads.apic_id = t->apic_id;
1439 	if (DO_BIC(BIC_X2APIC))
1440 		average.threads.x2apic_id = t->x2apic_id;
1441 
1442 	/* remember first tv_begin */
1443 	if (average.threads.tv_begin.tv_sec == 0)
1444 		average.threads.tv_begin = t->tv_begin;
1445 
1446 	/* remember last tv_end */
1447 	average.threads.tv_end = t->tv_end;
1448 
1449 	average.threads.tsc += t->tsc;
1450 	average.threads.aperf += t->aperf;
1451 	average.threads.mperf += t->mperf;
1452 	average.threads.c1 += t->c1;
1453 
1454 	average.threads.irq_count += t->irq_count;
1455 	average.threads.smi_count += t->smi_count;
1456 
1457 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1458 		if (mp->format == FORMAT_RAW)
1459 			continue;
1460 		average.threads.counter[i] += t->counter[i];
1461 	}
1462 
1463 	/* sum per-core values only for 1st thread in core */
1464 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1465 		return 0;
1466 
1467 	average.cores.c3 += c->c3;
1468 	average.cores.c6 += c->c6;
1469 	average.cores.c7 += c->c7;
1470 	average.cores.mc6_us += c->mc6_us;
1471 
1472 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1473 
1474 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1475 		if (mp->format == FORMAT_RAW)
1476 			continue;
1477 		average.cores.counter[i] += c->counter[i];
1478 	}
1479 
1480 	/* sum per-pkg values only for 1st core in pkg */
1481 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1482 		return 0;
1483 
1484 	if (DO_BIC(BIC_Totl_c0))
1485 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1486 	if (DO_BIC(BIC_Any_c0))
1487 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1488 	if (DO_BIC(BIC_GFX_c0))
1489 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1490 	if (DO_BIC(BIC_CPUGFX))
1491 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1492 
1493 	average.packages.pc2 += p->pc2;
1494 	if (DO_BIC(BIC_Pkgpc3))
1495 		average.packages.pc3 += p->pc3;
1496 	if (DO_BIC(BIC_Pkgpc6))
1497 		average.packages.pc6 += p->pc6;
1498 	if (DO_BIC(BIC_Pkgpc7))
1499 		average.packages.pc7 += p->pc7;
1500 	average.packages.pc8 += p->pc8;
1501 	average.packages.pc9 += p->pc9;
1502 	average.packages.pc10 += p->pc10;
1503 
1504 	average.packages.cpu_lpi = p->cpu_lpi;
1505 	average.packages.sys_lpi = p->sys_lpi;
1506 
1507 	average.packages.energy_pkg += p->energy_pkg;
1508 	average.packages.energy_dram += p->energy_dram;
1509 	average.packages.energy_cores += p->energy_cores;
1510 	average.packages.energy_gfx += p->energy_gfx;
1511 
1512 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1513 	average.packages.gfx_mhz = p->gfx_mhz;
1514 
1515 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1516 
1517 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1518 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1519 
1520 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1521 		if (mp->format == FORMAT_RAW)
1522 			continue;
1523 		average.packages.counter[i] += p->counter[i];
1524 	}
1525 	return 0;
1526 }
1527 /*
1528  * sum the counters for all cpus in the system
1529  * compute the weighted average
1530  */
1531 void compute_average(struct thread_data *t, struct core_data *c,
1532 	struct pkg_data *p)
1533 {
1534 	int i;
1535 	struct msr_counter *mp;
1536 
1537 	clear_counters(&average.threads, &average.cores, &average.packages);
1538 
1539 	for_all_cpus(sum_counters, t, c, p);
1540 
1541 	average.threads.tsc /= topo.num_cpus;
1542 	average.threads.aperf /= topo.num_cpus;
1543 	average.threads.mperf /= topo.num_cpus;
1544 	average.threads.c1 /= topo.num_cpus;
1545 
1546 	if (average.threads.irq_count > 9999999)
1547 		sums_need_wide_columns = 1;
1548 
1549 	average.cores.c3 /= topo.num_cores;
1550 	average.cores.c6 /= topo.num_cores;
1551 	average.cores.c7 /= topo.num_cores;
1552 	average.cores.mc6_us /= topo.num_cores;
1553 
1554 	if (DO_BIC(BIC_Totl_c0))
1555 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1556 	if (DO_BIC(BIC_Any_c0))
1557 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1558 	if (DO_BIC(BIC_GFX_c0))
1559 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1560 	if (DO_BIC(BIC_CPUGFX))
1561 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1562 
1563 	average.packages.pc2 /= topo.num_packages;
1564 	if (DO_BIC(BIC_Pkgpc3))
1565 		average.packages.pc3 /= topo.num_packages;
1566 	if (DO_BIC(BIC_Pkgpc6))
1567 		average.packages.pc6 /= topo.num_packages;
1568 	if (DO_BIC(BIC_Pkgpc7))
1569 		average.packages.pc7 /= topo.num_packages;
1570 
1571 	average.packages.pc8 /= topo.num_packages;
1572 	average.packages.pc9 /= topo.num_packages;
1573 	average.packages.pc10 /= topo.num_packages;
1574 
1575 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1576 		if (mp->format == FORMAT_RAW)
1577 			continue;
1578 		if (mp->type == COUNTER_ITEMS) {
1579 			if (average.threads.counter[i] > 9999999)
1580 				sums_need_wide_columns = 1;
1581 			continue;
1582 		}
1583 		average.threads.counter[i] /= topo.num_cpus;
1584 	}
1585 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1586 		if (mp->format == FORMAT_RAW)
1587 			continue;
1588 		if (mp->type == COUNTER_ITEMS) {
1589 			if (average.cores.counter[i] > 9999999)
1590 				sums_need_wide_columns = 1;
1591 		}
1592 		average.cores.counter[i] /= topo.num_cores;
1593 	}
1594 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1595 		if (mp->format == FORMAT_RAW)
1596 			continue;
1597 		if (mp->type == COUNTER_ITEMS) {
1598 			if (average.packages.counter[i] > 9999999)
1599 				sums_need_wide_columns = 1;
1600 		}
1601 		average.packages.counter[i] /= topo.num_packages;
1602 	}
1603 }
1604 
1605 static unsigned long long rdtsc(void)
1606 {
1607 	unsigned int low, high;
1608 
1609 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1610 
1611 	return low | ((unsigned long long)high) << 32;
1612 }
1613 
1614 /*
1615  * Open a file, and exit on failure
1616  */
1617 FILE *fopen_or_die(const char *path, const char *mode)
1618 {
1619 	FILE *filep = fopen(path, mode);
1620 
1621 	if (!filep)
1622 		err(1, "%s: open failed", path);
1623 	return filep;
1624 }
1625 /*
1626  * snapshot_sysfs_counter()
1627  *
1628  * return snapshot of given counter
1629  */
1630 unsigned long long snapshot_sysfs_counter(char *path)
1631 {
1632 	FILE *fp;
1633 	int retval;
1634 	unsigned long long counter;
1635 
1636 	fp = fopen_or_die(path, "r");
1637 
1638 	retval = fscanf(fp, "%lld", &counter);
1639 	if (retval != 1)
1640 		err(1, "snapshot_sysfs_counter(%s)", path);
1641 
1642 	fclose(fp);
1643 
1644 	return counter;
1645 }
1646 
1647 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1648 {
1649 	if (mp->msr_num != 0) {
1650 		if (get_msr(cpu, mp->msr_num, counterp))
1651 			return -1;
1652 	} else {
1653 		char path[128 + PATH_BYTES];
1654 
1655 		if (mp->flags & SYSFS_PERCPU) {
1656 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1657 				 cpu, mp->path);
1658 
1659 			*counterp = snapshot_sysfs_counter(path);
1660 		} else {
1661 			*counterp = snapshot_sysfs_counter(mp->path);
1662 		}
1663 	}
1664 
1665 	return 0;
1666 }
1667 
1668 void get_apic_id(struct thread_data *t)
1669 {
1670 	unsigned int eax, ebx, ecx, edx, max_level;
1671 
1672 	eax = ebx = ecx = edx = 0;
1673 
1674 	if (!genuine_intel)
1675 		return;
1676 
1677 	__cpuid(0, max_level, ebx, ecx, edx);
1678 
1679 	__cpuid(1, eax, ebx, ecx, edx);
1680 	t->apic_id = (ebx >> 24) & 0xf;
1681 
1682 	if (max_level < 0xb)
1683 		return;
1684 
1685 	if (!DO_BIC(BIC_X2APIC))
1686 		return;
1687 
1688 	ecx = 0;
1689 	__cpuid(0xb, eax, ebx, ecx, edx);
1690 	t->x2apic_id = edx;
1691 
1692 	if (debug && (t->apic_id != t->x2apic_id))
1693 		fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
1694 }
1695 
1696 /*
1697  * get_counters(...)
1698  * migrate to cpu
1699  * acquire and record local counters for that cpu
1700  */
1701 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1702 {
1703 	int cpu = t->cpu_id;
1704 	unsigned long long msr;
1705 	int aperf_mperf_retry_count = 0;
1706 	struct msr_counter *mp;
1707 	int i;
1708 
1709 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1710 
1711 	if (cpu_migrate(cpu)) {
1712 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1713 		return -1;
1714 	}
1715 
1716 	if (first_counter_read)
1717 		get_apic_id(t);
1718 retry:
1719 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
1720 
1721 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1722 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1723 
1724 		/*
1725 		 * The TSC, APERF and MPERF must be read together for
1726 		 * APERF/MPERF and MPERF/TSC to give accurate results.
1727 		 *
1728 		 * Unfortunately, APERF and MPERF are read by
1729 		 * individual system call, so delays may occur
1730 		 * between them.  If the time to read them
1731 		 * varies by a large amount, we re-read them.
1732 		 */
1733 
1734 		/*
1735 		 * This initial dummy APERF read has been seen to
1736 		 * reduce jitter in the subsequent reads.
1737 		 */
1738 
1739 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1740 			return -3;
1741 
1742 		t->tsc = rdtsc();	/* re-read close to APERF */
1743 
1744 		tsc_before = t->tsc;
1745 
1746 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1747 			return -3;
1748 
1749 		tsc_between = rdtsc();
1750 
1751 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1752 			return -4;
1753 
1754 		tsc_after = rdtsc();
1755 
1756 		aperf_time = tsc_between - tsc_before;
1757 		mperf_time = tsc_after - tsc_between;
1758 
1759 		/*
1760 		 * If the system call latency to read APERF and MPERF
1761 		 * differ by more than 2x, then try again.
1762 		 */
1763 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1764 			aperf_mperf_retry_count++;
1765 			if (aperf_mperf_retry_count < 5)
1766 				goto retry;
1767 			else
1768 				warnx("cpu%d jitter %lld %lld",
1769 					cpu, aperf_time, mperf_time);
1770 		}
1771 		aperf_mperf_retry_count = 0;
1772 
1773 		t->aperf = t->aperf * aperf_mperf_multiplier;
1774 		t->mperf = t->mperf * aperf_mperf_multiplier;
1775 	}
1776 
1777 	if (DO_BIC(BIC_IRQ))
1778 		t->irq_count = irqs_per_cpu[cpu];
1779 	if (DO_BIC(BIC_SMI)) {
1780 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1781 			return -5;
1782 		t->smi_count = msr & 0xFFFFFFFF;
1783 	}
1784 	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1785 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1786 			return -6;
1787 	}
1788 
1789 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1790 		if (get_mp(cpu, mp, &t->counter[i]))
1791 			return -10;
1792 	}
1793 
1794 	/* collect core counters only for 1st thread in core */
1795 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1796 		goto done;
1797 
1798 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1799 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1800 			return -6;
1801 	}
1802 
1803 	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1804 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1805 			return -7;
1806 	} else if (do_knl_cstates) {
1807 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1808 			return -7;
1809 	}
1810 
1811 	if (DO_BIC(BIC_CPU_c7))
1812 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1813 			return -8;
1814 
1815 	if (DO_BIC(BIC_Mod_c6))
1816 		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1817 			return -8;
1818 
1819 	if (DO_BIC(BIC_CoreTmp)) {
1820 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1821 			return -9;
1822 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1823 	}
1824 
1825 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1826 		if (get_mp(cpu, mp, &c->counter[i]))
1827 			return -10;
1828 	}
1829 
1830 	/* collect package counters only for 1st core in package */
1831 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1832 		goto done;
1833 
1834 	if (DO_BIC(BIC_Totl_c0)) {
1835 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1836 			return -10;
1837 	}
1838 	if (DO_BIC(BIC_Any_c0)) {
1839 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1840 			return -11;
1841 	}
1842 	if (DO_BIC(BIC_GFX_c0)) {
1843 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1844 			return -12;
1845 	}
1846 	if (DO_BIC(BIC_CPUGFX)) {
1847 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1848 			return -13;
1849 	}
1850 	if (DO_BIC(BIC_Pkgpc3))
1851 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1852 			return -9;
1853 	if (DO_BIC(BIC_Pkgpc6)) {
1854 		if (do_slm_cstates) {
1855 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1856 				return -10;
1857 		} else {
1858 			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1859 				return -10;
1860 		}
1861 	}
1862 
1863 	if (DO_BIC(BIC_Pkgpc2))
1864 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1865 			return -11;
1866 	if (DO_BIC(BIC_Pkgpc7))
1867 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1868 			return -12;
1869 	if (DO_BIC(BIC_Pkgpc8))
1870 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1871 			return -13;
1872 	if (DO_BIC(BIC_Pkgpc9))
1873 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1874 			return -13;
1875 	if (DO_BIC(BIC_Pkgpc10))
1876 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1877 			return -13;
1878 
1879 	if (DO_BIC(BIC_CPU_LPI))
1880 		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1881 	if (DO_BIC(BIC_SYS_LPI))
1882 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
1883 
1884 	if (do_rapl & RAPL_PKG) {
1885 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1886 			return -13;
1887 		p->energy_pkg = msr & 0xFFFFFFFF;
1888 	}
1889 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1890 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1891 			return -14;
1892 		p->energy_cores = msr & 0xFFFFFFFF;
1893 	}
1894 	if (do_rapl & RAPL_DRAM) {
1895 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1896 			return -15;
1897 		p->energy_dram = msr & 0xFFFFFFFF;
1898 	}
1899 	if (do_rapl & RAPL_GFX) {
1900 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1901 			return -16;
1902 		p->energy_gfx = msr & 0xFFFFFFFF;
1903 	}
1904 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
1905 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1906 			return -16;
1907 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1908 	}
1909 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1910 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1911 			return -16;
1912 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1913 	}
1914 	if (DO_BIC(BIC_PkgTmp)) {
1915 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1916 			return -17;
1917 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1918 	}
1919 
1920 	if (DO_BIC(BIC_GFX_rc6))
1921 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
1922 
1923 	if (DO_BIC(BIC_GFXMHz))
1924 		p->gfx_mhz = gfx_cur_mhz;
1925 
1926 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1927 		if (get_mp(cpu, mp, &p->counter[i]))
1928 			return -10;
1929 	}
1930 done:
1931 	gettimeofday(&t->tv_end, (struct timezone *)NULL);
1932 
1933 	return 0;
1934 }
1935 
1936 /*
1937  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1938  * If you change the values, note they are used both in comparisons
1939  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1940  */
1941 
1942 #define PCLUKN 0 /* Unknown */
1943 #define PCLRSV 1 /* Reserved */
1944 #define PCL__0 2 /* PC0 */
1945 #define PCL__1 3 /* PC1 */
1946 #define PCL__2 4 /* PC2 */
1947 #define PCL__3 5 /* PC3 */
1948 #define PCL__4 6 /* PC4 */
1949 #define PCL__6 7 /* PC6 */
1950 #define PCL_6N 8 /* PC6 No Retention */
1951 #define PCL_6R 9 /* PC6 Retention */
1952 #define PCL__7 10 /* PC7 */
1953 #define PCL_7S 11 /* PC7 Shrink */
1954 #define PCL__8 12 /* PC8 */
1955 #define PCL__9 13 /* PC9 */
1956 #define PCLUNL 14 /* Unlimited */
1957 
1958 int pkg_cstate_limit = PCLUKN;
1959 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1960 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1961 
1962 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1963 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1964 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1965 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1966 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1967 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1968 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1969 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1970 
1971 
1972 static void
1973 calculate_tsc_tweak()
1974 {
1975 	tsc_tweak = base_hz / tsc_hz;
1976 }
1977 
1978 static void
1979 dump_nhm_platform_info(void)
1980 {
1981 	unsigned long long msr;
1982 	unsigned int ratio;
1983 
1984 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1985 
1986 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1987 
1988 	ratio = (msr >> 40) & 0xFF;
1989 	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1990 		ratio, bclk, ratio * bclk);
1991 
1992 	ratio = (msr >> 8) & 0xFF;
1993 	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1994 		ratio, bclk, ratio * bclk);
1995 
1996 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1997 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1998 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1999 
2000 	return;
2001 }
2002 
2003 static void
2004 dump_hsw_turbo_ratio_limits(void)
2005 {
2006 	unsigned long long msr;
2007 	unsigned int ratio;
2008 
2009 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2010 
2011 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2012 
2013 	ratio = (msr >> 8) & 0xFF;
2014 	if (ratio)
2015 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2016 			ratio, bclk, ratio * bclk);
2017 
2018 	ratio = (msr >> 0) & 0xFF;
2019 	if (ratio)
2020 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2021 			ratio, bclk, ratio * bclk);
2022 	return;
2023 }
2024 
2025 static void
2026 dump_ivt_turbo_ratio_limits(void)
2027 {
2028 	unsigned long long msr;
2029 	unsigned int ratio;
2030 
2031 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2032 
2033 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2034 
2035 	ratio = (msr >> 56) & 0xFF;
2036 	if (ratio)
2037 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2038 			ratio, bclk, ratio * bclk);
2039 
2040 	ratio = (msr >> 48) & 0xFF;
2041 	if (ratio)
2042 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2043 			ratio, bclk, ratio * bclk);
2044 
2045 	ratio = (msr >> 40) & 0xFF;
2046 	if (ratio)
2047 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2048 			ratio, bclk, ratio * bclk);
2049 
2050 	ratio = (msr >> 32) & 0xFF;
2051 	if (ratio)
2052 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2053 			ratio, bclk, ratio * bclk);
2054 
2055 	ratio = (msr >> 24) & 0xFF;
2056 	if (ratio)
2057 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2058 			ratio, bclk, ratio * bclk);
2059 
2060 	ratio = (msr >> 16) & 0xFF;
2061 	if (ratio)
2062 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2063 			ratio, bclk, ratio * bclk);
2064 
2065 	ratio = (msr >> 8) & 0xFF;
2066 	if (ratio)
2067 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2068 			ratio, bclk, ratio * bclk);
2069 
2070 	ratio = (msr >> 0) & 0xFF;
2071 	if (ratio)
2072 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2073 			ratio, bclk, ratio * bclk);
2074 	return;
2075 }
2076 int has_turbo_ratio_group_limits(int family, int model)
2077 {
2078 
2079 	if (!genuine_intel)
2080 		return 0;
2081 
2082 	switch (model) {
2083 	case INTEL_FAM6_ATOM_GOLDMONT:
2084 	case INTEL_FAM6_SKYLAKE_X:
2085 	case INTEL_FAM6_ATOM_DENVERTON:
2086 		return 1;
2087 	}
2088 	return 0;
2089 }
2090 
2091 static void
2092 dump_turbo_ratio_limits(int family, int model)
2093 {
2094 	unsigned long long msr, core_counts;
2095 	unsigned int ratio, group_size;
2096 
2097 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2098 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2099 
2100 	if (has_turbo_ratio_group_limits(family, model)) {
2101 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2102 		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2103 	} else {
2104 		core_counts = 0x0807060504030201;
2105 	}
2106 
2107 	ratio = (msr >> 56) & 0xFF;
2108 	group_size = (core_counts >> 56) & 0xFF;
2109 	if (ratio)
2110 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2111 			ratio, bclk, ratio * bclk, group_size);
2112 
2113 	ratio = (msr >> 48) & 0xFF;
2114 	group_size = (core_counts >> 48) & 0xFF;
2115 	if (ratio)
2116 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2117 			ratio, bclk, ratio * bclk, group_size);
2118 
2119 	ratio = (msr >> 40) & 0xFF;
2120 	group_size = (core_counts >> 40) & 0xFF;
2121 	if (ratio)
2122 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2123 			ratio, bclk, ratio * bclk, group_size);
2124 
2125 	ratio = (msr >> 32) & 0xFF;
2126 	group_size = (core_counts >> 32) & 0xFF;
2127 	if (ratio)
2128 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2129 			ratio, bclk, ratio * bclk, group_size);
2130 
2131 	ratio = (msr >> 24) & 0xFF;
2132 	group_size = (core_counts >> 24) & 0xFF;
2133 	if (ratio)
2134 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2135 			ratio, bclk, ratio * bclk, group_size);
2136 
2137 	ratio = (msr >> 16) & 0xFF;
2138 	group_size = (core_counts >> 16) & 0xFF;
2139 	if (ratio)
2140 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2141 			ratio, bclk, ratio * bclk, group_size);
2142 
2143 	ratio = (msr >> 8) & 0xFF;
2144 	group_size = (core_counts >> 8) & 0xFF;
2145 	if (ratio)
2146 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2147 			ratio, bclk, ratio * bclk, group_size);
2148 
2149 	ratio = (msr >> 0) & 0xFF;
2150 	group_size = (core_counts >> 0) & 0xFF;
2151 	if (ratio)
2152 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2153 			ratio, bclk, ratio * bclk, group_size);
2154 	return;
2155 }
2156 
2157 static void
2158 dump_atom_turbo_ratio_limits(void)
2159 {
2160 	unsigned long long msr;
2161 	unsigned int ratio;
2162 
2163 	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2164 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2165 
2166 	ratio = (msr >> 0) & 0x3F;
2167 	if (ratio)
2168 		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2169 			ratio, bclk, ratio * bclk);
2170 
2171 	ratio = (msr >> 8) & 0x3F;
2172 	if (ratio)
2173 		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2174 			ratio, bclk, ratio * bclk);
2175 
2176 	ratio = (msr >> 16) & 0x3F;
2177 	if (ratio)
2178 		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2179 			ratio, bclk, ratio * bclk);
2180 
2181 	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2182 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2183 
2184 	ratio = (msr >> 24) & 0x3F;
2185 	if (ratio)
2186 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2187 			ratio, bclk, ratio * bclk);
2188 
2189 	ratio = (msr >> 16) & 0x3F;
2190 	if (ratio)
2191 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2192 			ratio, bclk, ratio * bclk);
2193 
2194 	ratio = (msr >> 8) & 0x3F;
2195 	if (ratio)
2196 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2197 			ratio, bclk, ratio * bclk);
2198 
2199 	ratio = (msr >> 0) & 0x3F;
2200 	if (ratio)
2201 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2202 			ratio, bclk, ratio * bclk);
2203 }
2204 
2205 static void
2206 dump_knl_turbo_ratio_limits(void)
2207 {
2208 	const unsigned int buckets_no = 7;
2209 
2210 	unsigned long long msr;
2211 	int delta_cores, delta_ratio;
2212 	int i, b_nr;
2213 	unsigned int cores[buckets_no];
2214 	unsigned int ratio[buckets_no];
2215 
2216 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2217 
2218 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2219 		base_cpu, msr);
2220 
2221 	/**
2222 	 * Turbo encoding in KNL is as follows:
2223 	 * [0] -- Reserved
2224 	 * [7:1] -- Base value of number of active cores of bucket 1.
2225 	 * [15:8] -- Base value of freq ratio of bucket 1.
2226 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
2227 	 * i.e. active cores of bucket 2 =
2228 	 * active cores of bucket 1 + delta
2229 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
2230 	 * i.e. freq ratio of bucket 2 =
2231 	 * freq ratio of bucket 1 - delta
2232 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
2233 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
2234 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
2235 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
2236 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
2237 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
2238 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
2239 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
2240 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
2241 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
2242 	 */
2243 
2244 	b_nr = 0;
2245 	cores[b_nr] = (msr & 0xFF) >> 1;
2246 	ratio[b_nr] = (msr >> 8) & 0xFF;
2247 
2248 	for (i = 16; i < 64; i += 8) {
2249 		delta_cores = (msr >> i) & 0x1F;
2250 		delta_ratio = (msr >> (i + 5)) & 0x7;
2251 
2252 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
2253 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2254 		b_nr++;
2255 	}
2256 
2257 	for (i = buckets_no - 1; i >= 0; i--)
2258 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2259 			fprintf(outf,
2260 				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2261 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2262 }
2263 
2264 static void
2265 dump_nhm_cst_cfg(void)
2266 {
2267 	unsigned long long msr;
2268 
2269 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2270 
2271 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2272 
2273 	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2274 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2275 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2276 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2277 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2278 		(msr & (1 << 15)) ? "" : "UN",
2279 		(unsigned int)msr & 0xF,
2280 		pkg_cstate_limit_strings[pkg_cstate_limit]);
2281 
2282 #define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
2283 	if (has_automatic_cstate_conversion) {
2284 		fprintf(outf, ", automatic c-state conversion=%s",
2285 			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2286 	}
2287 
2288 	fprintf(outf, ")\n");
2289 
2290 	return;
2291 }
2292 
2293 static void
2294 dump_config_tdp(void)
2295 {
2296 	unsigned long long msr;
2297 
2298 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2299 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2300 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2301 
2302 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2303 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2304 	if (msr) {
2305 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2306 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2307 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2308 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2309 	}
2310 	fprintf(outf, ")\n");
2311 
2312 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2313 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2314 	if (msr) {
2315 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2316 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2317 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2318 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2319 	}
2320 	fprintf(outf, ")\n");
2321 
2322 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2323 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2324 	if ((msr) & 0x3)
2325 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2326 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2327 	fprintf(outf, ")\n");
2328 
2329 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2330 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2331 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2332 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2333 	fprintf(outf, ")\n");
2334 }
2335 
2336 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2337 
2338 void print_irtl(void)
2339 {
2340 	unsigned long long msr;
2341 
2342 	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2343 	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2344 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2345 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2346 
2347 	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2348 	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2349 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2350 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2351 
2352 	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2353 	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2354 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2355 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2356 
2357 	if (!do_irtl_hsw)
2358 		return;
2359 
2360 	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2361 	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2362 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2363 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2364 
2365 	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2366 	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2367 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2368 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2369 
2370 	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2371 	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2372 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2373 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2374 
2375 }
2376 void free_fd_percpu(void)
2377 {
2378 	int i;
2379 
2380 	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2381 		if (fd_percpu[i] != 0)
2382 			close(fd_percpu[i]);
2383 	}
2384 
2385 	free(fd_percpu);
2386 }
2387 
2388 void free_all_buffers(void)
2389 {
2390 	int i;
2391 
2392 	CPU_FREE(cpu_present_set);
2393 	cpu_present_set = NULL;
2394 	cpu_present_setsize = 0;
2395 
2396 	CPU_FREE(cpu_affinity_set);
2397 	cpu_affinity_set = NULL;
2398 	cpu_affinity_setsize = 0;
2399 
2400 	free(thread_even);
2401 	free(core_even);
2402 	free(package_even);
2403 
2404 	thread_even = NULL;
2405 	core_even = NULL;
2406 	package_even = NULL;
2407 
2408 	free(thread_odd);
2409 	free(core_odd);
2410 	free(package_odd);
2411 
2412 	thread_odd = NULL;
2413 	core_odd = NULL;
2414 	package_odd = NULL;
2415 
2416 	free(output_buffer);
2417 	output_buffer = NULL;
2418 	outp = NULL;
2419 
2420 	free_fd_percpu();
2421 
2422 	free(irq_column_2_cpu);
2423 	free(irqs_per_cpu);
2424 
2425 	for (i = 0; i <= topo.max_cpu_num; ++i) {
2426 		if (cpus[i].put_ids)
2427 			CPU_FREE(cpus[i].put_ids);
2428 	}
2429 	free(cpus);
2430 }
2431 
2432 
2433 /*
2434  * Parse a file containing a single int.
2435  */
2436 int parse_int_file(const char *fmt, ...)
2437 {
2438 	va_list args;
2439 	char path[PATH_MAX];
2440 	FILE *filep;
2441 	int value;
2442 
2443 	va_start(args, fmt);
2444 	vsnprintf(path, sizeof(path), fmt, args);
2445 	va_end(args);
2446 	filep = fopen_or_die(path, "r");
2447 	if (fscanf(filep, "%d", &value) != 1)
2448 		err(1, "%s: failed to parse number from file", path);
2449 	fclose(filep);
2450 	return value;
2451 }
2452 
2453 /*
2454  * cpu_is_first_core_in_package(cpu)
2455  * return 1 if given CPU is 1st core in package
2456  */
2457 int cpu_is_first_core_in_package(int cpu)
2458 {
2459 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2460 }
2461 
2462 int get_physical_package_id(int cpu)
2463 {
2464 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2465 }
2466 
2467 int get_core_id(int cpu)
2468 {
2469 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2470 }
2471 
2472 void set_node_data(void)
2473 {
2474 	int pkg, node, lnode, cpu, cpux;
2475 	int cpu_count;
2476 
2477 	/* initialize logical_node_id */
2478 	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2479 		cpus[cpu].logical_node_id = -1;
2480 
2481 	cpu_count = 0;
2482 	for (pkg = 0; pkg < topo.num_packages; pkg++) {
2483 		lnode = 0;
2484 		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2485 			if (cpus[cpu].physical_package_id != pkg)
2486 				continue;
2487 			/* find a cpu with an unset logical_node_id */
2488 			if (cpus[cpu].logical_node_id != -1)
2489 				continue;
2490 			cpus[cpu].logical_node_id = lnode;
2491 			node = cpus[cpu].physical_node_id;
2492 			cpu_count++;
2493 			/*
2494 			 * find all matching cpus on this pkg and set
2495 			 * the logical_node_id
2496 			 */
2497 			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2498 				if ((cpus[cpux].physical_package_id == pkg) &&
2499 				   (cpus[cpux].physical_node_id == node)) {
2500 					cpus[cpux].logical_node_id = lnode;
2501 					cpu_count++;
2502 				}
2503 			}
2504 			lnode++;
2505 			if (lnode > topo.nodes_per_pkg)
2506 				topo.nodes_per_pkg = lnode;
2507 		}
2508 		if (cpu_count >= topo.max_cpu_num)
2509 			break;
2510 	}
2511 }
2512 
2513 int get_physical_node_id(struct cpu_topology *thiscpu)
2514 {
2515 	char path[80];
2516 	FILE *filep;
2517 	int i;
2518 	int cpu = thiscpu->logical_cpu_id;
2519 
2520 	for (i = 0; i <= topo.max_cpu_num; i++) {
2521 		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2522 			cpu, i);
2523 		filep = fopen(path, "r");
2524 		if (!filep)
2525 			continue;
2526 		fclose(filep);
2527 		return i;
2528 	}
2529 	return -1;
2530 }
2531 
2532 int get_thread_siblings(struct cpu_topology *thiscpu)
2533 {
2534 	char path[80], character;
2535 	FILE *filep;
2536 	unsigned long map;
2537 	int so, shift, sib_core;
2538 	int cpu = thiscpu->logical_cpu_id;
2539 	int offset = topo.max_cpu_num + 1;
2540 	size_t size;
2541 	int thread_id = 0;
2542 
2543 	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2544 	if (thiscpu->thread_id < 0)
2545 		thiscpu->thread_id = thread_id++;
2546 	if (!thiscpu->put_ids)
2547 		return -1;
2548 
2549 	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2550 	CPU_ZERO_S(size, thiscpu->put_ids);
2551 
2552 	sprintf(path,
2553 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2554 	filep = fopen_or_die(path, "r");
2555 	do {
2556 		offset -= BITMASK_SIZE;
2557 		fscanf(filep, "%lx%c", &map, &character);
2558 		for (shift = 0; shift < BITMASK_SIZE; shift++) {
2559 			if ((map >> shift) & 0x1) {
2560 				so = shift + offset;
2561 				sib_core = get_core_id(so);
2562 				if (sib_core == thiscpu->physical_core_id) {
2563 					CPU_SET_S(so, size, thiscpu->put_ids);
2564 					if ((so != cpu) &&
2565 					    (cpus[so].thread_id < 0))
2566 						cpus[so].thread_id =
2567 								    thread_id++;
2568 				}
2569 			}
2570 		}
2571 	} while (!strncmp(&character, ",", 1));
2572 	fclose(filep);
2573 
2574 	return CPU_COUNT_S(size, thiscpu->put_ids);
2575 }
2576 
2577 /*
2578  * run func(thread, core, package) in topology order
2579  * skip non-present cpus
2580  */
2581 
2582 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2583 	struct pkg_data *, struct thread_data *, struct core_data *,
2584 	struct pkg_data *), struct thread_data *thread_base,
2585 	struct core_data *core_base, struct pkg_data *pkg_base,
2586 	struct thread_data *thread_base2, struct core_data *core_base2,
2587 	struct pkg_data *pkg_base2)
2588 {
2589 	int retval, pkg_no, node_no, core_no, thread_no;
2590 
2591 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2592 		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2593 			for (core_no = 0; core_no < topo.cores_per_node;
2594 			     ++core_no) {
2595 				for (thread_no = 0; thread_no <
2596 					topo.threads_per_core; ++thread_no) {
2597 					struct thread_data *t, *t2;
2598 					struct core_data *c, *c2;
2599 					struct pkg_data *p, *p2;
2600 
2601 					t = GET_THREAD(thread_base, thread_no,
2602 						       core_no, node_no,
2603 						       pkg_no);
2604 
2605 					if (cpu_is_not_present(t->cpu_id))
2606 						continue;
2607 
2608 					t2 = GET_THREAD(thread_base2, thread_no,
2609 							core_no, node_no,
2610 							pkg_no);
2611 
2612 					c = GET_CORE(core_base, core_no,
2613 						     node_no, pkg_no);
2614 					c2 = GET_CORE(core_base2, core_no,
2615 						      node_no,
2616 						      pkg_no);
2617 
2618 					p = GET_PKG(pkg_base, pkg_no);
2619 					p2 = GET_PKG(pkg_base2, pkg_no);
2620 
2621 					retval = func(t, c, p, t2, c2, p2);
2622 					if (retval)
2623 						return retval;
2624 				}
2625 			}
2626 		}
2627 	}
2628 	return 0;
2629 }
2630 
2631 /*
2632  * run func(cpu) on every cpu in /proc/stat
2633  * return max_cpu number
2634  */
2635 int for_all_proc_cpus(int (func)(int))
2636 {
2637 	FILE *fp;
2638 	int cpu_num;
2639 	int retval;
2640 
2641 	fp = fopen_or_die(proc_stat, "r");
2642 
2643 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2644 	if (retval != 0)
2645 		err(1, "%s: failed to parse format", proc_stat);
2646 
2647 	while (1) {
2648 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2649 		if (retval != 1)
2650 			break;
2651 
2652 		retval = func(cpu_num);
2653 		if (retval) {
2654 			fclose(fp);
2655 			return(retval);
2656 		}
2657 	}
2658 	fclose(fp);
2659 	return 0;
2660 }
2661 
2662 void re_initialize(void)
2663 {
2664 	free_all_buffers();
2665 	setup_all_buffers();
2666 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2667 }
2668 
2669 void set_max_cpu_num(void)
2670 {
2671 	FILE *filep;
2672 	unsigned long dummy;
2673 
2674 	topo.max_cpu_num = 0;
2675 	filep = fopen_or_die(
2676 			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2677 			"r");
2678 	while (fscanf(filep, "%lx,", &dummy) == 1)
2679 		topo.max_cpu_num += BITMASK_SIZE;
2680 	fclose(filep);
2681 	topo.max_cpu_num--; /* 0 based */
2682 }
2683 
2684 /*
2685  * count_cpus()
2686  * remember the last one seen, it will be the max
2687  */
2688 int count_cpus(int cpu)
2689 {
2690 	topo.num_cpus++;
2691 	return 0;
2692 }
2693 int mark_cpu_present(int cpu)
2694 {
2695 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2696 	return 0;
2697 }
2698 
2699 int init_thread_id(int cpu)
2700 {
2701 	cpus[cpu].thread_id = -1;
2702 	return 0;
2703 }
2704 
2705 /*
2706  * snapshot_proc_interrupts()
2707  *
2708  * read and record summary of /proc/interrupts
2709  *
2710  * return 1 if config change requires a restart, else return 0
2711  */
2712 int snapshot_proc_interrupts(void)
2713 {
2714 	static FILE *fp;
2715 	int column, retval;
2716 
2717 	if (fp == NULL)
2718 		fp = fopen_or_die("/proc/interrupts", "r");
2719 	else
2720 		rewind(fp);
2721 
2722 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
2723 	for (column = 0; column < topo.num_cpus; ++column) {
2724 		int cpu_number;
2725 
2726 		retval = fscanf(fp, " CPU%d", &cpu_number);
2727 		if (retval != 1)
2728 			break;
2729 
2730 		if (cpu_number > topo.max_cpu_num) {
2731 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2732 			return 1;
2733 		}
2734 
2735 		irq_column_2_cpu[column] = cpu_number;
2736 		irqs_per_cpu[cpu_number] = 0;
2737 	}
2738 
2739 	/* read /proc/interrupt count lines and sum up irqs per cpu */
2740 	while (1) {
2741 		int column;
2742 		char buf[64];
2743 
2744 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
2745 		if (retval != 1)
2746 			break;
2747 
2748 		/* read the count per cpu */
2749 		for (column = 0; column < topo.num_cpus; ++column) {
2750 
2751 			int cpu_number, irq_count;
2752 
2753 			retval = fscanf(fp, " %d", &irq_count);
2754 			if (retval != 1)
2755 				break;
2756 
2757 			cpu_number = irq_column_2_cpu[column];
2758 			irqs_per_cpu[cpu_number] += irq_count;
2759 
2760 		}
2761 
2762 		while (getc(fp) != '\n')
2763 			;	/* flush interrupt description */
2764 
2765 	}
2766 	return 0;
2767 }
2768 /*
2769  * snapshot_gfx_rc6_ms()
2770  *
2771  * record snapshot of
2772  * /sys/class/drm/card0/power/rc6_residency_ms
2773  *
2774  * return 1 if config change requires a restart, else return 0
2775  */
2776 int snapshot_gfx_rc6_ms(void)
2777 {
2778 	FILE *fp;
2779 	int retval;
2780 
2781 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2782 
2783 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2784 	if (retval != 1)
2785 		err(1, "GFX rc6");
2786 
2787 	fclose(fp);
2788 
2789 	return 0;
2790 }
2791 /*
2792  * snapshot_gfx_mhz()
2793  *
2794  * record snapshot of
2795  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2796  *
2797  * return 1 if config change requires a restart, else return 0
2798  */
2799 int snapshot_gfx_mhz(void)
2800 {
2801 	static FILE *fp;
2802 	int retval;
2803 
2804 	if (fp == NULL)
2805 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2806 	else {
2807 		rewind(fp);
2808 		fflush(fp);
2809 	}
2810 
2811 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
2812 	if (retval != 1)
2813 		err(1, "GFX MHz");
2814 
2815 	return 0;
2816 }
2817 
2818 /*
2819  * snapshot_cpu_lpi()
2820  *
2821  * record snapshot of
2822  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2823  *
2824  * return 1 if config change requires a restart, else return 0
2825  */
2826 int snapshot_cpu_lpi_us(void)
2827 {
2828 	FILE *fp;
2829 	int retval;
2830 
2831 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2832 
2833 	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2834 	if (retval != 1)
2835 		err(1, "CPU LPI");
2836 
2837 	fclose(fp);
2838 
2839 	return 0;
2840 }
2841 /*
2842  * snapshot_sys_lpi()
2843  *
2844  * record snapshot of
2845  * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2846  *
2847  * return 1 if config change requires a restart, else return 0
2848  */
2849 int snapshot_sys_lpi_us(void)
2850 {
2851 	FILE *fp;
2852 	int retval;
2853 
2854 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2855 
2856 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2857 	if (retval != 1)
2858 		err(1, "SYS LPI");
2859 
2860 	fclose(fp);
2861 
2862 	return 0;
2863 }
2864 /*
2865  * snapshot /proc and /sys files
2866  *
2867  * return 1 if configuration restart needed, else return 0
2868  */
2869 int snapshot_proc_sysfs_files(void)
2870 {
2871 	if (DO_BIC(BIC_IRQ))
2872 		if (snapshot_proc_interrupts())
2873 			return 1;
2874 
2875 	if (DO_BIC(BIC_GFX_rc6))
2876 		snapshot_gfx_rc6_ms();
2877 
2878 	if (DO_BIC(BIC_GFXMHz))
2879 		snapshot_gfx_mhz();
2880 
2881 	if (DO_BIC(BIC_CPU_LPI))
2882 		snapshot_cpu_lpi_us();
2883 
2884 	if (DO_BIC(BIC_SYS_LPI))
2885 		snapshot_sys_lpi_us();
2886 
2887 	return 0;
2888 }
2889 
2890 int exit_requested;
2891 
2892 static void signal_handler (int signal)
2893 {
2894 	switch (signal) {
2895 	case SIGINT:
2896 		exit_requested = 1;
2897 		if (debug)
2898 			fprintf(stderr, " SIGINT\n");
2899 		break;
2900 	case SIGUSR1:
2901 		if (debug > 1)
2902 			fprintf(stderr, "SIGUSR1\n");
2903 		break;
2904 	}
2905 	/* make sure this manually-invoked interval is at least 1ms long */
2906 	nanosleep(&one_msec, NULL);
2907 }
2908 
2909 void setup_signal_handler(void)
2910 {
2911 	struct sigaction sa;
2912 
2913 	memset(&sa, 0, sizeof(sa));
2914 
2915 	sa.sa_handler = &signal_handler;
2916 
2917 	if (sigaction(SIGINT, &sa, NULL) < 0)
2918 		err(1, "sigaction SIGINT");
2919 	if (sigaction(SIGUSR1, &sa, NULL) < 0)
2920 		err(1, "sigaction SIGUSR1");
2921 }
2922 
2923 void do_sleep(void)
2924 {
2925 	struct timeval select_timeout;
2926 	fd_set readfds;
2927 	int retval;
2928 
2929 	FD_ZERO(&readfds);
2930 	FD_SET(0, &readfds);
2931 
2932 	if (!isatty(fileno(stdin))) {
2933 		nanosleep(&interval_ts, NULL);
2934 		return;
2935 	}
2936 
2937 	select_timeout = interval_tv;
2938 	retval = select(1, &readfds, NULL, NULL, &select_timeout);
2939 
2940 	if (retval == 1) {
2941 		switch (getc(stdin)) {
2942 		case 'q':
2943 			exit_requested = 1;
2944 			break;
2945 		}
2946 		/* make sure this manually-invoked interval is at least 1ms long */
2947 		nanosleep(&one_msec, NULL);
2948 	}
2949 }
2950 
2951 
2952 void turbostat_loop()
2953 {
2954 	int retval;
2955 	int restarted = 0;
2956 	int done_iters = 0;
2957 
2958 	setup_signal_handler();
2959 
2960 restart:
2961 	restarted++;
2962 
2963 	snapshot_proc_sysfs_files();
2964 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2965 	first_counter_read = 0;
2966 	if (retval < -1) {
2967 		exit(retval);
2968 	} else if (retval == -1) {
2969 		if (restarted > 1) {
2970 			exit(retval);
2971 		}
2972 		re_initialize();
2973 		goto restart;
2974 	}
2975 	restarted = 0;
2976 	done_iters = 0;
2977 	gettimeofday(&tv_even, (struct timezone *)NULL);
2978 
2979 	while (1) {
2980 		if (for_all_proc_cpus(cpu_is_not_present)) {
2981 			re_initialize();
2982 			goto restart;
2983 		}
2984 		do_sleep();
2985 		if (snapshot_proc_sysfs_files())
2986 			goto restart;
2987 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
2988 		if (retval < -1) {
2989 			exit(retval);
2990 		} else if (retval == -1) {
2991 			re_initialize();
2992 			goto restart;
2993 		}
2994 		gettimeofday(&tv_odd, (struct timezone *)NULL);
2995 		timersub(&tv_odd, &tv_even, &tv_delta);
2996 		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2997 			re_initialize();
2998 			goto restart;
2999 		}
3000 		compute_average(EVEN_COUNTERS);
3001 		format_all_counters(EVEN_COUNTERS);
3002 		flush_output_stdout();
3003 		if (exit_requested)
3004 			break;
3005 		if (num_iterations && ++done_iters >= num_iterations)
3006 			break;
3007 		do_sleep();
3008 		if (snapshot_proc_sysfs_files())
3009 			goto restart;
3010 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3011 		if (retval < -1) {
3012 			exit(retval);
3013 		} else if (retval == -1) {
3014 			re_initialize();
3015 			goto restart;
3016 		}
3017 		gettimeofday(&tv_even, (struct timezone *)NULL);
3018 		timersub(&tv_even, &tv_odd, &tv_delta);
3019 		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3020 			re_initialize();
3021 			goto restart;
3022 		}
3023 		compute_average(ODD_COUNTERS);
3024 		format_all_counters(ODD_COUNTERS);
3025 		flush_output_stdout();
3026 		if (exit_requested)
3027 			break;
3028 		if (num_iterations && ++done_iters >= num_iterations)
3029 			break;
3030 	}
3031 }
3032 
3033 void check_dev_msr()
3034 {
3035 	struct stat sb;
3036 	char pathname[32];
3037 
3038 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3039 	if (stat(pathname, &sb))
3040  		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3041 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3042 }
3043 
3044 void check_permissions()
3045 {
3046 	struct __user_cap_header_struct cap_header_data;
3047 	cap_user_header_t cap_header = &cap_header_data;
3048 	struct __user_cap_data_struct cap_data_data;
3049 	cap_user_data_t cap_data = &cap_data_data;
3050 	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
3051 	int do_exit = 0;
3052 	char pathname[32];
3053 
3054 	/* check for CAP_SYS_RAWIO */
3055 	cap_header->pid = getpid();
3056 	cap_header->version = _LINUX_CAPABILITY_VERSION;
3057 	if (capget(cap_header, cap_data) < 0)
3058 		err(-6, "capget(2) failed");
3059 
3060 	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
3061 		do_exit++;
3062 		warnx("capget(CAP_SYS_RAWIO) failed,"
3063 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
3064 	}
3065 
3066 	/* test file permissions */
3067 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3068 	if (euidaccess(pathname, R_OK)) {
3069 		do_exit++;
3070 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3071 	}
3072 
3073 	/* if all else fails, thell them to be root */
3074 	if (do_exit)
3075 		if (getuid() != 0)
3076 			warnx("... or simply run as root");
3077 
3078 	if (do_exit)
3079 		exit(-6);
3080 }
3081 
3082 /*
3083  * NHM adds support for additional MSRs:
3084  *
3085  * MSR_SMI_COUNT                   0x00000034
3086  *
3087  * MSR_PLATFORM_INFO               0x000000ce
3088  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3089  *
3090  * MSR_MISC_PWR_MGMT               0x000001aa
3091  *
3092  * MSR_PKG_C3_RESIDENCY            0x000003f8
3093  * MSR_PKG_C6_RESIDENCY            0x000003f9
3094  * MSR_CORE_C3_RESIDENCY           0x000003fc
3095  * MSR_CORE_C6_RESIDENCY           0x000003fd
3096  *
3097  * Side effect:
3098  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3099  * sets has_misc_feature_control
3100  */
3101 int probe_nhm_msrs(unsigned int family, unsigned int model)
3102 {
3103 	unsigned long long msr;
3104 	unsigned int base_ratio;
3105 	int *pkg_cstate_limits;
3106 
3107 	if (!genuine_intel)
3108 		return 0;
3109 
3110 	if (family != 6)
3111 		return 0;
3112 
3113 	bclk = discover_bclk(family, model);
3114 
3115 	switch (model) {
3116 	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
3117 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3118 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
3119 	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
3120 	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
3121 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3122 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
3123 		pkg_cstate_limits = nhm_pkg_cstate_limits;
3124 		break;
3125 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
3126 	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
3127 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3128 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3129 		pkg_cstate_limits = snb_pkg_cstate_limits;
3130 		has_misc_feature_control = 1;
3131 		break;
3132 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3133 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3134 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3135 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3136 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3137 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3138 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3139 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3140 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3141 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3142 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3143 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3144 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3145 		pkg_cstate_limits = hsw_pkg_cstate_limits;
3146 		has_misc_feature_control = 1;
3147 		break;
3148 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3149 		pkg_cstate_limits = skx_pkg_cstate_limits;
3150 		has_misc_feature_control = 1;
3151 		break;
3152 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
3153 		no_MSR_MISC_PWR_MGMT = 1;
3154 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3155 		pkg_cstate_limits = slv_pkg_cstate_limits;
3156 		break;
3157 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
3158 		pkg_cstate_limits = amt_pkg_cstate_limits;
3159 		no_MSR_MISC_PWR_MGMT = 1;
3160 		break;
3161 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
3162 	case INTEL_FAM6_XEON_PHI_KNM:
3163 		pkg_cstate_limits = phi_pkg_cstate_limits;
3164 		break;
3165 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3166 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3167 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3168 		pkg_cstate_limits = bxt_pkg_cstate_limits;
3169 		break;
3170 	default:
3171 		return 0;
3172 	}
3173 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3174 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3175 
3176 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3177 	base_ratio = (msr >> 8) & 0xFF;
3178 
3179 	base_hz = base_ratio * bclk * 1000000;
3180 	has_base_hz = 1;
3181 	return 1;
3182 }
3183 /*
3184  * SLV client has support for unique MSRs:
3185  *
3186  * MSR_CC6_DEMOTION_POLICY_CONFIG
3187  * MSR_MC6_DEMOTION_POLICY_CONFIG
3188  */
3189 
3190 int has_slv_msrs(unsigned int family, unsigned int model)
3191 {
3192 	if (!genuine_intel)
3193 		return 0;
3194 
3195 	switch (model) {
3196 	case INTEL_FAM6_ATOM_SILVERMONT1:
3197 	case INTEL_FAM6_ATOM_MERRIFIELD:
3198 	case INTEL_FAM6_ATOM_MOOREFIELD:
3199 		return 1;
3200 	}
3201 	return 0;
3202 }
3203 int is_dnv(unsigned int family, unsigned int model)
3204 {
3205 
3206 	if (!genuine_intel)
3207 		return 0;
3208 
3209 	switch (model) {
3210 	case INTEL_FAM6_ATOM_DENVERTON:
3211 		return 1;
3212 	}
3213 	return 0;
3214 }
3215 int is_bdx(unsigned int family, unsigned int model)
3216 {
3217 
3218 	if (!genuine_intel)
3219 		return 0;
3220 
3221 	switch (model) {
3222 	case INTEL_FAM6_BROADWELL_X:
3223 	case INTEL_FAM6_BROADWELL_XEON_D:
3224 		return 1;
3225 	}
3226 	return 0;
3227 }
3228 int is_skx(unsigned int family, unsigned int model)
3229 {
3230 
3231 	if (!genuine_intel)
3232 		return 0;
3233 
3234 	switch (model) {
3235 	case INTEL_FAM6_SKYLAKE_X:
3236 		return 1;
3237 	}
3238 	return 0;
3239 }
3240 
3241 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3242 {
3243 	if (has_slv_msrs(family, model))
3244 		return 0;
3245 
3246 	switch (model) {
3247 	/* Nehalem compatible, but do not include turbo-ratio limit support */
3248 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3249 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
3250 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3251 	case INTEL_FAM6_XEON_PHI_KNM:
3252 		return 0;
3253 	default:
3254 		return 1;
3255 	}
3256 }
3257 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3258 {
3259 	if (has_slv_msrs(family, model))
3260 		return 1;
3261 
3262 	return 0;
3263 }
3264 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3265 {
3266 	if (!genuine_intel)
3267 		return 0;
3268 
3269 	if (family != 6)
3270 		return 0;
3271 
3272 	switch (model) {
3273 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3274 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3275 		return 1;
3276 	default:
3277 		return 0;
3278 	}
3279 }
3280 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3281 {
3282 	if (!genuine_intel)
3283 		return 0;
3284 
3285 	if (family != 6)
3286 		return 0;
3287 
3288 	switch (model) {
3289 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3290 		return 1;
3291 	default:
3292 		return 0;
3293 	}
3294 }
3295 
3296 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3297 {
3298 	if (!genuine_intel)
3299 		return 0;
3300 
3301 	if (family != 6)
3302 		return 0;
3303 
3304 	switch (model) {
3305 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3306 	case INTEL_FAM6_XEON_PHI_KNM:
3307 		return 1;
3308 	default:
3309 		return 0;
3310 	}
3311 }
3312 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3313 {
3314 	if (!genuine_intel)
3315 		return 0;
3316 
3317 	if (family != 6)
3318 		return 0;
3319 
3320 	switch (model) {
3321 	case INTEL_FAM6_ATOM_GOLDMONT:
3322 	case INTEL_FAM6_SKYLAKE_X:
3323 		return 1;
3324 	default:
3325 		return 0;
3326 	}
3327 }
3328 int has_config_tdp(unsigned int family, unsigned int model)
3329 {
3330 	if (!genuine_intel)
3331 		return 0;
3332 
3333 	if (family != 6)
3334 		return 0;
3335 
3336 	switch (model) {
3337 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3338 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3339 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3340 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3341 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3342 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3343 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3344 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3345 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3346 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3347 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3348 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3349 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3350 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3351 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3352 
3353 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3354 	case INTEL_FAM6_XEON_PHI_KNM:
3355 		return 1;
3356 	default:
3357 		return 0;
3358 	}
3359 }
3360 
3361 static void
3362 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3363 {
3364 	if (!do_nhm_platform_info)
3365 		return;
3366 
3367 	dump_nhm_platform_info();
3368 
3369 	if (has_hsw_turbo_ratio_limit(family, model))
3370 		dump_hsw_turbo_ratio_limits();
3371 
3372 	if (has_ivt_turbo_ratio_limit(family, model))
3373 		dump_ivt_turbo_ratio_limits();
3374 
3375 	if (has_turbo_ratio_limit(family, model))
3376 		dump_turbo_ratio_limits(family, model);
3377 
3378 	if (has_atom_turbo_ratio_limit(family, model))
3379 		dump_atom_turbo_ratio_limits();
3380 
3381 	if (has_knl_turbo_ratio_limit(family, model))
3382 		dump_knl_turbo_ratio_limits();
3383 
3384 	if (has_config_tdp(family, model))
3385 		dump_config_tdp();
3386 
3387 	dump_nhm_cst_cfg();
3388 }
3389 
3390 static void
3391 dump_sysfs_cstate_config(void)
3392 {
3393 	char path[64];
3394 	char name_buf[16];
3395 	char desc[64];
3396 	FILE *input;
3397 	int state;
3398 	char *sp;
3399 
3400 	if (!DO_BIC(BIC_sysfs))
3401 		return;
3402 
3403 	for (state = 0; state < 10; ++state) {
3404 
3405 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3406 			base_cpu, state);
3407 		input = fopen(path, "r");
3408 		if (input == NULL)
3409 			continue;
3410 		fgets(name_buf, sizeof(name_buf), input);
3411 
3412 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3413 		sp = strchr(name_buf, '-');
3414 		if (!sp)
3415 			sp = strchrnul(name_buf, '\n');
3416 		*sp = '\0';
3417 
3418 		fclose(input);
3419 
3420 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3421 			base_cpu, state);
3422 		input = fopen(path, "r");
3423 		if (input == NULL)
3424 			continue;
3425 		fgets(desc, sizeof(desc), input);
3426 
3427 		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3428 		fclose(input);
3429 	}
3430 }
3431 static void
3432 dump_sysfs_pstate_config(void)
3433 {
3434 	char path[64];
3435 	char driver_buf[64];
3436 	char governor_buf[64];
3437 	FILE *input;
3438 	int turbo;
3439 
3440 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3441 			base_cpu);
3442 	input = fopen(path, "r");
3443 	if (input == NULL) {
3444 		fprintf(stderr, "NSFOD %s\n", path);
3445 		return;
3446 	}
3447 	fgets(driver_buf, sizeof(driver_buf), input);
3448 	fclose(input);
3449 
3450 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3451 			base_cpu);
3452 	input = fopen(path, "r");
3453 	if (input == NULL) {
3454 		fprintf(stderr, "NSFOD %s\n", path);
3455 		return;
3456 	}
3457 	fgets(governor_buf, sizeof(governor_buf), input);
3458 	fclose(input);
3459 
3460 	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3461 	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3462 
3463 	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3464 	input = fopen(path, "r");
3465 	if (input != NULL) {
3466 		fscanf(input, "%d", &turbo);
3467 		fprintf(outf, "cpufreq boost: %d\n", turbo);
3468 		fclose(input);
3469 	}
3470 
3471 	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3472 	input = fopen(path, "r");
3473 	if (input != NULL) {
3474 		fscanf(input, "%d", &turbo);
3475 		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3476 		fclose(input);
3477 	}
3478 }
3479 
3480 
3481 /*
3482  * print_epb()
3483  * Decode the ENERGY_PERF_BIAS MSR
3484  */
3485 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3486 {
3487 	unsigned long long msr;
3488 	char *epb_string;
3489 	int cpu;
3490 
3491 	if (!has_epb)
3492 		return 0;
3493 
3494 	cpu = t->cpu_id;
3495 
3496 	/* EPB is per-package */
3497 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3498 		return 0;
3499 
3500 	if (cpu_migrate(cpu)) {
3501 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3502 		return -1;
3503 	}
3504 
3505 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3506 		return 0;
3507 
3508 	switch (msr & 0xF) {
3509 	case ENERGY_PERF_BIAS_PERFORMANCE:
3510 		epb_string = "performance";
3511 		break;
3512 	case ENERGY_PERF_BIAS_NORMAL:
3513 		epb_string = "balanced";
3514 		break;
3515 	case ENERGY_PERF_BIAS_POWERSAVE:
3516 		epb_string = "powersave";
3517 		break;
3518 	default:
3519 		epb_string = "custom";
3520 		break;
3521 	}
3522 	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3523 
3524 	return 0;
3525 }
3526 /*
3527  * print_hwp()
3528  * Decode the MSR_HWP_CAPABILITIES
3529  */
3530 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3531 {
3532 	unsigned long long msr;
3533 	int cpu;
3534 
3535 	if (!has_hwp)
3536 		return 0;
3537 
3538 	cpu = t->cpu_id;
3539 
3540 	/* MSR_HWP_CAPABILITIES is per-package */
3541 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3542 		return 0;
3543 
3544 	if (cpu_migrate(cpu)) {
3545 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3546 		return -1;
3547 	}
3548 
3549 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3550 		return 0;
3551 
3552 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3553 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3554 
3555 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3556 	if ((msr & (1 << 0)) == 0)
3557 		return 0;
3558 
3559 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3560 		return 0;
3561 
3562 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3563 			"(high %d guar %d eff %d low %d)\n",
3564 			cpu, msr,
3565 			(unsigned int)HWP_HIGHEST_PERF(msr),
3566 			(unsigned int)HWP_GUARANTEED_PERF(msr),
3567 			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3568 			(unsigned int)HWP_LOWEST_PERF(msr));
3569 
3570 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3571 		return 0;
3572 
3573 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3574 			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3575 			cpu, msr,
3576 			(unsigned int)(((msr) >> 0) & 0xff),
3577 			(unsigned int)(((msr) >> 8) & 0xff),
3578 			(unsigned int)(((msr) >> 16) & 0xff),
3579 			(unsigned int)(((msr) >> 24) & 0xff),
3580 			(unsigned int)(((msr) >> 32) & 0xff3),
3581 			(unsigned int)(((msr) >> 42) & 0x1));
3582 
3583 	if (has_hwp_pkg) {
3584 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3585 			return 0;
3586 
3587 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3588 			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
3589 			cpu, msr,
3590 			(unsigned int)(((msr) >> 0) & 0xff),
3591 			(unsigned int)(((msr) >> 8) & 0xff),
3592 			(unsigned int)(((msr) >> 16) & 0xff),
3593 			(unsigned int)(((msr) >> 24) & 0xff),
3594 			(unsigned int)(((msr) >> 32) & 0xff3));
3595 	}
3596 	if (has_hwp_notify) {
3597 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3598 			return 0;
3599 
3600 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3601 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3602 			cpu, msr,
3603 			((msr) & 0x1) ? "EN" : "Dis",
3604 			((msr) & 0x2) ? "EN" : "Dis");
3605 	}
3606 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3607 		return 0;
3608 
3609 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3610 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3611 			cpu, msr,
3612 			((msr) & 0x1) ? "" : "No-",
3613 			((msr) & 0x2) ? "" : "No-");
3614 
3615 	return 0;
3616 }
3617 
3618 /*
3619  * print_perf_limit()
3620  */
3621 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3622 {
3623 	unsigned long long msr;
3624 	int cpu;
3625 
3626 	cpu = t->cpu_id;
3627 
3628 	/* per-package */
3629 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3630 		return 0;
3631 
3632 	if (cpu_migrate(cpu)) {
3633 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3634 		return -1;
3635 	}
3636 
3637 	if (do_core_perf_limit_reasons) {
3638 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3639 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3640 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3641 			(msr & 1 << 15) ? "bit15, " : "",
3642 			(msr & 1 << 14) ? "bit14, " : "",
3643 			(msr & 1 << 13) ? "Transitions, " : "",
3644 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3645 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
3646 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3647 			(msr & 1 << 9) ? "CorePwr, " : "",
3648 			(msr & 1 << 8) ? "Amps, " : "",
3649 			(msr & 1 << 6) ? "VR-Therm, " : "",
3650 			(msr & 1 << 5) ? "Auto-HWP, " : "",
3651 			(msr & 1 << 4) ? "Graphics, " : "",
3652 			(msr & 1 << 2) ? "bit2, " : "",
3653 			(msr & 1 << 1) ? "ThermStatus, " : "",
3654 			(msr & 1 << 0) ? "PROCHOT, " : "");
3655 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3656 			(msr & 1 << 31) ? "bit31, " : "",
3657 			(msr & 1 << 30) ? "bit30, " : "",
3658 			(msr & 1 << 29) ? "Transitions, " : "",
3659 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3660 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
3661 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3662 			(msr & 1 << 25) ? "CorePwr, " : "",
3663 			(msr & 1 << 24) ? "Amps, " : "",
3664 			(msr & 1 << 22) ? "VR-Therm, " : "",
3665 			(msr & 1 << 21) ? "Auto-HWP, " : "",
3666 			(msr & 1 << 20) ? "Graphics, " : "",
3667 			(msr & 1 << 18) ? "bit18, " : "",
3668 			(msr & 1 << 17) ? "ThermStatus, " : "",
3669 			(msr & 1 << 16) ? "PROCHOT, " : "");
3670 
3671 	}
3672 	if (do_gfx_perf_limit_reasons) {
3673 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3674 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3675 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3676 			(msr & 1 << 0) ? "PROCHOT, " : "",
3677 			(msr & 1 << 1) ? "ThermStatus, " : "",
3678 			(msr & 1 << 4) ? "Graphics, " : "",
3679 			(msr & 1 << 6) ? "VR-Therm, " : "",
3680 			(msr & 1 << 8) ? "Amps, " : "",
3681 			(msr & 1 << 9) ? "GFXPwr, " : "",
3682 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3683 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3684 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3685 			(msr & 1 << 16) ? "PROCHOT, " : "",
3686 			(msr & 1 << 17) ? "ThermStatus, " : "",
3687 			(msr & 1 << 20) ? "Graphics, " : "",
3688 			(msr & 1 << 22) ? "VR-Therm, " : "",
3689 			(msr & 1 << 24) ? "Amps, " : "",
3690 			(msr & 1 << 25) ? "GFXPwr, " : "",
3691 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3692 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3693 	}
3694 	if (do_ring_perf_limit_reasons) {
3695 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3696 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3697 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
3698 			(msr & 1 << 0) ? "PROCHOT, " : "",
3699 			(msr & 1 << 1) ? "ThermStatus, " : "",
3700 			(msr & 1 << 6) ? "VR-Therm, " : "",
3701 			(msr & 1 << 8) ? "Amps, " : "",
3702 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3703 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3704 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3705 			(msr & 1 << 16) ? "PROCHOT, " : "",
3706 			(msr & 1 << 17) ? "ThermStatus, " : "",
3707 			(msr & 1 << 22) ? "VR-Therm, " : "",
3708 			(msr & 1 << 24) ? "Amps, " : "",
3709 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3710 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3711 	}
3712 	return 0;
3713 }
3714 
3715 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
3716 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
3717 
3718 double get_tdp(unsigned int model)
3719 {
3720 	unsigned long long msr;
3721 
3722 	if (do_rapl & RAPL_PKG_POWER_INFO)
3723 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3724 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3725 
3726 	switch (model) {
3727 	case INTEL_FAM6_ATOM_SILVERMONT1:
3728 	case INTEL_FAM6_ATOM_SILVERMONT2:
3729 		return 30.0;
3730 	default:
3731 		return 135.0;
3732 	}
3733 }
3734 
3735 /*
3736  * rapl_dram_energy_units_probe()
3737  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3738  */
3739 static double
3740 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3741 {
3742 	/* only called for genuine_intel, family 6 */
3743 
3744 	switch (model) {
3745 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3746 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3747 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3748 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3749 	case INTEL_FAM6_XEON_PHI_KNM:
3750 		return (rapl_dram_energy_units = 15.3 / 1000000);
3751 	default:
3752 		return (rapl_energy_units);
3753 	}
3754 }
3755 
3756 
3757 /*
3758  * rapl_probe()
3759  *
3760  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3761  */
3762 void rapl_probe(unsigned int family, unsigned int model)
3763 {
3764 	unsigned long long msr;
3765 	unsigned int time_unit;
3766 	double tdp;
3767 
3768 	if (!genuine_intel)
3769 		return;
3770 
3771 	if (family != 6)
3772 		return;
3773 
3774 	switch (model) {
3775 	case INTEL_FAM6_SANDYBRIDGE:
3776 	case INTEL_FAM6_IVYBRIDGE:
3777 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3778 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3779 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3780 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3781 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3782 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3783 		if (rapl_joules) {
3784 			BIC_PRESENT(BIC_Pkg_J);
3785 			BIC_PRESENT(BIC_Cor_J);
3786 			BIC_PRESENT(BIC_GFX_J);
3787 		} else {
3788 			BIC_PRESENT(BIC_PkgWatt);
3789 			BIC_PRESENT(BIC_CorWatt);
3790 			BIC_PRESENT(BIC_GFXWatt);
3791 		}
3792 		break;
3793 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3794 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3795 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3796 		if (rapl_joules)
3797 			BIC_PRESENT(BIC_Pkg_J);
3798 		else
3799 			BIC_PRESENT(BIC_PkgWatt);
3800 		break;
3801 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3802 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3803 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3804 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3805 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3806 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3807 		BIC_PRESENT(BIC_PKG__);
3808 		BIC_PRESENT(BIC_RAM__);
3809 		if (rapl_joules) {
3810 			BIC_PRESENT(BIC_Pkg_J);
3811 			BIC_PRESENT(BIC_Cor_J);
3812 			BIC_PRESENT(BIC_RAM_J);
3813 			BIC_PRESENT(BIC_GFX_J);
3814 		} else {
3815 			BIC_PRESENT(BIC_PkgWatt);
3816 			BIC_PRESENT(BIC_CorWatt);
3817 			BIC_PRESENT(BIC_RAMWatt);
3818 			BIC_PRESENT(BIC_GFXWatt);
3819 		}
3820 		break;
3821 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3822 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3823 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3824 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3825 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3826 	case INTEL_FAM6_XEON_PHI_KNM:
3827 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3828 		BIC_PRESENT(BIC_PKG__);
3829 		BIC_PRESENT(BIC_RAM__);
3830 		if (rapl_joules) {
3831 			BIC_PRESENT(BIC_Pkg_J);
3832 			BIC_PRESENT(BIC_RAM_J);
3833 		} else {
3834 			BIC_PRESENT(BIC_PkgWatt);
3835 			BIC_PRESENT(BIC_RAMWatt);
3836 		}
3837 		break;
3838 	case INTEL_FAM6_SANDYBRIDGE_X:
3839 	case INTEL_FAM6_IVYBRIDGE_X:
3840 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3841 		BIC_PRESENT(BIC_PKG__);
3842 		BIC_PRESENT(BIC_RAM__);
3843 		if (rapl_joules) {
3844 			BIC_PRESENT(BIC_Pkg_J);
3845 			BIC_PRESENT(BIC_Cor_J);
3846 			BIC_PRESENT(BIC_RAM_J);
3847 		} else {
3848 			BIC_PRESENT(BIC_PkgWatt);
3849 			BIC_PRESENT(BIC_CorWatt);
3850 			BIC_PRESENT(BIC_RAMWatt);
3851 		}
3852 		break;
3853 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
3854 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3855 		do_rapl = RAPL_PKG | RAPL_CORES;
3856 		if (rapl_joules) {
3857 			BIC_PRESENT(BIC_Pkg_J);
3858 			BIC_PRESENT(BIC_Cor_J);
3859 		} else {
3860 			BIC_PRESENT(BIC_PkgWatt);
3861 			BIC_PRESENT(BIC_CorWatt);
3862 		}
3863 		break;
3864 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3865 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3866 		BIC_PRESENT(BIC_PKG__);
3867 		BIC_PRESENT(BIC_RAM__);
3868 		if (rapl_joules) {
3869 			BIC_PRESENT(BIC_Pkg_J);
3870 			BIC_PRESENT(BIC_Cor_J);
3871 			BIC_PRESENT(BIC_RAM_J);
3872 		} else {
3873 			BIC_PRESENT(BIC_PkgWatt);
3874 			BIC_PRESENT(BIC_CorWatt);
3875 			BIC_PRESENT(BIC_RAMWatt);
3876 		}
3877 		break;
3878 	default:
3879 		return;
3880 	}
3881 
3882 	/* units on package 0, verify later other packages match */
3883 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3884 		return;
3885 
3886 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
3887 	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3888 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3889 	else
3890 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3891 
3892 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3893 
3894 	time_unit = msr >> 16 & 0xF;
3895 	if (time_unit == 0)
3896 		time_unit = 0xA;
3897 
3898 	rapl_time_units = 1.0 / (1 << (time_unit));
3899 
3900 	tdp = get_tdp(model);
3901 
3902 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3903 	if (!quiet)
3904 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3905 
3906 	return;
3907 }
3908 
3909 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3910 {
3911 	if (!genuine_intel)
3912 		return;
3913 
3914 	if (family != 6)
3915 		return;
3916 
3917 	switch (model) {
3918 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3919 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3920 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3921 		do_gfx_perf_limit_reasons = 1;
3922 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3923 		do_core_perf_limit_reasons = 1;
3924 		do_ring_perf_limit_reasons = 1;
3925 	default:
3926 		return;
3927 	}
3928 }
3929 
3930 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3931 {
3932 	if (is_skx(family, model) || is_bdx(family, model))
3933 		has_automatic_cstate_conversion = 1;
3934 }
3935 
3936 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3937 {
3938 	unsigned long long msr;
3939 	unsigned int dts, dts2;
3940 	int cpu;
3941 
3942 	if (!(do_dts || do_ptm))
3943 		return 0;
3944 
3945 	cpu = t->cpu_id;
3946 
3947 	/* DTS is per-core, no need to print for each thread */
3948 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3949 		return 0;
3950 
3951 	if (cpu_migrate(cpu)) {
3952 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3953 		return -1;
3954 	}
3955 
3956 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3957 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3958 			return 0;
3959 
3960 		dts = (msr >> 16) & 0x7F;
3961 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3962 			cpu, msr, tcc_activation_temp - dts);
3963 
3964 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3965 			return 0;
3966 
3967 		dts = (msr >> 16) & 0x7F;
3968 		dts2 = (msr >> 8) & 0x7F;
3969 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3970 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3971 	}
3972 
3973 
3974 	if (do_dts && debug) {
3975 		unsigned int resolution;
3976 
3977 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3978 			return 0;
3979 
3980 		dts = (msr >> 16) & 0x7F;
3981 		resolution = (msr >> 27) & 0xF;
3982 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3983 			cpu, msr, tcc_activation_temp - dts, resolution);
3984 
3985 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3986 			return 0;
3987 
3988 		dts = (msr >> 16) & 0x7F;
3989 		dts2 = (msr >> 8) & 0x7F;
3990 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3991 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3992 	}
3993 
3994 	return 0;
3995 }
3996 
3997 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3998 {
3999 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4000 		cpu, label,
4001 		((msr >> 15) & 1) ? "EN" : "DIS",
4002 		((msr >> 0) & 0x7FFF) * rapl_power_units,
4003 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4004 		(((msr >> 16) & 1) ? "EN" : "DIS"));
4005 
4006 	return;
4007 }
4008 
4009 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4010 {
4011 	unsigned long long msr;
4012 	int cpu;
4013 
4014 	if (!do_rapl)
4015 		return 0;
4016 
4017 	/* RAPL counters are per package, so print only for 1st thread/package */
4018 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4019 		return 0;
4020 
4021 	cpu = t->cpu_id;
4022 	if (cpu_migrate(cpu)) {
4023 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4024 		return -1;
4025 	}
4026 
4027 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4028 		return -1;
4029 
4030 	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
4031 		rapl_power_units, rapl_energy_units, rapl_time_units);
4032 
4033 	if (do_rapl & RAPL_PKG_POWER_INFO) {
4034 
4035 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4036                 	return -5;
4037 
4038 
4039 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4040 			cpu, msr,
4041 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4042 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4043 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4044 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4045 
4046 	}
4047 	if (do_rapl & RAPL_PKG) {
4048 
4049 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4050 			return -9;
4051 
4052 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4053 			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4054 
4055 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
4056 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4057 			cpu,
4058 			((msr >> 47) & 1) ? "EN" : "DIS",
4059 			((msr >> 32) & 0x7FFF) * rapl_power_units,
4060 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4061 			((msr >> 48) & 1) ? "EN" : "DIS");
4062 	}
4063 
4064 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
4065 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4066                 	return -6;
4067 
4068 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4069 			cpu, msr,
4070 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4071 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4072 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4073 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4074 	}
4075 	if (do_rapl & RAPL_DRAM) {
4076 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4077 			return -9;
4078 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4079 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4080 
4081 		print_power_limit_msr(cpu, msr, "DRAM Limit");
4082 	}
4083 	if (do_rapl & RAPL_CORE_POLICY) {
4084 		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4085 			return -7;
4086 
4087 		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4088 	}
4089 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4090 		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4091 			return -9;
4092 		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4093 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4094 		print_power_limit_msr(cpu, msr, "Cores Limit");
4095 	}
4096 	if (do_rapl & RAPL_GFX) {
4097 		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4098 			return -8;
4099 
4100 		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4101 
4102 		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4103 			return -9;
4104 		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4105 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4106 		print_power_limit_msr(cpu, msr, "GFX Limit");
4107 	}
4108 	return 0;
4109 }
4110 
4111 /*
4112  * SNB adds support for additional MSRs:
4113  *
4114  * MSR_PKG_C7_RESIDENCY            0x000003fa
4115  * MSR_CORE_C7_RESIDENCY           0x000003fe
4116  * MSR_PKG_C2_RESIDENCY            0x0000060d
4117  */
4118 
4119 int has_snb_msrs(unsigned int family, unsigned int model)
4120 {
4121 	if (!genuine_intel)
4122 		return 0;
4123 
4124 	switch (model) {
4125 	case INTEL_FAM6_SANDYBRIDGE:
4126 	case INTEL_FAM6_SANDYBRIDGE_X:
4127 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
4128 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
4129 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
4130 	case INTEL_FAM6_HASWELL_X:	/* HSW */
4131 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
4132 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
4133 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
4134 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
4135 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
4136 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
4137 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4138 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4139 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4140 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4141 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4142 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
4143 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4144 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
4145 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
4146 		return 1;
4147 	}
4148 	return 0;
4149 }
4150 
4151 /*
4152  * HSW adds support for additional MSRs:
4153  *
4154  * MSR_PKG_C8_RESIDENCY		0x00000630
4155  * MSR_PKG_C9_RESIDENCY		0x00000631
4156  * MSR_PKG_C10_RESIDENCY	0x00000632
4157  *
4158  * MSR_PKGC8_IRTL		0x00000633
4159  * MSR_PKGC9_IRTL		0x00000634
4160  * MSR_PKGC10_IRTL		0x00000635
4161  *
4162  */
4163 int has_hsw_msrs(unsigned int family, unsigned int model)
4164 {
4165 	if (!genuine_intel)
4166 		return 0;
4167 
4168 	switch (model) {
4169 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
4170 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
4171 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4172 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4173 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4174 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4175 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4176 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4177 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
4178 		return 1;
4179 	}
4180 	return 0;
4181 }
4182 
4183 /*
4184  * SKL adds support for additional MSRS:
4185  *
4186  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4187  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4188  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4189  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4190  */
4191 int has_skl_msrs(unsigned int family, unsigned int model)
4192 {
4193 	if (!genuine_intel)
4194 		return 0;
4195 
4196 	switch (model) {
4197 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4198 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4199 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4200 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4201 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4202 		return 1;
4203 	}
4204 	return 0;
4205 }
4206 
4207 int is_slm(unsigned int family, unsigned int model)
4208 {
4209 	if (!genuine_intel)
4210 		return 0;
4211 	switch (model) {
4212 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
4213 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
4214 		return 1;
4215 	}
4216 	return 0;
4217 }
4218 
4219 int is_knl(unsigned int family, unsigned int model)
4220 {
4221 	if (!genuine_intel)
4222 		return 0;
4223 	switch (model) {
4224 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4225 	case INTEL_FAM6_XEON_PHI_KNM:
4226 		return 1;
4227 	}
4228 	return 0;
4229 }
4230 
4231 int is_cnl(unsigned int family, unsigned int model)
4232 {
4233 	if (!genuine_intel)
4234 		return 0;
4235 
4236 	switch (model) {
4237 	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4238 		return 1;
4239 	}
4240 
4241 	return 0;
4242 }
4243 
4244 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4245 {
4246 	if (is_knl(family, model))
4247 		return 1024;
4248 	return 1;
4249 }
4250 
4251 #define SLM_BCLK_FREQS 5
4252 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4253 
4254 double slm_bclk(void)
4255 {
4256 	unsigned long long msr = 3;
4257 	unsigned int i;
4258 	double freq;
4259 
4260 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4261 		fprintf(outf, "SLM BCLK: unknown\n");
4262 
4263 	i = msr & 0xf;
4264 	if (i >= SLM_BCLK_FREQS) {
4265 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4266 		i = 3;
4267 	}
4268 	freq = slm_freq_table[i];
4269 
4270 	if (!quiet)
4271 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4272 
4273 	return freq;
4274 }
4275 
4276 double discover_bclk(unsigned int family, unsigned int model)
4277 {
4278 	if (has_snb_msrs(family, model) || is_knl(family, model))
4279 		return 100.00;
4280 	else if (is_slm(family, model))
4281 		return slm_bclk();
4282 	else
4283 		return 133.33;
4284 }
4285 
4286 /*
4287  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4288  * the Thermal Control Circuit (TCC) activates.
4289  * This is usually equal to tjMax.
4290  *
4291  * Older processors do not have this MSR, so there we guess,
4292  * but also allow cmdline over-ride with -T.
4293  *
4294  * Several MSR temperature values are in units of degrees-C
4295  * below this value, including the Digital Thermal Sensor (DTS),
4296  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4297  */
4298 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4299 {
4300 	unsigned long long msr;
4301 	unsigned int target_c_local;
4302 	int cpu;
4303 
4304 	/* tcc_activation_temp is used only for dts or ptm */
4305 	if (!(do_dts || do_ptm))
4306 		return 0;
4307 
4308 	/* this is a per-package concept */
4309 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4310 		return 0;
4311 
4312 	cpu = t->cpu_id;
4313 	if (cpu_migrate(cpu)) {
4314 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4315 		return -1;
4316 	}
4317 
4318 	if (tcc_activation_temp_override != 0) {
4319 		tcc_activation_temp = tcc_activation_temp_override;
4320 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4321 			cpu, tcc_activation_temp);
4322 		return 0;
4323 	}
4324 
4325 	/* Temperature Target MSR is Nehalem and newer only */
4326 	if (!do_nhm_platform_info)
4327 		goto guess;
4328 
4329 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4330 		goto guess;
4331 
4332 	target_c_local = (msr >> 16) & 0xFF;
4333 
4334 	if (!quiet)
4335 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4336 			cpu, msr, target_c_local);
4337 
4338 	if (!target_c_local)
4339 		goto guess;
4340 
4341 	tcc_activation_temp = target_c_local;
4342 
4343 	return 0;
4344 
4345 guess:
4346 	tcc_activation_temp = TJMAX_DEFAULT;
4347 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4348 		cpu, tcc_activation_temp);
4349 
4350 	return 0;
4351 }
4352 
4353 void decode_feature_control_msr(void)
4354 {
4355 	unsigned long long msr;
4356 
4357 	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4358 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4359 			base_cpu, msr,
4360 			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4361 			msr & (1 << 18) ? "SGX" : "");
4362 }
4363 
4364 void decode_misc_enable_msr(void)
4365 {
4366 	unsigned long long msr;
4367 
4368 	if (!genuine_intel)
4369 		return;
4370 
4371 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4372 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4373 			base_cpu, msr,
4374 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4375 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4376 			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4377 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4378 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4379 }
4380 
4381 void decode_misc_feature_control(void)
4382 {
4383 	unsigned long long msr;
4384 
4385 	if (!has_misc_feature_control)
4386 		return;
4387 
4388 	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4389 		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4390 			base_cpu, msr,
4391 			msr & (0 << 0) ? "No-" : "",
4392 			msr & (1 << 0) ? "No-" : "",
4393 			msr & (2 << 0) ? "No-" : "",
4394 			msr & (3 << 0) ? "No-" : "");
4395 }
4396 /*
4397  * Decode MSR_MISC_PWR_MGMT
4398  *
4399  * Decode the bits according to the Nehalem documentation
4400  * bit[0] seems to continue to have same meaning going forward
4401  * bit[1] less so...
4402  */
4403 void decode_misc_pwr_mgmt_msr(void)
4404 {
4405 	unsigned long long msr;
4406 
4407 	if (!do_nhm_platform_info)
4408 		return;
4409 
4410 	if (no_MSR_MISC_PWR_MGMT)
4411 		return;
4412 
4413 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4414 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4415 			base_cpu, msr,
4416 			msr & (1 << 0) ? "DIS" : "EN",
4417 			msr & (1 << 1) ? "EN" : "DIS",
4418 			msr & (1 << 8) ? "EN" : "DIS");
4419 }
4420 /*
4421  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4422  *
4423  * This MSRs are present on Silvermont processors,
4424  * Intel Atom processor E3000 series (Baytrail), and friends.
4425  */
4426 void decode_c6_demotion_policy_msr(void)
4427 {
4428 	unsigned long long msr;
4429 
4430 	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4431 		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4432 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4433 
4434 	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4435 		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4436 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4437 }
4438 
4439 void process_cpuid()
4440 {
4441 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
4442 	unsigned int fms, family, model, stepping;
4443 	unsigned int has_turbo;
4444 
4445 	eax = ebx = ecx = edx = 0;
4446 
4447 	__cpuid(0, max_level, ebx, ecx, edx);
4448 
4449 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
4450 		genuine_intel = 1;
4451 
4452 	if (!quiet)
4453 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4454 			(char *)&ebx, (char *)&edx, (char *)&ecx);
4455 
4456 	__cpuid(1, fms, ebx, ecx, edx);
4457 	family = (fms >> 8) & 0xf;
4458 	model = (fms >> 4) & 0xf;
4459 	stepping = fms & 0xf;
4460 	if (family == 0xf)
4461 		family += (fms >> 20) & 0xff;
4462 	if (family >= 6)
4463 		model += ((fms >> 16) & 0xf) << 4;
4464 
4465 	if (!quiet) {
4466 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4467 			max_level, family, model, stepping, family, model, stepping);
4468 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4469 			ecx & (1 << 0) ? "SSE3" : "-",
4470 			ecx & (1 << 3) ? "MONITOR" : "-",
4471 			ecx & (1 << 6) ? "SMX" : "-",
4472 			ecx & (1 << 7) ? "EIST" : "-",
4473 			ecx & (1 << 8) ? "TM2" : "-",
4474 			edx & (1 << 4) ? "TSC" : "-",
4475 			edx & (1 << 5) ? "MSR" : "-",
4476 			edx & (1 << 22) ? "ACPI-TM" : "-",
4477 			edx & (1 << 28) ? "HT" : "-",
4478 			edx & (1 << 29) ? "TM" : "-");
4479 	}
4480 
4481 	if (!(edx & (1 << 5)))
4482 		errx(1, "CPUID: no MSR");
4483 
4484 	/*
4485 	 * check max extended function levels of CPUID.
4486 	 * This is needed to check for invariant TSC.
4487 	 * This check is valid for both Intel and AMD.
4488 	 */
4489 	ebx = ecx = edx = 0;
4490 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4491 
4492 	if (max_extended_level >= 0x80000007) {
4493 
4494 		/*
4495 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4496 		 * this check is valid for both Intel and AMD
4497 		 */
4498 		__cpuid(0x80000007, eax, ebx, ecx, edx);
4499 		has_invariant_tsc = edx & (1 << 8);
4500 	}
4501 
4502 	/*
4503 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4504 	 * this check is valid for both Intel and AMD
4505 	 */
4506 
4507 	__cpuid(0x6, eax, ebx, ecx, edx);
4508 	has_aperf = ecx & (1 << 0);
4509 	if (has_aperf) {
4510 		BIC_PRESENT(BIC_Avg_MHz);
4511 		BIC_PRESENT(BIC_Busy);
4512 		BIC_PRESENT(BIC_Bzy_MHz);
4513 	}
4514 	do_dts = eax & (1 << 0);
4515 	if (do_dts)
4516 		BIC_PRESENT(BIC_CoreTmp);
4517 	has_turbo = eax & (1 << 1);
4518 	do_ptm = eax & (1 << 6);
4519 	if (do_ptm)
4520 		BIC_PRESENT(BIC_PkgTmp);
4521 	has_hwp = eax & (1 << 7);
4522 	has_hwp_notify = eax & (1 << 8);
4523 	has_hwp_activity_window = eax & (1 << 9);
4524 	has_hwp_epp = eax & (1 << 10);
4525 	has_hwp_pkg = eax & (1 << 11);
4526 	has_epb = ecx & (1 << 3);
4527 
4528 	if (!quiet)
4529 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4530 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4531 			has_aperf ? "" : "No-",
4532 			has_turbo ? "" : "No-",
4533 			do_dts ? "" : "No-",
4534 			do_ptm ? "" : "No-",
4535 			has_hwp ? "" : "No-",
4536 			has_hwp_notify ? "" : "No-",
4537 			has_hwp_activity_window ? "" : "No-",
4538 			has_hwp_epp ? "" : "No-",
4539 			has_hwp_pkg ? "" : "No-",
4540 			has_epb ? "" : "No-");
4541 
4542 	if (!quiet)
4543 		decode_misc_enable_msr();
4544 
4545 
4546 	if (max_level >= 0x7 && !quiet) {
4547 		int has_sgx;
4548 
4549 		ecx = 0;
4550 
4551 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4552 
4553 		has_sgx = ebx & (1 << 2);
4554 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4555 
4556 		if (has_sgx)
4557 			decode_feature_control_msr();
4558 	}
4559 
4560 	if (max_level >= 0x15) {
4561 		unsigned int eax_crystal;
4562 		unsigned int ebx_tsc;
4563 
4564 		/*
4565 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4566 		 */
4567 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4568 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4569 
4570 		if (ebx_tsc != 0) {
4571 
4572 			if (!quiet && (ebx != 0))
4573 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4574 					eax_crystal, ebx_tsc, crystal_hz);
4575 
4576 			if (crystal_hz == 0)
4577 				switch(model) {
4578 				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4579 				case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4580 				case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4581 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4582 					crystal_hz = 24000000;	/* 24.0 MHz */
4583 					break;
4584 				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
4585 					crystal_hz = 25000000;	/* 25.0 MHz */
4586 					break;
4587 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4588 				case INTEL_FAM6_ATOM_GEMINI_LAKE:
4589 					crystal_hz = 19200000;	/* 19.2 MHz */
4590 					break;
4591 				default:
4592 					crystal_hz = 0;
4593 			}
4594 
4595 			if (crystal_hz) {
4596 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4597 				if (!quiet)
4598 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4599 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4600 			}
4601 		}
4602 	}
4603 	if (max_level >= 0x16) {
4604 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
4605 
4606 		/*
4607 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
4608 		 */
4609 		base_mhz = max_mhz = bus_mhz = edx = 0;
4610 
4611 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4612 		if (!quiet)
4613 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4614 				base_mhz, max_mhz, bus_mhz);
4615 	}
4616 
4617 	if (has_aperf)
4618 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4619 
4620 	BIC_PRESENT(BIC_IRQ);
4621 	BIC_PRESENT(BIC_TSC_MHz);
4622 
4623 	if (probe_nhm_msrs(family, model)) {
4624 		do_nhm_platform_info = 1;
4625 		BIC_PRESENT(BIC_CPU_c1);
4626 		BIC_PRESENT(BIC_CPU_c3);
4627 		BIC_PRESENT(BIC_CPU_c6);
4628 		BIC_PRESENT(BIC_SMI);
4629 	}
4630 	do_snb_cstates = has_snb_msrs(family, model);
4631 
4632 	if (do_snb_cstates)
4633 		BIC_PRESENT(BIC_CPU_c7);
4634 
4635 	do_irtl_snb = has_snb_msrs(family, model);
4636 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4637 		BIC_PRESENT(BIC_Pkgpc2);
4638 	if (pkg_cstate_limit >= PCL__3)
4639 		BIC_PRESENT(BIC_Pkgpc3);
4640 	if (pkg_cstate_limit >= PCL__6)
4641 		BIC_PRESENT(BIC_Pkgpc6);
4642 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4643 		BIC_PRESENT(BIC_Pkgpc7);
4644 	if (has_slv_msrs(family, model)) {
4645 		BIC_NOT_PRESENT(BIC_Pkgpc2);
4646 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4647 		BIC_PRESENT(BIC_Pkgpc6);
4648 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4649 		BIC_PRESENT(BIC_Mod_c6);
4650 		use_c1_residency_msr = 1;
4651 	}
4652 	if (is_dnv(family, model)) {
4653 		BIC_PRESENT(BIC_CPU_c1);
4654 		BIC_NOT_PRESENT(BIC_CPU_c3);
4655 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4656 		BIC_NOT_PRESENT(BIC_CPU_c7);
4657 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4658 		use_c1_residency_msr = 1;
4659 	}
4660 	if (is_skx(family, model)) {
4661 		BIC_NOT_PRESENT(BIC_CPU_c3);
4662 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4663 		BIC_NOT_PRESENT(BIC_CPU_c7);
4664 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4665 	}
4666 	if (is_bdx(family, model)) {
4667 		BIC_NOT_PRESENT(BIC_CPU_c7);
4668 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4669 	}
4670 	if (has_hsw_msrs(family, model)) {
4671 		BIC_PRESENT(BIC_Pkgpc8);
4672 		BIC_PRESENT(BIC_Pkgpc9);
4673 		BIC_PRESENT(BIC_Pkgpc10);
4674 	}
4675 	do_irtl_hsw = has_hsw_msrs(family, model);
4676 	if (has_skl_msrs(family, model)) {
4677 		BIC_PRESENT(BIC_Totl_c0);
4678 		BIC_PRESENT(BIC_Any_c0);
4679 		BIC_PRESENT(BIC_GFX_c0);
4680 		BIC_PRESENT(BIC_CPUGFX);
4681 	}
4682 	do_slm_cstates = is_slm(family, model);
4683 	do_knl_cstates  = is_knl(family, model);
4684 	do_cnl_cstates = is_cnl(family, model);
4685 
4686 	if (!quiet)
4687 		decode_misc_pwr_mgmt_msr();
4688 
4689 	if (!quiet && has_slv_msrs(family, model))
4690 		decode_c6_demotion_policy_msr();
4691 
4692 	rapl_probe(family, model);
4693 	perf_limit_reasons_probe(family, model);
4694 	automatic_cstate_conversion_probe(family, model);
4695 
4696 	if (!quiet)
4697 		dump_cstate_pstate_config_info(family, model);
4698 
4699 	if (!quiet)
4700 		dump_sysfs_cstate_config();
4701 	if (!quiet)
4702 		dump_sysfs_pstate_config();
4703 
4704 	if (has_skl_msrs(family, model))
4705 		calculate_tsc_tweak();
4706 
4707 	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4708 		BIC_PRESENT(BIC_GFX_rc6);
4709 
4710 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4711 		BIC_PRESENT(BIC_GFXMHz);
4712 
4713 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4714 		BIC_PRESENT(BIC_CPU_LPI);
4715 	else
4716 		BIC_NOT_PRESENT(BIC_CPU_LPI);
4717 
4718 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4719 		BIC_PRESENT(BIC_SYS_LPI);
4720 	else
4721 		BIC_NOT_PRESENT(BIC_SYS_LPI);
4722 
4723 	if (!quiet)
4724 		decode_misc_feature_control();
4725 
4726 	return;
4727 }
4728 
4729 /*
4730  * in /dev/cpu/ return success for names that are numbers
4731  * ie. filter out ".", "..", "microcode".
4732  */
4733 int dir_filter(const struct dirent *dirp)
4734 {
4735 	if (isdigit(dirp->d_name[0]))
4736 		return 1;
4737 	else
4738 		return 0;
4739 }
4740 
4741 int open_dev_cpu_msr(int dummy1)
4742 {
4743 	return 0;
4744 }
4745 
4746 void topology_probe()
4747 {
4748 	int i;
4749 	int max_core_id = 0;
4750 	int max_package_id = 0;
4751 	int max_siblings = 0;
4752 
4753 	/* Initialize num_cpus, max_cpu_num */
4754 	set_max_cpu_num();
4755 	topo.num_cpus = 0;
4756 	for_all_proc_cpus(count_cpus);
4757 	if (!summary_only && topo.num_cpus > 1)
4758 		BIC_PRESENT(BIC_CPU);
4759 
4760 	if (debug > 1)
4761 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4762 
4763 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4764 	if (cpus == NULL)
4765 		err(1, "calloc cpus");
4766 
4767 	/*
4768 	 * Allocate and initialize cpu_present_set
4769 	 */
4770 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4771 	if (cpu_present_set == NULL)
4772 		err(3, "CPU_ALLOC");
4773 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4774 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4775 	for_all_proc_cpus(mark_cpu_present);
4776 
4777 	/*
4778 	 * Validate that all cpus in cpu_subset are also in cpu_present_set
4779 	 */
4780 	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4781 		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4782 			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4783 				err(1, "cpu%d not present", i);
4784 	}
4785 
4786 	/*
4787 	 * Allocate and initialize cpu_affinity_set
4788 	 */
4789 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4790 	if (cpu_affinity_set == NULL)
4791 		err(3, "CPU_ALLOC");
4792 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4793 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4794 
4795 	for_all_proc_cpus(init_thread_id);
4796 
4797 	/*
4798 	 * For online cpus
4799 	 * find max_core_id, max_package_id
4800 	 */
4801 	for (i = 0; i <= topo.max_cpu_num; ++i) {
4802 		int siblings;
4803 
4804 		if (cpu_is_not_present(i)) {
4805 			if (debug > 1)
4806 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
4807 			continue;
4808 		}
4809 
4810 		cpus[i].logical_cpu_id = i;
4811 
4812 		/* get package information */
4813 		cpus[i].physical_package_id = get_physical_package_id(i);
4814 		if (cpus[i].physical_package_id > max_package_id)
4815 			max_package_id = cpus[i].physical_package_id;
4816 
4817 		/* get numa node information */
4818 		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
4819 		if (cpus[i].physical_node_id > topo.max_node_num)
4820 			topo.max_node_num = cpus[i].physical_node_id;
4821 
4822 		/* get core information */
4823 		cpus[i].physical_core_id = get_core_id(i);
4824 		if (cpus[i].physical_core_id > max_core_id)
4825 			max_core_id = cpus[i].physical_core_id;
4826 
4827 		/* get thread information */
4828 		siblings = get_thread_siblings(&cpus[i]);
4829 		if (siblings > max_siblings)
4830 			max_siblings = siblings;
4831 		if (cpus[i].thread_id == 0)
4832 			topo.num_cores++;
4833 	}
4834 
4835 	topo.cores_per_node = max_core_id + 1;
4836 	if (debug > 1)
4837 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4838 			max_core_id, topo.cores_per_node);
4839 	if (!summary_only && topo.cores_per_node > 1)
4840 		BIC_PRESENT(BIC_Core);
4841 
4842 	topo.num_packages = max_package_id + 1;
4843 	if (debug > 1)
4844 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4845 			max_package_id, topo.num_packages);
4846 	if (!summary_only && topo.num_packages > 1)
4847 		BIC_PRESENT(BIC_Package);
4848 
4849 	set_node_data();
4850 	if (debug > 1)
4851 		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
4852 	if (!summary_only && topo.nodes_per_pkg > 1)
4853 		BIC_PRESENT(BIC_Node);
4854 
4855 	topo.threads_per_core = max_siblings;
4856 	if (debug > 1)
4857 		fprintf(outf, "max_siblings %d\n", max_siblings);
4858 
4859 	if (debug < 1)
4860 		return;
4861 
4862 	for (i = 0; i <= topo.max_cpu_num; ++i) {
4863 		fprintf(outf,
4864 			"cpu %d pkg %d node %d lnode %d core %d thread %d\n",
4865 			i, cpus[i].physical_package_id,
4866 			cpus[i].physical_node_id,
4867 			cpus[i].logical_node_id,
4868 			cpus[i].physical_core_id,
4869 			cpus[i].thread_id);
4870 	}
4871 
4872 }
4873 
4874 void
4875 allocate_counters(struct thread_data **t, struct core_data **c,
4876 		  struct pkg_data **p)
4877 {
4878 	int i;
4879 	int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
4880 			topo.num_packages;
4881 	int num_threads = topo.threads_per_core * num_cores;
4882 
4883 	*t = calloc(num_threads, sizeof(struct thread_data));
4884 	if (*t == NULL)
4885 		goto error;
4886 
4887 	for (i = 0; i < num_threads; i++)
4888 		(*t)[i].cpu_id = -1;
4889 
4890 	*c = calloc(num_cores, sizeof(struct core_data));
4891 	if (*c == NULL)
4892 		goto error;
4893 
4894 	for (i = 0; i < num_cores; i++)
4895 		(*c)[i].core_id = -1;
4896 
4897 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
4898 	if (*p == NULL)
4899 		goto error;
4900 
4901 	for (i = 0; i < topo.num_packages; i++)
4902 		(*p)[i].package_id = i;
4903 
4904 	return;
4905 error:
4906 	err(1, "calloc counters");
4907 }
4908 /*
4909  * init_counter()
4910  *
4911  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4912  */
4913 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4914 	struct pkg_data *pkg_base, int cpu_id)
4915 {
4916 	int pkg_id = cpus[cpu_id].physical_package_id;
4917 	int node_id = cpus[cpu_id].logical_node_id;
4918 	int core_id = cpus[cpu_id].physical_core_id;
4919 	int thread_id = cpus[cpu_id].thread_id;
4920 	struct thread_data *t;
4921 	struct core_data *c;
4922 	struct pkg_data *p;
4923 
4924 
4925 	/* Workaround for systems where physical_node_id==-1
4926 	 * and logical_node_id==(-1 - topo.num_cpus)
4927 	 */
4928 	if (node_id < 0)
4929 		node_id = 0;
4930 
4931 	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
4932 	c = GET_CORE(core_base, core_id, node_id, pkg_id);
4933 	p = GET_PKG(pkg_base, pkg_id);
4934 
4935 	t->cpu_id = cpu_id;
4936 	if (thread_id == 0) {
4937 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4938 		if (cpu_is_first_core_in_package(cpu_id))
4939 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4940 	}
4941 
4942 	c->core_id = core_id;
4943 	p->package_id = pkg_id;
4944 }
4945 
4946 
4947 int initialize_counters(int cpu_id)
4948 {
4949 	init_counter(EVEN_COUNTERS, cpu_id);
4950 	init_counter(ODD_COUNTERS, cpu_id);
4951 	return 0;
4952 }
4953 
4954 void allocate_output_buffer()
4955 {
4956 	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4957 	outp = output_buffer;
4958 	if (outp == NULL)
4959 		err(-1, "calloc output buffer");
4960 }
4961 void allocate_fd_percpu(void)
4962 {
4963 	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4964 	if (fd_percpu == NULL)
4965 		err(-1, "calloc fd_percpu");
4966 }
4967 void allocate_irq_buffers(void)
4968 {
4969 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4970 	if (irq_column_2_cpu == NULL)
4971 		err(-1, "calloc %d", topo.num_cpus);
4972 
4973 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4974 	if (irqs_per_cpu == NULL)
4975 		err(-1, "calloc %d", topo.max_cpu_num + 1);
4976 }
4977 void setup_all_buffers(void)
4978 {
4979 	topology_probe();
4980 	allocate_irq_buffers();
4981 	allocate_fd_percpu();
4982 	allocate_counters(&thread_even, &core_even, &package_even);
4983 	allocate_counters(&thread_odd, &core_odd, &package_odd);
4984 	allocate_output_buffer();
4985 	for_all_proc_cpus(initialize_counters);
4986 }
4987 
4988 void set_base_cpu(void)
4989 {
4990 	base_cpu = sched_getcpu();
4991 	if (base_cpu < 0)
4992 		err(-ENODEV, "No valid cpus found");
4993 
4994 	if (debug > 1)
4995 		fprintf(outf, "base_cpu = %d\n", base_cpu);
4996 }
4997 
4998 void turbostat_init()
4999 {
5000 	setup_all_buffers();
5001 	set_base_cpu();
5002 	check_dev_msr();
5003 	check_permissions();
5004 	process_cpuid();
5005 
5006 
5007 	if (!quiet)
5008 		for_all_cpus(print_hwp, ODD_COUNTERS);
5009 
5010 	if (!quiet)
5011 		for_all_cpus(print_epb, ODD_COUNTERS);
5012 
5013 	if (!quiet)
5014 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
5015 
5016 	if (!quiet)
5017 		for_all_cpus(print_rapl, ODD_COUNTERS);
5018 
5019 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
5020 
5021 	if (!quiet)
5022 		for_all_cpus(print_thermal, ODD_COUNTERS);
5023 
5024 	if (!quiet && do_irtl_snb)
5025 		print_irtl();
5026 }
5027 
5028 int fork_it(char **argv)
5029 {
5030 	pid_t child_pid;
5031 	int status;
5032 
5033 	snapshot_proc_sysfs_files();
5034 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
5035 	first_counter_read = 0;
5036 	if (status)
5037 		exit(status);
5038 	/* clear affinity side-effect of get_counters() */
5039 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5040 	gettimeofday(&tv_even, (struct timezone *)NULL);
5041 
5042 	child_pid = fork();
5043 	if (!child_pid) {
5044 		/* child */
5045 		execvp(argv[0], argv);
5046 		err(errno, "exec %s", argv[0]);
5047 	} else {
5048 
5049 		/* parent */
5050 		if (child_pid == -1)
5051 			err(1, "fork");
5052 
5053 		signal(SIGINT, SIG_IGN);
5054 		signal(SIGQUIT, SIG_IGN);
5055 		if (waitpid(child_pid, &status, 0) == -1)
5056 			err(status, "waitpid");
5057 	}
5058 	/*
5059 	 * n.b. fork_it() does not check for errors from for_all_cpus()
5060 	 * because re-starting is problematic when forking
5061 	 */
5062 	snapshot_proc_sysfs_files();
5063 	for_all_cpus(get_counters, ODD_COUNTERS);
5064 	gettimeofday(&tv_odd, (struct timezone *)NULL);
5065 	timersub(&tv_odd, &tv_even, &tv_delta);
5066 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5067 		fprintf(outf, "%s: Counter reset detected\n", progname);
5068 	else {
5069 		compute_average(EVEN_COUNTERS);
5070 		format_all_counters(EVEN_COUNTERS);
5071 	}
5072 
5073 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5074 
5075 	flush_output_stderr();
5076 
5077 	return status;
5078 }
5079 
5080 int get_and_dump_counters(void)
5081 {
5082 	int status;
5083 
5084 	snapshot_proc_sysfs_files();
5085 	status = for_all_cpus(get_counters, ODD_COUNTERS);
5086 	if (status)
5087 		return status;
5088 
5089 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
5090 	if (status)
5091 		return status;
5092 
5093 	flush_output_stdout();
5094 
5095 	return status;
5096 }
5097 
5098 void print_version() {
5099 	fprintf(outf, "turbostat version 18.07.27"
5100 		" - Len Brown <lenb@kernel.org>\n");
5101 }
5102 
5103 int add_counter(unsigned int msr_num, char *path, char *name,
5104 	unsigned int width, enum counter_scope scope,
5105 	enum counter_type type, enum counter_format format, int flags)
5106 {
5107 	struct msr_counter *msrp;
5108 
5109 	msrp = calloc(1, sizeof(struct msr_counter));
5110 	if (msrp == NULL) {
5111 		perror("calloc");
5112 		exit(1);
5113 	}
5114 
5115 	msrp->msr_num = msr_num;
5116 	strncpy(msrp->name, name, NAME_BYTES);
5117 	if (path)
5118 		strncpy(msrp->path, path, PATH_BYTES);
5119 	msrp->width = width;
5120 	msrp->type = type;
5121 	msrp->format = format;
5122 	msrp->flags = flags;
5123 
5124 	switch (scope) {
5125 
5126 	case SCOPE_CPU:
5127 		msrp->next = sys.tp;
5128 		sys.tp = msrp;
5129 		sys.added_thread_counters++;
5130 		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5131 			fprintf(stderr, "exceeded max %d added thread counters\n",
5132 				MAX_ADDED_COUNTERS);
5133 			exit(-1);
5134 		}
5135 		break;
5136 
5137 	case SCOPE_CORE:
5138 		msrp->next = sys.cp;
5139 		sys.cp = msrp;
5140 		sys.added_core_counters++;
5141 		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5142 			fprintf(stderr, "exceeded max %d added core counters\n",
5143 				MAX_ADDED_COUNTERS);
5144 			exit(-1);
5145 		}
5146 		break;
5147 
5148 	case SCOPE_PACKAGE:
5149 		msrp->next = sys.pp;
5150 		sys.pp = msrp;
5151 		sys.added_package_counters++;
5152 		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5153 			fprintf(stderr, "exceeded max %d added package counters\n",
5154 				MAX_ADDED_COUNTERS);
5155 			exit(-1);
5156 		}
5157 		break;
5158 	}
5159 
5160 	return 0;
5161 }
5162 
5163 void parse_add_command(char *add_command)
5164 {
5165 	int msr_num = 0;
5166 	char *path = NULL;
5167 	char name_buffer[NAME_BYTES] = "";
5168 	int width = 64;
5169 	int fail = 0;
5170 	enum counter_scope scope = SCOPE_CPU;
5171 	enum counter_type type = COUNTER_CYCLES;
5172 	enum counter_format format = FORMAT_DELTA;
5173 
5174 	while (add_command) {
5175 
5176 		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5177 			goto next;
5178 
5179 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
5180 			goto next;
5181 
5182 		if (*add_command == '/') {
5183 			path = add_command;
5184 			goto next;
5185 		}
5186 
5187 		if (sscanf(add_command, "u%d", &width) == 1) {
5188 			if ((width == 32) || (width == 64))
5189 				goto next;
5190 			width = 64;
5191 		}
5192 		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5193 			scope = SCOPE_CPU;
5194 			goto next;
5195 		}
5196 		if (!strncmp(add_command, "core", strlen("core"))) {
5197 			scope = SCOPE_CORE;
5198 			goto next;
5199 		}
5200 		if (!strncmp(add_command, "package", strlen("package"))) {
5201 			scope = SCOPE_PACKAGE;
5202 			goto next;
5203 		}
5204 		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5205 			type = COUNTER_CYCLES;
5206 			goto next;
5207 		}
5208 		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5209 			type = COUNTER_SECONDS;
5210 			goto next;
5211 		}
5212 		if (!strncmp(add_command, "usec", strlen("usec"))) {
5213 			type = COUNTER_USEC;
5214 			goto next;
5215 		}
5216 		if (!strncmp(add_command, "raw", strlen("raw"))) {
5217 			format = FORMAT_RAW;
5218 			goto next;
5219 		}
5220 		if (!strncmp(add_command, "delta", strlen("delta"))) {
5221 			format = FORMAT_DELTA;
5222 			goto next;
5223 		}
5224 		if (!strncmp(add_command, "percent", strlen("percent"))) {
5225 			format = FORMAT_PERCENT;
5226 			goto next;
5227 		}
5228 
5229 		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
5230 			char *eos;
5231 
5232 			eos = strchr(name_buffer, ',');
5233 			if (eos)
5234 				*eos = '\0';
5235 			goto next;
5236 		}
5237 
5238 next:
5239 		add_command = strchr(add_command, ',');
5240 		if (add_command) {
5241 			*add_command = '\0';
5242 			add_command++;
5243 		}
5244 
5245 	}
5246 	if ((msr_num == 0) && (path == NULL)) {
5247 		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5248 		fail++;
5249 	}
5250 
5251 	/* generate default column header */
5252 	if (*name_buffer == '\0') {
5253 		if (width == 32)
5254 			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5255 		else
5256 			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5257 	}
5258 
5259 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5260 		fail++;
5261 
5262 	if (fail) {
5263 		help();
5264 		exit(1);
5265 	}
5266 }
5267 
5268 int is_deferred_skip(char *name)
5269 {
5270 	int i;
5271 
5272 	for (i = 0; i < deferred_skip_index; ++i)
5273 		if (!strcmp(name, deferred_skip_names[i]))
5274 			return 1;
5275 	return 0;
5276 }
5277 
5278 void probe_sysfs(void)
5279 {
5280 	char path[64];
5281 	char name_buf[16];
5282 	FILE *input;
5283 	int state;
5284 	char *sp;
5285 
5286 	if (!DO_BIC(BIC_sysfs))
5287 		return;
5288 
5289 	for (state = 10; state >= 0; --state) {
5290 
5291 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5292 			base_cpu, state);
5293 		input = fopen(path, "r");
5294 		if (input == NULL)
5295 			continue;
5296 		fgets(name_buf, sizeof(name_buf), input);
5297 
5298 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5299 		sp = strchr(name_buf, '-');
5300 		if (!sp)
5301 			sp = strchrnul(name_buf, '\n');
5302 		*sp = '%';
5303 		*(sp + 1) = '\0';
5304 
5305 		fclose(input);
5306 
5307 		sprintf(path, "cpuidle/state%d/time", state);
5308 
5309 		if (is_deferred_skip(name_buf))
5310 			continue;
5311 
5312 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5313 				FORMAT_PERCENT, SYSFS_PERCPU);
5314 	}
5315 
5316 	for (state = 10; state >= 0; --state) {
5317 
5318 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5319 			base_cpu, state);
5320 		input = fopen(path, "r");
5321 		if (input == NULL)
5322 			continue;
5323 		fgets(name_buf, sizeof(name_buf), input);
5324 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5325 		sp = strchr(name_buf, '-');
5326 		if (!sp)
5327 			sp = strchrnul(name_buf, '\n');
5328 		*sp = '\0';
5329 		fclose(input);
5330 
5331 		sprintf(path, "cpuidle/state%d/usage", state);
5332 
5333 		if (is_deferred_skip(name_buf))
5334 			continue;
5335 
5336 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5337 				FORMAT_DELTA, SYSFS_PERCPU);
5338 	}
5339 
5340 }
5341 
5342 
5343 /*
5344  * parse cpuset with following syntax
5345  * 1,2,4..6,8-10 and set bits in cpu_subset
5346  */
5347 void parse_cpu_command(char *optarg)
5348 {
5349 	unsigned int start, end;
5350 	char *next;
5351 
5352 	if (!strcmp(optarg, "core")) {
5353 		if (cpu_subset)
5354 			goto error;
5355 		show_core_only++;
5356 		return;
5357 	}
5358 	if (!strcmp(optarg, "package")) {
5359 		if (cpu_subset)
5360 			goto error;
5361 		show_pkg_only++;
5362 		return;
5363 	}
5364 	if (show_core_only || show_pkg_only)
5365 		goto error;
5366 
5367 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5368 	if (cpu_subset == NULL)
5369 		err(3, "CPU_ALLOC");
5370 	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5371 
5372 	CPU_ZERO_S(cpu_subset_size, cpu_subset);
5373 
5374 	next = optarg;
5375 
5376 	while (next && *next) {
5377 
5378 		if (*next == '-')	/* no negative cpu numbers */
5379 			goto error;
5380 
5381 		start = strtoul(next, &next, 10);
5382 
5383 		if (start >= CPU_SUBSET_MAXCPUS)
5384 			goto error;
5385 		CPU_SET_S(start, cpu_subset_size, cpu_subset);
5386 
5387 		if (*next == '\0')
5388 			break;
5389 
5390 		if (*next == ',') {
5391 			next += 1;
5392 			continue;
5393 		}
5394 
5395 		if (*next == '-') {
5396 			next += 1;	/* start range */
5397 		} else if (*next == '.') {
5398 			next += 1;
5399 			if (*next == '.')
5400 				next += 1;	/* start range */
5401 			else
5402 				goto error;
5403 		}
5404 
5405 		end = strtoul(next, &next, 10);
5406 		if (end <= start)
5407 			goto error;
5408 
5409 		while (++start <= end) {
5410 			if (start >= CPU_SUBSET_MAXCPUS)
5411 				goto error;
5412 			CPU_SET_S(start, cpu_subset_size, cpu_subset);
5413 		}
5414 
5415 		if (*next == ',')
5416 			next += 1;
5417 		else if (*next != '\0')
5418 			goto error;
5419 	}
5420 
5421 	return;
5422 
5423 error:
5424 	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5425 	help();
5426 	exit(-1);
5427 }
5428 
5429 
5430 void cmdline(int argc, char **argv)
5431 {
5432 	int opt;
5433 	int option_index = 0;
5434 	static struct option long_options[] = {
5435 		{"add",		required_argument,	0, 'a'},
5436 		{"cpu",		required_argument,	0, 'c'},
5437 		{"Dump",	no_argument,		0, 'D'},
5438 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
5439 		{"enable",	required_argument,	0, 'e'},
5440 		{"interval",	required_argument,	0, 'i'},
5441 		{"num_iterations",	required_argument,	0, 'n'},
5442 		{"help",	no_argument,		0, 'h'},
5443 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
5444 		{"Joules",	no_argument,		0, 'J'},
5445 		{"list",	no_argument,		0, 'l'},
5446 		{"out",		required_argument,	0, 'o'},
5447 		{"quiet",	no_argument,		0, 'q'},
5448 		{"show",	required_argument,	0, 's'},
5449 		{"Summary",	no_argument,		0, 'S'},
5450 		{"TCC",		required_argument,	0, 'T'},
5451 		{"version",	no_argument,		0, 'v' },
5452 		{0,		0,			0,  0 }
5453 	};
5454 
5455 	progname = argv[0];
5456 
5457 	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5458 				long_options, &option_index)) != -1) {
5459 		switch (opt) {
5460 		case 'a':
5461 			parse_add_command(optarg);
5462 			break;
5463 		case 'c':
5464 			parse_cpu_command(optarg);
5465 			break;
5466 		case 'D':
5467 			dump_only++;
5468 			break;
5469 		case 'e':
5470 			/* --enable specified counter */
5471 			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5472 			break;
5473 		case 'd':
5474 			debug++;
5475 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5476 			break;
5477 		case 'H':
5478 			/*
5479 			 * --hide: do not show those specified
5480 			 *  multiple invocations simply clear more bits in enabled mask
5481 			 */
5482 			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5483 			break;
5484 		case 'h':
5485 		default:
5486 			help();
5487 			exit(1);
5488 		case 'i':
5489 			{
5490 				double interval = strtod(optarg, NULL);
5491 
5492 				if (interval < 0.001) {
5493 					fprintf(outf, "interval %f seconds is too small\n",
5494 						interval);
5495 					exit(2);
5496 				}
5497 
5498 				interval_tv.tv_sec = interval_ts.tv_sec = interval;
5499 				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5500 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5501 			}
5502 			break;
5503 		case 'J':
5504 			rapl_joules++;
5505 			break;
5506 		case 'l':
5507 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5508 			list_header_only++;
5509 			quiet++;
5510 			break;
5511 		case 'o':
5512 			outf = fopen_or_die(optarg, "w");
5513 			break;
5514 		case 'q':
5515 			quiet = 1;
5516 			break;
5517 		case 'n':
5518 			num_iterations = strtod(optarg, NULL);
5519 
5520 			if (num_iterations <= 0) {
5521 				fprintf(outf, "iterations %d should be positive number\n",
5522 					num_iterations);
5523 				exit(2);
5524 			}
5525 			break;
5526 		case 's':
5527 			/*
5528 			 * --show: show only those specified
5529 			 *  The 1st invocation will clear and replace the enabled mask
5530 			 *  subsequent invocations can add to it.
5531 			 */
5532 			if (shown == 0)
5533 				bic_enabled = bic_lookup(optarg, SHOW_LIST);
5534 			else
5535 				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5536 			shown = 1;
5537 			break;
5538 		case 'S':
5539 			summary_only++;
5540 			break;
5541 		case 'T':
5542 			tcc_activation_temp_override = atoi(optarg);
5543 			break;
5544 		case 'v':
5545 			print_version();
5546 			exit(0);
5547 			break;
5548 		}
5549 	}
5550 }
5551 
5552 int main(int argc, char **argv)
5553 {
5554 	outf = stderr;
5555 	cmdline(argc, argv);
5556 
5557 	if (!quiet)
5558 		print_version();
5559 
5560 	probe_sysfs();
5561 
5562 	turbostat_init();
5563 
5564 	/* dump counters and exit */
5565 	if (dump_only)
5566 		return get_and_dump_counters();
5567 
5568 	/* list header and exit */
5569 	if (list_header_only) {
5570 		print_header(",");
5571 		flush_output_stdout();
5572 		return 0;
5573 	}
5574 
5575 	/*
5576 	 * if any params left, it must be a command to fork
5577 	 */
5578 	if (argc - optind)
5579 		return fork_it(argv + optind);
5580 	else
5581 		turbostat_loop();
5582 
5583 	return 0;
5584 }
5585