1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2013 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9 
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36 
37 char *proc_stat = "/proc/stat";
38 FILE *outf;
39 int *fd_percpu;
40 struct timeval interval_tv = {5, 0};
41 struct timespec interval_ts = {5, 0};
42 unsigned int num_iterations;
43 unsigned int debug;
44 unsigned int quiet;
45 unsigned int shown;
46 unsigned int sums_need_wide_columns;
47 unsigned int rapl_joules;
48 unsigned int summary_only;
49 unsigned int list_header_only;
50 unsigned int dump_only;
51 unsigned int do_snb_cstates;
52 unsigned int do_knl_cstates;
53 unsigned int do_slm_cstates;
54 unsigned int use_c1_residency_msr;
55 unsigned int has_aperf;
56 unsigned int has_epb;
57 unsigned int do_irtl_snb;
58 unsigned int do_irtl_hsw;
59 unsigned int units = 1000000;	/* MHz etc */
60 unsigned int genuine_intel;
61 unsigned int authentic_amd;
62 unsigned int hygon_genuine;
63 unsigned int max_level, max_extended_level;
64 unsigned int has_invariant_tsc;
65 unsigned int do_nhm_platform_info;
66 unsigned int no_MSR_MISC_PWR_MGMT;
67 unsigned int aperf_mperf_multiplier = 1;
68 double bclk;
69 double base_hz;
70 unsigned int has_base_hz;
71 double tsc_tweak = 1.0;
72 unsigned int show_pkg_only;
73 unsigned int show_core_only;
74 char *output_buffer, *outp;
75 unsigned int do_rapl;
76 unsigned int do_dts;
77 unsigned int do_ptm;
78 unsigned long long  gfx_cur_rc6_ms;
79 unsigned long long cpuidle_cur_cpu_lpi_us;
80 unsigned long long cpuidle_cur_sys_lpi_us;
81 unsigned int gfx_cur_mhz;
82 unsigned int tcc_activation_temp;
83 unsigned int tcc_activation_temp_override;
84 double rapl_power_units, rapl_time_units;
85 double rapl_dram_energy_units, rapl_energy_units;
86 double rapl_joule_counter_range;
87 unsigned int do_core_perf_limit_reasons;
88 unsigned int has_automatic_cstate_conversion;
89 unsigned int do_gfx_perf_limit_reasons;
90 unsigned int do_ring_perf_limit_reasons;
91 unsigned int crystal_hz;
92 unsigned long long tsc_hz;
93 int base_cpu;
94 double discover_bclk(unsigned int family, unsigned int model);
95 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
96 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
97 unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
98 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
99 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
100 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
101 unsigned int has_misc_feature_control;
102 unsigned int first_counter_read = 1;
103 int ignore_stdin;
104 
105 #define RAPL_PKG		(1 << 0)
106 					/* 0x610 MSR_PKG_POWER_LIMIT */
107 					/* 0x611 MSR_PKG_ENERGY_STATUS */
108 #define RAPL_PKG_PERF_STATUS	(1 << 1)
109 					/* 0x613 MSR_PKG_PERF_STATUS */
110 #define RAPL_PKG_POWER_INFO	(1 << 2)
111 					/* 0x614 MSR_PKG_POWER_INFO */
112 
113 #define RAPL_DRAM		(1 << 3)
114 					/* 0x618 MSR_DRAM_POWER_LIMIT */
115 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
116 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
117 					/* 0x61b MSR_DRAM_PERF_STATUS */
118 #define RAPL_DRAM_POWER_INFO	(1 << 5)
119 					/* 0x61c MSR_DRAM_POWER_INFO */
120 
121 #define RAPL_CORES_POWER_LIMIT	(1 << 6)
122 					/* 0x638 MSR_PP0_POWER_LIMIT */
123 #define RAPL_CORE_POLICY	(1 << 7)
124 					/* 0x63a MSR_PP0_POLICY */
125 
126 #define RAPL_GFX		(1 << 8)
127 					/* 0x640 MSR_PP1_POWER_LIMIT */
128 					/* 0x641 MSR_PP1_ENERGY_STATUS */
129 					/* 0x642 MSR_PP1_POLICY */
130 
131 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
132 					/* 0x639 MSR_PP0_ENERGY_STATUS */
133 #define RAPL_PER_CORE_ENERGY	(1 << 10)
134 					/* Indicates cores energy collection is per-core,
135 					 * not per-package. */
136 #define RAPL_AMD_F17H		(1 << 11)
137 					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
138 					/* 0xc001029a MSR_CORE_ENERGY_STAT */
139 					/* 0xc001029b MSR_PKG_ENERGY_STAT */
140 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
141 #define	TJMAX_DEFAULT	100
142 
143 /* MSRs that are not yet in the kernel-provided header. */
144 #define MSR_RAPL_PWR_UNIT	0xc0010299
145 #define MSR_CORE_ENERGY_STAT	0xc001029a
146 #define MSR_PKG_ENERGY_STAT	0xc001029b
147 
148 #define MAX(a, b) ((a) > (b) ? (a) : (b))
149 
150 /*
151  * buffer size used by sscanf() for added column names
152  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
153  */
154 #define	NAME_BYTES 20
155 #define PATH_BYTES 128
156 
157 int backwards_count;
158 char *progname;
159 
160 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
161 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
162 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
163 #define MAX_ADDED_COUNTERS 8
164 #define MAX_ADDED_THREAD_COUNTERS 24
165 #define BITMASK_SIZE 32
166 
167 struct thread_data {
168 	struct timeval tv_begin;
169 	struct timeval tv_end;
170 	struct timeval tv_delta;
171 	unsigned long long tsc;
172 	unsigned long long aperf;
173 	unsigned long long mperf;
174 	unsigned long long c1;
175 	unsigned long long  irq_count;
176 	unsigned int smi_count;
177 	unsigned int cpu_id;
178 	unsigned int apic_id;
179 	unsigned int x2apic_id;
180 	unsigned int flags;
181 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
182 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
183 	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
184 } *thread_even, *thread_odd;
185 
186 struct core_data {
187 	unsigned long long c3;
188 	unsigned long long c6;
189 	unsigned long long c7;
190 	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
191 	unsigned int core_temp_c;
192 	unsigned int core_energy;	/* MSR_CORE_ENERGY_STAT */
193 	unsigned int core_id;
194 	unsigned long long counter[MAX_ADDED_COUNTERS];
195 } *core_even, *core_odd;
196 
197 struct pkg_data {
198 	unsigned long long pc2;
199 	unsigned long long pc3;
200 	unsigned long long pc6;
201 	unsigned long long pc7;
202 	unsigned long long pc8;
203 	unsigned long long pc9;
204 	unsigned long long pc10;
205 	unsigned long long cpu_lpi;
206 	unsigned long long sys_lpi;
207 	unsigned long long pkg_wtd_core_c0;
208 	unsigned long long pkg_any_core_c0;
209 	unsigned long long pkg_any_gfxe_c0;
210 	unsigned long long pkg_both_core_gfxe_c0;
211 	long long gfx_rc6_ms;
212 	unsigned int gfx_mhz;
213 	unsigned int package_id;
214 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
215 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
216 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
217 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
218 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
219 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
220 	unsigned int pkg_temp_c;
221 	unsigned long long counter[MAX_ADDED_COUNTERS];
222 } *package_even, *package_odd;
223 
224 #define ODD_COUNTERS thread_odd, core_odd, package_odd
225 #define EVEN_COUNTERS thread_even, core_even, package_even
226 
227 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
228 	((thread_base) +						      \
229 	 ((pkg_no) *							      \
230 	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
231 	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
232 	 ((core_no) * topo.threads_per_core) +				      \
233 	 (thread_no))
234 
235 #define GET_CORE(core_base, core_no, node_no, pkg_no)			\
236 	((core_base) +							\
237 	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
238 	 ((node_no) * topo.cores_per_node) +				\
239 	 (core_no))
240 
241 
242 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
243 
244 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
245 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
246 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
247 
248 struct msr_counter {
249 	unsigned int msr_num;
250 	char name[NAME_BYTES];
251 	char path[PATH_BYTES];
252 	unsigned int width;
253 	enum counter_type type;
254 	enum counter_format format;
255 	struct msr_counter *next;
256 	unsigned int flags;
257 #define	FLAGS_HIDE	(1 << 0)
258 #define	FLAGS_SHOW	(1 << 1)
259 #define	SYSFS_PERCPU	(1 << 1)
260 };
261 
262 struct sys_counters {
263 	unsigned int added_thread_counters;
264 	unsigned int added_core_counters;
265 	unsigned int added_package_counters;
266 	struct msr_counter *tp;
267 	struct msr_counter *cp;
268 	struct msr_counter *pp;
269 } sys;
270 
271 struct system_summary {
272 	struct thread_data threads;
273 	struct core_data cores;
274 	struct pkg_data packages;
275 } average;
276 
277 struct cpu_topology {
278 	int physical_package_id;
279 	int die_id;
280 	int logical_cpu_id;
281 	int physical_node_id;
282 	int logical_node_id;	/* 0-based count within the package */
283 	int physical_core_id;
284 	int thread_id;
285 	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
286 } *cpus;
287 
288 struct topo_params {
289 	int num_packages;
290 	int num_die;
291 	int num_cpus;
292 	int num_cores;
293 	int max_cpu_num;
294 	int max_node_num;
295 	int nodes_per_pkg;
296 	int cores_per_node;
297 	int threads_per_core;
298 } topo;
299 
300 struct timeval tv_even, tv_odd, tv_delta;
301 
302 int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
303 int *irqs_per_cpu;		/* indexed by cpu_num */
304 
305 void setup_all_buffers(void);
306 
307 char *sys_lpi_file;
308 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
309 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
310 
311 int cpu_is_not_present(int cpu)
312 {
313 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
314 }
315 /*
316  * run func(thread, core, package) in topology order
317  * skip non-present cpus
318  */
319 
320 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
321 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
322 {
323 	int retval, pkg_no, core_no, thread_no, node_no;
324 
325 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
326 		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
327 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
328 				for (thread_no = 0; thread_no <
329 					topo.threads_per_core; ++thread_no) {
330 					struct thread_data *t;
331 					struct core_data *c;
332 					struct pkg_data *p;
333 
334 					t = GET_THREAD(thread_base, thread_no,
335 						       core_no, node_no,
336 						       pkg_no);
337 
338 					if (cpu_is_not_present(t->cpu_id))
339 						continue;
340 
341 					c = GET_CORE(core_base, core_no,
342 						     node_no, pkg_no);
343 					p = GET_PKG(pkg_base, pkg_no);
344 
345 					retval = func(t, c, p);
346 					if (retval)
347 						return retval;
348 				}
349 			}
350 		}
351 	}
352 	return 0;
353 }
354 
355 int cpu_migrate(int cpu)
356 {
357 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
358 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
359 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
360 		return -1;
361 	else
362 		return 0;
363 }
364 int get_msr_fd(int cpu)
365 {
366 	char pathname[32];
367 	int fd;
368 
369 	fd = fd_percpu[cpu];
370 
371 	if (fd)
372 		return fd;
373 
374 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
375 	fd = open(pathname, O_RDONLY);
376 	if (fd < 0)
377 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
378 
379 	fd_percpu[cpu] = fd;
380 
381 	return fd;
382 }
383 
384 int get_msr(int cpu, off_t offset, unsigned long long *msr)
385 {
386 	ssize_t retval;
387 
388 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
389 
390 	if (retval != sizeof *msr)
391 		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
392 
393 	return 0;
394 }
395 
396 /*
397  * This list matches the column headers, except
398  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
399  * 2. Core and CPU are moved to the end, we can't have strings that contain them
400  *    matching on them for --show and --hide.
401  */
402 struct msr_counter bic[] = {
403 	{ 0x0, "usec" },
404 	{ 0x0, "Time_Of_Day_Seconds" },
405 	{ 0x0, "Package" },
406 	{ 0x0, "Node" },
407 	{ 0x0, "Avg_MHz" },
408 	{ 0x0, "Busy%" },
409 	{ 0x0, "Bzy_MHz" },
410 	{ 0x0, "TSC_MHz" },
411 	{ 0x0, "IRQ" },
412 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
413 	{ 0x0, "sysfs" },
414 	{ 0x0, "CPU%c1" },
415 	{ 0x0, "CPU%c3" },
416 	{ 0x0, "CPU%c6" },
417 	{ 0x0, "CPU%c7" },
418 	{ 0x0, "ThreadC" },
419 	{ 0x0, "CoreTmp" },
420 	{ 0x0, "CoreCnt" },
421 	{ 0x0, "PkgTmp" },
422 	{ 0x0, "GFX%rc6" },
423 	{ 0x0, "GFXMHz" },
424 	{ 0x0, "Pkg%pc2" },
425 	{ 0x0, "Pkg%pc3" },
426 	{ 0x0, "Pkg%pc6" },
427 	{ 0x0, "Pkg%pc7" },
428 	{ 0x0, "Pkg%pc8" },
429 	{ 0x0, "Pkg%pc9" },
430 	{ 0x0, "Pk%pc10" },
431 	{ 0x0, "CPU%LPI" },
432 	{ 0x0, "SYS%LPI" },
433 	{ 0x0, "PkgWatt" },
434 	{ 0x0, "CorWatt" },
435 	{ 0x0, "GFXWatt" },
436 	{ 0x0, "PkgCnt" },
437 	{ 0x0, "RAMWatt" },
438 	{ 0x0, "PKG_%" },
439 	{ 0x0, "RAM_%" },
440 	{ 0x0, "Pkg_J" },
441 	{ 0x0, "Cor_J" },
442 	{ 0x0, "GFX_J" },
443 	{ 0x0, "RAM_J" },
444 	{ 0x0, "Mod%c6" },
445 	{ 0x0, "Totl%C0" },
446 	{ 0x0, "Any%C0" },
447 	{ 0x0, "GFX%C0" },
448 	{ 0x0, "CPUGFX%" },
449 	{ 0x0, "Core" },
450 	{ 0x0, "CPU" },
451 	{ 0x0, "APIC" },
452 	{ 0x0, "X2APIC" },
453 	{ 0x0, "Die" },
454 };
455 
456 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
457 #define	BIC_USEC	(1ULL << 0)
458 #define	BIC_TOD		(1ULL << 1)
459 #define	BIC_Package	(1ULL << 2)
460 #define	BIC_Node	(1ULL << 3)
461 #define	BIC_Avg_MHz	(1ULL << 4)
462 #define	BIC_Busy	(1ULL << 5)
463 #define	BIC_Bzy_MHz	(1ULL << 6)
464 #define	BIC_TSC_MHz	(1ULL << 7)
465 #define	BIC_IRQ		(1ULL << 8)
466 #define	BIC_SMI		(1ULL << 9)
467 #define	BIC_sysfs	(1ULL << 10)
468 #define	BIC_CPU_c1	(1ULL << 11)
469 #define	BIC_CPU_c3	(1ULL << 12)
470 #define	BIC_CPU_c6	(1ULL << 13)
471 #define	BIC_CPU_c7	(1ULL << 14)
472 #define	BIC_ThreadC	(1ULL << 15)
473 #define	BIC_CoreTmp	(1ULL << 16)
474 #define	BIC_CoreCnt	(1ULL << 17)
475 #define	BIC_PkgTmp	(1ULL << 18)
476 #define	BIC_GFX_rc6	(1ULL << 19)
477 #define	BIC_GFXMHz	(1ULL << 20)
478 #define	BIC_Pkgpc2	(1ULL << 21)
479 #define	BIC_Pkgpc3	(1ULL << 22)
480 #define	BIC_Pkgpc6	(1ULL << 23)
481 #define	BIC_Pkgpc7	(1ULL << 24)
482 #define	BIC_Pkgpc8	(1ULL << 25)
483 #define	BIC_Pkgpc9	(1ULL << 26)
484 #define	BIC_Pkgpc10	(1ULL << 27)
485 #define BIC_CPU_LPI	(1ULL << 28)
486 #define BIC_SYS_LPI	(1ULL << 29)
487 #define	BIC_PkgWatt	(1ULL << 30)
488 #define	BIC_CorWatt	(1ULL << 31)
489 #define	BIC_GFXWatt	(1ULL << 32)
490 #define	BIC_PkgCnt	(1ULL << 33)
491 #define	BIC_RAMWatt	(1ULL << 34)
492 #define	BIC_PKG__	(1ULL << 35)
493 #define	BIC_RAM__	(1ULL << 36)
494 #define	BIC_Pkg_J	(1ULL << 37)
495 #define	BIC_Cor_J	(1ULL << 38)
496 #define	BIC_GFX_J	(1ULL << 39)
497 #define	BIC_RAM_J	(1ULL << 40)
498 #define	BIC_Mod_c6	(1ULL << 41)
499 #define	BIC_Totl_c0	(1ULL << 42)
500 #define	BIC_Any_c0	(1ULL << 43)
501 #define	BIC_GFX_c0	(1ULL << 44)
502 #define	BIC_CPUGFX	(1ULL << 45)
503 #define	BIC_Core	(1ULL << 46)
504 #define	BIC_CPU		(1ULL << 47)
505 #define	BIC_APIC	(1ULL << 48)
506 #define	BIC_X2APIC	(1ULL << 49)
507 #define	BIC_Die		(1ULL << 50)
508 
509 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
510 
511 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
512 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
513 
514 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
515 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
516 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
517 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
518 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
519 
520 
521 #define MAX_DEFERRED 16
522 char *deferred_skip_names[MAX_DEFERRED];
523 int deferred_skip_index;
524 
525 /*
526  * HIDE_LIST - hide this list of counters, show the rest [default]
527  * SHOW_LIST - show this list of counters, hide the rest
528  */
529 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
530 
531 void help(void)
532 {
533 	fprintf(outf,
534 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
535 	"\n"
536 	"Turbostat forks the specified COMMAND and prints statistics\n"
537 	"when COMMAND completes.\n"
538 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
539 	"to print statistics, until interrupted.\n"
540 	"  -a, --add	add a counter\n"
541 	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
542 	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
543 	"		  {core | package | j,k,l..m,n-p }\n"
544 	"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
545 	"  -D, --Dump	displays the raw counter values\n"
546 	"  -e, --enable	[all | column]\n"
547 	"		shows all or the specified disabled column\n"
548 	"  -H, --hide [column|column,column,...]\n"
549 	"		hide the specified column(s)\n"
550 	"  -i, --interval sec.subsec\n"
551 	"		Override default 5-second measurement interval\n"
552 	"  -J, --Joules	displays energy in Joules instead of Watts\n"
553 	"  -l, --list	list column headers only\n"
554 	"  -n, --num_iterations num\n"
555 	"		number of the measurement iterations\n"
556 	"  -o, --out file\n"
557 	"		create or truncate \"file\" for all output\n"
558 	"  -q, --quiet	skip decoding system configuration header\n"
559 	"  -s, --show [column|column,column,...]\n"
560 	"		show only the specified column(s)\n"
561 	"  -S, --Summary\n"
562 	"		limits output to 1-line system summary per interval\n"
563 	"  -T, --TCC temperature\n"
564 	"		sets the Thermal Control Circuit temperature in\n"
565 	"		  degrees Celsius\n"
566 	"  -h, --help	print this help message\n"
567 	"  -v, --version	print version information\n"
568 	"\n"
569 	"For more help, run \"man turbostat\"\n");
570 }
571 
572 /*
573  * bic_lookup
574  * for all the strings in comma separate name_list,
575  * set the approprate bit in return value.
576  */
577 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
578 {
579 	int i;
580 	unsigned long long retval = 0;
581 
582 	while (name_list) {
583 		char *comma;
584 
585 		comma = strchr(name_list, ',');
586 
587 		if (comma)
588 			*comma = '\0';
589 
590 		if (!strcmp(name_list, "all"))
591 			return ~0;
592 
593 		for (i = 0; i < MAX_BIC; ++i) {
594 			if (!strcmp(name_list, bic[i].name)) {
595 				retval |= (1ULL << i);
596 				break;
597 			}
598 		}
599 		if (i == MAX_BIC) {
600 			if (mode == SHOW_LIST) {
601 				fprintf(stderr, "Invalid counter name: %s\n", name_list);
602 				exit(-1);
603 			}
604 			deferred_skip_names[deferred_skip_index++] = name_list;
605 			if (debug)
606 				fprintf(stderr, "deferred \"%s\"\n", name_list);
607 			if (deferred_skip_index >= MAX_DEFERRED) {
608 				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
609 					MAX_DEFERRED, name_list);
610 				help();
611 				exit(1);
612 			}
613 		}
614 
615 		name_list = comma;
616 		if (name_list)
617 			name_list++;
618 
619 	}
620 	return retval;
621 }
622 
623 
624 void print_header(char *delim)
625 {
626 	struct msr_counter *mp;
627 	int printed = 0;
628 
629 	if (DO_BIC(BIC_USEC))
630 		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
631 	if (DO_BIC(BIC_TOD))
632 		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
633 	if (DO_BIC(BIC_Package))
634 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
635 	if (DO_BIC(BIC_Die))
636 		outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
637 	if (DO_BIC(BIC_Node))
638 		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
639 	if (DO_BIC(BIC_Core))
640 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
641 	if (DO_BIC(BIC_CPU))
642 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
643 	if (DO_BIC(BIC_APIC))
644 		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
645 	if (DO_BIC(BIC_X2APIC))
646 		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
647 	if (DO_BIC(BIC_Avg_MHz))
648 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
649 	if (DO_BIC(BIC_Busy))
650 		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
651 	if (DO_BIC(BIC_Bzy_MHz))
652 		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
653 	if (DO_BIC(BIC_TSC_MHz))
654 		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
655 
656 	if (DO_BIC(BIC_IRQ)) {
657 		if (sums_need_wide_columns)
658 			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
659 		else
660 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
661 	}
662 
663 	if (DO_BIC(BIC_SMI))
664 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
665 
666 	for (mp = sys.tp; mp; mp = mp->next) {
667 
668 		if (mp->format == FORMAT_RAW) {
669 			if (mp->width == 64)
670 				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
671 			else
672 				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
673 		} else {
674 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
675 				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
676 			else
677 				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
678 		}
679 	}
680 
681 	if (DO_BIC(BIC_CPU_c1))
682 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
683 	if (DO_BIC(BIC_CPU_c3))
684 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
685 	if (DO_BIC(BIC_CPU_c6))
686 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
687 	if (DO_BIC(BIC_CPU_c7))
688 		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
689 
690 	if (DO_BIC(BIC_Mod_c6))
691 		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
692 
693 	if (DO_BIC(BIC_CoreTmp))
694 		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
695 
696 	if (do_rapl && !rapl_joules) {
697 		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
698 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
699 	} else if (do_rapl && rapl_joules) {
700 		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
701 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
702 	}
703 
704 	for (mp = sys.cp; mp; mp = mp->next) {
705 		if (mp->format == FORMAT_RAW) {
706 			if (mp->width == 64)
707 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
708 			else
709 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
710 		} else {
711 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
712 				outp += sprintf(outp, "%s%8s", delim, mp->name);
713 			else
714 				outp += sprintf(outp, "%s%s", delim, mp->name);
715 		}
716 	}
717 
718 	if (DO_BIC(BIC_PkgTmp))
719 		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
720 
721 	if (DO_BIC(BIC_GFX_rc6))
722 		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
723 
724 	if (DO_BIC(BIC_GFXMHz))
725 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
726 
727 	if (DO_BIC(BIC_Totl_c0))
728 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
729 	if (DO_BIC(BIC_Any_c0))
730 		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
731 	if (DO_BIC(BIC_GFX_c0))
732 		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
733 	if (DO_BIC(BIC_CPUGFX))
734 		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
735 
736 	if (DO_BIC(BIC_Pkgpc2))
737 		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
738 	if (DO_BIC(BIC_Pkgpc3))
739 		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
740 	if (DO_BIC(BIC_Pkgpc6))
741 		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
742 	if (DO_BIC(BIC_Pkgpc7))
743 		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
744 	if (DO_BIC(BIC_Pkgpc8))
745 		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
746 	if (DO_BIC(BIC_Pkgpc9))
747 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
748 	if (DO_BIC(BIC_Pkgpc10))
749 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
750 	if (DO_BIC(BIC_CPU_LPI))
751 		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
752 	if (DO_BIC(BIC_SYS_LPI))
753 		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
754 
755 	if (do_rapl && !rapl_joules) {
756 		if (DO_BIC(BIC_PkgWatt))
757 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
758 		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
759 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
760 		if (DO_BIC(BIC_GFXWatt))
761 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
762 		if (DO_BIC(BIC_RAMWatt))
763 			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
764 		if (DO_BIC(BIC_PKG__))
765 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
766 		if (DO_BIC(BIC_RAM__))
767 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
768 	} else if (do_rapl && rapl_joules) {
769 		if (DO_BIC(BIC_Pkg_J))
770 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
771 		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
772 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
773 		if (DO_BIC(BIC_GFX_J))
774 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
775 		if (DO_BIC(BIC_RAM_J))
776 			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
777 		if (DO_BIC(BIC_PKG__))
778 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
779 		if (DO_BIC(BIC_RAM__))
780 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
781 	}
782 	for (mp = sys.pp; mp; mp = mp->next) {
783 		if (mp->format == FORMAT_RAW) {
784 			if (mp->width == 64)
785 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
786 			else
787 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
788 		} else {
789 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
790 				outp += sprintf(outp, "%s%8s", delim, mp->name);
791 			else
792 				outp += sprintf(outp, "%s%s", delim, mp->name);
793 		}
794 	}
795 
796 	outp += sprintf(outp, "\n");
797 }
798 
799 int dump_counters(struct thread_data *t, struct core_data *c,
800 	struct pkg_data *p)
801 {
802 	int i;
803 	struct msr_counter *mp;
804 
805 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
806 
807 	if (t) {
808 		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
809 			t->cpu_id, t->flags);
810 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
811 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
812 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
813 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
814 
815 		if (DO_BIC(BIC_IRQ))
816 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
817 		if (DO_BIC(BIC_SMI))
818 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
819 
820 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
821 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
822 				i, mp->msr_num, t->counter[i]);
823 		}
824 	}
825 
826 	if (c) {
827 		outp += sprintf(outp, "core: %d\n", c->core_id);
828 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
829 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
830 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
831 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
832 		outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
833 
834 		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
835 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
836 				i, mp->msr_num, c->counter[i]);
837 		}
838 		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
839 	}
840 
841 	if (p) {
842 		outp += sprintf(outp, "package: %d\n", p->package_id);
843 
844 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
845 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
846 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
847 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
848 
849 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
850 		if (DO_BIC(BIC_Pkgpc3))
851 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
852 		if (DO_BIC(BIC_Pkgpc6))
853 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
854 		if (DO_BIC(BIC_Pkgpc7))
855 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
856 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
857 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
858 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
859 		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
860 		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
861 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
862 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
863 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
864 		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
865 		outp += sprintf(outp, "Throttle PKG: %0X\n",
866 			p->rapl_pkg_perf_status);
867 		outp += sprintf(outp, "Throttle RAM: %0X\n",
868 			p->rapl_dram_perf_status);
869 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
870 
871 		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
872 			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
873 				i, mp->msr_num, p->counter[i]);
874 		}
875 	}
876 
877 	outp += sprintf(outp, "\n");
878 
879 	return 0;
880 }
881 
882 /*
883  * column formatting convention & formats
884  */
885 int format_counters(struct thread_data *t, struct core_data *c,
886 	struct pkg_data *p)
887 {
888 	double interval_float, tsc;
889 	char *fmt8;
890 	int i;
891 	struct msr_counter *mp;
892 	char *delim = "\t";
893 	int printed = 0;
894 
895 	 /* if showing only 1st thread in core and this isn't one, bail out */
896 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
897 		return 0;
898 
899 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
900 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
901 		return 0;
902 
903 	/*if not summary line and --cpu is used */
904 	if ((t != &average.threads) &&
905 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
906 		return 0;
907 
908 	if (DO_BIC(BIC_USEC)) {
909 		/* on each row, print how many usec each timestamp took to gather */
910 		struct timeval tv;
911 
912 		timersub(&t->tv_end, &t->tv_begin, &tv);
913 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
914 	}
915 
916 	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
917 	if (DO_BIC(BIC_TOD))
918 		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
919 
920 	interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
921 
922 	tsc = t->tsc * tsc_tweak;
923 
924 	/* topo columns, print blanks on 1st (average) line */
925 	if (t == &average.threads) {
926 		if (DO_BIC(BIC_Package))
927 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
928 		if (DO_BIC(BIC_Die))
929 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
930 		if (DO_BIC(BIC_Node))
931 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
932 		if (DO_BIC(BIC_Core))
933 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
934 		if (DO_BIC(BIC_CPU))
935 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
936 		if (DO_BIC(BIC_APIC))
937 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
938 		if (DO_BIC(BIC_X2APIC))
939 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
940 	} else {
941 		if (DO_BIC(BIC_Package)) {
942 			if (p)
943 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
944 			else
945 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
946 		}
947 		if (DO_BIC(BIC_Die)) {
948 			if (c)
949 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
950 			else
951 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
952 		}
953 		if (DO_BIC(BIC_Node)) {
954 			if (t)
955 				outp += sprintf(outp, "%s%d",
956 						(printed++ ? delim : ""),
957 					      cpus[t->cpu_id].physical_node_id);
958 			else
959 				outp += sprintf(outp, "%s-",
960 						(printed++ ? delim : ""));
961 		}
962 		if (DO_BIC(BIC_Core)) {
963 			if (c)
964 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
965 			else
966 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
967 		}
968 		if (DO_BIC(BIC_CPU))
969 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
970 		if (DO_BIC(BIC_APIC))
971 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
972 		if (DO_BIC(BIC_X2APIC))
973 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
974 	}
975 
976 	if (DO_BIC(BIC_Avg_MHz))
977 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
978 			1.0 / units * t->aperf / interval_float);
979 
980 	if (DO_BIC(BIC_Busy))
981 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
982 
983 	if (DO_BIC(BIC_Bzy_MHz)) {
984 		if (has_base_hz)
985 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
986 		else
987 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
988 				tsc / units * t->aperf / t->mperf / interval_float);
989 	}
990 
991 	if (DO_BIC(BIC_TSC_MHz))
992 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
993 
994 	/* IRQ */
995 	if (DO_BIC(BIC_IRQ)) {
996 		if (sums_need_wide_columns)
997 			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
998 		else
999 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1000 	}
1001 
1002 	/* SMI */
1003 	if (DO_BIC(BIC_SMI))
1004 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1005 
1006 	/* Added counters */
1007 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1008 		if (mp->format == FORMAT_RAW) {
1009 			if (mp->width == 32)
1010 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
1011 			else
1012 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1013 		} else if (mp->format == FORMAT_DELTA) {
1014 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1015 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1016 			else
1017 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1018 		} else if (mp->format == FORMAT_PERCENT) {
1019 			if (mp->type == COUNTER_USEC)
1020 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
1021 			else
1022 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
1023 		}
1024 	}
1025 
1026 	/* C1 */
1027 	if (DO_BIC(BIC_CPU_c1))
1028 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
1029 
1030 
1031 	/* print per-core data only for 1st thread in core */
1032 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1033 		goto done;
1034 
1035 	if (DO_BIC(BIC_CPU_c3))
1036 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
1037 	if (DO_BIC(BIC_CPU_c6))
1038 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
1039 	if (DO_BIC(BIC_CPU_c7))
1040 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
1041 
1042 	/* Mod%c6 */
1043 	if (DO_BIC(BIC_Mod_c6))
1044 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1045 
1046 	if (DO_BIC(BIC_CoreTmp))
1047 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1048 
1049 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1050 		if (mp->format == FORMAT_RAW) {
1051 			if (mp->width == 32)
1052 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
1053 			else
1054 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1055 		} else if (mp->format == FORMAT_DELTA) {
1056 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1057 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1058 			else
1059 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1060 		} else if (mp->format == FORMAT_PERCENT) {
1061 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
1062 		}
1063 	}
1064 
1065 	/*
1066 	 * If measurement interval exceeds minimum RAPL Joule Counter range,
1067 	 * indicate that results are suspect by printing "**" in fraction place.
1068 	 */
1069 	if (interval_float < rapl_joule_counter_range)
1070 		fmt8 = "%s%.2f";
1071 	else
1072 		fmt8 = "%6.0f**";
1073 
1074 	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1075 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1076 	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1077 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1078 
1079 	/* print per-package data only for 1st core in package */
1080 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1081 		goto done;
1082 
1083 	/* PkgTmp */
1084 	if (DO_BIC(BIC_PkgTmp))
1085 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1086 
1087 	/* GFXrc6 */
1088 	if (DO_BIC(BIC_GFX_rc6)) {
1089 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
1090 			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1091 		} else {
1092 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1093 				p->gfx_rc6_ms / 10.0 / interval_float);
1094 		}
1095 	}
1096 
1097 	/* GFXMHz */
1098 	if (DO_BIC(BIC_GFXMHz))
1099 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1100 
1101 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1102 	if (DO_BIC(BIC_Totl_c0))
1103 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
1104 	if (DO_BIC(BIC_Any_c0))
1105 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
1106 	if (DO_BIC(BIC_GFX_c0))
1107 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
1108 	if (DO_BIC(BIC_CPUGFX))
1109 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
1110 
1111 	if (DO_BIC(BIC_Pkgpc2))
1112 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
1113 	if (DO_BIC(BIC_Pkgpc3))
1114 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1115 	if (DO_BIC(BIC_Pkgpc6))
1116 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1117 	if (DO_BIC(BIC_Pkgpc7))
1118 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1119 	if (DO_BIC(BIC_Pkgpc8))
1120 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1121 	if (DO_BIC(BIC_Pkgpc9))
1122 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1123 	if (DO_BIC(BIC_Pkgpc10))
1124 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1125 
1126 	if (DO_BIC(BIC_CPU_LPI))
1127 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1128 	if (DO_BIC(BIC_SYS_LPI))
1129 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1130 
1131 	if (DO_BIC(BIC_PkgWatt))
1132 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1133 	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1134 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1135 	if (DO_BIC(BIC_GFXWatt))
1136 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1137 	if (DO_BIC(BIC_RAMWatt))
1138 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1139 	if (DO_BIC(BIC_Pkg_J))
1140 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1141 	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1142 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1143 	if (DO_BIC(BIC_GFX_J))
1144 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1145 	if (DO_BIC(BIC_RAM_J))
1146 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1147 	if (DO_BIC(BIC_PKG__))
1148 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1149 	if (DO_BIC(BIC_RAM__))
1150 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1151 
1152 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1153 		if (mp->format == FORMAT_RAW) {
1154 			if (mp->width == 32)
1155 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1156 			else
1157 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1158 		} else if (mp->format == FORMAT_DELTA) {
1159 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1160 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1161 			else
1162 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1163 		} else if (mp->format == FORMAT_PERCENT) {
1164 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1165 		}
1166 	}
1167 
1168 done:
1169 	if (*(outp - 1) != '\n')
1170 		outp += sprintf(outp, "\n");
1171 
1172 	return 0;
1173 }
1174 
1175 void flush_output_stdout(void)
1176 {
1177 	FILE *filep;
1178 
1179 	if (outf == stderr)
1180 		filep = stdout;
1181 	else
1182 		filep = outf;
1183 
1184 	fputs(output_buffer, filep);
1185 	fflush(filep);
1186 
1187 	outp = output_buffer;
1188 }
1189 void flush_output_stderr(void)
1190 {
1191 	fputs(output_buffer, outf);
1192 	fflush(outf);
1193 	outp = output_buffer;
1194 }
1195 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1196 {
1197 	static int printed;
1198 
1199 	if (!printed || !summary_only)
1200 		print_header("\t");
1201 
1202 	format_counters(&average.threads, &average.cores, &average.packages);
1203 
1204 	printed = 1;
1205 
1206 	if (summary_only)
1207 		return;
1208 
1209 	for_all_cpus(format_counters, t, c, p);
1210 }
1211 
1212 #define DELTA_WRAP32(new, old)			\
1213 	if (new > old) {			\
1214 		old = new - old;		\
1215 	} else {				\
1216 		old = 0x100000000 + new - old;	\
1217 	}
1218 
1219 int
1220 delta_package(struct pkg_data *new, struct pkg_data *old)
1221 {
1222 	int i;
1223 	struct msr_counter *mp;
1224 
1225 
1226 	if (DO_BIC(BIC_Totl_c0))
1227 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1228 	if (DO_BIC(BIC_Any_c0))
1229 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1230 	if (DO_BIC(BIC_GFX_c0))
1231 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1232 	if (DO_BIC(BIC_CPUGFX))
1233 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1234 
1235 	old->pc2 = new->pc2 - old->pc2;
1236 	if (DO_BIC(BIC_Pkgpc3))
1237 		old->pc3 = new->pc3 - old->pc3;
1238 	if (DO_BIC(BIC_Pkgpc6))
1239 		old->pc6 = new->pc6 - old->pc6;
1240 	if (DO_BIC(BIC_Pkgpc7))
1241 		old->pc7 = new->pc7 - old->pc7;
1242 	old->pc8 = new->pc8 - old->pc8;
1243 	old->pc9 = new->pc9 - old->pc9;
1244 	old->pc10 = new->pc10 - old->pc10;
1245 	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1246 	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1247 	old->pkg_temp_c = new->pkg_temp_c;
1248 
1249 	/* flag an error when rc6 counter resets/wraps */
1250 	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1251 		old->gfx_rc6_ms = -1;
1252 	else
1253 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1254 
1255 	old->gfx_mhz = new->gfx_mhz;
1256 
1257 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1258 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
1259 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1260 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
1261 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1262 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1263 
1264 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1265 		if (mp->format == FORMAT_RAW)
1266 			old->counter[i] = new->counter[i];
1267 		else
1268 			old->counter[i] = new->counter[i] - old->counter[i];
1269 	}
1270 
1271 	return 0;
1272 }
1273 
1274 void
1275 delta_core(struct core_data *new, struct core_data *old)
1276 {
1277 	int i;
1278 	struct msr_counter *mp;
1279 
1280 	old->c3 = new->c3 - old->c3;
1281 	old->c6 = new->c6 - old->c6;
1282 	old->c7 = new->c7 - old->c7;
1283 	old->core_temp_c = new->core_temp_c;
1284 	old->mc6_us = new->mc6_us - old->mc6_us;
1285 
1286 	DELTA_WRAP32(new->core_energy, old->core_energy);
1287 
1288 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1289 		if (mp->format == FORMAT_RAW)
1290 			old->counter[i] = new->counter[i];
1291 		else
1292 			old->counter[i] = new->counter[i] - old->counter[i];
1293 	}
1294 }
1295 
1296 int soft_c1_residency_display(int bic)
1297 {
1298 	if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1299 		return 0;
1300 
1301 	return DO_BIC_READ(bic);
1302 }
1303 
1304 /*
1305  * old = new - old
1306  */
1307 int
1308 delta_thread(struct thread_data *new, struct thread_data *old,
1309 	struct core_data *core_delta)
1310 {
1311 	int i;
1312 	struct msr_counter *mp;
1313 
1314 	/* we run cpuid just the 1st time, copy the results */
1315 	if (DO_BIC(BIC_APIC))
1316 		new->apic_id = old->apic_id;
1317 	if (DO_BIC(BIC_X2APIC))
1318 		new->x2apic_id = old->x2apic_id;
1319 
1320 	/*
1321 	 * the timestamps from start of measurement interval are in "old"
1322 	 * the timestamp from end of measurement interval are in "new"
1323 	 * over-write old w/ new so we can print end of interval values
1324 	 */
1325 
1326 	timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1327 	old->tv_begin = new->tv_begin;
1328 	old->tv_end = new->tv_end;
1329 
1330 	old->tsc = new->tsc - old->tsc;
1331 
1332 	/* check for TSC < 1 Mcycles over interval */
1333 	if (old->tsc < (1000 * 1000))
1334 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1335 		     "You can disable all c-states by booting with \"idle=poll\"\n"
1336 		     "or just the deep ones with \"processor.max_cstate=1\"");
1337 
1338 	old->c1 = new->c1 - old->c1;
1339 
1340 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
1341 	    soft_c1_residency_display(BIC_Avg_MHz)) {
1342 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1343 			old->aperf = new->aperf - old->aperf;
1344 			old->mperf = new->mperf - old->mperf;
1345 		} else {
1346 			return -1;
1347 		}
1348 	}
1349 
1350 
1351 	if (use_c1_residency_msr) {
1352 		/*
1353 		 * Some models have a dedicated C1 residency MSR,
1354 		 * which should be more accurate than the derivation below.
1355 		 */
1356 	} else {
1357 		/*
1358 		 * As counter collection is not atomic,
1359 		 * it is possible for mperf's non-halted cycles + idle states
1360 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
1361 		 */
1362 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1363 			old->c1 = 0;
1364 		else {
1365 			/* normal case, derive c1 */
1366 			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1367 				- core_delta->c6 - core_delta->c7;
1368 		}
1369 	}
1370 
1371 	if (old->mperf == 0) {
1372 		if (debug > 1)
1373 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1374 		old->mperf = 1;	/* divide by 0 protection */
1375 	}
1376 
1377 	if (DO_BIC(BIC_IRQ))
1378 		old->irq_count = new->irq_count - old->irq_count;
1379 
1380 	if (DO_BIC(BIC_SMI))
1381 		old->smi_count = new->smi_count - old->smi_count;
1382 
1383 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1384 		if (mp->format == FORMAT_RAW)
1385 			old->counter[i] = new->counter[i];
1386 		else
1387 			old->counter[i] = new->counter[i] - old->counter[i];
1388 	}
1389 	return 0;
1390 }
1391 
1392 int delta_cpu(struct thread_data *t, struct core_data *c,
1393 	struct pkg_data *p, struct thread_data *t2,
1394 	struct core_data *c2, struct pkg_data *p2)
1395 {
1396 	int retval = 0;
1397 
1398 	/* calculate core delta only for 1st thread in core */
1399 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1400 		delta_core(c, c2);
1401 
1402 	/* always calculate thread delta */
1403 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
1404 	if (retval)
1405 		return retval;
1406 
1407 	/* calculate package delta only for 1st core in package */
1408 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1409 		retval = delta_package(p, p2);
1410 
1411 	return retval;
1412 }
1413 
1414 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1415 {
1416 	int i;
1417 	struct msr_counter  *mp;
1418 
1419 	t->tv_begin.tv_sec = 0;
1420 	t->tv_begin.tv_usec = 0;
1421 	t->tv_end.tv_sec = 0;
1422 	t->tv_end.tv_usec = 0;
1423 	t->tv_delta.tv_sec = 0;
1424 	t->tv_delta.tv_usec = 0;
1425 
1426 	t->tsc = 0;
1427 	t->aperf = 0;
1428 	t->mperf = 0;
1429 	t->c1 = 0;
1430 
1431 	t->irq_count = 0;
1432 	t->smi_count = 0;
1433 
1434 	/* tells format_counters to dump all fields from this set */
1435 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1436 
1437 	c->c3 = 0;
1438 	c->c6 = 0;
1439 	c->c7 = 0;
1440 	c->mc6_us = 0;
1441 	c->core_temp_c = 0;
1442 	c->core_energy = 0;
1443 
1444 	p->pkg_wtd_core_c0 = 0;
1445 	p->pkg_any_core_c0 = 0;
1446 	p->pkg_any_gfxe_c0 = 0;
1447 	p->pkg_both_core_gfxe_c0 = 0;
1448 
1449 	p->pc2 = 0;
1450 	if (DO_BIC(BIC_Pkgpc3))
1451 		p->pc3 = 0;
1452 	if (DO_BIC(BIC_Pkgpc6))
1453 		p->pc6 = 0;
1454 	if (DO_BIC(BIC_Pkgpc7))
1455 		p->pc7 = 0;
1456 	p->pc8 = 0;
1457 	p->pc9 = 0;
1458 	p->pc10 = 0;
1459 	p->cpu_lpi = 0;
1460 	p->sys_lpi = 0;
1461 
1462 	p->energy_pkg = 0;
1463 	p->energy_dram = 0;
1464 	p->energy_cores = 0;
1465 	p->energy_gfx = 0;
1466 	p->rapl_pkg_perf_status = 0;
1467 	p->rapl_dram_perf_status = 0;
1468 	p->pkg_temp_c = 0;
1469 
1470 	p->gfx_rc6_ms = 0;
1471 	p->gfx_mhz = 0;
1472 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1473 		t->counter[i] = 0;
1474 
1475 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1476 		c->counter[i] = 0;
1477 
1478 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1479 		p->counter[i] = 0;
1480 }
1481 int sum_counters(struct thread_data *t, struct core_data *c,
1482 	struct pkg_data *p)
1483 {
1484 	int i;
1485 	struct msr_counter *mp;
1486 
1487 	/* copy un-changing apic_id's */
1488 	if (DO_BIC(BIC_APIC))
1489 		average.threads.apic_id = t->apic_id;
1490 	if (DO_BIC(BIC_X2APIC))
1491 		average.threads.x2apic_id = t->x2apic_id;
1492 
1493 	/* remember first tv_begin */
1494 	if (average.threads.tv_begin.tv_sec == 0)
1495 		average.threads.tv_begin = t->tv_begin;
1496 
1497 	/* remember last tv_end */
1498 	average.threads.tv_end = t->tv_end;
1499 
1500 	average.threads.tsc += t->tsc;
1501 	average.threads.aperf += t->aperf;
1502 	average.threads.mperf += t->mperf;
1503 	average.threads.c1 += t->c1;
1504 
1505 	average.threads.irq_count += t->irq_count;
1506 	average.threads.smi_count += t->smi_count;
1507 
1508 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1509 		if (mp->format == FORMAT_RAW)
1510 			continue;
1511 		average.threads.counter[i] += t->counter[i];
1512 	}
1513 
1514 	/* sum per-core values only for 1st thread in core */
1515 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1516 		return 0;
1517 
1518 	average.cores.c3 += c->c3;
1519 	average.cores.c6 += c->c6;
1520 	average.cores.c7 += c->c7;
1521 	average.cores.mc6_us += c->mc6_us;
1522 
1523 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1524 
1525 	average.cores.core_energy += c->core_energy;
1526 
1527 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1528 		if (mp->format == FORMAT_RAW)
1529 			continue;
1530 		average.cores.counter[i] += c->counter[i];
1531 	}
1532 
1533 	/* sum per-pkg values only for 1st core in pkg */
1534 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1535 		return 0;
1536 
1537 	if (DO_BIC(BIC_Totl_c0))
1538 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1539 	if (DO_BIC(BIC_Any_c0))
1540 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1541 	if (DO_BIC(BIC_GFX_c0))
1542 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1543 	if (DO_BIC(BIC_CPUGFX))
1544 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1545 
1546 	average.packages.pc2 += p->pc2;
1547 	if (DO_BIC(BIC_Pkgpc3))
1548 		average.packages.pc3 += p->pc3;
1549 	if (DO_BIC(BIC_Pkgpc6))
1550 		average.packages.pc6 += p->pc6;
1551 	if (DO_BIC(BIC_Pkgpc7))
1552 		average.packages.pc7 += p->pc7;
1553 	average.packages.pc8 += p->pc8;
1554 	average.packages.pc9 += p->pc9;
1555 	average.packages.pc10 += p->pc10;
1556 
1557 	average.packages.cpu_lpi = p->cpu_lpi;
1558 	average.packages.sys_lpi = p->sys_lpi;
1559 
1560 	average.packages.energy_pkg += p->energy_pkg;
1561 	average.packages.energy_dram += p->energy_dram;
1562 	average.packages.energy_cores += p->energy_cores;
1563 	average.packages.energy_gfx += p->energy_gfx;
1564 
1565 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1566 	average.packages.gfx_mhz = p->gfx_mhz;
1567 
1568 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1569 
1570 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1571 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1572 
1573 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1574 		if (mp->format == FORMAT_RAW)
1575 			continue;
1576 		average.packages.counter[i] += p->counter[i];
1577 	}
1578 	return 0;
1579 }
1580 /*
1581  * sum the counters for all cpus in the system
1582  * compute the weighted average
1583  */
1584 void compute_average(struct thread_data *t, struct core_data *c,
1585 	struct pkg_data *p)
1586 {
1587 	int i;
1588 	struct msr_counter *mp;
1589 
1590 	clear_counters(&average.threads, &average.cores, &average.packages);
1591 
1592 	for_all_cpus(sum_counters, t, c, p);
1593 
1594 	/* Use the global time delta for the average. */
1595 	average.threads.tv_delta = tv_delta;
1596 
1597 	average.threads.tsc /= topo.num_cpus;
1598 	average.threads.aperf /= topo.num_cpus;
1599 	average.threads.mperf /= topo.num_cpus;
1600 	average.threads.c1 /= topo.num_cpus;
1601 
1602 	if (average.threads.irq_count > 9999999)
1603 		sums_need_wide_columns = 1;
1604 
1605 	average.cores.c3 /= topo.num_cores;
1606 	average.cores.c6 /= topo.num_cores;
1607 	average.cores.c7 /= topo.num_cores;
1608 	average.cores.mc6_us /= topo.num_cores;
1609 
1610 	if (DO_BIC(BIC_Totl_c0))
1611 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1612 	if (DO_BIC(BIC_Any_c0))
1613 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1614 	if (DO_BIC(BIC_GFX_c0))
1615 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1616 	if (DO_BIC(BIC_CPUGFX))
1617 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1618 
1619 	average.packages.pc2 /= topo.num_packages;
1620 	if (DO_BIC(BIC_Pkgpc3))
1621 		average.packages.pc3 /= topo.num_packages;
1622 	if (DO_BIC(BIC_Pkgpc6))
1623 		average.packages.pc6 /= topo.num_packages;
1624 	if (DO_BIC(BIC_Pkgpc7))
1625 		average.packages.pc7 /= topo.num_packages;
1626 
1627 	average.packages.pc8 /= topo.num_packages;
1628 	average.packages.pc9 /= topo.num_packages;
1629 	average.packages.pc10 /= topo.num_packages;
1630 
1631 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1632 		if (mp->format == FORMAT_RAW)
1633 			continue;
1634 		if (mp->type == COUNTER_ITEMS) {
1635 			if (average.threads.counter[i] > 9999999)
1636 				sums_need_wide_columns = 1;
1637 			continue;
1638 		}
1639 		average.threads.counter[i] /= topo.num_cpus;
1640 	}
1641 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1642 		if (mp->format == FORMAT_RAW)
1643 			continue;
1644 		if (mp->type == COUNTER_ITEMS) {
1645 			if (average.cores.counter[i] > 9999999)
1646 				sums_need_wide_columns = 1;
1647 		}
1648 		average.cores.counter[i] /= topo.num_cores;
1649 	}
1650 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1651 		if (mp->format == FORMAT_RAW)
1652 			continue;
1653 		if (mp->type == COUNTER_ITEMS) {
1654 			if (average.packages.counter[i] > 9999999)
1655 				sums_need_wide_columns = 1;
1656 		}
1657 		average.packages.counter[i] /= topo.num_packages;
1658 	}
1659 }
1660 
1661 static unsigned long long rdtsc(void)
1662 {
1663 	unsigned int low, high;
1664 
1665 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1666 
1667 	return low | ((unsigned long long)high) << 32;
1668 }
1669 
1670 /*
1671  * Open a file, and exit on failure
1672  */
1673 FILE *fopen_or_die(const char *path, const char *mode)
1674 {
1675 	FILE *filep = fopen(path, mode);
1676 
1677 	if (!filep)
1678 		err(1, "%s: open failed", path);
1679 	return filep;
1680 }
1681 /*
1682  * snapshot_sysfs_counter()
1683  *
1684  * return snapshot of given counter
1685  */
1686 unsigned long long snapshot_sysfs_counter(char *path)
1687 {
1688 	FILE *fp;
1689 	int retval;
1690 	unsigned long long counter;
1691 
1692 	fp = fopen_or_die(path, "r");
1693 
1694 	retval = fscanf(fp, "%lld", &counter);
1695 	if (retval != 1)
1696 		err(1, "snapshot_sysfs_counter(%s)", path);
1697 
1698 	fclose(fp);
1699 
1700 	return counter;
1701 }
1702 
1703 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1704 {
1705 	if (mp->msr_num != 0) {
1706 		if (get_msr(cpu, mp->msr_num, counterp))
1707 			return -1;
1708 	} else {
1709 		char path[128 + PATH_BYTES];
1710 
1711 		if (mp->flags & SYSFS_PERCPU) {
1712 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1713 				 cpu, mp->path);
1714 
1715 			*counterp = snapshot_sysfs_counter(path);
1716 		} else {
1717 			*counterp = snapshot_sysfs_counter(mp->path);
1718 		}
1719 	}
1720 
1721 	return 0;
1722 }
1723 
1724 void get_apic_id(struct thread_data *t)
1725 {
1726 	unsigned int eax, ebx, ecx, edx;
1727 
1728 	if (DO_BIC(BIC_APIC)) {
1729 		eax = ebx = ecx = edx = 0;
1730 		__cpuid(1, eax, ebx, ecx, edx);
1731 
1732 		t->apic_id = (ebx >> 24) & 0xff;
1733 	}
1734 
1735 	if (!DO_BIC(BIC_X2APIC))
1736 		return;
1737 
1738 	if (authentic_amd || hygon_genuine) {
1739 		unsigned int topology_extensions;
1740 
1741 		if (max_extended_level < 0x8000001e)
1742 			return;
1743 
1744 		eax = ebx = ecx = edx = 0;
1745 		__cpuid(0x80000001, eax, ebx, ecx, edx);
1746 			topology_extensions = ecx & (1 << 22);
1747 
1748 		if (topology_extensions == 0)
1749 			return;
1750 
1751 		eax = ebx = ecx = edx = 0;
1752 		__cpuid(0x8000001e, eax, ebx, ecx, edx);
1753 
1754 		t->x2apic_id = eax;
1755 		return;
1756 	}
1757 
1758 	if (!genuine_intel)
1759 		return;
1760 
1761 	if (max_level < 0xb)
1762 		return;
1763 
1764 	ecx = 0;
1765 	__cpuid(0xb, eax, ebx, ecx, edx);
1766 	t->x2apic_id = edx;
1767 
1768 	if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
1769 		fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
1770 				t->cpu_id, t->apic_id, t->x2apic_id);
1771 }
1772 
1773 /*
1774  * get_counters(...)
1775  * migrate to cpu
1776  * acquire and record local counters for that cpu
1777  */
1778 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1779 {
1780 	int cpu = t->cpu_id;
1781 	unsigned long long msr;
1782 	int aperf_mperf_retry_count = 0;
1783 	struct msr_counter *mp;
1784 	int i;
1785 
1786 	if (cpu_migrate(cpu)) {
1787 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1788 		return -1;
1789 	}
1790 
1791 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1792 
1793 	if (first_counter_read)
1794 		get_apic_id(t);
1795 retry:
1796 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
1797 
1798 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
1799 	    soft_c1_residency_display(BIC_Avg_MHz)) {
1800 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1801 
1802 		/*
1803 		 * The TSC, APERF and MPERF must be read together for
1804 		 * APERF/MPERF and MPERF/TSC to give accurate results.
1805 		 *
1806 		 * Unfortunately, APERF and MPERF are read by
1807 		 * individual system call, so delays may occur
1808 		 * between them.  If the time to read them
1809 		 * varies by a large amount, we re-read them.
1810 		 */
1811 
1812 		/*
1813 		 * This initial dummy APERF read has been seen to
1814 		 * reduce jitter in the subsequent reads.
1815 		 */
1816 
1817 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1818 			return -3;
1819 
1820 		t->tsc = rdtsc();	/* re-read close to APERF */
1821 
1822 		tsc_before = t->tsc;
1823 
1824 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1825 			return -3;
1826 
1827 		tsc_between = rdtsc();
1828 
1829 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1830 			return -4;
1831 
1832 		tsc_after = rdtsc();
1833 
1834 		aperf_time = tsc_between - tsc_before;
1835 		mperf_time = tsc_after - tsc_between;
1836 
1837 		/*
1838 		 * If the system call latency to read APERF and MPERF
1839 		 * differ by more than 2x, then try again.
1840 		 */
1841 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1842 			aperf_mperf_retry_count++;
1843 			if (aperf_mperf_retry_count < 5)
1844 				goto retry;
1845 			else
1846 				warnx("cpu%d jitter %lld %lld",
1847 					cpu, aperf_time, mperf_time);
1848 		}
1849 		aperf_mperf_retry_count = 0;
1850 
1851 		t->aperf = t->aperf * aperf_mperf_multiplier;
1852 		t->mperf = t->mperf * aperf_mperf_multiplier;
1853 	}
1854 
1855 	if (DO_BIC(BIC_IRQ))
1856 		t->irq_count = irqs_per_cpu[cpu];
1857 	if (DO_BIC(BIC_SMI)) {
1858 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1859 			return -5;
1860 		t->smi_count = msr & 0xFFFFFFFF;
1861 	}
1862 	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1863 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1864 			return -6;
1865 	}
1866 
1867 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1868 		if (get_mp(cpu, mp, &t->counter[i]))
1869 			return -10;
1870 	}
1871 
1872 	/* collect core counters only for 1st thread in core */
1873 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1874 		goto done;
1875 
1876 	if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
1877 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1878 			return -6;
1879 	}
1880 
1881 	if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
1882 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1883 			return -7;
1884 	} else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
1885 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1886 			return -7;
1887 	}
1888 
1889 	if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
1890 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1891 			return -8;
1892 
1893 	if (DO_BIC(BIC_Mod_c6))
1894 		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1895 			return -8;
1896 
1897 	if (DO_BIC(BIC_CoreTmp)) {
1898 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1899 			return -9;
1900 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1901 	}
1902 
1903 	if (do_rapl & RAPL_AMD_F17H) {
1904 		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
1905 			return -14;
1906 		c->core_energy = msr & 0xFFFFFFFF;
1907 	}
1908 
1909 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1910 		if (get_mp(cpu, mp, &c->counter[i]))
1911 			return -10;
1912 	}
1913 
1914 	/* collect package counters only for 1st core in package */
1915 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1916 		goto done;
1917 
1918 	if (DO_BIC(BIC_Totl_c0)) {
1919 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1920 			return -10;
1921 	}
1922 	if (DO_BIC(BIC_Any_c0)) {
1923 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1924 			return -11;
1925 	}
1926 	if (DO_BIC(BIC_GFX_c0)) {
1927 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1928 			return -12;
1929 	}
1930 	if (DO_BIC(BIC_CPUGFX)) {
1931 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1932 			return -13;
1933 	}
1934 	if (DO_BIC(BIC_Pkgpc3))
1935 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1936 			return -9;
1937 	if (DO_BIC(BIC_Pkgpc6)) {
1938 		if (do_slm_cstates) {
1939 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1940 				return -10;
1941 		} else {
1942 			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1943 				return -10;
1944 		}
1945 	}
1946 
1947 	if (DO_BIC(BIC_Pkgpc2))
1948 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1949 			return -11;
1950 	if (DO_BIC(BIC_Pkgpc7))
1951 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1952 			return -12;
1953 	if (DO_BIC(BIC_Pkgpc8))
1954 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1955 			return -13;
1956 	if (DO_BIC(BIC_Pkgpc9))
1957 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1958 			return -13;
1959 	if (DO_BIC(BIC_Pkgpc10))
1960 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1961 			return -13;
1962 
1963 	if (DO_BIC(BIC_CPU_LPI))
1964 		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1965 	if (DO_BIC(BIC_SYS_LPI))
1966 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
1967 
1968 	if (do_rapl & RAPL_PKG) {
1969 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1970 			return -13;
1971 		p->energy_pkg = msr & 0xFFFFFFFF;
1972 	}
1973 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1974 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1975 			return -14;
1976 		p->energy_cores = msr & 0xFFFFFFFF;
1977 	}
1978 	if (do_rapl & RAPL_DRAM) {
1979 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1980 			return -15;
1981 		p->energy_dram = msr & 0xFFFFFFFF;
1982 	}
1983 	if (do_rapl & RAPL_GFX) {
1984 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1985 			return -16;
1986 		p->energy_gfx = msr & 0xFFFFFFFF;
1987 	}
1988 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
1989 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1990 			return -16;
1991 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1992 	}
1993 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1994 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1995 			return -16;
1996 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1997 	}
1998 	if (do_rapl & RAPL_AMD_F17H) {
1999 		if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
2000 			return -13;
2001 		p->energy_pkg = msr & 0xFFFFFFFF;
2002 	}
2003 	if (DO_BIC(BIC_PkgTmp)) {
2004 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2005 			return -17;
2006 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
2007 	}
2008 
2009 	if (DO_BIC(BIC_GFX_rc6))
2010 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
2011 
2012 	if (DO_BIC(BIC_GFXMHz))
2013 		p->gfx_mhz = gfx_cur_mhz;
2014 
2015 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2016 		if (get_mp(cpu, mp, &p->counter[i]))
2017 			return -10;
2018 	}
2019 done:
2020 	gettimeofday(&t->tv_end, (struct timezone *)NULL);
2021 
2022 	return 0;
2023 }
2024 
2025 /*
2026  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2027  * If you change the values, note they are used both in comparisons
2028  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2029  */
2030 
2031 #define PCLUKN 0 /* Unknown */
2032 #define PCLRSV 1 /* Reserved */
2033 #define PCL__0 2 /* PC0 */
2034 #define PCL__1 3 /* PC1 */
2035 #define PCL__2 4 /* PC2 */
2036 #define PCL__3 5 /* PC3 */
2037 #define PCL__4 6 /* PC4 */
2038 #define PCL__6 7 /* PC6 */
2039 #define PCL_6N 8 /* PC6 No Retention */
2040 #define PCL_6R 9 /* PC6 Retention */
2041 #define PCL__7 10 /* PC7 */
2042 #define PCL_7S 11 /* PC7 Shrink */
2043 #define PCL__8 12 /* PC8 */
2044 #define PCL__9 13 /* PC9 */
2045 #define PCL_10 14 /* PC10 */
2046 #define PCLUNL 15 /* Unlimited */
2047 
2048 int pkg_cstate_limit = PCLUKN;
2049 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2050 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
2051 
2052 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2053 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2054 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2055 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
2056 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2057 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2058 int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2059 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
2060 
2061 
2062 static void
2063 calculate_tsc_tweak()
2064 {
2065 	tsc_tweak = base_hz / tsc_hz;
2066 }
2067 
2068 static void
2069 dump_nhm_platform_info(void)
2070 {
2071 	unsigned long long msr;
2072 	unsigned int ratio;
2073 
2074 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2075 
2076 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2077 
2078 	ratio = (msr >> 40) & 0xFF;
2079 	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
2080 		ratio, bclk, ratio * bclk);
2081 
2082 	ratio = (msr >> 8) & 0xFF;
2083 	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2084 		ratio, bclk, ratio * bclk);
2085 
2086 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2087 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2088 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2089 
2090 	return;
2091 }
2092 
2093 static void
2094 dump_hsw_turbo_ratio_limits(void)
2095 {
2096 	unsigned long long msr;
2097 	unsigned int ratio;
2098 
2099 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2100 
2101 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2102 
2103 	ratio = (msr >> 8) & 0xFF;
2104 	if (ratio)
2105 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
2106 			ratio, bclk, ratio * bclk);
2107 
2108 	ratio = (msr >> 0) & 0xFF;
2109 	if (ratio)
2110 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
2111 			ratio, bclk, ratio * bclk);
2112 	return;
2113 }
2114 
2115 static void
2116 dump_ivt_turbo_ratio_limits(void)
2117 {
2118 	unsigned long long msr;
2119 	unsigned int ratio;
2120 
2121 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2122 
2123 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2124 
2125 	ratio = (msr >> 56) & 0xFF;
2126 	if (ratio)
2127 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
2128 			ratio, bclk, ratio * bclk);
2129 
2130 	ratio = (msr >> 48) & 0xFF;
2131 	if (ratio)
2132 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
2133 			ratio, bclk, ratio * bclk);
2134 
2135 	ratio = (msr >> 40) & 0xFF;
2136 	if (ratio)
2137 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
2138 			ratio, bclk, ratio * bclk);
2139 
2140 	ratio = (msr >> 32) & 0xFF;
2141 	if (ratio)
2142 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
2143 			ratio, bclk, ratio * bclk);
2144 
2145 	ratio = (msr >> 24) & 0xFF;
2146 	if (ratio)
2147 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
2148 			ratio, bclk, ratio * bclk);
2149 
2150 	ratio = (msr >> 16) & 0xFF;
2151 	if (ratio)
2152 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
2153 			ratio, bclk, ratio * bclk);
2154 
2155 	ratio = (msr >> 8) & 0xFF;
2156 	if (ratio)
2157 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
2158 			ratio, bclk, ratio * bclk);
2159 
2160 	ratio = (msr >> 0) & 0xFF;
2161 	if (ratio)
2162 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
2163 			ratio, bclk, ratio * bclk);
2164 	return;
2165 }
2166 int has_turbo_ratio_group_limits(int family, int model)
2167 {
2168 
2169 	if (!genuine_intel)
2170 		return 0;
2171 
2172 	switch (model) {
2173 	case INTEL_FAM6_ATOM_GOLDMONT:
2174 	case INTEL_FAM6_SKYLAKE_X:
2175 	case INTEL_FAM6_ATOM_GOLDMONT_D:
2176 		return 1;
2177 	}
2178 	return 0;
2179 }
2180 
2181 static void
2182 dump_turbo_ratio_limits(int family, int model)
2183 {
2184 	unsigned long long msr, core_counts;
2185 	unsigned int ratio, group_size;
2186 
2187 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2188 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2189 
2190 	if (has_turbo_ratio_group_limits(family, model)) {
2191 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2192 		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2193 	} else {
2194 		core_counts = 0x0807060504030201;
2195 	}
2196 
2197 	ratio = (msr >> 56) & 0xFF;
2198 	group_size = (core_counts >> 56) & 0xFF;
2199 	if (ratio)
2200 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2201 			ratio, bclk, ratio * bclk, group_size);
2202 
2203 	ratio = (msr >> 48) & 0xFF;
2204 	group_size = (core_counts >> 48) & 0xFF;
2205 	if (ratio)
2206 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2207 			ratio, bclk, ratio * bclk, group_size);
2208 
2209 	ratio = (msr >> 40) & 0xFF;
2210 	group_size = (core_counts >> 40) & 0xFF;
2211 	if (ratio)
2212 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2213 			ratio, bclk, ratio * bclk, group_size);
2214 
2215 	ratio = (msr >> 32) & 0xFF;
2216 	group_size = (core_counts >> 32) & 0xFF;
2217 	if (ratio)
2218 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2219 			ratio, bclk, ratio * bclk, group_size);
2220 
2221 	ratio = (msr >> 24) & 0xFF;
2222 	group_size = (core_counts >> 24) & 0xFF;
2223 	if (ratio)
2224 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2225 			ratio, bclk, ratio * bclk, group_size);
2226 
2227 	ratio = (msr >> 16) & 0xFF;
2228 	group_size = (core_counts >> 16) & 0xFF;
2229 	if (ratio)
2230 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2231 			ratio, bclk, ratio * bclk, group_size);
2232 
2233 	ratio = (msr >> 8) & 0xFF;
2234 	group_size = (core_counts >> 8) & 0xFF;
2235 	if (ratio)
2236 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2237 			ratio, bclk, ratio * bclk, group_size);
2238 
2239 	ratio = (msr >> 0) & 0xFF;
2240 	group_size = (core_counts >> 0) & 0xFF;
2241 	if (ratio)
2242 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2243 			ratio, bclk, ratio * bclk, group_size);
2244 	return;
2245 }
2246 
2247 static void
2248 dump_atom_turbo_ratio_limits(void)
2249 {
2250 	unsigned long long msr;
2251 	unsigned int ratio;
2252 
2253 	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2254 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2255 
2256 	ratio = (msr >> 0) & 0x3F;
2257 	if (ratio)
2258 		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2259 			ratio, bclk, ratio * bclk);
2260 
2261 	ratio = (msr >> 8) & 0x3F;
2262 	if (ratio)
2263 		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2264 			ratio, bclk, ratio * bclk);
2265 
2266 	ratio = (msr >> 16) & 0x3F;
2267 	if (ratio)
2268 		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2269 			ratio, bclk, ratio * bclk);
2270 
2271 	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2272 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2273 
2274 	ratio = (msr >> 24) & 0x3F;
2275 	if (ratio)
2276 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2277 			ratio, bclk, ratio * bclk);
2278 
2279 	ratio = (msr >> 16) & 0x3F;
2280 	if (ratio)
2281 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2282 			ratio, bclk, ratio * bclk);
2283 
2284 	ratio = (msr >> 8) & 0x3F;
2285 	if (ratio)
2286 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2287 			ratio, bclk, ratio * bclk);
2288 
2289 	ratio = (msr >> 0) & 0x3F;
2290 	if (ratio)
2291 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2292 			ratio, bclk, ratio * bclk);
2293 }
2294 
2295 static void
2296 dump_knl_turbo_ratio_limits(void)
2297 {
2298 	const unsigned int buckets_no = 7;
2299 
2300 	unsigned long long msr;
2301 	int delta_cores, delta_ratio;
2302 	int i, b_nr;
2303 	unsigned int cores[buckets_no];
2304 	unsigned int ratio[buckets_no];
2305 
2306 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2307 
2308 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2309 		base_cpu, msr);
2310 
2311 	/**
2312 	 * Turbo encoding in KNL is as follows:
2313 	 * [0] -- Reserved
2314 	 * [7:1] -- Base value of number of active cores of bucket 1.
2315 	 * [15:8] -- Base value of freq ratio of bucket 1.
2316 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
2317 	 * i.e. active cores of bucket 2 =
2318 	 * active cores of bucket 1 + delta
2319 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
2320 	 * i.e. freq ratio of bucket 2 =
2321 	 * freq ratio of bucket 1 - delta
2322 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
2323 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
2324 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
2325 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
2326 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
2327 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
2328 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
2329 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
2330 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
2331 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
2332 	 */
2333 
2334 	b_nr = 0;
2335 	cores[b_nr] = (msr & 0xFF) >> 1;
2336 	ratio[b_nr] = (msr >> 8) & 0xFF;
2337 
2338 	for (i = 16; i < 64; i += 8) {
2339 		delta_cores = (msr >> i) & 0x1F;
2340 		delta_ratio = (msr >> (i + 5)) & 0x7;
2341 
2342 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
2343 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2344 		b_nr++;
2345 	}
2346 
2347 	for (i = buckets_no - 1; i >= 0; i--)
2348 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2349 			fprintf(outf,
2350 				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2351 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2352 }
2353 
2354 static void
2355 dump_nhm_cst_cfg(void)
2356 {
2357 	unsigned long long msr;
2358 
2359 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2360 
2361 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2362 
2363 	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2364 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2365 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2366 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2367 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2368 		(msr & (1 << 15)) ? "" : "UN",
2369 		(unsigned int)msr & 0xF,
2370 		pkg_cstate_limit_strings[pkg_cstate_limit]);
2371 
2372 #define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
2373 	if (has_automatic_cstate_conversion) {
2374 		fprintf(outf, ", automatic c-state conversion=%s",
2375 			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2376 	}
2377 
2378 	fprintf(outf, ")\n");
2379 
2380 	return;
2381 }
2382 
2383 static void
2384 dump_config_tdp(void)
2385 {
2386 	unsigned long long msr;
2387 
2388 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2389 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2390 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2391 
2392 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2393 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2394 	if (msr) {
2395 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2396 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2397 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2398 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2399 	}
2400 	fprintf(outf, ")\n");
2401 
2402 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2403 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2404 	if (msr) {
2405 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2406 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2407 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2408 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2409 	}
2410 	fprintf(outf, ")\n");
2411 
2412 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2413 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2414 	if ((msr) & 0x3)
2415 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2416 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2417 	fprintf(outf, ")\n");
2418 
2419 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2420 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2421 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2422 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2423 	fprintf(outf, ")\n");
2424 }
2425 
2426 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2427 
2428 void print_irtl(void)
2429 {
2430 	unsigned long long msr;
2431 
2432 	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2433 	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2434 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2435 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2436 
2437 	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2438 	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2439 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2440 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2441 
2442 	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2443 	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2444 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2445 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2446 
2447 	if (!do_irtl_hsw)
2448 		return;
2449 
2450 	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2451 	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2452 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2453 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2454 
2455 	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2456 	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2457 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2458 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2459 
2460 	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2461 	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2462 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2463 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2464 
2465 }
2466 void free_fd_percpu(void)
2467 {
2468 	int i;
2469 
2470 	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2471 		if (fd_percpu[i] != 0)
2472 			close(fd_percpu[i]);
2473 	}
2474 
2475 	free(fd_percpu);
2476 }
2477 
2478 void free_all_buffers(void)
2479 {
2480 	int i;
2481 
2482 	CPU_FREE(cpu_present_set);
2483 	cpu_present_set = NULL;
2484 	cpu_present_setsize = 0;
2485 
2486 	CPU_FREE(cpu_affinity_set);
2487 	cpu_affinity_set = NULL;
2488 	cpu_affinity_setsize = 0;
2489 
2490 	free(thread_even);
2491 	free(core_even);
2492 	free(package_even);
2493 
2494 	thread_even = NULL;
2495 	core_even = NULL;
2496 	package_even = NULL;
2497 
2498 	free(thread_odd);
2499 	free(core_odd);
2500 	free(package_odd);
2501 
2502 	thread_odd = NULL;
2503 	core_odd = NULL;
2504 	package_odd = NULL;
2505 
2506 	free(output_buffer);
2507 	output_buffer = NULL;
2508 	outp = NULL;
2509 
2510 	free_fd_percpu();
2511 
2512 	free(irq_column_2_cpu);
2513 	free(irqs_per_cpu);
2514 
2515 	for (i = 0; i <= topo.max_cpu_num; ++i) {
2516 		if (cpus[i].put_ids)
2517 			CPU_FREE(cpus[i].put_ids);
2518 	}
2519 	free(cpus);
2520 }
2521 
2522 
2523 /*
2524  * Parse a file containing a single int.
2525  * Return 0 if file can not be opened
2526  * Exit if file can be opened, but can not be parsed
2527  */
2528 int parse_int_file(const char *fmt, ...)
2529 {
2530 	va_list args;
2531 	char path[PATH_MAX];
2532 	FILE *filep;
2533 	int value;
2534 
2535 	va_start(args, fmt);
2536 	vsnprintf(path, sizeof(path), fmt, args);
2537 	va_end(args);
2538 	filep = fopen(path, "r");
2539 	if (!filep)
2540 		return 0;
2541 	if (fscanf(filep, "%d", &value) != 1)
2542 		err(1, "%s: failed to parse number from file", path);
2543 	fclose(filep);
2544 	return value;
2545 }
2546 
2547 /*
2548  * cpu_is_first_core_in_package(cpu)
2549  * return 1 if given CPU is 1st core in package
2550  */
2551 int cpu_is_first_core_in_package(int cpu)
2552 {
2553 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2554 }
2555 
2556 int get_physical_package_id(int cpu)
2557 {
2558 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2559 }
2560 
2561 int get_die_id(int cpu)
2562 {
2563 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2564 }
2565 
2566 int get_core_id(int cpu)
2567 {
2568 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2569 }
2570 
2571 void set_node_data(void)
2572 {
2573 	int pkg, node, lnode, cpu, cpux;
2574 	int cpu_count;
2575 
2576 	/* initialize logical_node_id */
2577 	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2578 		cpus[cpu].logical_node_id = -1;
2579 
2580 	cpu_count = 0;
2581 	for (pkg = 0; pkg < topo.num_packages; pkg++) {
2582 		lnode = 0;
2583 		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2584 			if (cpus[cpu].physical_package_id != pkg)
2585 				continue;
2586 			/* find a cpu with an unset logical_node_id */
2587 			if (cpus[cpu].logical_node_id != -1)
2588 				continue;
2589 			cpus[cpu].logical_node_id = lnode;
2590 			node = cpus[cpu].physical_node_id;
2591 			cpu_count++;
2592 			/*
2593 			 * find all matching cpus on this pkg and set
2594 			 * the logical_node_id
2595 			 */
2596 			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2597 				if ((cpus[cpux].physical_package_id == pkg) &&
2598 				   (cpus[cpux].physical_node_id == node)) {
2599 					cpus[cpux].logical_node_id = lnode;
2600 					cpu_count++;
2601 				}
2602 			}
2603 			lnode++;
2604 			if (lnode > topo.nodes_per_pkg)
2605 				topo.nodes_per_pkg = lnode;
2606 		}
2607 		if (cpu_count >= topo.max_cpu_num)
2608 			break;
2609 	}
2610 }
2611 
2612 int get_physical_node_id(struct cpu_topology *thiscpu)
2613 {
2614 	char path[80];
2615 	FILE *filep;
2616 	int i;
2617 	int cpu = thiscpu->logical_cpu_id;
2618 
2619 	for (i = 0; i <= topo.max_cpu_num; i++) {
2620 		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2621 			cpu, i);
2622 		filep = fopen(path, "r");
2623 		if (!filep)
2624 			continue;
2625 		fclose(filep);
2626 		return i;
2627 	}
2628 	return -1;
2629 }
2630 
2631 int get_thread_siblings(struct cpu_topology *thiscpu)
2632 {
2633 	char path[80], character;
2634 	FILE *filep;
2635 	unsigned long map;
2636 	int so, shift, sib_core;
2637 	int cpu = thiscpu->logical_cpu_id;
2638 	int offset = topo.max_cpu_num + 1;
2639 	size_t size;
2640 	int thread_id = 0;
2641 
2642 	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2643 	if (thiscpu->thread_id < 0)
2644 		thiscpu->thread_id = thread_id++;
2645 	if (!thiscpu->put_ids)
2646 		return -1;
2647 
2648 	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2649 	CPU_ZERO_S(size, thiscpu->put_ids);
2650 
2651 	sprintf(path,
2652 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2653 	filep = fopen_or_die(path, "r");
2654 	do {
2655 		offset -= BITMASK_SIZE;
2656 		if (fscanf(filep, "%lx%c", &map, &character) != 2)
2657 			err(1, "%s: failed to parse file", path);
2658 		for (shift = 0; shift < BITMASK_SIZE; shift++) {
2659 			if ((map >> shift) & 0x1) {
2660 				so = shift + offset;
2661 				sib_core = get_core_id(so);
2662 				if (sib_core == thiscpu->physical_core_id) {
2663 					CPU_SET_S(so, size, thiscpu->put_ids);
2664 					if ((so != cpu) &&
2665 					    (cpus[so].thread_id < 0))
2666 						cpus[so].thread_id =
2667 								    thread_id++;
2668 				}
2669 			}
2670 		}
2671 	} while (!strncmp(&character, ",", 1));
2672 	fclose(filep);
2673 
2674 	return CPU_COUNT_S(size, thiscpu->put_ids);
2675 }
2676 
2677 /*
2678  * run func(thread, core, package) in topology order
2679  * skip non-present cpus
2680  */
2681 
2682 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2683 	struct pkg_data *, struct thread_data *, struct core_data *,
2684 	struct pkg_data *), struct thread_data *thread_base,
2685 	struct core_data *core_base, struct pkg_data *pkg_base,
2686 	struct thread_data *thread_base2, struct core_data *core_base2,
2687 	struct pkg_data *pkg_base2)
2688 {
2689 	int retval, pkg_no, node_no, core_no, thread_no;
2690 
2691 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2692 		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2693 			for (core_no = 0; core_no < topo.cores_per_node;
2694 			     ++core_no) {
2695 				for (thread_no = 0; thread_no <
2696 					topo.threads_per_core; ++thread_no) {
2697 					struct thread_data *t, *t2;
2698 					struct core_data *c, *c2;
2699 					struct pkg_data *p, *p2;
2700 
2701 					t = GET_THREAD(thread_base, thread_no,
2702 						       core_no, node_no,
2703 						       pkg_no);
2704 
2705 					if (cpu_is_not_present(t->cpu_id))
2706 						continue;
2707 
2708 					t2 = GET_THREAD(thread_base2, thread_no,
2709 							core_no, node_no,
2710 							pkg_no);
2711 
2712 					c = GET_CORE(core_base, core_no,
2713 						     node_no, pkg_no);
2714 					c2 = GET_CORE(core_base2, core_no,
2715 						      node_no,
2716 						      pkg_no);
2717 
2718 					p = GET_PKG(pkg_base, pkg_no);
2719 					p2 = GET_PKG(pkg_base2, pkg_no);
2720 
2721 					retval = func(t, c, p, t2, c2, p2);
2722 					if (retval)
2723 						return retval;
2724 				}
2725 			}
2726 		}
2727 	}
2728 	return 0;
2729 }
2730 
2731 /*
2732  * run func(cpu) on every cpu in /proc/stat
2733  * return max_cpu number
2734  */
2735 int for_all_proc_cpus(int (func)(int))
2736 {
2737 	FILE *fp;
2738 	int cpu_num;
2739 	int retval;
2740 
2741 	fp = fopen_or_die(proc_stat, "r");
2742 
2743 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2744 	if (retval != 0)
2745 		err(1, "%s: failed to parse format", proc_stat);
2746 
2747 	while (1) {
2748 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2749 		if (retval != 1)
2750 			break;
2751 
2752 		retval = func(cpu_num);
2753 		if (retval) {
2754 			fclose(fp);
2755 			return(retval);
2756 		}
2757 	}
2758 	fclose(fp);
2759 	return 0;
2760 }
2761 
2762 void re_initialize(void)
2763 {
2764 	free_all_buffers();
2765 	setup_all_buffers();
2766 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2767 }
2768 
2769 void set_max_cpu_num(void)
2770 {
2771 	FILE *filep;
2772 	unsigned long dummy;
2773 
2774 	topo.max_cpu_num = 0;
2775 	filep = fopen_or_die(
2776 			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2777 			"r");
2778 	while (fscanf(filep, "%lx,", &dummy) == 1)
2779 		topo.max_cpu_num += BITMASK_SIZE;
2780 	fclose(filep);
2781 	topo.max_cpu_num--; /* 0 based */
2782 }
2783 
2784 /*
2785  * count_cpus()
2786  * remember the last one seen, it will be the max
2787  */
2788 int count_cpus(int cpu)
2789 {
2790 	topo.num_cpus++;
2791 	return 0;
2792 }
2793 int mark_cpu_present(int cpu)
2794 {
2795 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2796 	return 0;
2797 }
2798 
2799 int init_thread_id(int cpu)
2800 {
2801 	cpus[cpu].thread_id = -1;
2802 	return 0;
2803 }
2804 
2805 /*
2806  * snapshot_proc_interrupts()
2807  *
2808  * read and record summary of /proc/interrupts
2809  *
2810  * return 1 if config change requires a restart, else return 0
2811  */
2812 int snapshot_proc_interrupts(void)
2813 {
2814 	static FILE *fp;
2815 	int column, retval;
2816 
2817 	if (fp == NULL)
2818 		fp = fopen_or_die("/proc/interrupts", "r");
2819 	else
2820 		rewind(fp);
2821 
2822 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
2823 	for (column = 0; column < topo.num_cpus; ++column) {
2824 		int cpu_number;
2825 
2826 		retval = fscanf(fp, " CPU%d", &cpu_number);
2827 		if (retval != 1)
2828 			break;
2829 
2830 		if (cpu_number > topo.max_cpu_num) {
2831 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2832 			return 1;
2833 		}
2834 
2835 		irq_column_2_cpu[column] = cpu_number;
2836 		irqs_per_cpu[cpu_number] = 0;
2837 	}
2838 
2839 	/* read /proc/interrupt count lines and sum up irqs per cpu */
2840 	while (1) {
2841 		int column;
2842 		char buf[64];
2843 
2844 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
2845 		if (retval != 1)
2846 			break;
2847 
2848 		/* read the count per cpu */
2849 		for (column = 0; column < topo.num_cpus; ++column) {
2850 
2851 			int cpu_number, irq_count;
2852 
2853 			retval = fscanf(fp, " %d", &irq_count);
2854 			if (retval != 1)
2855 				break;
2856 
2857 			cpu_number = irq_column_2_cpu[column];
2858 			irqs_per_cpu[cpu_number] += irq_count;
2859 
2860 		}
2861 
2862 		while (getc(fp) != '\n')
2863 			;	/* flush interrupt description */
2864 
2865 	}
2866 	return 0;
2867 }
2868 /*
2869  * snapshot_gfx_rc6_ms()
2870  *
2871  * record snapshot of
2872  * /sys/class/drm/card0/power/rc6_residency_ms
2873  *
2874  * return 1 if config change requires a restart, else return 0
2875  */
2876 int snapshot_gfx_rc6_ms(void)
2877 {
2878 	FILE *fp;
2879 	int retval;
2880 
2881 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2882 
2883 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2884 	if (retval != 1)
2885 		err(1, "GFX rc6");
2886 
2887 	fclose(fp);
2888 
2889 	return 0;
2890 }
2891 /*
2892  * snapshot_gfx_mhz()
2893  *
2894  * record snapshot of
2895  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2896  *
2897  * return 1 if config change requires a restart, else return 0
2898  */
2899 int snapshot_gfx_mhz(void)
2900 {
2901 	static FILE *fp;
2902 	int retval;
2903 
2904 	if (fp == NULL)
2905 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2906 	else {
2907 		rewind(fp);
2908 		fflush(fp);
2909 	}
2910 
2911 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
2912 	if (retval != 1)
2913 		err(1, "GFX MHz");
2914 
2915 	return 0;
2916 }
2917 
2918 /*
2919  * snapshot_cpu_lpi()
2920  *
2921  * record snapshot of
2922  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2923  */
2924 int snapshot_cpu_lpi_us(void)
2925 {
2926 	FILE *fp;
2927 	int retval;
2928 
2929 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2930 
2931 	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2932 	if (retval != 1) {
2933 		fprintf(stderr, "Disabling Low Power Idle CPU output\n");
2934 		BIC_NOT_PRESENT(BIC_CPU_LPI);
2935 		fclose(fp);
2936 		return -1;
2937 	}
2938 
2939 	fclose(fp);
2940 
2941 	return 0;
2942 }
2943 /*
2944  * snapshot_sys_lpi()
2945  *
2946  * record snapshot of sys_lpi_file
2947  */
2948 int snapshot_sys_lpi_us(void)
2949 {
2950 	FILE *fp;
2951 	int retval;
2952 
2953 	fp = fopen_or_die(sys_lpi_file, "r");
2954 
2955 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2956 	if (retval != 1) {
2957 		fprintf(stderr, "Disabling Low Power Idle System output\n");
2958 		BIC_NOT_PRESENT(BIC_SYS_LPI);
2959 		fclose(fp);
2960 		return -1;
2961 	}
2962 	fclose(fp);
2963 
2964 	return 0;
2965 }
2966 /*
2967  * snapshot /proc and /sys files
2968  *
2969  * return 1 if configuration restart needed, else return 0
2970  */
2971 int snapshot_proc_sysfs_files(void)
2972 {
2973 	if (DO_BIC(BIC_IRQ))
2974 		if (snapshot_proc_interrupts())
2975 			return 1;
2976 
2977 	if (DO_BIC(BIC_GFX_rc6))
2978 		snapshot_gfx_rc6_ms();
2979 
2980 	if (DO_BIC(BIC_GFXMHz))
2981 		snapshot_gfx_mhz();
2982 
2983 	if (DO_BIC(BIC_CPU_LPI))
2984 		snapshot_cpu_lpi_us();
2985 
2986 	if (DO_BIC(BIC_SYS_LPI))
2987 		snapshot_sys_lpi_us();
2988 
2989 	return 0;
2990 }
2991 
2992 int exit_requested;
2993 
2994 static void signal_handler (int signal)
2995 {
2996 	switch (signal) {
2997 	case SIGINT:
2998 		exit_requested = 1;
2999 		if (debug)
3000 			fprintf(stderr, " SIGINT\n");
3001 		break;
3002 	case SIGUSR1:
3003 		if (debug > 1)
3004 			fprintf(stderr, "SIGUSR1\n");
3005 		break;
3006 	}
3007 }
3008 
3009 void setup_signal_handler(void)
3010 {
3011 	struct sigaction sa;
3012 
3013 	memset(&sa, 0, sizeof(sa));
3014 
3015 	sa.sa_handler = &signal_handler;
3016 
3017 	if (sigaction(SIGINT, &sa, NULL) < 0)
3018 		err(1, "sigaction SIGINT");
3019 	if (sigaction(SIGUSR1, &sa, NULL) < 0)
3020 		err(1, "sigaction SIGUSR1");
3021 }
3022 
3023 void do_sleep(void)
3024 {
3025 	struct timeval tout;
3026 	struct timespec rest;
3027 	fd_set readfds;
3028 	int retval;
3029 
3030 	FD_ZERO(&readfds);
3031 	FD_SET(0, &readfds);
3032 
3033 	if (ignore_stdin) {
3034 		nanosleep(&interval_ts, NULL);
3035 		return;
3036 	}
3037 
3038 	tout = interval_tv;
3039 	retval = select(1, &readfds, NULL, NULL, &tout);
3040 
3041 	if (retval == 1) {
3042 		switch (getc(stdin)) {
3043 		case 'q':
3044 			exit_requested = 1;
3045 			break;
3046 		case EOF:
3047 			/*
3048 			 * 'stdin' is a pipe closed on the other end. There
3049 			 * won't be any further input.
3050 			 */
3051 			ignore_stdin = 1;
3052 			/* Sleep the rest of the time */
3053 			rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3054 			rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3055 			nanosleep(&rest, NULL);
3056 		}
3057 	}
3058 }
3059 
3060 
3061 void turbostat_loop()
3062 {
3063 	int retval;
3064 	int restarted = 0;
3065 	int done_iters = 0;
3066 
3067 	setup_signal_handler();
3068 
3069 restart:
3070 	restarted++;
3071 
3072 	snapshot_proc_sysfs_files();
3073 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3074 	first_counter_read = 0;
3075 	if (retval < -1) {
3076 		exit(retval);
3077 	} else if (retval == -1) {
3078 		if (restarted > 1) {
3079 			exit(retval);
3080 		}
3081 		re_initialize();
3082 		goto restart;
3083 	}
3084 	restarted = 0;
3085 	done_iters = 0;
3086 	gettimeofday(&tv_even, (struct timezone *)NULL);
3087 
3088 	while (1) {
3089 		if (for_all_proc_cpus(cpu_is_not_present)) {
3090 			re_initialize();
3091 			goto restart;
3092 		}
3093 		do_sleep();
3094 		if (snapshot_proc_sysfs_files())
3095 			goto restart;
3096 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
3097 		if (retval < -1) {
3098 			exit(retval);
3099 		} else if (retval == -1) {
3100 			re_initialize();
3101 			goto restart;
3102 		}
3103 		gettimeofday(&tv_odd, (struct timezone *)NULL);
3104 		timersub(&tv_odd, &tv_even, &tv_delta);
3105 		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3106 			re_initialize();
3107 			goto restart;
3108 		}
3109 		compute_average(EVEN_COUNTERS);
3110 		format_all_counters(EVEN_COUNTERS);
3111 		flush_output_stdout();
3112 		if (exit_requested)
3113 			break;
3114 		if (num_iterations && ++done_iters >= num_iterations)
3115 			break;
3116 		do_sleep();
3117 		if (snapshot_proc_sysfs_files())
3118 			goto restart;
3119 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3120 		if (retval < -1) {
3121 			exit(retval);
3122 		} else if (retval == -1) {
3123 			re_initialize();
3124 			goto restart;
3125 		}
3126 		gettimeofday(&tv_even, (struct timezone *)NULL);
3127 		timersub(&tv_even, &tv_odd, &tv_delta);
3128 		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3129 			re_initialize();
3130 			goto restart;
3131 		}
3132 		compute_average(ODD_COUNTERS);
3133 		format_all_counters(ODD_COUNTERS);
3134 		flush_output_stdout();
3135 		if (exit_requested)
3136 			break;
3137 		if (num_iterations && ++done_iters >= num_iterations)
3138 			break;
3139 	}
3140 }
3141 
3142 void check_dev_msr()
3143 {
3144 	struct stat sb;
3145 	char pathname[32];
3146 
3147 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3148 	if (stat(pathname, &sb))
3149  		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3150 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3151 }
3152 
3153 /*
3154  * check for CAP_SYS_RAWIO
3155  * return 0 on success
3156  * return 1 on fail
3157  */
3158 int check_for_cap_sys_rawio(void)
3159 {
3160 	cap_t caps;
3161 	cap_flag_value_t cap_flag_value;
3162 
3163 	caps = cap_get_proc();
3164 	if (caps == NULL)
3165 		err(-6, "cap_get_proc\n");
3166 
3167 	if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3168 		err(-6, "cap_get\n");
3169 
3170 	if (cap_flag_value != CAP_SET) {
3171 		warnx("capget(CAP_SYS_RAWIO) failed,"
3172 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
3173 		return 1;
3174 	}
3175 
3176 	if (cap_free(caps) == -1)
3177 		err(-6, "cap_free\n");
3178 
3179 	return 0;
3180 }
3181 void check_permissions(void)
3182 {
3183 	int do_exit = 0;
3184 	char pathname[32];
3185 
3186 	/* check for CAP_SYS_RAWIO */
3187 	do_exit += check_for_cap_sys_rawio();
3188 
3189 	/* test file permissions */
3190 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3191 	if (euidaccess(pathname, R_OK)) {
3192 		do_exit++;
3193 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3194 	}
3195 
3196 	/* if all else fails, thell them to be root */
3197 	if (do_exit)
3198 		if (getuid() != 0)
3199 			warnx("... or simply run as root");
3200 
3201 	if (do_exit)
3202 		exit(-6);
3203 }
3204 
3205 /*
3206  * NHM adds support for additional MSRs:
3207  *
3208  * MSR_SMI_COUNT                   0x00000034
3209  *
3210  * MSR_PLATFORM_INFO               0x000000ce
3211  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3212  *
3213  * MSR_MISC_PWR_MGMT               0x000001aa
3214  *
3215  * MSR_PKG_C3_RESIDENCY            0x000003f8
3216  * MSR_PKG_C6_RESIDENCY            0x000003f9
3217  * MSR_CORE_C3_RESIDENCY           0x000003fc
3218  * MSR_CORE_C6_RESIDENCY           0x000003fd
3219  *
3220  * Side effect:
3221  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3222  * sets has_misc_feature_control
3223  */
3224 int probe_nhm_msrs(unsigned int family, unsigned int model)
3225 {
3226 	unsigned long long msr;
3227 	unsigned int base_ratio;
3228 	int *pkg_cstate_limits;
3229 
3230 	if (!genuine_intel)
3231 		return 0;
3232 
3233 	if (family != 6)
3234 		return 0;
3235 
3236 	bclk = discover_bclk(family, model);
3237 
3238 	switch (model) {
3239 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3240 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3241 		pkg_cstate_limits = nhm_pkg_cstate_limits;
3242 		break;
3243 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
3244 	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
3245 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3246 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3247 		pkg_cstate_limits = snb_pkg_cstate_limits;
3248 		has_misc_feature_control = 1;
3249 		break;
3250 	case INTEL_FAM6_HASWELL:	/* HSW */
3251 	case INTEL_FAM6_HASWELL_G:	/* HSW */
3252 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3253 	case INTEL_FAM6_HASWELL_L:	/* HSW */
3254 	case INTEL_FAM6_BROADWELL:	/* BDW */
3255 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
3256 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3257 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
3258 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
3259 		pkg_cstate_limits = hsw_pkg_cstate_limits;
3260 		has_misc_feature_control = 1;
3261 		break;
3262 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3263 		pkg_cstate_limits = skx_pkg_cstate_limits;
3264 		has_misc_feature_control = 1;
3265 		break;
3266 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
3267 		no_MSR_MISC_PWR_MGMT = 1;
3268 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
3269 		pkg_cstate_limits = slv_pkg_cstate_limits;
3270 		break;
3271 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
3272 		pkg_cstate_limits = amt_pkg_cstate_limits;
3273 		no_MSR_MISC_PWR_MGMT = 1;
3274 		break;
3275 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
3276 		pkg_cstate_limits = phi_pkg_cstate_limits;
3277 		break;
3278 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3279 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3280 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
3281 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
3282 		pkg_cstate_limits = glm_pkg_cstate_limits;
3283 		break;
3284 	default:
3285 		return 0;
3286 	}
3287 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3288 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3289 
3290 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3291 	base_ratio = (msr >> 8) & 0xFF;
3292 
3293 	base_hz = base_ratio * bclk * 1000000;
3294 	has_base_hz = 1;
3295 	return 1;
3296 }
3297 /*
3298  * SLV client has support for unique MSRs:
3299  *
3300  * MSR_CC6_DEMOTION_POLICY_CONFIG
3301  * MSR_MC6_DEMOTION_POLICY_CONFIG
3302  */
3303 
3304 int has_slv_msrs(unsigned int family, unsigned int model)
3305 {
3306 	if (!genuine_intel)
3307 		return 0;
3308 
3309 	switch (model) {
3310 	case INTEL_FAM6_ATOM_SILVERMONT:
3311 	case INTEL_FAM6_ATOM_SILVERMONT_MID:
3312 	case INTEL_FAM6_ATOM_AIRMONT_MID:
3313 		return 1;
3314 	}
3315 	return 0;
3316 }
3317 int is_dnv(unsigned int family, unsigned int model)
3318 {
3319 
3320 	if (!genuine_intel)
3321 		return 0;
3322 
3323 	switch (model) {
3324 	case INTEL_FAM6_ATOM_GOLDMONT_D:
3325 		return 1;
3326 	}
3327 	return 0;
3328 }
3329 int is_bdx(unsigned int family, unsigned int model)
3330 {
3331 
3332 	if (!genuine_intel)
3333 		return 0;
3334 
3335 	switch (model) {
3336 	case INTEL_FAM6_BROADWELL_X:
3337 		return 1;
3338 	}
3339 	return 0;
3340 }
3341 int is_skx(unsigned int family, unsigned int model)
3342 {
3343 
3344 	if (!genuine_intel)
3345 		return 0;
3346 
3347 	switch (model) {
3348 	case INTEL_FAM6_SKYLAKE_X:
3349 		return 1;
3350 	}
3351 	return 0;
3352 }
3353 int is_ehl(unsigned int family, unsigned int model)
3354 {
3355 	if (!genuine_intel)
3356 		return 0;
3357 
3358 	switch (model) {
3359 	case INTEL_FAM6_ATOM_TREMONT:
3360 		return 1;
3361 	}
3362 	return 0;
3363 }
3364 
3365 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3366 {
3367 	if (has_slv_msrs(family, model))
3368 		return 0;
3369 
3370 	switch (model) {
3371 	/* Nehalem compatible, but do not include turbo-ratio limit support */
3372 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3373 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3374 		return 0;
3375 	default:
3376 		return 1;
3377 	}
3378 }
3379 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3380 {
3381 	if (has_slv_msrs(family, model))
3382 		return 1;
3383 
3384 	return 0;
3385 }
3386 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3387 {
3388 	if (!genuine_intel)
3389 		return 0;
3390 
3391 	if (family != 6)
3392 		return 0;
3393 
3394 	switch (model) {
3395 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3396 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3397 		return 1;
3398 	default:
3399 		return 0;
3400 	}
3401 }
3402 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3403 {
3404 	if (!genuine_intel)
3405 		return 0;
3406 
3407 	if (family != 6)
3408 		return 0;
3409 
3410 	switch (model) {
3411 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3412 		return 1;
3413 	default:
3414 		return 0;
3415 	}
3416 }
3417 
3418 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3419 {
3420 	if (!genuine_intel)
3421 		return 0;
3422 
3423 	if (family != 6)
3424 		return 0;
3425 
3426 	switch (model) {
3427 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3428 		return 1;
3429 	default:
3430 		return 0;
3431 	}
3432 }
3433 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3434 {
3435 	if (!genuine_intel)
3436 		return 0;
3437 
3438 	if (family != 6)
3439 		return 0;
3440 
3441 	switch (model) {
3442 	case INTEL_FAM6_ATOM_GOLDMONT:
3443 	case INTEL_FAM6_SKYLAKE_X:
3444 		return 1;
3445 	default:
3446 		return 0;
3447 	}
3448 }
3449 int has_config_tdp(unsigned int family, unsigned int model)
3450 {
3451 	if (!genuine_intel)
3452 		return 0;
3453 
3454 	if (family != 6)
3455 		return 0;
3456 
3457 	switch (model) {
3458 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3459 	case INTEL_FAM6_HASWELL:	/* HSW */
3460 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3461 	case INTEL_FAM6_HASWELL_L:	/* HSW */
3462 	case INTEL_FAM6_HASWELL_G:	/* HSW */
3463 	case INTEL_FAM6_BROADWELL:	/* BDW */
3464 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
3465 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3466 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
3467 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
3468 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3469 
3470 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3471 		return 1;
3472 	default:
3473 		return 0;
3474 	}
3475 }
3476 
3477 static void
3478 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3479 {
3480 	if (!do_nhm_platform_info)
3481 		return;
3482 
3483 	dump_nhm_platform_info();
3484 
3485 	if (has_hsw_turbo_ratio_limit(family, model))
3486 		dump_hsw_turbo_ratio_limits();
3487 
3488 	if (has_ivt_turbo_ratio_limit(family, model))
3489 		dump_ivt_turbo_ratio_limits();
3490 
3491 	if (has_turbo_ratio_limit(family, model))
3492 		dump_turbo_ratio_limits(family, model);
3493 
3494 	if (has_atom_turbo_ratio_limit(family, model))
3495 		dump_atom_turbo_ratio_limits();
3496 
3497 	if (has_knl_turbo_ratio_limit(family, model))
3498 		dump_knl_turbo_ratio_limits();
3499 
3500 	if (has_config_tdp(family, model))
3501 		dump_config_tdp();
3502 
3503 	dump_nhm_cst_cfg();
3504 }
3505 
3506 static void dump_sysfs_file(char *path)
3507 {
3508 	FILE *input;
3509 	char cpuidle_buf[64];
3510 
3511 	input = fopen(path, "r");
3512 	if (input == NULL) {
3513 		if (debug)
3514 			fprintf(outf, "NSFOD %s\n", path);
3515 		return;
3516 	}
3517 	if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
3518 		err(1, "%s: failed to read file", path);
3519 	fclose(input);
3520 
3521 	fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
3522 }
3523 static void
3524 dump_sysfs_cstate_config(void)
3525 {
3526 	char path[64];
3527 	char name_buf[16];
3528 	char desc[64];
3529 	FILE *input;
3530 	int state;
3531 	char *sp;
3532 
3533 	if (!DO_BIC(BIC_sysfs))
3534 		return;
3535 
3536 	if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
3537 		fprintf(outf, "cpuidle not loaded\n");
3538 		return;
3539 	}
3540 
3541 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
3542 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
3543 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
3544 
3545 	for (state = 0; state < 10; ++state) {
3546 
3547 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3548 			base_cpu, state);
3549 		input = fopen(path, "r");
3550 		if (input == NULL)
3551 			continue;
3552 		if (!fgets(name_buf, sizeof(name_buf), input))
3553 			err(1, "%s: failed to read file", path);
3554 
3555 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3556 		sp = strchr(name_buf, '-');
3557 		if (!sp)
3558 			sp = strchrnul(name_buf, '\n');
3559 		*sp = '\0';
3560 		fclose(input);
3561 
3562 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3563 			base_cpu, state);
3564 		input = fopen(path, "r");
3565 		if (input == NULL)
3566 			continue;
3567 		if (!fgets(desc, sizeof(desc), input))
3568 			err(1, "%s: failed to read file", path);
3569 
3570 		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3571 		fclose(input);
3572 	}
3573 }
3574 static void
3575 dump_sysfs_pstate_config(void)
3576 {
3577 	char path[64];
3578 	char driver_buf[64];
3579 	char governor_buf[64];
3580 	FILE *input;
3581 	int turbo;
3582 
3583 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3584 			base_cpu);
3585 	input = fopen(path, "r");
3586 	if (input == NULL) {
3587 		fprintf(outf, "NSFOD %s\n", path);
3588 		return;
3589 	}
3590 	if (!fgets(driver_buf, sizeof(driver_buf), input))
3591 		err(1, "%s: failed to read file", path);
3592 	fclose(input);
3593 
3594 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3595 			base_cpu);
3596 	input = fopen(path, "r");
3597 	if (input == NULL) {
3598 		fprintf(outf, "NSFOD %s\n", path);
3599 		return;
3600 	}
3601 	if (!fgets(governor_buf, sizeof(governor_buf), input))
3602 		err(1, "%s: failed to read file", path);
3603 	fclose(input);
3604 
3605 	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3606 	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3607 
3608 	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3609 	input = fopen(path, "r");
3610 	if (input != NULL) {
3611 		if (fscanf(input, "%d", &turbo) != 1)
3612 			err(1, "%s: failed to parse number from file", path);
3613 		fprintf(outf, "cpufreq boost: %d\n", turbo);
3614 		fclose(input);
3615 	}
3616 
3617 	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3618 	input = fopen(path, "r");
3619 	if (input != NULL) {
3620 		if (fscanf(input, "%d", &turbo) != 1)
3621 			err(1, "%s: failed to parse number from file", path);
3622 		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3623 		fclose(input);
3624 	}
3625 }
3626 
3627 
3628 /*
3629  * print_epb()
3630  * Decode the ENERGY_PERF_BIAS MSR
3631  */
3632 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3633 {
3634 	unsigned long long msr;
3635 	char *epb_string;
3636 	int cpu;
3637 
3638 	if (!has_epb)
3639 		return 0;
3640 
3641 	cpu = t->cpu_id;
3642 
3643 	/* EPB is per-package */
3644 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3645 		return 0;
3646 
3647 	if (cpu_migrate(cpu)) {
3648 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3649 		return -1;
3650 	}
3651 
3652 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3653 		return 0;
3654 
3655 	switch (msr & 0xF) {
3656 	case ENERGY_PERF_BIAS_PERFORMANCE:
3657 		epb_string = "performance";
3658 		break;
3659 	case ENERGY_PERF_BIAS_NORMAL:
3660 		epb_string = "balanced";
3661 		break;
3662 	case ENERGY_PERF_BIAS_POWERSAVE:
3663 		epb_string = "powersave";
3664 		break;
3665 	default:
3666 		epb_string = "custom";
3667 		break;
3668 	}
3669 	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3670 
3671 	return 0;
3672 }
3673 /*
3674  * print_hwp()
3675  * Decode the MSR_HWP_CAPABILITIES
3676  */
3677 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3678 {
3679 	unsigned long long msr;
3680 	int cpu;
3681 
3682 	if (!has_hwp)
3683 		return 0;
3684 
3685 	cpu = t->cpu_id;
3686 
3687 	/* MSR_HWP_CAPABILITIES is per-package */
3688 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3689 		return 0;
3690 
3691 	if (cpu_migrate(cpu)) {
3692 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3693 		return -1;
3694 	}
3695 
3696 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3697 		return 0;
3698 
3699 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3700 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3701 
3702 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3703 	if ((msr & (1 << 0)) == 0)
3704 		return 0;
3705 
3706 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3707 		return 0;
3708 
3709 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3710 			"(high %d guar %d eff %d low %d)\n",
3711 			cpu, msr,
3712 			(unsigned int)HWP_HIGHEST_PERF(msr),
3713 			(unsigned int)HWP_GUARANTEED_PERF(msr),
3714 			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3715 			(unsigned int)HWP_LOWEST_PERF(msr));
3716 
3717 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3718 		return 0;
3719 
3720 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3721 			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3722 			cpu, msr,
3723 			(unsigned int)(((msr) >> 0) & 0xff),
3724 			(unsigned int)(((msr) >> 8) & 0xff),
3725 			(unsigned int)(((msr) >> 16) & 0xff),
3726 			(unsigned int)(((msr) >> 24) & 0xff),
3727 			(unsigned int)(((msr) >> 32) & 0xff3),
3728 			(unsigned int)(((msr) >> 42) & 0x1));
3729 
3730 	if (has_hwp_pkg) {
3731 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3732 			return 0;
3733 
3734 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3735 			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
3736 			cpu, msr,
3737 			(unsigned int)(((msr) >> 0) & 0xff),
3738 			(unsigned int)(((msr) >> 8) & 0xff),
3739 			(unsigned int)(((msr) >> 16) & 0xff),
3740 			(unsigned int)(((msr) >> 24) & 0xff),
3741 			(unsigned int)(((msr) >> 32) & 0xff3));
3742 	}
3743 	if (has_hwp_notify) {
3744 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3745 			return 0;
3746 
3747 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3748 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3749 			cpu, msr,
3750 			((msr) & 0x1) ? "EN" : "Dis",
3751 			((msr) & 0x2) ? "EN" : "Dis");
3752 	}
3753 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3754 		return 0;
3755 
3756 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3757 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3758 			cpu, msr,
3759 			((msr) & 0x1) ? "" : "No-",
3760 			((msr) & 0x2) ? "" : "No-");
3761 
3762 	return 0;
3763 }
3764 
3765 /*
3766  * print_perf_limit()
3767  */
3768 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3769 {
3770 	unsigned long long msr;
3771 	int cpu;
3772 
3773 	cpu = t->cpu_id;
3774 
3775 	/* per-package */
3776 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3777 		return 0;
3778 
3779 	if (cpu_migrate(cpu)) {
3780 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3781 		return -1;
3782 	}
3783 
3784 	if (do_core_perf_limit_reasons) {
3785 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3786 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3787 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3788 			(msr & 1 << 15) ? "bit15, " : "",
3789 			(msr & 1 << 14) ? "bit14, " : "",
3790 			(msr & 1 << 13) ? "Transitions, " : "",
3791 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3792 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
3793 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3794 			(msr & 1 << 9) ? "CorePwr, " : "",
3795 			(msr & 1 << 8) ? "Amps, " : "",
3796 			(msr & 1 << 6) ? "VR-Therm, " : "",
3797 			(msr & 1 << 5) ? "Auto-HWP, " : "",
3798 			(msr & 1 << 4) ? "Graphics, " : "",
3799 			(msr & 1 << 2) ? "bit2, " : "",
3800 			(msr & 1 << 1) ? "ThermStatus, " : "",
3801 			(msr & 1 << 0) ? "PROCHOT, " : "");
3802 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3803 			(msr & 1 << 31) ? "bit31, " : "",
3804 			(msr & 1 << 30) ? "bit30, " : "",
3805 			(msr & 1 << 29) ? "Transitions, " : "",
3806 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3807 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
3808 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3809 			(msr & 1 << 25) ? "CorePwr, " : "",
3810 			(msr & 1 << 24) ? "Amps, " : "",
3811 			(msr & 1 << 22) ? "VR-Therm, " : "",
3812 			(msr & 1 << 21) ? "Auto-HWP, " : "",
3813 			(msr & 1 << 20) ? "Graphics, " : "",
3814 			(msr & 1 << 18) ? "bit18, " : "",
3815 			(msr & 1 << 17) ? "ThermStatus, " : "",
3816 			(msr & 1 << 16) ? "PROCHOT, " : "");
3817 
3818 	}
3819 	if (do_gfx_perf_limit_reasons) {
3820 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3821 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3822 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3823 			(msr & 1 << 0) ? "PROCHOT, " : "",
3824 			(msr & 1 << 1) ? "ThermStatus, " : "",
3825 			(msr & 1 << 4) ? "Graphics, " : "",
3826 			(msr & 1 << 6) ? "VR-Therm, " : "",
3827 			(msr & 1 << 8) ? "Amps, " : "",
3828 			(msr & 1 << 9) ? "GFXPwr, " : "",
3829 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3830 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3831 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3832 			(msr & 1 << 16) ? "PROCHOT, " : "",
3833 			(msr & 1 << 17) ? "ThermStatus, " : "",
3834 			(msr & 1 << 20) ? "Graphics, " : "",
3835 			(msr & 1 << 22) ? "VR-Therm, " : "",
3836 			(msr & 1 << 24) ? "Amps, " : "",
3837 			(msr & 1 << 25) ? "GFXPwr, " : "",
3838 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3839 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3840 	}
3841 	if (do_ring_perf_limit_reasons) {
3842 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3843 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3844 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
3845 			(msr & 1 << 0) ? "PROCHOT, " : "",
3846 			(msr & 1 << 1) ? "ThermStatus, " : "",
3847 			(msr & 1 << 6) ? "VR-Therm, " : "",
3848 			(msr & 1 << 8) ? "Amps, " : "",
3849 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3850 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3851 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3852 			(msr & 1 << 16) ? "PROCHOT, " : "",
3853 			(msr & 1 << 17) ? "ThermStatus, " : "",
3854 			(msr & 1 << 22) ? "VR-Therm, " : "",
3855 			(msr & 1 << 24) ? "Amps, " : "",
3856 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3857 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3858 	}
3859 	return 0;
3860 }
3861 
3862 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
3863 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
3864 
3865 double get_tdp_intel(unsigned int model)
3866 {
3867 	unsigned long long msr;
3868 
3869 	if (do_rapl & RAPL_PKG_POWER_INFO)
3870 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3871 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3872 
3873 	switch (model) {
3874 	case INTEL_FAM6_ATOM_SILVERMONT:
3875 	case INTEL_FAM6_ATOM_SILVERMONT_D:
3876 		return 30.0;
3877 	default:
3878 		return 135.0;
3879 	}
3880 }
3881 
3882 double get_tdp_amd(unsigned int family)
3883 {
3884 	switch (family) {
3885 	case 0x17:
3886 	case 0x18:
3887 	default:
3888 		/* This is the max stock TDP of HEDT/Server Fam17h chips */
3889 		return 250.0;
3890 	}
3891 }
3892 
3893 /*
3894  * rapl_dram_energy_units_probe()
3895  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3896  */
3897 static double
3898 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3899 {
3900 	/* only called for genuine_intel, family 6 */
3901 
3902 	switch (model) {
3903 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3904 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3905 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3906 		return (rapl_dram_energy_units = 15.3 / 1000000);
3907 	default:
3908 		return (rapl_energy_units);
3909 	}
3910 }
3911 
3912 void rapl_probe_intel(unsigned int family, unsigned int model)
3913 {
3914 	unsigned long long msr;
3915 	unsigned int time_unit;
3916 	double tdp;
3917 
3918 	if (family != 6)
3919 		return;
3920 
3921 	switch (model) {
3922 	case INTEL_FAM6_SANDYBRIDGE:
3923 	case INTEL_FAM6_IVYBRIDGE:
3924 	case INTEL_FAM6_HASWELL:	/* HSW */
3925 	case INTEL_FAM6_HASWELL_L:	/* HSW */
3926 	case INTEL_FAM6_HASWELL_G:	/* HSW */
3927 	case INTEL_FAM6_BROADWELL:	/* BDW */
3928 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
3929 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3930 		if (rapl_joules) {
3931 			BIC_PRESENT(BIC_Pkg_J);
3932 			BIC_PRESENT(BIC_Cor_J);
3933 			BIC_PRESENT(BIC_GFX_J);
3934 		} else {
3935 			BIC_PRESENT(BIC_PkgWatt);
3936 			BIC_PRESENT(BIC_CorWatt);
3937 			BIC_PRESENT(BIC_GFXWatt);
3938 		}
3939 		break;
3940 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3941 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3942 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3943 		if (rapl_joules)
3944 			BIC_PRESENT(BIC_Pkg_J);
3945 		else
3946 			BIC_PRESENT(BIC_PkgWatt);
3947 		break;
3948 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
3949 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3950 		if (rapl_joules) {
3951 			BIC_PRESENT(BIC_Pkg_J);
3952 			BIC_PRESENT(BIC_Cor_J);
3953 			BIC_PRESENT(BIC_RAM_J);
3954 			BIC_PRESENT(BIC_GFX_J);
3955 		} else {
3956 			BIC_PRESENT(BIC_PkgWatt);
3957 			BIC_PRESENT(BIC_CorWatt);
3958 			BIC_PRESENT(BIC_RAMWatt);
3959 			BIC_PRESENT(BIC_GFXWatt);
3960 		}
3961 		break;
3962 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
3963 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
3964 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3965 		BIC_PRESENT(BIC_PKG__);
3966 		BIC_PRESENT(BIC_RAM__);
3967 		if (rapl_joules) {
3968 			BIC_PRESENT(BIC_Pkg_J);
3969 			BIC_PRESENT(BIC_Cor_J);
3970 			BIC_PRESENT(BIC_RAM_J);
3971 			BIC_PRESENT(BIC_GFX_J);
3972 		} else {
3973 			BIC_PRESENT(BIC_PkgWatt);
3974 			BIC_PRESENT(BIC_CorWatt);
3975 			BIC_PRESENT(BIC_RAMWatt);
3976 			BIC_PRESENT(BIC_GFXWatt);
3977 		}
3978 		break;
3979 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3980 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3981 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3982 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3983 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3984 		BIC_PRESENT(BIC_PKG__);
3985 		BIC_PRESENT(BIC_RAM__);
3986 		if (rapl_joules) {
3987 			BIC_PRESENT(BIC_Pkg_J);
3988 			BIC_PRESENT(BIC_RAM_J);
3989 		} else {
3990 			BIC_PRESENT(BIC_PkgWatt);
3991 			BIC_PRESENT(BIC_RAMWatt);
3992 		}
3993 		break;
3994 	case INTEL_FAM6_SANDYBRIDGE_X:
3995 	case INTEL_FAM6_IVYBRIDGE_X:
3996 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3997 		BIC_PRESENT(BIC_PKG__);
3998 		BIC_PRESENT(BIC_RAM__);
3999 		if (rapl_joules) {
4000 			BIC_PRESENT(BIC_Pkg_J);
4001 			BIC_PRESENT(BIC_Cor_J);
4002 			BIC_PRESENT(BIC_RAM_J);
4003 		} else {
4004 			BIC_PRESENT(BIC_PkgWatt);
4005 			BIC_PRESENT(BIC_CorWatt);
4006 			BIC_PRESENT(BIC_RAMWatt);
4007 		}
4008 		break;
4009 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
4010 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
4011 		do_rapl = RAPL_PKG | RAPL_CORES;
4012 		if (rapl_joules) {
4013 			BIC_PRESENT(BIC_Pkg_J);
4014 			BIC_PRESENT(BIC_Cor_J);
4015 		} else {
4016 			BIC_PRESENT(BIC_PkgWatt);
4017 			BIC_PRESENT(BIC_CorWatt);
4018 		}
4019 		break;
4020 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
4021 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4022 		BIC_PRESENT(BIC_PKG__);
4023 		BIC_PRESENT(BIC_RAM__);
4024 		if (rapl_joules) {
4025 			BIC_PRESENT(BIC_Pkg_J);
4026 			BIC_PRESENT(BIC_Cor_J);
4027 			BIC_PRESENT(BIC_RAM_J);
4028 		} else {
4029 			BIC_PRESENT(BIC_PkgWatt);
4030 			BIC_PRESENT(BIC_CorWatt);
4031 			BIC_PRESENT(BIC_RAMWatt);
4032 		}
4033 		break;
4034 	default:
4035 		return;
4036 	}
4037 
4038 	/* units on package 0, verify later other packages match */
4039 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4040 		return;
4041 
4042 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
4043 	if (model == INTEL_FAM6_ATOM_SILVERMONT)
4044 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4045 	else
4046 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4047 
4048 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4049 
4050 	time_unit = msr >> 16 & 0xF;
4051 	if (time_unit == 0)
4052 		time_unit = 0xA;
4053 
4054 	rapl_time_units = 1.0 / (1 << (time_unit));
4055 
4056 	tdp = get_tdp_intel(model);
4057 
4058 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4059 	if (!quiet)
4060 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4061 }
4062 
4063 void rapl_probe_amd(unsigned int family, unsigned int model)
4064 {
4065 	unsigned long long msr;
4066 	unsigned int eax, ebx, ecx, edx;
4067 	unsigned int has_rapl = 0;
4068 	double tdp;
4069 
4070 	if (max_extended_level >= 0x80000007) {
4071 		__cpuid(0x80000007, eax, ebx, ecx, edx);
4072 		/* RAPL (Fam 17h) */
4073 		has_rapl = edx & (1 << 14);
4074 	}
4075 
4076 	if (!has_rapl)
4077 		return;
4078 
4079 	switch (family) {
4080 	case 0x17: /* Zen, Zen+ */
4081 	case 0x18: /* Hygon Dhyana */
4082 		do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4083 		if (rapl_joules) {
4084 			BIC_PRESENT(BIC_Pkg_J);
4085 			BIC_PRESENT(BIC_Cor_J);
4086 		} else {
4087 			BIC_PRESENT(BIC_PkgWatt);
4088 			BIC_PRESENT(BIC_CorWatt);
4089 		}
4090 		break;
4091 	default:
4092 		return;
4093 	}
4094 
4095 	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4096 		return;
4097 
4098 	rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4099 	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4100 	rapl_power_units = ldexp(1.0, -(msr & 0xf));
4101 
4102 	tdp = get_tdp_amd(family);
4103 
4104 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4105 	if (!quiet)
4106 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4107 }
4108 
4109 /*
4110  * rapl_probe()
4111  *
4112  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4113  */
4114 void rapl_probe(unsigned int family, unsigned int model)
4115 {
4116 	if (genuine_intel)
4117 		rapl_probe_intel(family, model);
4118 	if (authentic_amd || hygon_genuine)
4119 		rapl_probe_amd(family, model);
4120 }
4121 
4122 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4123 {
4124 	if (!genuine_intel)
4125 		return;
4126 
4127 	if (family != 6)
4128 		return;
4129 
4130 	switch (model) {
4131 	case INTEL_FAM6_HASWELL:	/* HSW */
4132 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4133 	case INTEL_FAM6_HASWELL_G:	/* HSW */
4134 		do_gfx_perf_limit_reasons = 1;
4135 	case INTEL_FAM6_HASWELL_X:	/* HSX */
4136 		do_core_perf_limit_reasons = 1;
4137 		do_ring_perf_limit_reasons = 1;
4138 	default:
4139 		return;
4140 	}
4141 }
4142 
4143 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4144 {
4145 	if (is_skx(family, model) || is_bdx(family, model))
4146 		has_automatic_cstate_conversion = 1;
4147 }
4148 
4149 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4150 {
4151 	unsigned long long msr;
4152 	unsigned int dts, dts2;
4153 	int cpu;
4154 
4155 	if (!(do_dts || do_ptm))
4156 		return 0;
4157 
4158 	cpu = t->cpu_id;
4159 
4160 	/* DTS is per-core, no need to print for each thread */
4161 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4162 		return 0;
4163 
4164 	if (cpu_migrate(cpu)) {
4165 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4166 		return -1;
4167 	}
4168 
4169 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4170 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4171 			return 0;
4172 
4173 		dts = (msr >> 16) & 0x7F;
4174 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
4175 			cpu, msr, tcc_activation_temp - dts);
4176 
4177 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4178 			return 0;
4179 
4180 		dts = (msr >> 16) & 0x7F;
4181 		dts2 = (msr >> 8) & 0x7F;
4182 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4183 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4184 	}
4185 
4186 
4187 	if (do_dts && debug) {
4188 		unsigned int resolution;
4189 
4190 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4191 			return 0;
4192 
4193 		dts = (msr >> 16) & 0x7F;
4194 		resolution = (msr >> 27) & 0xF;
4195 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4196 			cpu, msr, tcc_activation_temp - dts, resolution);
4197 
4198 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4199 			return 0;
4200 
4201 		dts = (msr >> 16) & 0x7F;
4202 		dts2 = (msr >> 8) & 0x7F;
4203 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4204 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
4205 	}
4206 
4207 	return 0;
4208 }
4209 
4210 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4211 {
4212 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
4213 		cpu, label,
4214 		((msr >> 15) & 1) ? "EN" : "DIS",
4215 		((msr >> 0) & 0x7FFF) * rapl_power_units,
4216 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4217 		(((msr >> 16) & 1) ? "EN" : "DIS"));
4218 
4219 	return;
4220 }
4221 
4222 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4223 {
4224 	unsigned long long msr;
4225 	const char *msr_name;
4226 	int cpu;
4227 
4228 	if (!do_rapl)
4229 		return 0;
4230 
4231 	/* RAPL counters are per package, so print only for 1st thread/package */
4232 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4233 		return 0;
4234 
4235 	cpu = t->cpu_id;
4236 	if (cpu_migrate(cpu)) {
4237 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4238 		return -1;
4239 	}
4240 
4241 	if (do_rapl & RAPL_AMD_F17H) {
4242 		msr_name = "MSR_RAPL_PWR_UNIT";
4243 		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4244 			return -1;
4245 	} else {
4246 		msr_name = "MSR_RAPL_POWER_UNIT";
4247 		if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4248 			return -1;
4249 	}
4250 
4251 	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4252 		rapl_power_units, rapl_energy_units, rapl_time_units);
4253 
4254 	if (do_rapl & RAPL_PKG_POWER_INFO) {
4255 
4256 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4257                 	return -5;
4258 
4259 
4260 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4261 			cpu, msr,
4262 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4263 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4264 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4265 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4266 
4267 	}
4268 	if (do_rapl & RAPL_PKG) {
4269 
4270 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4271 			return -9;
4272 
4273 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4274 			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4275 
4276 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
4277 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
4278 			cpu,
4279 			((msr >> 47) & 1) ? "EN" : "DIS",
4280 			((msr >> 32) & 0x7FFF) * rapl_power_units,
4281 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4282 			((msr >> 48) & 1) ? "EN" : "DIS");
4283 	}
4284 
4285 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
4286 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4287                 	return -6;
4288 
4289 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4290 			cpu, msr,
4291 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4292 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4293 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4294 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4295 	}
4296 	if (do_rapl & RAPL_DRAM) {
4297 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4298 			return -9;
4299 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4300 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4301 
4302 		print_power_limit_msr(cpu, msr, "DRAM Limit");
4303 	}
4304 	if (do_rapl & RAPL_CORE_POLICY) {
4305 		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4306 			return -7;
4307 
4308 		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4309 	}
4310 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4311 		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4312 			return -9;
4313 		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4314 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4315 		print_power_limit_msr(cpu, msr, "Cores Limit");
4316 	}
4317 	if (do_rapl & RAPL_GFX) {
4318 		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4319 			return -8;
4320 
4321 		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4322 
4323 		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4324 			return -9;
4325 		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4326 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4327 		print_power_limit_msr(cpu, msr, "GFX Limit");
4328 	}
4329 	return 0;
4330 }
4331 
4332 /*
4333  * SNB adds support for additional MSRs:
4334  *
4335  * MSR_PKG_C7_RESIDENCY            0x000003fa
4336  * MSR_CORE_C7_RESIDENCY           0x000003fe
4337  * MSR_PKG_C2_RESIDENCY            0x0000060d
4338  */
4339 
4340 int has_snb_msrs(unsigned int family, unsigned int model)
4341 {
4342 	if (!genuine_intel)
4343 		return 0;
4344 
4345 	switch (model) {
4346 	case INTEL_FAM6_SANDYBRIDGE:
4347 	case INTEL_FAM6_SANDYBRIDGE_X:
4348 	case INTEL_FAM6_IVYBRIDGE:		/* IVB */
4349 	case INTEL_FAM6_IVYBRIDGE_X:		/* IVB Xeon */
4350 	case INTEL_FAM6_HASWELL:		/* HSW */
4351 	case INTEL_FAM6_HASWELL_X:		/* HSW */
4352 	case INTEL_FAM6_HASWELL_L:		/* HSW */
4353 	case INTEL_FAM6_HASWELL_G:		/* HSW */
4354 	case INTEL_FAM6_BROADWELL:		/* BDW */
4355 	case INTEL_FAM6_BROADWELL_G:		/* BDW */
4356 	case INTEL_FAM6_BROADWELL_X:		/* BDX */
4357 	case INTEL_FAM6_SKYLAKE_L:		/* SKL */
4358 	case INTEL_FAM6_CANNONLAKE_L:		/* CNL */
4359 	case INTEL_FAM6_SKYLAKE_X:		/* SKX */
4360 	case INTEL_FAM6_ATOM_GOLDMONT:		/* BXT */
4361 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4362 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
4363 	case INTEL_FAM6_ATOM_TREMONT:		/* EHL */
4364 		return 1;
4365 	}
4366 	return 0;
4367 }
4368 
4369 /*
4370  * HSW ULT added support for C8/C9/C10 MSRs:
4371  *
4372  * MSR_PKG_C8_RESIDENCY		0x00000630
4373  * MSR_PKG_C9_RESIDENCY		0x00000631
4374  * MSR_PKG_C10_RESIDENCY	0x00000632
4375  *
4376  * MSR_PKGC8_IRTL		0x00000633
4377  * MSR_PKGC9_IRTL		0x00000634
4378  * MSR_PKGC10_IRTL		0x00000635
4379  *
4380  */
4381 int has_c8910_msrs(unsigned int family, unsigned int model)
4382 {
4383 	if (!genuine_intel)
4384 		return 0;
4385 
4386 	switch (model) {
4387 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4388 	case INTEL_FAM6_BROADWELL:	/* BDW */
4389 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4390 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4391 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4392 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4393 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
4394 		return 1;
4395 	}
4396 	return 0;
4397 }
4398 
4399 /*
4400  * SKL adds support for additional MSRS:
4401  *
4402  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4403  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4404  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4405  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4406  */
4407 int has_skl_msrs(unsigned int family, unsigned int model)
4408 {
4409 	if (!genuine_intel)
4410 		return 0;
4411 
4412 	switch (model) {
4413 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4414 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4415 		return 1;
4416 	}
4417 	return 0;
4418 }
4419 
4420 int is_slm(unsigned int family, unsigned int model)
4421 {
4422 	if (!genuine_intel)
4423 		return 0;
4424 	switch (model) {
4425 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
4426 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
4427 		return 1;
4428 	}
4429 	return 0;
4430 }
4431 
4432 int is_knl(unsigned int family, unsigned int model)
4433 {
4434 	if (!genuine_intel)
4435 		return 0;
4436 	switch (model) {
4437 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4438 		return 1;
4439 	}
4440 	return 0;
4441 }
4442 
4443 int is_cnl(unsigned int family, unsigned int model)
4444 {
4445 	if (!genuine_intel)
4446 		return 0;
4447 
4448 	switch (model) {
4449 	case INTEL_FAM6_CANNONLAKE_L: /* CNL */
4450 		return 1;
4451 	}
4452 
4453 	return 0;
4454 }
4455 
4456 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4457 {
4458 	if (is_knl(family, model))
4459 		return 1024;
4460 	return 1;
4461 }
4462 
4463 #define SLM_BCLK_FREQS 5
4464 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4465 
4466 double slm_bclk(void)
4467 {
4468 	unsigned long long msr = 3;
4469 	unsigned int i;
4470 	double freq;
4471 
4472 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4473 		fprintf(outf, "SLM BCLK: unknown\n");
4474 
4475 	i = msr & 0xf;
4476 	if (i >= SLM_BCLK_FREQS) {
4477 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4478 		i = 3;
4479 	}
4480 	freq = slm_freq_table[i];
4481 
4482 	if (!quiet)
4483 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4484 
4485 	return freq;
4486 }
4487 
4488 double discover_bclk(unsigned int family, unsigned int model)
4489 {
4490 	if (has_snb_msrs(family, model) || is_knl(family, model))
4491 		return 100.00;
4492 	else if (is_slm(family, model))
4493 		return slm_bclk();
4494 	else
4495 		return 133.33;
4496 }
4497 
4498 /*
4499  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4500  * the Thermal Control Circuit (TCC) activates.
4501  * This is usually equal to tjMax.
4502  *
4503  * Older processors do not have this MSR, so there we guess,
4504  * but also allow cmdline over-ride with -T.
4505  *
4506  * Several MSR temperature values are in units of degrees-C
4507  * below this value, including the Digital Thermal Sensor (DTS),
4508  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4509  */
4510 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4511 {
4512 	unsigned long long msr;
4513 	unsigned int target_c_local;
4514 	int cpu;
4515 
4516 	/* tcc_activation_temp is used only for dts or ptm */
4517 	if (!(do_dts || do_ptm))
4518 		return 0;
4519 
4520 	/* this is a per-package concept */
4521 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4522 		return 0;
4523 
4524 	cpu = t->cpu_id;
4525 	if (cpu_migrate(cpu)) {
4526 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4527 		return -1;
4528 	}
4529 
4530 	if (tcc_activation_temp_override != 0) {
4531 		tcc_activation_temp = tcc_activation_temp_override;
4532 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4533 			cpu, tcc_activation_temp);
4534 		return 0;
4535 	}
4536 
4537 	/* Temperature Target MSR is Nehalem and newer only */
4538 	if (!do_nhm_platform_info)
4539 		goto guess;
4540 
4541 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4542 		goto guess;
4543 
4544 	target_c_local = (msr >> 16) & 0xFF;
4545 
4546 	if (!quiet)
4547 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4548 			cpu, msr, target_c_local);
4549 
4550 	if (!target_c_local)
4551 		goto guess;
4552 
4553 	tcc_activation_temp = target_c_local;
4554 
4555 	return 0;
4556 
4557 guess:
4558 	tcc_activation_temp = TJMAX_DEFAULT;
4559 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4560 		cpu, tcc_activation_temp);
4561 
4562 	return 0;
4563 }
4564 
4565 void decode_feature_control_msr(void)
4566 {
4567 	unsigned long long msr;
4568 
4569 	if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
4570 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4571 			base_cpu, msr,
4572 			msr & FEAT_CTL_LOCKED ? "" : "UN-",
4573 			msr & (1 << 18) ? "SGX" : "");
4574 }
4575 
4576 void decode_misc_enable_msr(void)
4577 {
4578 	unsigned long long msr;
4579 
4580 	if (!genuine_intel)
4581 		return;
4582 
4583 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4584 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4585 			base_cpu, msr,
4586 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4587 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4588 			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4589 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4590 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4591 }
4592 
4593 void decode_misc_feature_control(void)
4594 {
4595 	unsigned long long msr;
4596 
4597 	if (!has_misc_feature_control)
4598 		return;
4599 
4600 	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4601 		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4602 			base_cpu, msr,
4603 			msr & (0 << 0) ? "No-" : "",
4604 			msr & (1 << 0) ? "No-" : "",
4605 			msr & (2 << 0) ? "No-" : "",
4606 			msr & (3 << 0) ? "No-" : "");
4607 }
4608 /*
4609  * Decode MSR_MISC_PWR_MGMT
4610  *
4611  * Decode the bits according to the Nehalem documentation
4612  * bit[0] seems to continue to have same meaning going forward
4613  * bit[1] less so...
4614  */
4615 void decode_misc_pwr_mgmt_msr(void)
4616 {
4617 	unsigned long long msr;
4618 
4619 	if (!do_nhm_platform_info)
4620 		return;
4621 
4622 	if (no_MSR_MISC_PWR_MGMT)
4623 		return;
4624 
4625 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4626 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4627 			base_cpu, msr,
4628 			msr & (1 << 0) ? "DIS" : "EN",
4629 			msr & (1 << 1) ? "EN" : "DIS",
4630 			msr & (1 << 8) ? "EN" : "DIS");
4631 }
4632 /*
4633  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4634  *
4635  * This MSRs are present on Silvermont processors,
4636  * Intel Atom processor E3000 series (Baytrail), and friends.
4637  */
4638 void decode_c6_demotion_policy_msr(void)
4639 {
4640 	unsigned long long msr;
4641 
4642 	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4643 		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4644 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4645 
4646 	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4647 		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4648 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4649 }
4650 
4651 /*
4652  * When models are the same, for the purpose of turbostat, reuse
4653  */
4654 unsigned int intel_model_duplicates(unsigned int model)
4655 {
4656 
4657 	switch(model) {
4658 	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
4659 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
4660 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
4661 	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
4662 	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
4663 		return INTEL_FAM6_NEHALEM;
4664 
4665 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
4666 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
4667 		return INTEL_FAM6_NEHALEM_EX;
4668 
4669 	case INTEL_FAM6_XEON_PHI_KNM:
4670 		return INTEL_FAM6_XEON_PHI_KNL;
4671 
4672 	case INTEL_FAM6_BROADWELL_X:
4673 	case INTEL_FAM6_BROADWELL_D:	/* BDX-DE */
4674 		return INTEL_FAM6_BROADWELL_X;
4675 
4676 	case INTEL_FAM6_SKYLAKE_L:
4677 	case INTEL_FAM6_SKYLAKE:
4678 	case INTEL_FAM6_KABYLAKE_L:
4679 	case INTEL_FAM6_KABYLAKE:
4680 	case INTEL_FAM6_COMETLAKE_L:
4681 	case INTEL_FAM6_COMETLAKE:
4682 		return INTEL_FAM6_SKYLAKE_L;
4683 
4684 	case INTEL_FAM6_ICELAKE_L:
4685 	case INTEL_FAM6_ICELAKE_NNPI:
4686 	case INTEL_FAM6_TIGERLAKE_L:
4687 	case INTEL_FAM6_TIGERLAKE:
4688 		return INTEL_FAM6_CANNONLAKE_L;
4689 
4690 	case INTEL_FAM6_ATOM_TREMONT_D:
4691 		return INTEL_FAM6_ATOM_GOLDMONT_D;
4692 
4693 	case INTEL_FAM6_ATOM_TREMONT_L:
4694 		return INTEL_FAM6_ATOM_TREMONT;
4695 
4696 	case INTEL_FAM6_ICELAKE_X:
4697 		return INTEL_FAM6_SKYLAKE_X;
4698 	}
4699 	return model;
4700 }
4701 void process_cpuid()
4702 {
4703 	unsigned int eax, ebx, ecx, edx;
4704 	unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
4705 	unsigned int has_turbo;
4706 
4707 	eax = ebx = ecx = edx = 0;
4708 
4709 	__cpuid(0, max_level, ebx, ecx, edx);
4710 
4711 	if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
4712 		genuine_intel = 1;
4713 	else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
4714 		authentic_amd = 1;
4715 	else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
4716 		hygon_genuine = 1;
4717 
4718 	if (!quiet)
4719 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4720 			(char *)&ebx, (char *)&edx, (char *)&ecx);
4721 
4722 	__cpuid(1, fms, ebx, ecx, edx);
4723 	family = (fms >> 8) & 0xf;
4724 	model = (fms >> 4) & 0xf;
4725 	stepping = fms & 0xf;
4726 	if (family == 0xf)
4727 		family += (fms >> 20) & 0xff;
4728 	if (family >= 6)
4729 		model += ((fms >> 16) & 0xf) << 4;
4730 	ecx_flags = ecx;
4731 	edx_flags = edx;
4732 
4733 	/*
4734 	 * check max extended function levels of CPUID.
4735 	 * This is needed to check for invariant TSC.
4736 	 * This check is valid for both Intel and AMD.
4737 	 */
4738 	ebx = ecx = edx = 0;
4739 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4740 
4741 	if (!quiet) {
4742 		fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4743 			max_level, max_extended_level, family, model, stepping, family, model, stepping);
4744 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
4745 			ecx_flags & (1 << 0) ? "SSE3" : "-",
4746 			ecx_flags & (1 << 3) ? "MONITOR" : "-",
4747 			ecx_flags & (1 << 6) ? "SMX" : "-",
4748 			ecx_flags & (1 << 7) ? "EIST" : "-",
4749 			ecx_flags & (1 << 8) ? "TM2" : "-",
4750 			edx_flags & (1 << 4) ? "TSC" : "-",
4751 			edx_flags & (1 << 5) ? "MSR" : "-",
4752 			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
4753 			edx_flags & (1 << 28) ? "HT" : "-",
4754 			edx_flags & (1 << 29) ? "TM" : "-");
4755 	}
4756 	if (genuine_intel)
4757 		model = intel_model_duplicates(model);
4758 
4759 	if (!(edx_flags & (1 << 5)))
4760 		errx(1, "CPUID: no MSR");
4761 
4762 	if (max_extended_level >= 0x80000007) {
4763 
4764 		/*
4765 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4766 		 * this check is valid for both Intel and AMD
4767 		 */
4768 		__cpuid(0x80000007, eax, ebx, ecx, edx);
4769 		has_invariant_tsc = edx & (1 << 8);
4770 	}
4771 
4772 	/*
4773 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4774 	 * this check is valid for both Intel and AMD
4775 	 */
4776 
4777 	__cpuid(0x6, eax, ebx, ecx, edx);
4778 	has_aperf = ecx & (1 << 0);
4779 	if (has_aperf) {
4780 		BIC_PRESENT(BIC_Avg_MHz);
4781 		BIC_PRESENT(BIC_Busy);
4782 		BIC_PRESENT(BIC_Bzy_MHz);
4783 	}
4784 	do_dts = eax & (1 << 0);
4785 	if (do_dts)
4786 		BIC_PRESENT(BIC_CoreTmp);
4787 	has_turbo = eax & (1 << 1);
4788 	do_ptm = eax & (1 << 6);
4789 	if (do_ptm)
4790 		BIC_PRESENT(BIC_PkgTmp);
4791 	has_hwp = eax & (1 << 7);
4792 	has_hwp_notify = eax & (1 << 8);
4793 	has_hwp_activity_window = eax & (1 << 9);
4794 	has_hwp_epp = eax & (1 << 10);
4795 	has_hwp_pkg = eax & (1 << 11);
4796 	has_epb = ecx & (1 << 3);
4797 
4798 	if (!quiet)
4799 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4800 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4801 			has_aperf ? "" : "No-",
4802 			has_turbo ? "" : "No-",
4803 			do_dts ? "" : "No-",
4804 			do_ptm ? "" : "No-",
4805 			has_hwp ? "" : "No-",
4806 			has_hwp_notify ? "" : "No-",
4807 			has_hwp_activity_window ? "" : "No-",
4808 			has_hwp_epp ? "" : "No-",
4809 			has_hwp_pkg ? "" : "No-",
4810 			has_epb ? "" : "No-");
4811 
4812 	if (!quiet)
4813 		decode_misc_enable_msr();
4814 
4815 
4816 	if (max_level >= 0x7 && !quiet) {
4817 		int has_sgx;
4818 
4819 		ecx = 0;
4820 
4821 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4822 
4823 		has_sgx = ebx & (1 << 2);
4824 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4825 
4826 		if (has_sgx)
4827 			decode_feature_control_msr();
4828 	}
4829 
4830 	if (max_level >= 0x15) {
4831 		unsigned int eax_crystal;
4832 		unsigned int ebx_tsc;
4833 
4834 		/*
4835 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4836 		 */
4837 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4838 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4839 
4840 		if (ebx_tsc != 0) {
4841 
4842 			if (!quiet && (ebx != 0))
4843 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4844 					eax_crystal, ebx_tsc, crystal_hz);
4845 
4846 			if (crystal_hz == 0)
4847 				switch(model) {
4848 				case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4849 					crystal_hz = 24000000;	/* 24.0 MHz */
4850 					break;
4851 				case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
4852 					crystal_hz = 25000000;	/* 25.0 MHz */
4853 					break;
4854 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4855 				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4856 					crystal_hz = 19200000;	/* 19.2 MHz */
4857 					break;
4858 				default:
4859 					crystal_hz = 0;
4860 			}
4861 
4862 			if (crystal_hz) {
4863 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4864 				if (!quiet)
4865 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4866 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4867 			}
4868 		}
4869 	}
4870 	if (max_level >= 0x16) {
4871 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
4872 
4873 		/*
4874 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
4875 		 */
4876 		base_mhz = max_mhz = bus_mhz = edx = 0;
4877 
4878 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4879 		if (!quiet)
4880 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4881 				base_mhz, max_mhz, bus_mhz);
4882 	}
4883 
4884 	if (has_aperf)
4885 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4886 
4887 	BIC_PRESENT(BIC_IRQ);
4888 	BIC_PRESENT(BIC_TSC_MHz);
4889 
4890 	if (probe_nhm_msrs(family, model)) {
4891 		do_nhm_platform_info = 1;
4892 		BIC_PRESENT(BIC_CPU_c1);
4893 		BIC_PRESENT(BIC_CPU_c3);
4894 		BIC_PRESENT(BIC_CPU_c6);
4895 		BIC_PRESENT(BIC_SMI);
4896 	}
4897 	do_snb_cstates = has_snb_msrs(family, model);
4898 
4899 	if (do_snb_cstates)
4900 		BIC_PRESENT(BIC_CPU_c7);
4901 
4902 	do_irtl_snb = has_snb_msrs(family, model);
4903 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4904 		BIC_PRESENT(BIC_Pkgpc2);
4905 	if (pkg_cstate_limit >= PCL__3)
4906 		BIC_PRESENT(BIC_Pkgpc3);
4907 	if (pkg_cstate_limit >= PCL__6)
4908 		BIC_PRESENT(BIC_Pkgpc6);
4909 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4910 		BIC_PRESENT(BIC_Pkgpc7);
4911 	if (has_slv_msrs(family, model)) {
4912 		BIC_NOT_PRESENT(BIC_Pkgpc2);
4913 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4914 		BIC_PRESENT(BIC_Pkgpc6);
4915 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4916 		BIC_PRESENT(BIC_Mod_c6);
4917 		use_c1_residency_msr = 1;
4918 	}
4919 	if (is_dnv(family, model)) {
4920 		BIC_PRESENT(BIC_CPU_c1);
4921 		BIC_NOT_PRESENT(BIC_CPU_c3);
4922 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4923 		BIC_NOT_PRESENT(BIC_CPU_c7);
4924 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4925 		use_c1_residency_msr = 1;
4926 	}
4927 	if (is_skx(family, model)) {
4928 		BIC_NOT_PRESENT(BIC_CPU_c3);
4929 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4930 		BIC_NOT_PRESENT(BIC_CPU_c7);
4931 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4932 	}
4933 	if (is_bdx(family, model)) {
4934 		BIC_NOT_PRESENT(BIC_CPU_c7);
4935 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4936 	}
4937 	if (has_c8910_msrs(family, model)) {
4938 		BIC_PRESENT(BIC_Pkgpc8);
4939 		BIC_PRESENT(BIC_Pkgpc9);
4940 		BIC_PRESENT(BIC_Pkgpc10);
4941 	}
4942 	do_irtl_hsw = has_c8910_msrs(family, model);
4943 	if (has_skl_msrs(family, model)) {
4944 		BIC_PRESENT(BIC_Totl_c0);
4945 		BIC_PRESENT(BIC_Any_c0);
4946 		BIC_PRESENT(BIC_GFX_c0);
4947 		BIC_PRESENT(BIC_CPUGFX);
4948 	}
4949 	do_slm_cstates = is_slm(family, model);
4950 	do_knl_cstates  = is_knl(family, model);
4951 
4952 	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
4953 	    is_ehl(family, model))
4954 		BIC_NOT_PRESENT(BIC_CPU_c3);
4955 
4956 	if (!quiet)
4957 		decode_misc_pwr_mgmt_msr();
4958 
4959 	if (!quiet && has_slv_msrs(family, model))
4960 		decode_c6_demotion_policy_msr();
4961 
4962 	rapl_probe(family, model);
4963 	perf_limit_reasons_probe(family, model);
4964 	automatic_cstate_conversion_probe(family, model);
4965 
4966 	if (!quiet)
4967 		dump_cstate_pstate_config_info(family, model);
4968 
4969 	if (!quiet)
4970 		dump_sysfs_cstate_config();
4971 	if (!quiet)
4972 		dump_sysfs_pstate_config();
4973 
4974 	if (has_skl_msrs(family, model))
4975 		calculate_tsc_tweak();
4976 
4977 	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4978 		BIC_PRESENT(BIC_GFX_rc6);
4979 
4980 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4981 		BIC_PRESENT(BIC_GFXMHz);
4982 
4983 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4984 		BIC_PRESENT(BIC_CPU_LPI);
4985 	else
4986 		BIC_NOT_PRESENT(BIC_CPU_LPI);
4987 
4988 	if (!access(sys_lpi_file_sysfs, R_OK)) {
4989 		sys_lpi_file = sys_lpi_file_sysfs;
4990 		BIC_PRESENT(BIC_SYS_LPI);
4991 	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
4992 		sys_lpi_file = sys_lpi_file_debugfs;
4993 		BIC_PRESENT(BIC_SYS_LPI);
4994 	} else {
4995 		sys_lpi_file_sysfs = NULL;
4996 		BIC_NOT_PRESENT(BIC_SYS_LPI);
4997 	}
4998 
4999 	if (!quiet)
5000 		decode_misc_feature_control();
5001 
5002 	return;
5003 }
5004 
5005 /*
5006  * in /dev/cpu/ return success for names that are numbers
5007  * ie. filter out ".", "..", "microcode".
5008  */
5009 int dir_filter(const struct dirent *dirp)
5010 {
5011 	if (isdigit(dirp->d_name[0]))
5012 		return 1;
5013 	else
5014 		return 0;
5015 }
5016 
5017 int open_dev_cpu_msr(int dummy1)
5018 {
5019 	return 0;
5020 }
5021 
5022 void topology_probe()
5023 {
5024 	int i;
5025 	int max_core_id = 0;
5026 	int max_package_id = 0;
5027 	int max_die_id = 0;
5028 	int max_siblings = 0;
5029 
5030 	/* Initialize num_cpus, max_cpu_num */
5031 	set_max_cpu_num();
5032 	topo.num_cpus = 0;
5033 	for_all_proc_cpus(count_cpus);
5034 	if (!summary_only && topo.num_cpus > 1)
5035 		BIC_PRESENT(BIC_CPU);
5036 
5037 	if (debug > 1)
5038 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
5039 
5040 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
5041 	if (cpus == NULL)
5042 		err(1, "calloc cpus");
5043 
5044 	/*
5045 	 * Allocate and initialize cpu_present_set
5046 	 */
5047 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
5048 	if (cpu_present_set == NULL)
5049 		err(3, "CPU_ALLOC");
5050 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5051 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
5052 	for_all_proc_cpus(mark_cpu_present);
5053 
5054 	/*
5055 	 * Validate that all cpus in cpu_subset are also in cpu_present_set
5056 	 */
5057 	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
5058 		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
5059 			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
5060 				err(1, "cpu%d not present", i);
5061 	}
5062 
5063 	/*
5064 	 * Allocate and initialize cpu_affinity_set
5065 	 */
5066 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
5067 	if (cpu_affinity_set == NULL)
5068 		err(3, "CPU_ALLOC");
5069 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5070 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
5071 
5072 	for_all_proc_cpus(init_thread_id);
5073 
5074 	/*
5075 	 * For online cpus
5076 	 * find max_core_id, max_package_id
5077 	 */
5078 	for (i = 0; i <= topo.max_cpu_num; ++i) {
5079 		int siblings;
5080 
5081 		if (cpu_is_not_present(i)) {
5082 			if (debug > 1)
5083 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
5084 			continue;
5085 		}
5086 
5087 		cpus[i].logical_cpu_id = i;
5088 
5089 		/* get package information */
5090 		cpus[i].physical_package_id = get_physical_package_id(i);
5091 		if (cpus[i].physical_package_id > max_package_id)
5092 			max_package_id = cpus[i].physical_package_id;
5093 
5094 		/* get die information */
5095 		cpus[i].die_id = get_die_id(i);
5096 		if (cpus[i].die_id > max_die_id)
5097 			max_die_id = cpus[i].die_id;
5098 
5099 		/* get numa node information */
5100 		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
5101 		if (cpus[i].physical_node_id > topo.max_node_num)
5102 			topo.max_node_num = cpus[i].physical_node_id;
5103 
5104 		/* get core information */
5105 		cpus[i].physical_core_id = get_core_id(i);
5106 		if (cpus[i].physical_core_id > max_core_id)
5107 			max_core_id = cpus[i].physical_core_id;
5108 
5109 		/* get thread information */
5110 		siblings = get_thread_siblings(&cpus[i]);
5111 		if (siblings > max_siblings)
5112 			max_siblings = siblings;
5113 		if (cpus[i].thread_id == 0)
5114 			topo.num_cores++;
5115 	}
5116 
5117 	topo.cores_per_node = max_core_id + 1;
5118 	if (debug > 1)
5119 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
5120 			max_core_id, topo.cores_per_node);
5121 	if (!summary_only && topo.cores_per_node > 1)
5122 		BIC_PRESENT(BIC_Core);
5123 
5124 	topo.num_die = max_die_id + 1;
5125 	if (debug > 1)
5126 		fprintf(outf, "max_die_id %d, sizing for %d die\n",
5127 				max_die_id, topo.num_die);
5128 	if (!summary_only && topo.num_die > 1)
5129 		BIC_PRESENT(BIC_Die);
5130 
5131 	topo.num_packages = max_package_id + 1;
5132 	if (debug > 1)
5133 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
5134 			max_package_id, topo.num_packages);
5135 	if (!summary_only && topo.num_packages > 1)
5136 		BIC_PRESENT(BIC_Package);
5137 
5138 	set_node_data();
5139 	if (debug > 1)
5140 		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5141 	if (!summary_only && topo.nodes_per_pkg > 1)
5142 		BIC_PRESENT(BIC_Node);
5143 
5144 	topo.threads_per_core = max_siblings;
5145 	if (debug > 1)
5146 		fprintf(outf, "max_siblings %d\n", max_siblings);
5147 
5148 	if (debug < 1)
5149 		return;
5150 
5151 	for (i = 0; i <= topo.max_cpu_num; ++i) {
5152 		if (cpu_is_not_present(i))
5153 			continue;
5154 		fprintf(outf,
5155 			"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5156 			i, cpus[i].physical_package_id, cpus[i].die_id,
5157 			cpus[i].physical_node_id,
5158 			cpus[i].logical_node_id,
5159 			cpus[i].physical_core_id,
5160 			cpus[i].thread_id);
5161 	}
5162 
5163 }
5164 
5165 void
5166 allocate_counters(struct thread_data **t, struct core_data **c,
5167 		  struct pkg_data **p)
5168 {
5169 	int i;
5170 	int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
5171 			topo.num_packages;
5172 	int num_threads = topo.threads_per_core * num_cores;
5173 
5174 	*t = calloc(num_threads, sizeof(struct thread_data));
5175 	if (*t == NULL)
5176 		goto error;
5177 
5178 	for (i = 0; i < num_threads; i++)
5179 		(*t)[i].cpu_id = -1;
5180 
5181 	*c = calloc(num_cores, sizeof(struct core_data));
5182 	if (*c == NULL)
5183 		goto error;
5184 
5185 	for (i = 0; i < num_cores; i++)
5186 		(*c)[i].core_id = -1;
5187 
5188 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
5189 	if (*p == NULL)
5190 		goto error;
5191 
5192 	for (i = 0; i < topo.num_packages; i++)
5193 		(*p)[i].package_id = i;
5194 
5195 	return;
5196 error:
5197 	err(1, "calloc counters");
5198 }
5199 /*
5200  * init_counter()
5201  *
5202  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
5203  */
5204 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
5205 	struct pkg_data *pkg_base, int cpu_id)
5206 {
5207 	int pkg_id = cpus[cpu_id].physical_package_id;
5208 	int node_id = cpus[cpu_id].logical_node_id;
5209 	int core_id = cpus[cpu_id].physical_core_id;
5210 	int thread_id = cpus[cpu_id].thread_id;
5211 	struct thread_data *t;
5212 	struct core_data *c;
5213 	struct pkg_data *p;
5214 
5215 
5216 	/* Workaround for systems where physical_node_id==-1
5217 	 * and logical_node_id==(-1 - topo.num_cpus)
5218 	 */
5219 	if (node_id < 0)
5220 		node_id = 0;
5221 
5222 	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
5223 	c = GET_CORE(core_base, core_id, node_id, pkg_id);
5224 	p = GET_PKG(pkg_base, pkg_id);
5225 
5226 	t->cpu_id = cpu_id;
5227 	if (thread_id == 0) {
5228 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
5229 		if (cpu_is_first_core_in_package(cpu_id))
5230 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
5231 	}
5232 
5233 	c->core_id = core_id;
5234 	p->package_id = pkg_id;
5235 }
5236 
5237 
5238 int initialize_counters(int cpu_id)
5239 {
5240 	init_counter(EVEN_COUNTERS, cpu_id);
5241 	init_counter(ODD_COUNTERS, cpu_id);
5242 	return 0;
5243 }
5244 
5245 void allocate_output_buffer()
5246 {
5247 	output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
5248 	outp = output_buffer;
5249 	if (outp == NULL)
5250 		err(-1, "calloc output buffer");
5251 }
5252 void allocate_fd_percpu(void)
5253 {
5254 	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5255 	if (fd_percpu == NULL)
5256 		err(-1, "calloc fd_percpu");
5257 }
5258 void allocate_irq_buffers(void)
5259 {
5260 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5261 	if (irq_column_2_cpu == NULL)
5262 		err(-1, "calloc %d", topo.num_cpus);
5263 
5264 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5265 	if (irqs_per_cpu == NULL)
5266 		err(-1, "calloc %d", topo.max_cpu_num + 1);
5267 }
5268 void setup_all_buffers(void)
5269 {
5270 	topology_probe();
5271 	allocate_irq_buffers();
5272 	allocate_fd_percpu();
5273 	allocate_counters(&thread_even, &core_even, &package_even);
5274 	allocate_counters(&thread_odd, &core_odd, &package_odd);
5275 	allocate_output_buffer();
5276 	for_all_proc_cpus(initialize_counters);
5277 }
5278 
5279 void set_base_cpu(void)
5280 {
5281 	base_cpu = sched_getcpu();
5282 	if (base_cpu < 0)
5283 		err(-ENODEV, "No valid cpus found");
5284 
5285 	if (debug > 1)
5286 		fprintf(outf, "base_cpu = %d\n", base_cpu);
5287 }
5288 
5289 void turbostat_init()
5290 {
5291 	setup_all_buffers();
5292 	set_base_cpu();
5293 	check_dev_msr();
5294 	check_permissions();
5295 	process_cpuid();
5296 
5297 
5298 	if (!quiet)
5299 		for_all_cpus(print_hwp, ODD_COUNTERS);
5300 
5301 	if (!quiet)
5302 		for_all_cpus(print_epb, ODD_COUNTERS);
5303 
5304 	if (!quiet)
5305 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
5306 
5307 	if (!quiet)
5308 		for_all_cpus(print_rapl, ODD_COUNTERS);
5309 
5310 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
5311 
5312 	if (!quiet)
5313 		for_all_cpus(print_thermal, ODD_COUNTERS);
5314 
5315 	if (!quiet && do_irtl_snb)
5316 		print_irtl();
5317 }
5318 
5319 int fork_it(char **argv)
5320 {
5321 	pid_t child_pid;
5322 	int status;
5323 
5324 	snapshot_proc_sysfs_files();
5325 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
5326 	first_counter_read = 0;
5327 	if (status)
5328 		exit(status);
5329 	/* clear affinity side-effect of get_counters() */
5330 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5331 	gettimeofday(&tv_even, (struct timezone *)NULL);
5332 
5333 	child_pid = fork();
5334 	if (!child_pid) {
5335 		/* child */
5336 		execvp(argv[0], argv);
5337 		err(errno, "exec %s", argv[0]);
5338 	} else {
5339 
5340 		/* parent */
5341 		if (child_pid == -1)
5342 			err(1, "fork");
5343 
5344 		signal(SIGINT, SIG_IGN);
5345 		signal(SIGQUIT, SIG_IGN);
5346 		if (waitpid(child_pid, &status, 0) == -1)
5347 			err(status, "waitpid");
5348 
5349 		if (WIFEXITED(status))
5350 			status = WEXITSTATUS(status);
5351 	}
5352 	/*
5353 	 * n.b. fork_it() does not check for errors from for_all_cpus()
5354 	 * because re-starting is problematic when forking
5355 	 */
5356 	snapshot_proc_sysfs_files();
5357 	for_all_cpus(get_counters, ODD_COUNTERS);
5358 	gettimeofday(&tv_odd, (struct timezone *)NULL);
5359 	timersub(&tv_odd, &tv_even, &tv_delta);
5360 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
5361 		fprintf(outf, "%s: Counter reset detected\n", progname);
5362 	else {
5363 		compute_average(EVEN_COUNTERS);
5364 		format_all_counters(EVEN_COUNTERS);
5365 	}
5366 
5367 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
5368 
5369 	flush_output_stderr();
5370 
5371 	return status;
5372 }
5373 
5374 int get_and_dump_counters(void)
5375 {
5376 	int status;
5377 
5378 	snapshot_proc_sysfs_files();
5379 	status = for_all_cpus(get_counters, ODD_COUNTERS);
5380 	if (status)
5381 		return status;
5382 
5383 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
5384 	if (status)
5385 		return status;
5386 
5387 	flush_output_stdout();
5388 
5389 	return status;
5390 }
5391 
5392 void print_version() {
5393 	fprintf(outf, "turbostat version 20.03.20"
5394 		" - Len Brown <lenb@kernel.org>\n");
5395 }
5396 
5397 int add_counter(unsigned int msr_num, char *path, char *name,
5398 	unsigned int width, enum counter_scope scope,
5399 	enum counter_type type, enum counter_format format, int flags)
5400 {
5401 	struct msr_counter *msrp;
5402 
5403 	msrp = calloc(1, sizeof(struct msr_counter));
5404 	if (msrp == NULL) {
5405 		perror("calloc");
5406 		exit(1);
5407 	}
5408 
5409 	msrp->msr_num = msr_num;
5410 	strncpy(msrp->name, name, NAME_BYTES - 1);
5411 	if (path)
5412 		strncpy(msrp->path, path, PATH_BYTES - 1);
5413 	msrp->width = width;
5414 	msrp->type = type;
5415 	msrp->format = format;
5416 	msrp->flags = flags;
5417 
5418 	switch (scope) {
5419 
5420 	case SCOPE_CPU:
5421 		msrp->next = sys.tp;
5422 		sys.tp = msrp;
5423 		sys.added_thread_counters++;
5424 		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
5425 			fprintf(stderr, "exceeded max %d added thread counters\n",
5426 				MAX_ADDED_COUNTERS);
5427 			exit(-1);
5428 		}
5429 		break;
5430 
5431 	case SCOPE_CORE:
5432 		msrp->next = sys.cp;
5433 		sys.cp = msrp;
5434 		sys.added_core_counters++;
5435 		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
5436 			fprintf(stderr, "exceeded max %d added core counters\n",
5437 				MAX_ADDED_COUNTERS);
5438 			exit(-1);
5439 		}
5440 		break;
5441 
5442 	case SCOPE_PACKAGE:
5443 		msrp->next = sys.pp;
5444 		sys.pp = msrp;
5445 		sys.added_package_counters++;
5446 		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
5447 			fprintf(stderr, "exceeded max %d added package counters\n",
5448 				MAX_ADDED_COUNTERS);
5449 			exit(-1);
5450 		}
5451 		break;
5452 	}
5453 
5454 	return 0;
5455 }
5456 
5457 void parse_add_command(char *add_command)
5458 {
5459 	int msr_num = 0;
5460 	char *path = NULL;
5461 	char name_buffer[NAME_BYTES] = "";
5462 	int width = 64;
5463 	int fail = 0;
5464 	enum counter_scope scope = SCOPE_CPU;
5465 	enum counter_type type = COUNTER_CYCLES;
5466 	enum counter_format format = FORMAT_DELTA;
5467 
5468 	while (add_command) {
5469 
5470 		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5471 			goto next;
5472 
5473 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
5474 			goto next;
5475 
5476 		if (*add_command == '/') {
5477 			path = add_command;
5478 			goto next;
5479 		}
5480 
5481 		if (sscanf(add_command, "u%d", &width) == 1) {
5482 			if ((width == 32) || (width == 64))
5483 				goto next;
5484 			width = 64;
5485 		}
5486 		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5487 			scope = SCOPE_CPU;
5488 			goto next;
5489 		}
5490 		if (!strncmp(add_command, "core", strlen("core"))) {
5491 			scope = SCOPE_CORE;
5492 			goto next;
5493 		}
5494 		if (!strncmp(add_command, "package", strlen("package"))) {
5495 			scope = SCOPE_PACKAGE;
5496 			goto next;
5497 		}
5498 		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5499 			type = COUNTER_CYCLES;
5500 			goto next;
5501 		}
5502 		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5503 			type = COUNTER_SECONDS;
5504 			goto next;
5505 		}
5506 		if (!strncmp(add_command, "usec", strlen("usec"))) {
5507 			type = COUNTER_USEC;
5508 			goto next;
5509 		}
5510 		if (!strncmp(add_command, "raw", strlen("raw"))) {
5511 			format = FORMAT_RAW;
5512 			goto next;
5513 		}
5514 		if (!strncmp(add_command, "delta", strlen("delta"))) {
5515 			format = FORMAT_DELTA;
5516 			goto next;
5517 		}
5518 		if (!strncmp(add_command, "percent", strlen("percent"))) {
5519 			format = FORMAT_PERCENT;
5520 			goto next;
5521 		}
5522 
5523 		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
5524 			char *eos;
5525 
5526 			eos = strchr(name_buffer, ',');
5527 			if (eos)
5528 				*eos = '\0';
5529 			goto next;
5530 		}
5531 
5532 next:
5533 		add_command = strchr(add_command, ',');
5534 		if (add_command) {
5535 			*add_command = '\0';
5536 			add_command++;
5537 		}
5538 
5539 	}
5540 	if ((msr_num == 0) && (path == NULL)) {
5541 		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5542 		fail++;
5543 	}
5544 
5545 	/* generate default column header */
5546 	if (*name_buffer == '\0') {
5547 		if (width == 32)
5548 			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5549 		else
5550 			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5551 	}
5552 
5553 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5554 		fail++;
5555 
5556 	if (fail) {
5557 		help();
5558 		exit(1);
5559 	}
5560 }
5561 
5562 int is_deferred_skip(char *name)
5563 {
5564 	int i;
5565 
5566 	for (i = 0; i < deferred_skip_index; ++i)
5567 		if (!strcmp(name, deferred_skip_names[i]))
5568 			return 1;
5569 	return 0;
5570 }
5571 
5572 void probe_sysfs(void)
5573 {
5574 	char path[64];
5575 	char name_buf[16];
5576 	FILE *input;
5577 	int state;
5578 	char *sp;
5579 
5580 	if (!DO_BIC(BIC_sysfs))
5581 		return;
5582 
5583 	for (state = 10; state >= 0; --state) {
5584 
5585 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5586 			base_cpu, state);
5587 		input = fopen(path, "r");
5588 		if (input == NULL)
5589 			continue;
5590 		if (!fgets(name_buf, sizeof(name_buf), input))
5591 			err(1, "%s: failed to read file", path);
5592 
5593 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5594 		sp = strchr(name_buf, '-');
5595 		if (!sp)
5596 			sp = strchrnul(name_buf, '\n');
5597 		*sp = '%';
5598 		*(sp + 1) = '\0';
5599 
5600 		fclose(input);
5601 
5602 		sprintf(path, "cpuidle/state%d/time", state);
5603 
5604 		if (is_deferred_skip(name_buf))
5605 			continue;
5606 
5607 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5608 				FORMAT_PERCENT, SYSFS_PERCPU);
5609 	}
5610 
5611 	for (state = 10; state >= 0; --state) {
5612 
5613 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5614 			base_cpu, state);
5615 		input = fopen(path, "r");
5616 		if (input == NULL)
5617 			continue;
5618 		if (!fgets(name_buf, sizeof(name_buf), input))
5619 			err(1, "%s: failed to read file", path);
5620 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5621 		sp = strchr(name_buf, '-');
5622 		if (!sp)
5623 			sp = strchrnul(name_buf, '\n');
5624 		*sp = '\0';
5625 		fclose(input);
5626 
5627 		sprintf(path, "cpuidle/state%d/usage", state);
5628 
5629 		if (is_deferred_skip(name_buf))
5630 			continue;
5631 
5632 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5633 				FORMAT_DELTA, SYSFS_PERCPU);
5634 	}
5635 
5636 }
5637 
5638 
5639 /*
5640  * parse cpuset with following syntax
5641  * 1,2,4..6,8-10 and set bits in cpu_subset
5642  */
5643 void parse_cpu_command(char *optarg)
5644 {
5645 	unsigned int start, end;
5646 	char *next;
5647 
5648 	if (!strcmp(optarg, "core")) {
5649 		if (cpu_subset)
5650 			goto error;
5651 		show_core_only++;
5652 		return;
5653 	}
5654 	if (!strcmp(optarg, "package")) {
5655 		if (cpu_subset)
5656 			goto error;
5657 		show_pkg_only++;
5658 		return;
5659 	}
5660 	if (show_core_only || show_pkg_only)
5661 		goto error;
5662 
5663 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5664 	if (cpu_subset == NULL)
5665 		err(3, "CPU_ALLOC");
5666 	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5667 
5668 	CPU_ZERO_S(cpu_subset_size, cpu_subset);
5669 
5670 	next = optarg;
5671 
5672 	while (next && *next) {
5673 
5674 		if (*next == '-')	/* no negative cpu numbers */
5675 			goto error;
5676 
5677 		start = strtoul(next, &next, 10);
5678 
5679 		if (start >= CPU_SUBSET_MAXCPUS)
5680 			goto error;
5681 		CPU_SET_S(start, cpu_subset_size, cpu_subset);
5682 
5683 		if (*next == '\0')
5684 			break;
5685 
5686 		if (*next == ',') {
5687 			next += 1;
5688 			continue;
5689 		}
5690 
5691 		if (*next == '-') {
5692 			next += 1;	/* start range */
5693 		} else if (*next == '.') {
5694 			next += 1;
5695 			if (*next == '.')
5696 				next += 1;	/* start range */
5697 			else
5698 				goto error;
5699 		}
5700 
5701 		end = strtoul(next, &next, 10);
5702 		if (end <= start)
5703 			goto error;
5704 
5705 		while (++start <= end) {
5706 			if (start >= CPU_SUBSET_MAXCPUS)
5707 				goto error;
5708 			CPU_SET_S(start, cpu_subset_size, cpu_subset);
5709 		}
5710 
5711 		if (*next == ',')
5712 			next += 1;
5713 		else if (*next != '\0')
5714 			goto error;
5715 	}
5716 
5717 	return;
5718 
5719 error:
5720 	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5721 	help();
5722 	exit(-1);
5723 }
5724 
5725 
5726 void cmdline(int argc, char **argv)
5727 {
5728 	int opt;
5729 	int option_index = 0;
5730 	static struct option long_options[] = {
5731 		{"add",		required_argument,	0, 'a'},
5732 		{"cpu",		required_argument,	0, 'c'},
5733 		{"Dump",	no_argument,		0, 'D'},
5734 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
5735 		{"enable",	required_argument,	0, 'e'},
5736 		{"interval",	required_argument,	0, 'i'},
5737 		{"num_iterations",	required_argument,	0, 'n'},
5738 		{"help",	no_argument,		0, 'h'},
5739 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
5740 		{"Joules",	no_argument,		0, 'J'},
5741 		{"list",	no_argument,		0, 'l'},
5742 		{"out",		required_argument,	0, 'o'},
5743 		{"quiet",	no_argument,		0, 'q'},
5744 		{"show",	required_argument,	0, 's'},
5745 		{"Summary",	no_argument,		0, 'S'},
5746 		{"TCC",		required_argument,	0, 'T'},
5747 		{"version",	no_argument,		0, 'v' },
5748 		{0,		0,			0,  0 }
5749 	};
5750 
5751 	progname = argv[0];
5752 
5753 	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5754 				long_options, &option_index)) != -1) {
5755 		switch (opt) {
5756 		case 'a':
5757 			parse_add_command(optarg);
5758 			break;
5759 		case 'c':
5760 			parse_cpu_command(optarg);
5761 			break;
5762 		case 'D':
5763 			dump_only++;
5764 			break;
5765 		case 'e':
5766 			/* --enable specified counter */
5767 			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
5768 			break;
5769 		case 'd':
5770 			debug++;
5771 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5772 			break;
5773 		case 'H':
5774 			/*
5775 			 * --hide: do not show those specified
5776 			 *  multiple invocations simply clear more bits in enabled mask
5777 			 */
5778 			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5779 			break;
5780 		case 'h':
5781 		default:
5782 			help();
5783 			exit(1);
5784 		case 'i':
5785 			{
5786 				double interval = strtod(optarg, NULL);
5787 
5788 				if (interval < 0.001) {
5789 					fprintf(outf, "interval %f seconds is too small\n",
5790 						interval);
5791 					exit(2);
5792 				}
5793 
5794 				interval_tv.tv_sec = interval_ts.tv_sec = interval;
5795 				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5796 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5797 			}
5798 			break;
5799 		case 'J':
5800 			rapl_joules++;
5801 			break;
5802 		case 'l':
5803 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5804 			list_header_only++;
5805 			quiet++;
5806 			break;
5807 		case 'o':
5808 			outf = fopen_or_die(optarg, "w");
5809 			break;
5810 		case 'q':
5811 			quiet = 1;
5812 			break;
5813 		case 'n':
5814 			num_iterations = strtod(optarg, NULL);
5815 
5816 			if (num_iterations <= 0) {
5817 				fprintf(outf, "iterations %d should be positive number\n",
5818 					num_iterations);
5819 				exit(2);
5820 			}
5821 			break;
5822 		case 's':
5823 			/*
5824 			 * --show: show only those specified
5825 			 *  The 1st invocation will clear and replace the enabled mask
5826 			 *  subsequent invocations can add to it.
5827 			 */
5828 			if (shown == 0)
5829 				bic_enabled = bic_lookup(optarg, SHOW_LIST);
5830 			else
5831 				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5832 			shown = 1;
5833 			break;
5834 		case 'S':
5835 			summary_only++;
5836 			break;
5837 		case 'T':
5838 			tcc_activation_temp_override = atoi(optarg);
5839 			break;
5840 		case 'v':
5841 			print_version();
5842 			exit(0);
5843 			break;
5844 		}
5845 	}
5846 }
5847 
5848 int main(int argc, char **argv)
5849 {
5850 	outf = stderr;
5851 	cmdline(argc, argv);
5852 
5853 	if (!quiet)
5854 		print_version();
5855 
5856 	probe_sysfs();
5857 
5858 	turbostat_init();
5859 
5860 	/* dump counters and exit */
5861 	if (dump_only)
5862 		return get_and_dump_counters();
5863 
5864 	/* list header and exit */
5865 	if (list_header_only) {
5866 		print_header(",");
5867 		flush_output_stdout();
5868 		return 0;
5869 	}
5870 
5871 	/*
5872 	 * if any params left, it must be a command to fork
5873 	 */
5874 	if (argc - optind)
5875 		return fork_it(argv + optind);
5876 	else
5877 		turbostat_loop();
5878 
5879 	return 0;
5880 }
5881