xref: /openbmc/linux/tools/power/x86/turbostat/turbostat.c (revision 0e2d8f058f9924c373ee7061064936cd582bcbe7)
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include MSRHEADER
24 #include INTEL_FAMILY_HEADER
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <err.h>
28 #include <unistd.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
31 #include <sys/stat.h>
32 #include <sys/select.h>
33 #include <sys/resource.h>
34 #include <fcntl.h>
35 #include <signal.h>
36 #include <sys/time.h>
37 #include <stdlib.h>
38 #include <getopt.h>
39 #include <dirent.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <sched.h>
43 #include <time.h>
44 #include <cpuid.h>
45 #include <linux/capability.h>
46 #include <errno.h>
47 
48 char *proc_stat = "/proc/stat";
49 FILE *outf;
50 int *fd_percpu;
51 struct timeval interval_tv = {5, 0};
52 struct timespec interval_ts = {5, 0};
53 struct timespec one_msec = {0, 1000000};
54 unsigned int num_iterations;
55 unsigned int debug;
56 unsigned int quiet;
57 unsigned int shown;
58 unsigned int sums_need_wide_columns;
59 unsigned int rapl_joules;
60 unsigned int summary_only;
61 unsigned int list_header_only;
62 unsigned int dump_only;
63 unsigned int do_snb_cstates;
64 unsigned int do_knl_cstates;
65 unsigned int do_slm_cstates;
66 unsigned int do_cnl_cstates;
67 unsigned int use_c1_residency_msr;
68 unsigned int has_aperf;
69 unsigned int has_epb;
70 unsigned int do_irtl_snb;
71 unsigned int do_irtl_hsw;
72 unsigned int units = 1000000;	/* MHz etc */
73 unsigned int genuine_intel;
74 unsigned int has_invariant_tsc;
75 unsigned int do_nhm_platform_info;
76 unsigned int no_MSR_MISC_PWR_MGMT;
77 unsigned int aperf_mperf_multiplier = 1;
78 double bclk;
79 double base_hz;
80 unsigned int has_base_hz;
81 double tsc_tweak = 1.0;
82 unsigned int show_pkg_only;
83 unsigned int show_core_only;
84 char *output_buffer, *outp;
85 unsigned int do_rapl;
86 unsigned int do_dts;
87 unsigned int do_ptm;
88 unsigned long long  gfx_cur_rc6_ms;
89 unsigned long long cpuidle_cur_cpu_lpi_us;
90 unsigned long long cpuidle_cur_sys_lpi_us;
91 unsigned int gfx_cur_mhz;
92 unsigned int tcc_activation_temp;
93 unsigned int tcc_activation_temp_override;
94 double rapl_power_units, rapl_time_units;
95 double rapl_dram_energy_units, rapl_energy_units;
96 double rapl_joule_counter_range;
97 unsigned int do_core_perf_limit_reasons;
98 unsigned int has_automatic_cstate_conversion;
99 unsigned int do_gfx_perf_limit_reasons;
100 unsigned int do_ring_perf_limit_reasons;
101 unsigned int crystal_hz;
102 unsigned long long tsc_hz;
103 int base_cpu;
104 double discover_bclk(unsigned int family, unsigned int model);
105 unsigned int has_hwp;	/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
106 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
107 unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
108 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
109 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
110 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
111 unsigned int has_misc_feature_control;
112 
113 #define RAPL_PKG		(1 << 0)
114 					/* 0x610 MSR_PKG_POWER_LIMIT */
115 					/* 0x611 MSR_PKG_ENERGY_STATUS */
116 #define RAPL_PKG_PERF_STATUS	(1 << 1)
117 					/* 0x613 MSR_PKG_PERF_STATUS */
118 #define RAPL_PKG_POWER_INFO	(1 << 2)
119 					/* 0x614 MSR_PKG_POWER_INFO */
120 
121 #define RAPL_DRAM		(1 << 3)
122 					/* 0x618 MSR_DRAM_POWER_LIMIT */
123 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
124 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
125 					/* 0x61b MSR_DRAM_PERF_STATUS */
126 #define RAPL_DRAM_POWER_INFO	(1 << 5)
127 					/* 0x61c MSR_DRAM_POWER_INFO */
128 
129 #define RAPL_CORES_POWER_LIMIT	(1 << 6)
130 					/* 0x638 MSR_PP0_POWER_LIMIT */
131 #define RAPL_CORE_POLICY	(1 << 7)
132 					/* 0x63a MSR_PP0_POLICY */
133 
134 #define RAPL_GFX		(1 << 8)
135 					/* 0x640 MSR_PP1_POWER_LIMIT */
136 					/* 0x641 MSR_PP1_ENERGY_STATUS */
137 					/* 0x642 MSR_PP1_POLICY */
138 
139 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
140 					/* 0x639 MSR_PP0_ENERGY_STATUS */
141 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
142 #define	TJMAX_DEFAULT	100
143 
144 #define MAX(a, b) ((a) > (b) ? (a) : (b))
145 
146 /*
147  * buffer size used by sscanf() for added column names
148  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
149  */
150 #define	NAME_BYTES 20
151 #define PATH_BYTES 128
152 
153 int backwards_count;
154 char *progname;
155 
156 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
157 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
158 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
159 #define MAX_ADDED_COUNTERS 8
160 #define MAX_ADDED_THREAD_COUNTERS 24
161 #define BITMASK_SIZE 32
162 
163 struct thread_data {
164 	struct timeval tv_begin;
165 	struct timeval tv_end;
166 	unsigned long long tsc;
167 	unsigned long long aperf;
168 	unsigned long long mperf;
169 	unsigned long long c1;
170 	unsigned long long  irq_count;
171 	unsigned int smi_count;
172 	unsigned int cpu_id;
173 	unsigned int flags;
174 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
175 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
176 	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
177 } *thread_even, *thread_odd;
178 
179 struct core_data {
180 	unsigned long long c3;
181 	unsigned long long c6;
182 	unsigned long long c7;
183 	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
184 	unsigned int core_temp_c;
185 	unsigned int core_id;
186 	unsigned long long counter[MAX_ADDED_COUNTERS];
187 } *core_even, *core_odd;
188 
189 struct pkg_data {
190 	unsigned long long pc2;
191 	unsigned long long pc3;
192 	unsigned long long pc6;
193 	unsigned long long pc7;
194 	unsigned long long pc8;
195 	unsigned long long pc9;
196 	unsigned long long pc10;
197 	unsigned long long cpu_lpi;
198 	unsigned long long sys_lpi;
199 	unsigned long long pkg_wtd_core_c0;
200 	unsigned long long pkg_any_core_c0;
201 	unsigned long long pkg_any_gfxe_c0;
202 	unsigned long long pkg_both_core_gfxe_c0;
203 	long long gfx_rc6_ms;
204 	unsigned int gfx_mhz;
205 	unsigned int package_id;
206 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
207 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
208 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
209 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
210 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
211 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
212 	unsigned int pkg_temp_c;
213 	unsigned long long counter[MAX_ADDED_COUNTERS];
214 } *package_even, *package_odd;
215 
216 #define ODD_COUNTERS thread_odd, core_odd, package_odd
217 #define EVEN_COUNTERS thread_even, core_even, package_even
218 
219 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
220 	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
221 		topo.num_threads_per_core + \
222 		(core_no) * topo.num_threads_per_core + (thread_no))
223 #define GET_CORE(core_base, core_no, pkg_no) \
224 	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
225 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
226 
227 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
228 enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
229 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
230 
231 struct msr_counter {
232 	unsigned int msr_num;
233 	char name[NAME_BYTES];
234 	char path[PATH_BYTES];
235 	unsigned int width;
236 	enum counter_type type;
237 	enum counter_format format;
238 	struct msr_counter *next;
239 	unsigned int flags;
240 #define	FLAGS_HIDE	(1 << 0)
241 #define	FLAGS_SHOW	(1 << 1)
242 #define	SYSFS_PERCPU	(1 << 1)
243 };
244 
245 struct sys_counters {
246 	unsigned int added_thread_counters;
247 	unsigned int added_core_counters;
248 	unsigned int added_package_counters;
249 	struct msr_counter *tp;
250 	struct msr_counter *cp;
251 	struct msr_counter *pp;
252 } sys;
253 
254 struct system_summary {
255 	struct thread_data threads;
256 	struct core_data cores;
257 	struct pkg_data packages;
258 } average;
259 
260 struct cpu_topology {
261 	int physical_package_id;
262 	int logical_cpu_id;
263 	int node_id;
264 	int physical_core_id;
265 	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
266 } *cpus;
267 
268 struct topo_params {
269 	int num_packages;
270 	int num_cpus;
271 	int num_cores;
272 	int max_cpu_num;
273 	int num_cores_per_pkg;
274 	int num_threads_per_core;
275 } topo;
276 
277 struct timeval tv_even, tv_odd, tv_delta;
278 
279 int *irq_column_2_cpu;	/* /proc/interrupts column numbers */
280 int *irqs_per_cpu;		/* indexed by cpu_num */
281 
282 void setup_all_buffers(void);
283 
284 int cpu_is_not_present(int cpu)
285 {
286 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
287 }
288 /*
289  * run func(thread, core, package) in topology order
290  * skip non-present cpus
291  */
292 
293 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
294 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
295 {
296 	int retval, pkg_no, core_no, thread_no;
297 
298 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
299 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
300 			for (thread_no = 0; thread_no <
301 				topo.num_threads_per_core; ++thread_no) {
302 				struct thread_data *t;
303 				struct core_data *c;
304 				struct pkg_data *p;
305 
306 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
307 
308 				if (cpu_is_not_present(t->cpu_id))
309 					continue;
310 
311 				c = GET_CORE(core_base, core_no, pkg_no);
312 				p = GET_PKG(pkg_base, pkg_no);
313 
314 				retval = func(t, c, p);
315 				if (retval)
316 					return retval;
317 			}
318 		}
319 	}
320 	return 0;
321 }
322 
323 int cpu_migrate(int cpu)
324 {
325 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
326 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
327 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
328 		return -1;
329 	else
330 		return 0;
331 }
332 int get_msr_fd(int cpu)
333 {
334 	char pathname[32];
335 	int fd;
336 
337 	fd = fd_percpu[cpu];
338 
339 	if (fd)
340 		return fd;
341 
342 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
343 	fd = open(pathname, O_RDONLY);
344 	if (fd < 0)
345 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
346 
347 	fd_percpu[cpu] = fd;
348 
349 	return fd;
350 }
351 
352 int get_msr(int cpu, off_t offset, unsigned long long *msr)
353 {
354 	ssize_t retval;
355 
356 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
357 
358 	if (retval != sizeof *msr)
359 		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
360 
361 	return 0;
362 }
363 
364 /*
365  * Each string in this array is compared in --show and --hide cmdline.
366  * Thus, strings that are proper sub-sets must follow their more specific peers.
367  */
368 struct msr_counter bic[] = {
369 	{ 0x0, "usec" },
370 	{ 0x0, "Time_Of_Day_Seconds" },
371 	{ 0x0, "Package" },
372 	{ 0x0, "Avg_MHz" },
373 	{ 0x0, "Bzy_MHz" },
374 	{ 0x0, "TSC_MHz" },
375 	{ 0x0, "IRQ" },
376 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
377 	{ 0x0, "Busy%" },
378 	{ 0x0, "CPU%c1" },
379 	{ 0x0, "CPU%c3" },
380 	{ 0x0, "CPU%c6" },
381 	{ 0x0, "CPU%c7" },
382 	{ 0x0, "ThreadC" },
383 	{ 0x0, "CoreTmp" },
384 	{ 0x0, "CoreCnt" },
385 	{ 0x0, "PkgTmp" },
386 	{ 0x0, "GFX%rc6" },
387 	{ 0x0, "GFXMHz" },
388 	{ 0x0, "Pkg%pc2" },
389 	{ 0x0, "Pkg%pc3" },
390 	{ 0x0, "Pkg%pc6" },
391 	{ 0x0, "Pkg%pc7" },
392 	{ 0x0, "Pkg%pc8" },
393 	{ 0x0, "Pkg%pc9" },
394 	{ 0x0, "Pk%pc10" },
395 	{ 0x0, "CPU%LPI" },
396 	{ 0x0, "SYS%LPI" },
397 	{ 0x0, "PkgWatt" },
398 	{ 0x0, "CorWatt" },
399 	{ 0x0, "GFXWatt" },
400 	{ 0x0, "PkgCnt" },
401 	{ 0x0, "RAMWatt" },
402 	{ 0x0, "PKG_%" },
403 	{ 0x0, "RAM_%" },
404 	{ 0x0, "Pkg_J" },
405 	{ 0x0, "Cor_J" },
406 	{ 0x0, "GFX_J" },
407 	{ 0x0, "RAM_J" },
408 	{ 0x0, "Core" },
409 	{ 0x0, "CPU" },
410 	{ 0x0, "Mod%c6" },
411 	{ 0x0, "sysfs" },
412 	{ 0x0, "Totl%C0" },
413 	{ 0x0, "Any%C0" },
414 	{ 0x0, "GFX%C0" },
415 	{ 0x0, "CPUGFX%" },
416 	{ 0x0, "Node%" },
417 };
418 
419 
420 
421 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
422 #define	BIC_USEC	(1ULL << 0)
423 #define	BIC_TOD		(1ULL << 1)
424 #define	BIC_Package	(1ULL << 2)
425 #define	BIC_Avg_MHz	(1ULL << 3)
426 #define	BIC_Bzy_MHz	(1ULL << 4)
427 #define	BIC_TSC_MHz	(1ULL << 5)
428 #define	BIC_IRQ		(1ULL << 6)
429 #define	BIC_SMI		(1ULL << 7)
430 #define	BIC_Busy	(1ULL << 8)
431 #define	BIC_CPU_c1	(1ULL << 9)
432 #define	BIC_CPU_c3	(1ULL << 10)
433 #define	BIC_CPU_c6	(1ULL << 11)
434 #define	BIC_CPU_c7	(1ULL << 12)
435 #define	BIC_ThreadC	(1ULL << 13)
436 #define	BIC_CoreTmp	(1ULL << 14)
437 #define	BIC_CoreCnt	(1ULL << 15)
438 #define	BIC_PkgTmp	(1ULL << 16)
439 #define	BIC_GFX_rc6	(1ULL << 17)
440 #define	BIC_GFXMHz	(1ULL << 18)
441 #define	BIC_Pkgpc2	(1ULL << 19)
442 #define	BIC_Pkgpc3	(1ULL << 20)
443 #define	BIC_Pkgpc6	(1ULL << 21)
444 #define	BIC_Pkgpc7	(1ULL << 22)
445 #define	BIC_Pkgpc8	(1ULL << 23)
446 #define	BIC_Pkgpc9	(1ULL << 24)
447 #define	BIC_Pkgpc10	(1ULL << 25)
448 #define BIC_CPU_LPI	(1ULL << 26)
449 #define BIC_SYS_LPI	(1ULL << 27)
450 #define	BIC_PkgWatt	(1ULL << 26)
451 #define	BIC_CorWatt	(1ULL << 27)
452 #define	BIC_GFXWatt	(1ULL << 28)
453 #define	BIC_PkgCnt	(1ULL << 29)
454 #define	BIC_RAMWatt	(1ULL << 30)
455 #define	BIC_PKG__	(1ULL << 31)
456 #define	BIC_RAM__	(1ULL << 32)
457 #define	BIC_Pkg_J	(1ULL << 33)
458 #define	BIC_Cor_J	(1ULL << 34)
459 #define	BIC_GFX_J	(1ULL << 35)
460 #define	BIC_RAM_J	(1ULL << 36)
461 #define	BIC_Core	(1ULL << 37)
462 #define	BIC_CPU		(1ULL << 38)
463 #define	BIC_Mod_c6	(1ULL << 39)
464 #define	BIC_sysfs	(1ULL << 40)
465 #define	BIC_Totl_c0	(1ULL << 41)
466 #define	BIC_Any_c0	(1ULL << 42)
467 #define	BIC_GFX_c0	(1ULL << 43)
468 #define	BIC_CPUGFX	(1ULL << 44)
469 
470 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD)
471 
472 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
473 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
474 
475 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
476 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
477 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
478 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
479 
480 
481 #define MAX_DEFERRED 16
482 char *deferred_skip_names[MAX_DEFERRED];
483 int deferred_skip_index;
484 
485 /*
486  * HIDE_LIST - hide this list of counters, show the rest [default]
487  * SHOW_LIST - show this list of counters, hide the rest
488  */
489 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
490 
491 void help(void)
492 {
493 	fprintf(outf,
494 	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
495 	"\n"
496 	"Turbostat forks the specified COMMAND and prints statistics\n"
497 	"when COMMAND completes.\n"
498 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
499 	"to print statistics, until interrupted.\n"
500 	"--add		add a counter\n"
501 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
502 	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
503 	"		{core | package | j,k,l..m,n-p }\n"
504 	"--quiet	skip decoding system configuration header\n"
505 	"--interval sec.subsec	Override default 5-second measurement interval\n"
506 	"--help		print this help message\n"
507 	"--list		list column headers only\n"
508 	"--num_iterations num   number of the measurement iterations\n"
509 	"--out file	create or truncate \"file\" for all output\n"
510 	"--version	print version information\n"
511 	"\n"
512 	"For more help, run \"man turbostat\"\n");
513 }
514 
515 /*
516  * bic_lookup
517  * for all the strings in comma separate name_list,
518  * set the approprate bit in return value.
519  */
520 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
521 {
522 	int i;
523 	unsigned long long retval = 0;
524 
525 	while (name_list) {
526 		char *comma;
527 
528 		comma = strchr(name_list, ',');
529 
530 		if (comma)
531 			*comma = '\0';
532 
533 		if (!strcmp(name_list, "all"))
534 			return ~0;
535 
536 		for (i = 0; i < MAX_BIC; ++i) {
537 			if (!strcmp(name_list, bic[i].name)) {
538 				retval |= (1ULL << i);
539 				break;
540 			}
541 		}
542 		if (i == MAX_BIC) {
543 			if (mode == SHOW_LIST) {
544 				fprintf(stderr, "Invalid counter name: %s\n", name_list);
545 				exit(-1);
546 			}
547 			deferred_skip_names[deferred_skip_index++] = name_list;
548 			if (debug)
549 				fprintf(stderr, "deferred \"%s\"\n", name_list);
550 			if (deferred_skip_index >= MAX_DEFERRED) {
551 				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
552 					MAX_DEFERRED, name_list);
553 				help();
554 				exit(1);
555 			}
556 		}
557 
558 		name_list = comma;
559 		if (name_list)
560 			name_list++;
561 
562 	}
563 	return retval;
564 }
565 
566 
567 void print_header(char *delim)
568 {
569 	struct msr_counter *mp;
570 	int printed = 0;
571 
572 	if (DO_BIC(BIC_USEC))
573 		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
574 	if (DO_BIC(BIC_TOD))
575 		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
576 	if (DO_BIC(BIC_Package))
577 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
578 	if (DO_BIC(BIC_Core))
579 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
580 	if (DO_BIC(BIC_CPU))
581 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
582 	if (DO_BIC(BIC_Avg_MHz))
583 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
584 	if (DO_BIC(BIC_Busy))
585 		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
586 	if (DO_BIC(BIC_Bzy_MHz))
587 		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
588 	if (DO_BIC(BIC_TSC_MHz))
589 		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
590 
591 	if (DO_BIC(BIC_IRQ)) {
592 		if (sums_need_wide_columns)
593 			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
594 		else
595 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
596 	}
597 
598 	if (DO_BIC(BIC_SMI))
599 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
600 
601 	for (mp = sys.tp; mp; mp = mp->next) {
602 
603 		if (mp->format == FORMAT_RAW) {
604 			if (mp->width == 64)
605 				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
606 			else
607 				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
608 		} else {
609 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
610 				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
611 			else
612 				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
613 		}
614 	}
615 
616 	if (DO_BIC(BIC_CPU_c1))
617 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
618 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
619 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
620 	if (DO_BIC(BIC_CPU_c6))
621 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
622 	if (DO_BIC(BIC_CPU_c7))
623 		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
624 
625 	if (DO_BIC(BIC_Mod_c6))
626 		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
627 
628 	if (DO_BIC(BIC_CoreTmp))
629 		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
630 
631 	for (mp = sys.cp; mp; mp = mp->next) {
632 		if (mp->format == FORMAT_RAW) {
633 			if (mp->width == 64)
634 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
635 			else
636 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
637 		} else {
638 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
639 				outp += sprintf(outp, "%s%8s", delim, mp->name);
640 			else
641 				outp += sprintf(outp, "%s%s", delim, mp->name);
642 		}
643 	}
644 
645 	if (DO_BIC(BIC_PkgTmp))
646 		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
647 
648 	if (DO_BIC(BIC_GFX_rc6))
649 		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
650 
651 	if (DO_BIC(BIC_GFXMHz))
652 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
653 
654 	if (DO_BIC(BIC_Totl_c0))
655 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
656 	if (DO_BIC(BIC_Any_c0))
657 		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
658 	if (DO_BIC(BIC_GFX_c0))
659 		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
660 	if (DO_BIC(BIC_CPUGFX))
661 		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
662 
663 	if (DO_BIC(BIC_Pkgpc2))
664 		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
665 	if (DO_BIC(BIC_Pkgpc3))
666 		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
667 	if (DO_BIC(BIC_Pkgpc6))
668 		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
669 	if (DO_BIC(BIC_Pkgpc7))
670 		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
671 	if (DO_BIC(BIC_Pkgpc8))
672 		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
673 	if (DO_BIC(BIC_Pkgpc9))
674 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
675 	if (DO_BIC(BIC_Pkgpc10))
676 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
677 	if (DO_BIC(BIC_CPU_LPI))
678 		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
679 	if (DO_BIC(BIC_SYS_LPI))
680 		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
681 
682 	if (do_rapl && !rapl_joules) {
683 		if (DO_BIC(BIC_PkgWatt))
684 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
685 		if (DO_BIC(BIC_CorWatt))
686 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
687 		if (DO_BIC(BIC_GFXWatt))
688 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
689 		if (DO_BIC(BIC_RAMWatt))
690 			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
691 		if (DO_BIC(BIC_PKG__))
692 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
693 		if (DO_BIC(BIC_RAM__))
694 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
695 	} else if (do_rapl && rapl_joules) {
696 		if (DO_BIC(BIC_Pkg_J))
697 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
698 		if (DO_BIC(BIC_Cor_J))
699 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
700 		if (DO_BIC(BIC_GFX_J))
701 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
702 		if (DO_BIC(BIC_RAM_J))
703 			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
704 		if (DO_BIC(BIC_PKG__))
705 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
706 		if (DO_BIC(BIC_RAM__))
707 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
708 	}
709 	for (mp = sys.pp; mp; mp = mp->next) {
710 		if (mp->format == FORMAT_RAW) {
711 			if (mp->width == 64)
712 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
713 			else
714 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
715 		} else {
716 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
717 				outp += sprintf(outp, "%s%8s", delim, mp->name);
718 			else
719 				outp += sprintf(outp, "%s%s", delim, mp->name);
720 		}
721 	}
722 
723 	outp += sprintf(outp, "\n");
724 }
725 
726 int dump_counters(struct thread_data *t, struct core_data *c,
727 	struct pkg_data *p)
728 {
729 	int i;
730 	struct msr_counter *mp;
731 
732 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
733 
734 	if (t) {
735 		outp += sprintf(outp, "CPU: %d flags 0x%x\n",
736 			t->cpu_id, t->flags);
737 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
738 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
739 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
740 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
741 
742 		if (DO_BIC(BIC_IRQ))
743 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
744 		if (DO_BIC(BIC_SMI))
745 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
746 
747 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
748 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
749 				i, mp->msr_num, t->counter[i]);
750 		}
751 	}
752 
753 	if (c) {
754 		outp += sprintf(outp, "core: %d\n", c->core_id);
755 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
756 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
757 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
758 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
759 
760 		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
761 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
762 				i, mp->msr_num, c->counter[i]);
763 		}
764 		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
765 	}
766 
767 	if (p) {
768 		outp += sprintf(outp, "package: %d\n", p->package_id);
769 
770 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
771 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
772 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
773 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
774 
775 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
776 		if (DO_BIC(BIC_Pkgpc3))
777 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
778 		if (DO_BIC(BIC_Pkgpc6))
779 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
780 		if (DO_BIC(BIC_Pkgpc7))
781 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
782 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
783 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
784 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
785 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
786 		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
787 		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
788 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
789 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
790 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
791 		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
792 		outp += sprintf(outp, "Throttle PKG: %0X\n",
793 			p->rapl_pkg_perf_status);
794 		outp += sprintf(outp, "Throttle RAM: %0X\n",
795 			p->rapl_dram_perf_status);
796 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
797 
798 		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
799 			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
800 				i, mp->msr_num, p->counter[i]);
801 		}
802 	}
803 
804 	outp += sprintf(outp, "\n");
805 
806 	return 0;
807 }
808 
809 /*
810  * column formatting convention & formats
811  */
812 int format_counters(struct thread_data *t, struct core_data *c,
813 	struct pkg_data *p)
814 {
815 	double interval_float, tsc;
816 	char *fmt8;
817 	int i;
818 	struct msr_counter *mp;
819 	char *delim = "\t";
820 	int printed = 0;
821 
822 	 /* if showing only 1st thread in core and this isn't one, bail out */
823 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
824 		return 0;
825 
826 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
827 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
828 		return 0;
829 
830 	/*if not summary line and --cpu is used */
831 	if ((t != &average.threads) &&
832 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
833 		return 0;
834 
835 	if (DO_BIC(BIC_USEC)) {
836 		/* on each row, print how many usec each timestamp took to gather */
837 		struct timeval tv;
838 
839 		timersub(&t->tv_end, &t->tv_begin, &tv);
840 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
841 	}
842 
843 	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
844 	if (DO_BIC(BIC_TOD))
845 		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
846 
847 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
848 
849 	tsc = t->tsc * tsc_tweak;
850 
851 	/* topo columns, print blanks on 1st (average) line */
852 	if (t == &average.threads) {
853 		if (DO_BIC(BIC_Package))
854 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
855 		if (DO_BIC(BIC_Core))
856 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
857 		if (DO_BIC(BIC_CPU))
858 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
859 	} else {
860 		if (DO_BIC(BIC_Package)) {
861 			if (p)
862 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
863 			else
864 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
865 		}
866 		if (DO_BIC(BIC_Core)) {
867 			if (c)
868 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
869 			else
870 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
871 		}
872 		if (DO_BIC(BIC_CPU))
873 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
874 	}
875 
876 	if (DO_BIC(BIC_Avg_MHz))
877 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
878 			1.0 / units * t->aperf / interval_float);
879 
880 	if (DO_BIC(BIC_Busy))
881 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
882 
883 	if (DO_BIC(BIC_Bzy_MHz)) {
884 		if (has_base_hz)
885 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
886 		else
887 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
888 				tsc / units * t->aperf / t->mperf / interval_float);
889 	}
890 
891 	if (DO_BIC(BIC_TSC_MHz))
892 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
893 
894 	/* IRQ */
895 	if (DO_BIC(BIC_IRQ)) {
896 		if (sums_need_wide_columns)
897 			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
898 		else
899 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
900 	}
901 
902 	/* SMI */
903 	if (DO_BIC(BIC_SMI))
904 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
905 
906 	/* Added counters */
907 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
908 		if (mp->format == FORMAT_RAW) {
909 			if (mp->width == 32)
910 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
911 			else
912 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
913 		} else if (mp->format == FORMAT_DELTA) {
914 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
915 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
916 			else
917 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
918 		} else if (mp->format == FORMAT_PERCENT) {
919 			if (mp->type == COUNTER_USEC)
920 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
921 			else
922 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
923 		}
924 	}
925 
926 	/* C1 */
927 	if (DO_BIC(BIC_CPU_c1))
928 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
929 
930 
931 	/* print per-core data only for 1st thread in core */
932 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
933 		goto done;
934 
935 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
936 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
937 	if (DO_BIC(BIC_CPU_c6))
938 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
939 	if (DO_BIC(BIC_CPU_c7))
940 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
941 
942 	/* Mod%c6 */
943 	if (DO_BIC(BIC_Mod_c6))
944 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
945 
946 	if (DO_BIC(BIC_CoreTmp))
947 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
948 
949 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
950 		if (mp->format == FORMAT_RAW) {
951 			if (mp->width == 32)
952 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
953 			else
954 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
955 		} else if (mp->format == FORMAT_DELTA) {
956 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
957 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
958 			else
959 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
960 		} else if (mp->format == FORMAT_PERCENT) {
961 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
962 		}
963 	}
964 
965 	/* print per-package data only for 1st core in package */
966 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
967 		goto done;
968 
969 	/* PkgTmp */
970 	if (DO_BIC(BIC_PkgTmp))
971 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
972 
973 	/* GFXrc6 */
974 	if (DO_BIC(BIC_GFX_rc6)) {
975 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
976 			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
977 		} else {
978 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
979 				p->gfx_rc6_ms / 10.0 / interval_float);
980 		}
981 	}
982 
983 	/* GFXMHz */
984 	if (DO_BIC(BIC_GFXMHz))
985 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
986 
987 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
988 	if (DO_BIC(BIC_Totl_c0))
989 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
990 	if (DO_BIC(BIC_Any_c0))
991 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
992 	if (DO_BIC(BIC_GFX_c0))
993 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
994 	if (DO_BIC(BIC_CPUGFX))
995 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
996 
997 	if (DO_BIC(BIC_Pkgpc2))
998 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
999 	if (DO_BIC(BIC_Pkgpc3))
1000 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
1001 	if (DO_BIC(BIC_Pkgpc6))
1002 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
1003 	if (DO_BIC(BIC_Pkgpc7))
1004 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
1005 	if (DO_BIC(BIC_Pkgpc8))
1006 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
1007 	if (DO_BIC(BIC_Pkgpc9))
1008 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
1009 	if (DO_BIC(BIC_Pkgpc10))
1010 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
1011 
1012 	if (DO_BIC(BIC_CPU_LPI))
1013 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1014 	if (DO_BIC(BIC_SYS_LPI))
1015 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1016 
1017 	/*
1018  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
1019  	 * indicate that results are suspect by printing "**" in fraction place.
1020  	 */
1021 	if (interval_float < rapl_joule_counter_range)
1022 		fmt8 = "%s%.2f";
1023 	else
1024 		fmt8 = "%6.0f**";
1025 
1026 	if (DO_BIC(BIC_PkgWatt))
1027 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1028 	if (DO_BIC(BIC_CorWatt))
1029 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1030 	if (DO_BIC(BIC_GFXWatt))
1031 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1032 	if (DO_BIC(BIC_RAMWatt))
1033 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
1034 	if (DO_BIC(BIC_Pkg_J))
1035 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1036 	if (DO_BIC(BIC_Cor_J))
1037 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1038 	if (DO_BIC(BIC_GFX_J))
1039 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1040 	if (DO_BIC(BIC_RAM_J))
1041 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1042 	if (DO_BIC(BIC_PKG__))
1043 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1044 	if (DO_BIC(BIC_RAM__))
1045 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1046 
1047 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1048 		if (mp->format == FORMAT_RAW) {
1049 			if (mp->width == 32)
1050 				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
1051 			else
1052 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1053 		} else if (mp->format == FORMAT_DELTA) {
1054 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1055 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1056 			else
1057 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1058 		} else if (mp->format == FORMAT_PERCENT) {
1059 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
1060 		}
1061 	}
1062 
1063 done:
1064 	if (*(outp - 1) != '\n')
1065 		outp += sprintf(outp, "\n");
1066 
1067 	return 0;
1068 }
1069 
1070 void flush_output_stdout(void)
1071 {
1072 	FILE *filep;
1073 
1074 	if (outf == stderr)
1075 		filep = stdout;
1076 	else
1077 		filep = outf;
1078 
1079 	fputs(output_buffer, filep);
1080 	fflush(filep);
1081 
1082 	outp = output_buffer;
1083 }
1084 void flush_output_stderr(void)
1085 {
1086 	fputs(output_buffer, outf);
1087 	fflush(outf);
1088 	outp = output_buffer;
1089 }
1090 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1091 {
1092 	static int printed;
1093 
1094 	if (!printed || !summary_only)
1095 		print_header("\t");
1096 
1097 	if (topo.num_cpus > 1)
1098 		format_counters(&average.threads, &average.cores,
1099 			&average.packages);
1100 
1101 	printed = 1;
1102 
1103 	if (summary_only)
1104 		return;
1105 
1106 	for_all_cpus(format_counters, t, c, p);
1107 }
1108 
1109 #define DELTA_WRAP32(new, old)			\
1110 	if (new > old) {			\
1111 		old = new - old;		\
1112 	} else {				\
1113 		old = 0x100000000 + new - old;	\
1114 	}
1115 
1116 int
1117 delta_package(struct pkg_data *new, struct pkg_data *old)
1118 {
1119 	int i;
1120 	struct msr_counter *mp;
1121 
1122 
1123 	if (DO_BIC(BIC_Totl_c0))
1124 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1125 	if (DO_BIC(BIC_Any_c0))
1126 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1127 	if (DO_BIC(BIC_GFX_c0))
1128 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1129 	if (DO_BIC(BIC_CPUGFX))
1130 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1131 
1132 	old->pc2 = new->pc2 - old->pc2;
1133 	if (DO_BIC(BIC_Pkgpc3))
1134 		old->pc3 = new->pc3 - old->pc3;
1135 	if (DO_BIC(BIC_Pkgpc6))
1136 		old->pc6 = new->pc6 - old->pc6;
1137 	if (DO_BIC(BIC_Pkgpc7))
1138 		old->pc7 = new->pc7 - old->pc7;
1139 	old->pc8 = new->pc8 - old->pc8;
1140 	old->pc9 = new->pc9 - old->pc9;
1141 	old->pc10 = new->pc10 - old->pc10;
1142 	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1143 	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1144 	old->pkg_temp_c = new->pkg_temp_c;
1145 
1146 	/* flag an error when rc6 counter resets/wraps */
1147 	if (old->gfx_rc6_ms >  new->gfx_rc6_ms)
1148 		old->gfx_rc6_ms = -1;
1149 	else
1150 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1151 
1152 	old->gfx_mhz = new->gfx_mhz;
1153 
1154 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
1155 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
1156 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
1157 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
1158 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
1159 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
1160 
1161 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1162 		if (mp->format == FORMAT_RAW)
1163 			old->counter[i] = new->counter[i];
1164 		else
1165 			old->counter[i] = new->counter[i] - old->counter[i];
1166 	}
1167 
1168 	return 0;
1169 }
1170 
1171 void
1172 delta_core(struct core_data *new, struct core_data *old)
1173 {
1174 	int i;
1175 	struct msr_counter *mp;
1176 
1177 	old->c3 = new->c3 - old->c3;
1178 	old->c6 = new->c6 - old->c6;
1179 	old->c7 = new->c7 - old->c7;
1180 	old->core_temp_c = new->core_temp_c;
1181 	old->mc6_us = new->mc6_us - old->mc6_us;
1182 
1183 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1184 		if (mp->format == FORMAT_RAW)
1185 			old->counter[i] = new->counter[i];
1186 		else
1187 			old->counter[i] = new->counter[i] - old->counter[i];
1188 	}
1189 }
1190 
1191 /*
1192  * old = new - old
1193  */
1194 int
1195 delta_thread(struct thread_data *new, struct thread_data *old,
1196 	struct core_data *core_delta)
1197 {
1198 	int i;
1199 	struct msr_counter *mp;
1200 
1201 	/*
1202 	 * the timestamps from start of measurement interval are in "old"
1203 	 * the timestamp from end of measurement interval are in "new"
1204 	 * over-write old w/ new so we can print end of interval values
1205 	 */
1206 
1207 	old->tv_begin = new->tv_begin;
1208 	old->tv_end = new->tv_end;
1209 
1210 	old->tsc = new->tsc - old->tsc;
1211 
1212 	/* check for TSC < 1 Mcycles over interval */
1213 	if (old->tsc < (1000 * 1000))
1214 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1215 		     "You can disable all c-states by booting with \"idle=poll\"\n"
1216 		     "or just the deep ones with \"processor.max_cstate=1\"");
1217 
1218 	old->c1 = new->c1 - old->c1;
1219 
1220 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1221 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1222 			old->aperf = new->aperf - old->aperf;
1223 			old->mperf = new->mperf - old->mperf;
1224 		} else {
1225 			return -1;
1226 		}
1227 	}
1228 
1229 
1230 	if (use_c1_residency_msr) {
1231 		/*
1232 		 * Some models have a dedicated C1 residency MSR,
1233 		 * which should be more accurate than the derivation below.
1234 		 */
1235 	} else {
1236 		/*
1237 		 * As counter collection is not atomic,
1238 		 * it is possible for mperf's non-halted cycles + idle states
1239 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
1240 		 */
1241 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1242 			old->c1 = 0;
1243 		else {
1244 			/* normal case, derive c1 */
1245 			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1246 				- core_delta->c6 - core_delta->c7;
1247 		}
1248 	}
1249 
1250 	if (old->mperf == 0) {
1251 		if (debug > 1)
1252 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1253 		old->mperf = 1;	/* divide by 0 protection */
1254 	}
1255 
1256 	if (DO_BIC(BIC_IRQ))
1257 		old->irq_count = new->irq_count - old->irq_count;
1258 
1259 	if (DO_BIC(BIC_SMI))
1260 		old->smi_count = new->smi_count - old->smi_count;
1261 
1262 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1263 		if (mp->format == FORMAT_RAW)
1264 			old->counter[i] = new->counter[i];
1265 		else
1266 			old->counter[i] = new->counter[i] - old->counter[i];
1267 	}
1268 	return 0;
1269 }
1270 
1271 int delta_cpu(struct thread_data *t, struct core_data *c,
1272 	struct pkg_data *p, struct thread_data *t2,
1273 	struct core_data *c2, struct pkg_data *p2)
1274 {
1275 	int retval = 0;
1276 
1277 	/* calculate core delta only for 1st thread in core */
1278 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1279 		delta_core(c, c2);
1280 
1281 	/* always calculate thread delta */
1282 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
1283 	if (retval)
1284 		return retval;
1285 
1286 	/* calculate package delta only for 1st core in package */
1287 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1288 		retval = delta_package(p, p2);
1289 
1290 	return retval;
1291 }
1292 
1293 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1294 {
1295 	int i;
1296 	struct msr_counter  *mp;
1297 
1298 	t->tv_begin.tv_sec = 0;
1299 	t->tv_begin.tv_usec = 0;
1300 	t->tv_end.tv_sec = 0;
1301 	t->tv_end.tv_usec = 0;
1302 
1303 	t->tsc = 0;
1304 	t->aperf = 0;
1305 	t->mperf = 0;
1306 	t->c1 = 0;
1307 
1308 	t->irq_count = 0;
1309 	t->smi_count = 0;
1310 
1311 	/* tells format_counters to dump all fields from this set */
1312 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1313 
1314 	c->c3 = 0;
1315 	c->c6 = 0;
1316 	c->c7 = 0;
1317 	c->mc6_us = 0;
1318 	c->core_temp_c = 0;
1319 
1320 	p->pkg_wtd_core_c0 = 0;
1321 	p->pkg_any_core_c0 = 0;
1322 	p->pkg_any_gfxe_c0 = 0;
1323 	p->pkg_both_core_gfxe_c0 = 0;
1324 
1325 	p->pc2 = 0;
1326 	if (DO_BIC(BIC_Pkgpc3))
1327 		p->pc3 = 0;
1328 	if (DO_BIC(BIC_Pkgpc6))
1329 		p->pc6 = 0;
1330 	if (DO_BIC(BIC_Pkgpc7))
1331 		p->pc7 = 0;
1332 	p->pc8 = 0;
1333 	p->pc9 = 0;
1334 	p->pc10 = 0;
1335 	p->cpu_lpi = 0;
1336 	p->sys_lpi = 0;
1337 
1338 	p->energy_pkg = 0;
1339 	p->energy_dram = 0;
1340 	p->energy_cores = 0;
1341 	p->energy_gfx = 0;
1342 	p->rapl_pkg_perf_status = 0;
1343 	p->rapl_dram_perf_status = 0;
1344 	p->pkg_temp_c = 0;
1345 
1346 	p->gfx_rc6_ms = 0;
1347 	p->gfx_mhz = 0;
1348 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1349 		t->counter[i] = 0;
1350 
1351 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1352 		c->counter[i] = 0;
1353 
1354 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1355 		p->counter[i] = 0;
1356 }
1357 int sum_counters(struct thread_data *t, struct core_data *c,
1358 	struct pkg_data *p)
1359 {
1360 	int i;
1361 	struct msr_counter *mp;
1362 
1363 	/* remember first tv_begin */
1364 	if (average.threads.tv_begin.tv_sec == 0)
1365 		average.threads.tv_begin = t->tv_begin;
1366 
1367 	/* remember last tv_end */
1368 	average.threads.tv_end = t->tv_end;
1369 
1370 	average.threads.tsc += t->tsc;
1371 	average.threads.aperf += t->aperf;
1372 	average.threads.mperf += t->mperf;
1373 	average.threads.c1 += t->c1;
1374 
1375 	average.threads.irq_count += t->irq_count;
1376 	average.threads.smi_count += t->smi_count;
1377 
1378 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1379 		if (mp->format == FORMAT_RAW)
1380 			continue;
1381 		average.threads.counter[i] += t->counter[i];
1382 	}
1383 
1384 	/* sum per-core values only for 1st thread in core */
1385 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1386 		return 0;
1387 
1388 	average.cores.c3 += c->c3;
1389 	average.cores.c6 += c->c6;
1390 	average.cores.c7 += c->c7;
1391 	average.cores.mc6_us += c->mc6_us;
1392 
1393 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1394 
1395 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1396 		if (mp->format == FORMAT_RAW)
1397 			continue;
1398 		average.cores.counter[i] += c->counter[i];
1399 	}
1400 
1401 	/* sum per-pkg values only for 1st core in pkg */
1402 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1403 		return 0;
1404 
1405 	if (DO_BIC(BIC_Totl_c0))
1406 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1407 	if (DO_BIC(BIC_Any_c0))
1408 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1409 	if (DO_BIC(BIC_GFX_c0))
1410 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1411 	if (DO_BIC(BIC_CPUGFX))
1412 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1413 
1414 	average.packages.pc2 += p->pc2;
1415 	if (DO_BIC(BIC_Pkgpc3))
1416 		average.packages.pc3 += p->pc3;
1417 	if (DO_BIC(BIC_Pkgpc6))
1418 		average.packages.pc6 += p->pc6;
1419 	if (DO_BIC(BIC_Pkgpc7))
1420 		average.packages.pc7 += p->pc7;
1421 	average.packages.pc8 += p->pc8;
1422 	average.packages.pc9 += p->pc9;
1423 	average.packages.pc10 += p->pc10;
1424 
1425 	average.packages.cpu_lpi = p->cpu_lpi;
1426 	average.packages.sys_lpi = p->sys_lpi;
1427 
1428 	average.packages.energy_pkg += p->energy_pkg;
1429 	average.packages.energy_dram += p->energy_dram;
1430 	average.packages.energy_cores += p->energy_cores;
1431 	average.packages.energy_gfx += p->energy_gfx;
1432 
1433 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1434 	average.packages.gfx_mhz = p->gfx_mhz;
1435 
1436 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1437 
1438 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1439 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1440 
1441 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1442 		if (mp->format == FORMAT_RAW)
1443 			continue;
1444 		average.packages.counter[i] += p->counter[i];
1445 	}
1446 	return 0;
1447 }
1448 /*
1449  * sum the counters for all cpus in the system
1450  * compute the weighted average
1451  */
1452 void compute_average(struct thread_data *t, struct core_data *c,
1453 	struct pkg_data *p)
1454 {
1455 	int i;
1456 	struct msr_counter *mp;
1457 
1458 	clear_counters(&average.threads, &average.cores, &average.packages);
1459 
1460 	for_all_cpus(sum_counters, t, c, p);
1461 
1462 	average.threads.tsc /= topo.num_cpus;
1463 	average.threads.aperf /= topo.num_cpus;
1464 	average.threads.mperf /= topo.num_cpus;
1465 	average.threads.c1 /= topo.num_cpus;
1466 
1467 	if (average.threads.irq_count > 9999999)
1468 		sums_need_wide_columns = 1;
1469 
1470 	average.cores.c3 /= topo.num_cores;
1471 	average.cores.c6 /= topo.num_cores;
1472 	average.cores.c7 /= topo.num_cores;
1473 	average.cores.mc6_us /= topo.num_cores;
1474 
1475 	if (DO_BIC(BIC_Totl_c0))
1476 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1477 	if (DO_BIC(BIC_Any_c0))
1478 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1479 	if (DO_BIC(BIC_GFX_c0))
1480 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1481 	if (DO_BIC(BIC_CPUGFX))
1482 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1483 
1484 	average.packages.pc2 /= topo.num_packages;
1485 	if (DO_BIC(BIC_Pkgpc3))
1486 		average.packages.pc3 /= topo.num_packages;
1487 	if (DO_BIC(BIC_Pkgpc6))
1488 		average.packages.pc6 /= topo.num_packages;
1489 	if (DO_BIC(BIC_Pkgpc7))
1490 		average.packages.pc7 /= topo.num_packages;
1491 
1492 	average.packages.pc8 /= topo.num_packages;
1493 	average.packages.pc9 /= topo.num_packages;
1494 	average.packages.pc10 /= topo.num_packages;
1495 
1496 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1497 		if (mp->format == FORMAT_RAW)
1498 			continue;
1499 		if (mp->type == COUNTER_ITEMS) {
1500 			if (average.threads.counter[i] > 9999999)
1501 				sums_need_wide_columns = 1;
1502 			continue;
1503 		}
1504 		average.threads.counter[i] /= topo.num_cpus;
1505 	}
1506 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1507 		if (mp->format == FORMAT_RAW)
1508 			continue;
1509 		if (mp->type == COUNTER_ITEMS) {
1510 			if (average.cores.counter[i] > 9999999)
1511 				sums_need_wide_columns = 1;
1512 		}
1513 		average.cores.counter[i] /= topo.num_cores;
1514 	}
1515 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1516 		if (mp->format == FORMAT_RAW)
1517 			continue;
1518 		if (mp->type == COUNTER_ITEMS) {
1519 			if (average.packages.counter[i] > 9999999)
1520 				sums_need_wide_columns = 1;
1521 		}
1522 		average.packages.counter[i] /= topo.num_packages;
1523 	}
1524 }
1525 
1526 static unsigned long long rdtsc(void)
1527 {
1528 	unsigned int low, high;
1529 
1530 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
1531 
1532 	return low | ((unsigned long long)high) << 32;
1533 }
1534 
1535 /*
1536  * Open a file, and exit on failure
1537  */
1538 FILE *fopen_or_die(const char *path, const char *mode)
1539 {
1540 	FILE *filep = fopen(path, mode);
1541 
1542 	if (!filep)
1543 		err(1, "%s: open failed", path);
1544 	return filep;
1545 }
1546 /*
1547  * snapshot_sysfs_counter()
1548  *
1549  * return snapshot of given counter
1550  */
1551 unsigned long long snapshot_sysfs_counter(char *path)
1552 {
1553 	FILE *fp;
1554 	int retval;
1555 	unsigned long long counter;
1556 
1557 	fp = fopen_or_die(path, "r");
1558 
1559 	retval = fscanf(fp, "%lld", &counter);
1560 	if (retval != 1)
1561 		err(1, "snapshot_sysfs_counter(%s)", path);
1562 
1563 	fclose(fp);
1564 
1565 	return counter;
1566 }
1567 
1568 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1569 {
1570 	if (mp->msr_num != 0) {
1571 		if (get_msr(cpu, mp->msr_num, counterp))
1572 			return -1;
1573 	} else {
1574 		char path[128 + PATH_BYTES];
1575 
1576 		if (mp->flags & SYSFS_PERCPU) {
1577 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
1578 				 cpu, mp->path);
1579 
1580 			*counterp = snapshot_sysfs_counter(path);
1581 		} else {
1582 			*counterp = snapshot_sysfs_counter(mp->path);
1583 		}
1584 	}
1585 
1586 	return 0;
1587 }
1588 
1589 /*
1590  * get_counters(...)
1591  * migrate to cpu
1592  * acquire and record local counters for that cpu
1593  */
1594 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1595 {
1596 	int cpu = t->cpu_id;
1597 	unsigned long long msr;
1598 	int aperf_mperf_retry_count = 0;
1599 	struct msr_counter *mp;
1600 	int i;
1601 
1602 
1603 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
1604 
1605 	if (cpu_migrate(cpu)) {
1606 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
1607 		return -1;
1608 	}
1609 
1610 retry:
1611 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
1612 
1613 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
1614 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1615 
1616 		/*
1617 		 * The TSC, APERF and MPERF must be read together for
1618 		 * APERF/MPERF and MPERF/TSC to give accurate results.
1619 		 *
1620 		 * Unfortunately, APERF and MPERF are read by
1621 		 * individual system call, so delays may occur
1622 		 * between them.  If the time to read them
1623 		 * varies by a large amount, we re-read them.
1624 		 */
1625 
1626 		/*
1627 		 * This initial dummy APERF read has been seen to
1628 		 * reduce jitter in the subsequent reads.
1629 		 */
1630 
1631 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1632 			return -3;
1633 
1634 		t->tsc = rdtsc();	/* re-read close to APERF */
1635 
1636 		tsc_before = t->tsc;
1637 
1638 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1639 			return -3;
1640 
1641 		tsc_between = rdtsc();
1642 
1643 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
1644 			return -4;
1645 
1646 		tsc_after = rdtsc();
1647 
1648 		aperf_time = tsc_between - tsc_before;
1649 		mperf_time = tsc_after - tsc_between;
1650 
1651 		/*
1652 		 * If the system call latency to read APERF and MPERF
1653 		 * differ by more than 2x, then try again.
1654 		 */
1655 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1656 			aperf_mperf_retry_count++;
1657 			if (aperf_mperf_retry_count < 5)
1658 				goto retry;
1659 			else
1660 				warnx("cpu%d jitter %lld %lld",
1661 					cpu, aperf_time, mperf_time);
1662 		}
1663 		aperf_mperf_retry_count = 0;
1664 
1665 		t->aperf = t->aperf * aperf_mperf_multiplier;
1666 		t->mperf = t->mperf * aperf_mperf_multiplier;
1667 	}
1668 
1669 	if (DO_BIC(BIC_IRQ))
1670 		t->irq_count = irqs_per_cpu[cpu];
1671 	if (DO_BIC(BIC_SMI)) {
1672 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1673 			return -5;
1674 		t->smi_count = msr & 0xFFFFFFFF;
1675 	}
1676 	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
1677 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1678 			return -6;
1679 	}
1680 
1681 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1682 		if (get_mp(cpu, mp, &t->counter[i]))
1683 			return -10;
1684 	}
1685 
1686 	/* collect core counters only for 1st thread in core */
1687 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1688 		goto done;
1689 
1690 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
1691 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
1692 			return -6;
1693 	}
1694 
1695 	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
1696 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
1697 			return -7;
1698 	} else if (do_knl_cstates) {
1699 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
1700 			return -7;
1701 	}
1702 
1703 	if (DO_BIC(BIC_CPU_c7))
1704 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
1705 			return -8;
1706 
1707 	if (DO_BIC(BIC_Mod_c6))
1708 		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
1709 			return -8;
1710 
1711 	if (DO_BIC(BIC_CoreTmp)) {
1712 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1713 			return -9;
1714 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1715 	}
1716 
1717 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1718 		if (get_mp(cpu, mp, &c->counter[i]))
1719 			return -10;
1720 	}
1721 
1722 	/* collect package counters only for 1st core in package */
1723 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1724 		goto done;
1725 
1726 	if (DO_BIC(BIC_Totl_c0)) {
1727 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1728 			return -10;
1729 	}
1730 	if (DO_BIC(BIC_Any_c0)) {
1731 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1732 			return -11;
1733 	}
1734 	if (DO_BIC(BIC_GFX_c0)) {
1735 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1736 			return -12;
1737 	}
1738 	if (DO_BIC(BIC_CPUGFX)) {
1739 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1740 			return -13;
1741 	}
1742 	if (DO_BIC(BIC_Pkgpc3))
1743 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
1744 			return -9;
1745 	if (DO_BIC(BIC_Pkgpc6)) {
1746 		if (do_slm_cstates) {
1747 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
1748 				return -10;
1749 		} else {
1750 			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
1751 				return -10;
1752 		}
1753 	}
1754 
1755 	if (DO_BIC(BIC_Pkgpc2))
1756 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
1757 			return -11;
1758 	if (DO_BIC(BIC_Pkgpc7))
1759 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
1760 			return -12;
1761 	if (DO_BIC(BIC_Pkgpc8))
1762 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
1763 			return -13;
1764 	if (DO_BIC(BIC_Pkgpc9))
1765 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
1766 			return -13;
1767 	if (DO_BIC(BIC_Pkgpc10))
1768 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
1769 			return -13;
1770 
1771 	if (DO_BIC(BIC_CPU_LPI))
1772 		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
1773 	if (DO_BIC(BIC_SYS_LPI))
1774 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
1775 
1776 	if (do_rapl & RAPL_PKG) {
1777 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
1778 			return -13;
1779 		p->energy_pkg = msr & 0xFFFFFFFF;
1780 	}
1781 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1782 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1783 			return -14;
1784 		p->energy_cores = msr & 0xFFFFFFFF;
1785 	}
1786 	if (do_rapl & RAPL_DRAM) {
1787 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
1788 			return -15;
1789 		p->energy_dram = msr & 0xFFFFFFFF;
1790 	}
1791 	if (do_rapl & RAPL_GFX) {
1792 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
1793 			return -16;
1794 		p->energy_gfx = msr & 0xFFFFFFFF;
1795 	}
1796 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
1797 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
1798 			return -16;
1799 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
1800 	}
1801 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
1802 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
1803 			return -16;
1804 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
1805 	}
1806 	if (DO_BIC(BIC_PkgTmp)) {
1807 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1808 			return -17;
1809 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1810 	}
1811 
1812 	if (DO_BIC(BIC_GFX_rc6))
1813 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
1814 
1815 	if (DO_BIC(BIC_GFXMHz))
1816 		p->gfx_mhz = gfx_cur_mhz;
1817 
1818 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1819 		if (get_mp(cpu, mp, &p->counter[i]))
1820 			return -10;
1821 	}
1822 done:
1823 	gettimeofday(&t->tv_end, (struct timezone *)NULL);
1824 
1825 	return 0;
1826 }
1827 
1828 /*
1829  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
1830  * If you change the values, note they are used both in comparisons
1831  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
1832  */
1833 
1834 #define PCLUKN 0 /* Unknown */
1835 #define PCLRSV 1 /* Reserved */
1836 #define PCL__0 2 /* PC0 */
1837 #define PCL__1 3 /* PC1 */
1838 #define PCL__2 4 /* PC2 */
1839 #define PCL__3 5 /* PC3 */
1840 #define PCL__4 6 /* PC4 */
1841 #define PCL__6 7 /* PC6 */
1842 #define PCL_6N 8 /* PC6 No Retention */
1843 #define PCL_6R 9 /* PC6 Retention */
1844 #define PCL__7 10 /* PC7 */
1845 #define PCL_7S 11 /* PC7 Shrink */
1846 #define PCL__8 12 /* PC8 */
1847 #define PCL__9 13 /* PC9 */
1848 #define PCLUNL 14 /* Unlimited */
1849 
1850 int pkg_cstate_limit = PCLUKN;
1851 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1852 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1853 
1854 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1855 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1856 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1857 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
1858 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1859 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1860 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1861 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1862 
1863 
1864 static void
1865 calculate_tsc_tweak()
1866 {
1867 	tsc_tweak = base_hz / tsc_hz;
1868 }
1869 
1870 static void
1871 dump_nhm_platform_info(void)
1872 {
1873 	unsigned long long msr;
1874 	unsigned int ratio;
1875 
1876 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1877 
1878 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1879 
1880 	ratio = (msr >> 40) & 0xFF;
1881 	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
1882 		ratio, bclk, ratio * bclk);
1883 
1884 	ratio = (msr >> 8) & 0xFF;
1885 	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
1886 		ratio, bclk, ratio * bclk);
1887 
1888 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1889 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1890 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1891 
1892 	return;
1893 }
1894 
1895 static void
1896 dump_hsw_turbo_ratio_limits(void)
1897 {
1898 	unsigned long long msr;
1899 	unsigned int ratio;
1900 
1901 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1902 
1903 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1904 
1905 	ratio = (msr >> 8) & 0xFF;
1906 	if (ratio)
1907 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
1908 			ratio, bclk, ratio * bclk);
1909 
1910 	ratio = (msr >> 0) & 0xFF;
1911 	if (ratio)
1912 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
1913 			ratio, bclk, ratio * bclk);
1914 	return;
1915 }
1916 
1917 static void
1918 dump_ivt_turbo_ratio_limits(void)
1919 {
1920 	unsigned long long msr;
1921 	unsigned int ratio;
1922 
1923 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1924 
1925 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1926 
1927 	ratio = (msr >> 56) & 0xFF;
1928 	if (ratio)
1929 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
1930 			ratio, bclk, ratio * bclk);
1931 
1932 	ratio = (msr >> 48) & 0xFF;
1933 	if (ratio)
1934 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
1935 			ratio, bclk, ratio * bclk);
1936 
1937 	ratio = (msr >> 40) & 0xFF;
1938 	if (ratio)
1939 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
1940 			ratio, bclk, ratio * bclk);
1941 
1942 	ratio = (msr >> 32) & 0xFF;
1943 	if (ratio)
1944 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
1945 			ratio, bclk, ratio * bclk);
1946 
1947 	ratio = (msr >> 24) & 0xFF;
1948 	if (ratio)
1949 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
1950 			ratio, bclk, ratio * bclk);
1951 
1952 	ratio = (msr >> 16) & 0xFF;
1953 	if (ratio)
1954 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
1955 			ratio, bclk, ratio * bclk);
1956 
1957 	ratio = (msr >> 8) & 0xFF;
1958 	if (ratio)
1959 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
1960 			ratio, bclk, ratio * bclk);
1961 
1962 	ratio = (msr >> 0) & 0xFF;
1963 	if (ratio)
1964 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
1965 			ratio, bclk, ratio * bclk);
1966 	return;
1967 }
1968 int has_turbo_ratio_group_limits(int family, int model)
1969 {
1970 
1971 	if (!genuine_intel)
1972 		return 0;
1973 
1974 	switch (model) {
1975 	case INTEL_FAM6_ATOM_GOLDMONT:
1976 	case INTEL_FAM6_SKYLAKE_X:
1977 	case INTEL_FAM6_ATOM_DENVERTON:
1978 		return 1;
1979 	}
1980 	return 0;
1981 }
1982 
1983 static void
1984 dump_turbo_ratio_limits(int family, int model)
1985 {
1986 	unsigned long long msr, core_counts;
1987 	unsigned int ratio, group_size;
1988 
1989 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1990 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1991 
1992 	if (has_turbo_ratio_group_limits(family, model)) {
1993 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
1994 		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
1995 	} else {
1996 		core_counts = 0x0807060504030201;
1997 	}
1998 
1999 	ratio = (msr >> 56) & 0xFF;
2000 	group_size = (core_counts >> 56) & 0xFF;
2001 	if (ratio)
2002 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2003 			ratio, bclk, ratio * bclk, group_size);
2004 
2005 	ratio = (msr >> 48) & 0xFF;
2006 	group_size = (core_counts >> 48) & 0xFF;
2007 	if (ratio)
2008 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2009 			ratio, bclk, ratio * bclk, group_size);
2010 
2011 	ratio = (msr >> 40) & 0xFF;
2012 	group_size = (core_counts >> 40) & 0xFF;
2013 	if (ratio)
2014 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2015 			ratio, bclk, ratio * bclk, group_size);
2016 
2017 	ratio = (msr >> 32) & 0xFF;
2018 	group_size = (core_counts >> 32) & 0xFF;
2019 	if (ratio)
2020 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2021 			ratio, bclk, ratio * bclk, group_size);
2022 
2023 	ratio = (msr >> 24) & 0xFF;
2024 	group_size = (core_counts >> 24) & 0xFF;
2025 	if (ratio)
2026 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2027 			ratio, bclk, ratio * bclk, group_size);
2028 
2029 	ratio = (msr >> 16) & 0xFF;
2030 	group_size = (core_counts >> 16) & 0xFF;
2031 	if (ratio)
2032 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2033 			ratio, bclk, ratio * bclk, group_size);
2034 
2035 	ratio = (msr >> 8) & 0xFF;
2036 	group_size = (core_counts >> 8) & 0xFF;
2037 	if (ratio)
2038 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2039 			ratio, bclk, ratio * bclk, group_size);
2040 
2041 	ratio = (msr >> 0) & 0xFF;
2042 	group_size = (core_counts >> 0) & 0xFF;
2043 	if (ratio)
2044 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2045 			ratio, bclk, ratio * bclk, group_size);
2046 	return;
2047 }
2048 
2049 static void
2050 dump_atom_turbo_ratio_limits(void)
2051 {
2052 	unsigned long long msr;
2053 	unsigned int ratio;
2054 
2055 	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2056 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2057 
2058 	ratio = (msr >> 0) & 0x3F;
2059 	if (ratio)
2060 		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
2061 			ratio, bclk, ratio * bclk);
2062 
2063 	ratio = (msr >> 8) & 0x3F;
2064 	if (ratio)
2065 		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
2066 			ratio, bclk, ratio * bclk);
2067 
2068 	ratio = (msr >> 16) & 0x3F;
2069 	if (ratio)
2070 		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
2071 			ratio, bclk, ratio * bclk);
2072 
2073 	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2074 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2075 
2076 	ratio = (msr >> 24) & 0x3F;
2077 	if (ratio)
2078 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
2079 			ratio, bclk, ratio * bclk);
2080 
2081 	ratio = (msr >> 16) & 0x3F;
2082 	if (ratio)
2083 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
2084 			ratio, bclk, ratio * bclk);
2085 
2086 	ratio = (msr >> 8) & 0x3F;
2087 	if (ratio)
2088 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
2089 			ratio, bclk, ratio * bclk);
2090 
2091 	ratio = (msr >> 0) & 0x3F;
2092 	if (ratio)
2093 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
2094 			ratio, bclk, ratio * bclk);
2095 }
2096 
2097 static void
2098 dump_knl_turbo_ratio_limits(void)
2099 {
2100 	const unsigned int buckets_no = 7;
2101 
2102 	unsigned long long msr;
2103 	int delta_cores, delta_ratio;
2104 	int i, b_nr;
2105 	unsigned int cores[buckets_no];
2106 	unsigned int ratio[buckets_no];
2107 
2108 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2109 
2110 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
2111 		base_cpu, msr);
2112 
2113 	/**
2114 	 * Turbo encoding in KNL is as follows:
2115 	 * [0] -- Reserved
2116 	 * [7:1] -- Base value of number of active cores of bucket 1.
2117 	 * [15:8] -- Base value of freq ratio of bucket 1.
2118 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
2119 	 * i.e. active cores of bucket 2 =
2120 	 * active cores of bucket 1 + delta
2121 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
2122 	 * i.e. freq ratio of bucket 2 =
2123 	 * freq ratio of bucket 1 - delta
2124 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
2125 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
2126 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
2127 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
2128 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
2129 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
2130 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
2131 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
2132 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
2133 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
2134 	 */
2135 
2136 	b_nr = 0;
2137 	cores[b_nr] = (msr & 0xFF) >> 1;
2138 	ratio[b_nr] = (msr >> 8) & 0xFF;
2139 
2140 	for (i = 16; i < 64; i += 8) {
2141 		delta_cores = (msr >> i) & 0x1F;
2142 		delta_ratio = (msr >> (i + 5)) & 0x7;
2143 
2144 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
2145 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2146 		b_nr++;
2147 	}
2148 
2149 	for (i = buckets_no - 1; i >= 0; i--)
2150 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2151 			fprintf(outf,
2152 				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2153 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2154 }
2155 
2156 static void
2157 dump_nhm_cst_cfg(void)
2158 {
2159 	unsigned long long msr;
2160 
2161 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2162 
2163 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2164 
2165 	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2166 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2167 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2168 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2169 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2170 		(msr & (1 << 15)) ? "" : "UN",
2171 		(unsigned int)msr & 0xF,
2172 		pkg_cstate_limit_strings[pkg_cstate_limit]);
2173 
2174 #define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
2175 	if (has_automatic_cstate_conversion) {
2176 		fprintf(outf, ", automatic c-state conversion=%s",
2177 			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2178 	}
2179 
2180 	fprintf(outf, ")\n");
2181 
2182 	return;
2183 }
2184 
2185 static void
2186 dump_config_tdp(void)
2187 {
2188 	unsigned long long msr;
2189 
2190 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2191 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2192 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2193 
2194 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2195 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2196 	if (msr) {
2197 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2198 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2199 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2200 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2201 	}
2202 	fprintf(outf, ")\n");
2203 
2204 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2205 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2206 	if (msr) {
2207 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2208 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2209 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2210 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2211 	}
2212 	fprintf(outf, ")\n");
2213 
2214 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2215 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2216 	if ((msr) & 0x3)
2217 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2218 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2219 	fprintf(outf, ")\n");
2220 
2221 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2222 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2223 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2224 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2225 	fprintf(outf, ")\n");
2226 }
2227 
2228 unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2229 
2230 void print_irtl(void)
2231 {
2232 	unsigned long long msr;
2233 
2234 	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2235 	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2236 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2237 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2238 
2239 	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2240 	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2241 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2242 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2243 
2244 	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2245 	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2246 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2247 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2248 
2249 	if (!do_irtl_hsw)
2250 		return;
2251 
2252 	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2253 	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2254 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2255 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2256 
2257 	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2258 	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2259 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2260 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2261 
2262 	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2263 	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2264 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2265 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2266 
2267 }
2268 void free_fd_percpu(void)
2269 {
2270 	int i;
2271 
2272 	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2273 		if (fd_percpu[i] != 0)
2274 			close(fd_percpu[i]);
2275 	}
2276 
2277 	free(fd_percpu);
2278 }
2279 
2280 void free_all_buffers(void)
2281 {
2282 	int i;
2283 
2284 	CPU_FREE(cpu_present_set);
2285 	cpu_present_set = NULL;
2286 	cpu_present_setsize = 0;
2287 
2288 	CPU_FREE(cpu_affinity_set);
2289 	cpu_affinity_set = NULL;
2290 	cpu_affinity_setsize = 0;
2291 
2292 	free(thread_even);
2293 	free(core_even);
2294 	free(package_even);
2295 
2296 	thread_even = NULL;
2297 	core_even = NULL;
2298 	package_even = NULL;
2299 
2300 	free(thread_odd);
2301 	free(core_odd);
2302 	free(package_odd);
2303 
2304 	thread_odd = NULL;
2305 	core_odd = NULL;
2306 	package_odd = NULL;
2307 
2308 	free(output_buffer);
2309 	output_buffer = NULL;
2310 	outp = NULL;
2311 
2312 	free_fd_percpu();
2313 
2314 	free(irq_column_2_cpu);
2315 	free(irqs_per_cpu);
2316 
2317 	for (i = 0; i <= topo.max_cpu_num; ++i) {
2318 		if (cpus[i].put_ids)
2319 			CPU_FREE(cpus[i].put_ids);
2320 	}
2321 	free(cpus);
2322 }
2323 
2324 
2325 /*
2326  * Parse a file containing a single int.
2327  */
2328 int parse_int_file(const char *fmt, ...)
2329 {
2330 	va_list args;
2331 	char path[PATH_MAX];
2332 	FILE *filep;
2333 	int value;
2334 
2335 	va_start(args, fmt);
2336 	vsnprintf(path, sizeof(path), fmt, args);
2337 	va_end(args);
2338 	filep = fopen_or_die(path, "r");
2339 	if (fscanf(filep, "%d", &value) != 1)
2340 		err(1, "%s: failed to parse number from file", path);
2341 	fclose(filep);
2342 	return value;
2343 }
2344 
2345 /*
2346  * get_cpu_position_in_core(cpu)
2347  * return the position of the CPU among its HT siblings in the core
2348  * return -1 if the sibling is not in list
2349  */
2350 int get_cpu_position_in_core(int cpu)
2351 {
2352 	char path[64];
2353 	FILE *filep;
2354 	int this_cpu;
2355 	char character;
2356 	int i;
2357 
2358 	sprintf(path,
2359 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
2360 		cpu);
2361 	filep = fopen(path, "r");
2362 	if (filep == NULL) {
2363 		perror(path);
2364 		exit(1);
2365 	}
2366 
2367 	for (i = 0; i < topo.num_threads_per_core; i++) {
2368 		fscanf(filep, "%d", &this_cpu);
2369 		if (this_cpu == cpu) {
2370 			fclose(filep);
2371 			return i;
2372 		}
2373 
2374 		/* Account for no separator after last thread*/
2375 		if (i != (topo.num_threads_per_core - 1))
2376 			fscanf(filep, "%c", &character);
2377 	}
2378 
2379 	fclose(filep);
2380 	return -1;
2381 }
2382 
2383 /*
2384  * cpu_is_first_core_in_package(cpu)
2385  * return 1 if given CPU is 1st core in package
2386  */
2387 int cpu_is_first_core_in_package(int cpu)
2388 {
2389 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2390 }
2391 
2392 int get_physical_package_id(int cpu)
2393 {
2394 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2395 }
2396 
2397 int get_core_id(int cpu)
2398 {
2399 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2400 }
2401 
2402 int get_node_id(struct cpu_topology *thiscpu)
2403 {
2404 	char path[80];
2405 	FILE *filep;
2406 	int i;
2407 	int cpu = thiscpu->logical_cpu_id;
2408 
2409 	for (i = 0; i <= topo.max_cpu_num; i++) {
2410 		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
2411 			cpu, i);
2412 		filep = fopen(path, "r");
2413 		if (!filep)
2414 			continue;
2415 		fclose(filep);
2416 		return i;
2417 	}
2418 	return -1;
2419 }
2420 
2421 int get_thread_siblings(struct cpu_topology *thiscpu)
2422 {
2423 	char path[80], character;
2424 	FILE *filep;
2425 	unsigned long map;
2426 	int shift, sib_core;
2427 	int cpu = thiscpu->logical_cpu_id;
2428 	int offset = topo.max_cpu_num + 1;
2429 	size_t size;
2430 
2431 	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2432 	if (!thiscpu->put_ids)
2433 		return -1;
2434 
2435 	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2436 	CPU_ZERO_S(size, thiscpu->put_ids);
2437 
2438 	sprintf(path,
2439 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2440 	filep = fopen_or_die(path, "r");
2441 	do {
2442 		offset -= BITMASK_SIZE;
2443 		fscanf(filep, "%lx%c", &map, &character);
2444 		for (shift = 0; shift < BITMASK_SIZE; shift++) {
2445 			if ((map >> shift) & 0x1) {
2446 				sib_core = get_core_id(shift + offset);
2447 				if (sib_core == thiscpu->physical_core_id)
2448 					CPU_SET_S(shift + offset, size,
2449 						thiscpu->put_ids);
2450 			}
2451 		}
2452 	} while (!strncmp(&character, ",", 1));
2453 	fclose(filep);
2454 
2455 	return CPU_COUNT_S(size, thiscpu->put_ids);
2456 }
2457 
2458 /*
2459  * run func(thread, core, package) in topology order
2460  * skip non-present cpus
2461  */
2462 
2463 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
2464 	struct pkg_data *, struct thread_data *, struct core_data *,
2465 	struct pkg_data *), struct thread_data *thread_base,
2466 	struct core_data *core_base, struct pkg_data *pkg_base,
2467 	struct thread_data *thread_base2, struct core_data *core_base2,
2468 	struct pkg_data *pkg_base2)
2469 {
2470 	int retval, pkg_no, core_no, thread_no;
2471 
2472 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2473 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
2474 			for (thread_no = 0; thread_no <
2475 				topo.num_threads_per_core; ++thread_no) {
2476 				struct thread_data *t, *t2;
2477 				struct core_data *c, *c2;
2478 				struct pkg_data *p, *p2;
2479 
2480 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
2481 
2482 				if (cpu_is_not_present(t->cpu_id))
2483 					continue;
2484 
2485 				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
2486 
2487 				c = GET_CORE(core_base, core_no, pkg_no);
2488 				c2 = GET_CORE(core_base2, core_no, pkg_no);
2489 
2490 				p = GET_PKG(pkg_base, pkg_no);
2491 				p2 = GET_PKG(pkg_base2, pkg_no);
2492 
2493 				retval = func(t, c, p, t2, c2, p2);
2494 				if (retval)
2495 					return retval;
2496 			}
2497 		}
2498 	}
2499 	return 0;
2500 }
2501 
2502 /*
2503  * run func(cpu) on every cpu in /proc/stat
2504  * return max_cpu number
2505  */
2506 int for_all_proc_cpus(int (func)(int))
2507 {
2508 	FILE *fp;
2509 	int cpu_num;
2510 	int retval;
2511 
2512 	fp = fopen_or_die(proc_stat, "r");
2513 
2514 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
2515 	if (retval != 0)
2516 		err(1, "%s: failed to parse format", proc_stat);
2517 
2518 	while (1) {
2519 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
2520 		if (retval != 1)
2521 			break;
2522 
2523 		retval = func(cpu_num);
2524 		if (retval) {
2525 			fclose(fp);
2526 			return(retval);
2527 		}
2528 	}
2529 	fclose(fp);
2530 	return 0;
2531 }
2532 
2533 void re_initialize(void)
2534 {
2535 	free_all_buffers();
2536 	setup_all_buffers();
2537 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
2538 }
2539 
2540 void set_max_cpu_num(void)
2541 {
2542 	FILE *filep;
2543 	unsigned long dummy;
2544 
2545 	topo.max_cpu_num = 0;
2546 	filep = fopen_or_die(
2547 			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
2548 			"r");
2549 	while (fscanf(filep, "%lx,", &dummy) == 1)
2550 		topo.max_cpu_num += BITMASK_SIZE;
2551 	fclose(filep);
2552 	topo.max_cpu_num--; /* 0 based */
2553 }
2554 
2555 /*
2556  * count_cpus()
2557  * remember the last one seen, it will be the max
2558  */
2559 int count_cpus(int cpu)
2560 {
2561 	topo.num_cpus++;
2562 	return 0;
2563 }
2564 int mark_cpu_present(int cpu)
2565 {
2566 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
2567 	return 0;
2568 }
2569 
2570 /*
2571  * snapshot_proc_interrupts()
2572  *
2573  * read and record summary of /proc/interrupts
2574  *
2575  * return 1 if config change requires a restart, else return 0
2576  */
2577 int snapshot_proc_interrupts(void)
2578 {
2579 	static FILE *fp;
2580 	int column, retval;
2581 
2582 	if (fp == NULL)
2583 		fp = fopen_or_die("/proc/interrupts", "r");
2584 	else
2585 		rewind(fp);
2586 
2587 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
2588 	for (column = 0; column < topo.num_cpus; ++column) {
2589 		int cpu_number;
2590 
2591 		retval = fscanf(fp, " CPU%d", &cpu_number);
2592 		if (retval != 1)
2593 			break;
2594 
2595 		if (cpu_number > topo.max_cpu_num) {
2596 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
2597 			return 1;
2598 		}
2599 
2600 		irq_column_2_cpu[column] = cpu_number;
2601 		irqs_per_cpu[cpu_number] = 0;
2602 	}
2603 
2604 	/* read /proc/interrupt count lines and sum up irqs per cpu */
2605 	while (1) {
2606 		int column;
2607 		char buf[64];
2608 
2609 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
2610 		if (retval != 1)
2611 			break;
2612 
2613 		/* read the count per cpu */
2614 		for (column = 0; column < topo.num_cpus; ++column) {
2615 
2616 			int cpu_number, irq_count;
2617 
2618 			retval = fscanf(fp, " %d", &irq_count);
2619 			if (retval != 1)
2620 				break;
2621 
2622 			cpu_number = irq_column_2_cpu[column];
2623 			irqs_per_cpu[cpu_number] += irq_count;
2624 
2625 		}
2626 
2627 		while (getc(fp) != '\n')
2628 			;	/* flush interrupt description */
2629 
2630 	}
2631 	return 0;
2632 }
2633 /*
2634  * snapshot_gfx_rc6_ms()
2635  *
2636  * record snapshot of
2637  * /sys/class/drm/card0/power/rc6_residency_ms
2638  *
2639  * return 1 if config change requires a restart, else return 0
2640  */
2641 int snapshot_gfx_rc6_ms(void)
2642 {
2643 	FILE *fp;
2644 	int retval;
2645 
2646 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
2647 
2648 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
2649 	if (retval != 1)
2650 		err(1, "GFX rc6");
2651 
2652 	fclose(fp);
2653 
2654 	return 0;
2655 }
2656 /*
2657  * snapshot_gfx_mhz()
2658  *
2659  * record snapshot of
2660  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
2661  *
2662  * return 1 if config change requires a restart, else return 0
2663  */
2664 int snapshot_gfx_mhz(void)
2665 {
2666 	static FILE *fp;
2667 	int retval;
2668 
2669 	if (fp == NULL)
2670 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
2671 	else {
2672 		rewind(fp);
2673 		fflush(fp);
2674 	}
2675 
2676 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
2677 	if (retval != 1)
2678 		err(1, "GFX MHz");
2679 
2680 	return 0;
2681 }
2682 
2683 /*
2684  * snapshot_cpu_lpi()
2685  *
2686  * record snapshot of
2687  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
2688  *
2689  * return 1 if config change requires a restart, else return 0
2690  */
2691 int snapshot_cpu_lpi_us(void)
2692 {
2693 	FILE *fp;
2694 	int retval;
2695 
2696 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
2697 
2698 	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
2699 	if (retval != 1)
2700 		err(1, "CPU LPI");
2701 
2702 	fclose(fp);
2703 
2704 	return 0;
2705 }
2706 /*
2707  * snapshot_sys_lpi()
2708  *
2709  * record snapshot of
2710  * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
2711  *
2712  * return 1 if config change requires a restart, else return 0
2713  */
2714 int snapshot_sys_lpi_us(void)
2715 {
2716 	FILE *fp;
2717 	int retval;
2718 
2719 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
2720 
2721 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
2722 	if (retval != 1)
2723 		err(1, "SYS LPI");
2724 
2725 	fclose(fp);
2726 
2727 	return 0;
2728 }
2729 /*
2730  * snapshot /proc and /sys files
2731  *
2732  * return 1 if configuration restart needed, else return 0
2733  */
2734 int snapshot_proc_sysfs_files(void)
2735 {
2736 	if (DO_BIC(BIC_IRQ))
2737 		if (snapshot_proc_interrupts())
2738 			return 1;
2739 
2740 	if (DO_BIC(BIC_GFX_rc6))
2741 		snapshot_gfx_rc6_ms();
2742 
2743 	if (DO_BIC(BIC_GFXMHz))
2744 		snapshot_gfx_mhz();
2745 
2746 	if (DO_BIC(BIC_CPU_LPI))
2747 		snapshot_cpu_lpi_us();
2748 
2749 	if (DO_BIC(BIC_SYS_LPI))
2750 		snapshot_sys_lpi_us();
2751 
2752 	return 0;
2753 }
2754 
2755 int exit_requested;
2756 
2757 static void signal_handler (int signal)
2758 {
2759 	switch (signal) {
2760 	case SIGINT:
2761 		exit_requested = 1;
2762 		if (debug)
2763 			fprintf(stderr, " SIGINT\n");
2764 		break;
2765 	case SIGUSR1:
2766 		if (debug > 1)
2767 			fprintf(stderr, "SIGUSR1\n");
2768 		break;
2769 	}
2770 	/* make sure this manually-invoked interval is at least 1ms long */
2771 	nanosleep(&one_msec, NULL);
2772 }
2773 
2774 void setup_signal_handler(void)
2775 {
2776 	struct sigaction sa;
2777 
2778 	memset(&sa, 0, sizeof(sa));
2779 
2780 	sa.sa_handler = &signal_handler;
2781 
2782 	if (sigaction(SIGINT, &sa, NULL) < 0)
2783 		err(1, "sigaction SIGINT");
2784 	if (sigaction(SIGUSR1, &sa, NULL) < 0)
2785 		err(1, "sigaction SIGUSR1");
2786 }
2787 
2788 void do_sleep(void)
2789 {
2790 	struct timeval select_timeout;
2791 	fd_set readfds;
2792 	int retval;
2793 
2794 	FD_ZERO(&readfds);
2795 	FD_SET(0, &readfds);
2796 
2797 	if (!isatty(fileno(stdin))) {
2798 		nanosleep(&interval_ts, NULL);
2799 		return;
2800 	}
2801 
2802 	select_timeout = interval_tv;
2803 	retval = select(1, &readfds, NULL, NULL, &select_timeout);
2804 
2805 	if (retval == 1) {
2806 		switch (getc(stdin)) {
2807 		case 'q':
2808 			exit_requested = 1;
2809 			break;
2810 		}
2811 		/* make sure this manually-invoked interval is at least 1ms long */
2812 		nanosleep(&one_msec, NULL);
2813 	}
2814 }
2815 
2816 void turbostat_loop()
2817 {
2818 	int retval;
2819 	int restarted = 0;
2820 	int done_iters = 0;
2821 
2822 	setup_signal_handler();
2823 
2824 restart:
2825 	restarted++;
2826 
2827 	snapshot_proc_sysfs_files();
2828 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2829 	if (retval < -1) {
2830 		exit(retval);
2831 	} else if (retval == -1) {
2832 		if (restarted > 1) {
2833 			exit(retval);
2834 		}
2835 		re_initialize();
2836 		goto restart;
2837 	}
2838 	restarted = 0;
2839 	done_iters = 0;
2840 	gettimeofday(&tv_even, (struct timezone *)NULL);
2841 
2842 	while (1) {
2843 		if (for_all_proc_cpus(cpu_is_not_present)) {
2844 			re_initialize();
2845 			goto restart;
2846 		}
2847 		do_sleep();
2848 		if (snapshot_proc_sysfs_files())
2849 			goto restart;
2850 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
2851 		if (retval < -1) {
2852 			exit(retval);
2853 		} else if (retval == -1) {
2854 			re_initialize();
2855 			goto restart;
2856 		}
2857 		gettimeofday(&tv_odd, (struct timezone *)NULL);
2858 		timersub(&tv_odd, &tv_even, &tv_delta);
2859 		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2860 			re_initialize();
2861 			goto restart;
2862 		}
2863 		compute_average(EVEN_COUNTERS);
2864 		format_all_counters(EVEN_COUNTERS);
2865 		flush_output_stdout();
2866 		if (exit_requested)
2867 			break;
2868 		if (num_iterations && ++done_iters >= num_iterations)
2869 			break;
2870 		do_sleep();
2871 		if (snapshot_proc_sysfs_files())
2872 			goto restart;
2873 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
2874 		if (retval < -1) {
2875 			exit(retval);
2876 		} else if (retval == -1) {
2877 			re_initialize();
2878 			goto restart;
2879 		}
2880 		gettimeofday(&tv_even, (struct timezone *)NULL);
2881 		timersub(&tv_even, &tv_odd, &tv_delta);
2882 		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2883 			re_initialize();
2884 			goto restart;
2885 		}
2886 		compute_average(ODD_COUNTERS);
2887 		format_all_counters(ODD_COUNTERS);
2888 		flush_output_stdout();
2889 		if (exit_requested)
2890 			break;
2891 		if (num_iterations && ++done_iters >= num_iterations)
2892 			break;
2893 	}
2894 }
2895 
2896 void check_dev_msr()
2897 {
2898 	struct stat sb;
2899 	char pathname[32];
2900 
2901 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2902 	if (stat(pathname, &sb))
2903  		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
2904 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
2905 }
2906 
2907 void check_permissions()
2908 {
2909 	struct __user_cap_header_struct cap_header_data;
2910 	cap_user_header_t cap_header = &cap_header_data;
2911 	struct __user_cap_data_struct cap_data_data;
2912 	cap_user_data_t cap_data = &cap_data_data;
2913 	extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
2914 	int do_exit = 0;
2915 	char pathname[32];
2916 
2917 	/* check for CAP_SYS_RAWIO */
2918 	cap_header->pid = getpid();
2919 	cap_header->version = _LINUX_CAPABILITY_VERSION;
2920 	if (capget(cap_header, cap_data) < 0)
2921 		err(-6, "capget(2) failed");
2922 
2923 	if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
2924 		do_exit++;
2925 		warnx("capget(CAP_SYS_RAWIO) failed,"
2926 			" try \"# setcap cap_sys_rawio=ep %s\"", progname);
2927 	}
2928 
2929 	/* test file permissions */
2930 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
2931 	if (euidaccess(pathname, R_OK)) {
2932 		do_exit++;
2933 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
2934 	}
2935 
2936 	/* if all else fails, thell them to be root */
2937 	if (do_exit)
2938 		if (getuid() != 0)
2939 			warnx("... or simply run as root");
2940 
2941 	if (do_exit)
2942 		exit(-6);
2943 }
2944 
2945 /*
2946  * NHM adds support for additional MSRs:
2947  *
2948  * MSR_SMI_COUNT                   0x00000034
2949  *
2950  * MSR_PLATFORM_INFO               0x000000ce
2951  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
2952  *
2953  * MSR_MISC_PWR_MGMT               0x000001aa
2954  *
2955  * MSR_PKG_C3_RESIDENCY            0x000003f8
2956  * MSR_PKG_C6_RESIDENCY            0x000003f9
2957  * MSR_CORE_C3_RESIDENCY           0x000003fc
2958  * MSR_CORE_C6_RESIDENCY           0x000003fd
2959  *
2960  * Side effect:
2961  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
2962  * sets has_misc_feature_control
2963  */
2964 int probe_nhm_msrs(unsigned int family, unsigned int model)
2965 {
2966 	unsigned long long msr;
2967 	unsigned int base_ratio;
2968 	int *pkg_cstate_limits;
2969 
2970 	if (!genuine_intel)
2971 		return 0;
2972 
2973 	if (family != 6)
2974 		return 0;
2975 
2976 	bclk = discover_bclk(family, model);
2977 
2978 	switch (model) {
2979 	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2980 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2981 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
2982 	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
2983 	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
2984 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
2985 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
2986 		pkg_cstate_limits = nhm_pkg_cstate_limits;
2987 		break;
2988 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
2989 	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
2990 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
2991 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
2992 		pkg_cstate_limits = snb_pkg_cstate_limits;
2993 		has_misc_feature_control = 1;
2994 		break;
2995 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
2996 	case INTEL_FAM6_HASWELL_X:	/* HSX */
2997 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
2998 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
2999 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3000 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3001 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3002 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3003 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3004 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3005 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3006 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3007 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3008 		pkg_cstate_limits = hsw_pkg_cstate_limits;
3009 		has_misc_feature_control = 1;
3010 		break;
3011 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3012 		pkg_cstate_limits = skx_pkg_cstate_limits;
3013 		has_misc_feature_control = 1;
3014 		break;
3015 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
3016 		no_MSR_MISC_PWR_MGMT = 1;
3017 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3018 		pkg_cstate_limits = slv_pkg_cstate_limits;
3019 		break;
3020 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
3021 		pkg_cstate_limits = amt_pkg_cstate_limits;
3022 		no_MSR_MISC_PWR_MGMT = 1;
3023 		break;
3024 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
3025 	case INTEL_FAM6_XEON_PHI_KNM:
3026 		pkg_cstate_limits = phi_pkg_cstate_limits;
3027 		break;
3028 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3029 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3030 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3031 		pkg_cstate_limits = bxt_pkg_cstate_limits;
3032 		break;
3033 	default:
3034 		return 0;
3035 	}
3036 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3037 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3038 
3039 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3040 	base_ratio = (msr >> 8) & 0xFF;
3041 
3042 	base_hz = base_ratio * bclk * 1000000;
3043 	has_base_hz = 1;
3044 	return 1;
3045 }
3046 /*
3047  * SLV client has support for unique MSRs:
3048  *
3049  * MSR_CC6_DEMOTION_POLICY_CONFIG
3050  * MSR_MC6_DEMOTION_POLICY_CONFIG
3051  */
3052 
3053 int has_slv_msrs(unsigned int family, unsigned int model)
3054 {
3055 	if (!genuine_intel)
3056 		return 0;
3057 
3058 	switch (model) {
3059 	case INTEL_FAM6_ATOM_SILVERMONT1:
3060 	case INTEL_FAM6_ATOM_MERRIFIELD:
3061 	case INTEL_FAM6_ATOM_MOOREFIELD:
3062 		return 1;
3063 	}
3064 	return 0;
3065 }
3066 int is_dnv(unsigned int family, unsigned int model)
3067 {
3068 
3069 	if (!genuine_intel)
3070 		return 0;
3071 
3072 	switch (model) {
3073 	case INTEL_FAM6_ATOM_DENVERTON:
3074 		return 1;
3075 	}
3076 	return 0;
3077 }
3078 int is_bdx(unsigned int family, unsigned int model)
3079 {
3080 
3081 	if (!genuine_intel)
3082 		return 0;
3083 
3084 	switch (model) {
3085 	case INTEL_FAM6_BROADWELL_X:
3086 	case INTEL_FAM6_BROADWELL_XEON_D:
3087 		return 1;
3088 	}
3089 	return 0;
3090 }
3091 int is_skx(unsigned int family, unsigned int model)
3092 {
3093 
3094 	if (!genuine_intel)
3095 		return 0;
3096 
3097 	switch (model) {
3098 	case INTEL_FAM6_SKYLAKE_X:
3099 		return 1;
3100 	}
3101 	return 0;
3102 }
3103 
3104 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3105 {
3106 	if (has_slv_msrs(family, model))
3107 		return 0;
3108 
3109 	switch (model) {
3110 	/* Nehalem compatible, but do not include turbo-ratio limit support */
3111 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3112 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
3113 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3114 	case INTEL_FAM6_XEON_PHI_KNM:
3115 		return 0;
3116 	default:
3117 		return 1;
3118 	}
3119 }
3120 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3121 {
3122 	if (has_slv_msrs(family, model))
3123 		return 1;
3124 
3125 	return 0;
3126 }
3127 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3128 {
3129 	if (!genuine_intel)
3130 		return 0;
3131 
3132 	if (family != 6)
3133 		return 0;
3134 
3135 	switch (model) {
3136 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3137 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3138 		return 1;
3139 	default:
3140 		return 0;
3141 	}
3142 }
3143 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3144 {
3145 	if (!genuine_intel)
3146 		return 0;
3147 
3148 	if (family != 6)
3149 		return 0;
3150 
3151 	switch (model) {
3152 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3153 		return 1;
3154 	default:
3155 		return 0;
3156 	}
3157 }
3158 
3159 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3160 {
3161 	if (!genuine_intel)
3162 		return 0;
3163 
3164 	if (family != 6)
3165 		return 0;
3166 
3167 	switch (model) {
3168 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3169 	case INTEL_FAM6_XEON_PHI_KNM:
3170 		return 1;
3171 	default:
3172 		return 0;
3173 	}
3174 }
3175 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3176 {
3177 	if (!genuine_intel)
3178 		return 0;
3179 
3180 	if (family != 6)
3181 		return 0;
3182 
3183 	switch (model) {
3184 	case INTEL_FAM6_ATOM_GOLDMONT:
3185 	case INTEL_FAM6_SKYLAKE_X:
3186 		return 1;
3187 	default:
3188 		return 0;
3189 	}
3190 }
3191 int has_config_tdp(unsigned int family, unsigned int model)
3192 {
3193 	if (!genuine_intel)
3194 		return 0;
3195 
3196 	if (family != 6)
3197 		return 0;
3198 
3199 	switch (model) {
3200 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3201 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3202 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3203 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3204 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3205 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3206 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3207 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3208 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3209 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3210 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3211 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3212 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3213 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3214 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3215 
3216 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3217 	case INTEL_FAM6_XEON_PHI_KNM:
3218 		return 1;
3219 	default:
3220 		return 0;
3221 	}
3222 }
3223 
3224 static void
3225 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
3226 {
3227 	if (!do_nhm_platform_info)
3228 		return;
3229 
3230 	dump_nhm_platform_info();
3231 
3232 	if (has_hsw_turbo_ratio_limit(family, model))
3233 		dump_hsw_turbo_ratio_limits();
3234 
3235 	if (has_ivt_turbo_ratio_limit(family, model))
3236 		dump_ivt_turbo_ratio_limits();
3237 
3238 	if (has_turbo_ratio_limit(family, model))
3239 		dump_turbo_ratio_limits(family, model);
3240 
3241 	if (has_atom_turbo_ratio_limit(family, model))
3242 		dump_atom_turbo_ratio_limits();
3243 
3244 	if (has_knl_turbo_ratio_limit(family, model))
3245 		dump_knl_turbo_ratio_limits();
3246 
3247 	if (has_config_tdp(family, model))
3248 		dump_config_tdp();
3249 
3250 	dump_nhm_cst_cfg();
3251 }
3252 
3253 static void
3254 dump_sysfs_cstate_config(void)
3255 {
3256 	char path[64];
3257 	char name_buf[16];
3258 	char desc[64];
3259 	FILE *input;
3260 	int state;
3261 	char *sp;
3262 
3263 	if (!DO_BIC(BIC_sysfs))
3264 		return;
3265 
3266 	for (state = 0; state < 10; ++state) {
3267 
3268 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
3269 			base_cpu, state);
3270 		input = fopen(path, "r");
3271 		if (input == NULL)
3272 			continue;
3273 		fgets(name_buf, sizeof(name_buf), input);
3274 
3275 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
3276 		sp = strchr(name_buf, '-');
3277 		if (!sp)
3278 			sp = strchrnul(name_buf, '\n');
3279 		*sp = '\0';
3280 
3281 		fclose(input);
3282 
3283 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
3284 			base_cpu, state);
3285 		input = fopen(path, "r");
3286 		if (input == NULL)
3287 			continue;
3288 		fgets(desc, sizeof(desc), input);
3289 
3290 		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
3291 		fclose(input);
3292 	}
3293 }
3294 static void
3295 dump_sysfs_pstate_config(void)
3296 {
3297 	char path[64];
3298 	char driver_buf[64];
3299 	char governor_buf[64];
3300 	FILE *input;
3301 	int turbo;
3302 
3303 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
3304 			base_cpu);
3305 	input = fopen(path, "r");
3306 	if (input == NULL) {
3307 		fprintf(stderr, "NSFOD %s\n", path);
3308 		return;
3309 	}
3310 	fgets(driver_buf, sizeof(driver_buf), input);
3311 	fclose(input);
3312 
3313 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
3314 			base_cpu);
3315 	input = fopen(path, "r");
3316 	if (input == NULL) {
3317 		fprintf(stderr, "NSFOD %s\n", path);
3318 		return;
3319 	}
3320 	fgets(governor_buf, sizeof(governor_buf), input);
3321 	fclose(input);
3322 
3323 	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
3324 	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
3325 
3326 	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
3327 	input = fopen(path, "r");
3328 	if (input != NULL) {
3329 		fscanf(input, "%d", &turbo);
3330 		fprintf(outf, "cpufreq boost: %d\n", turbo);
3331 		fclose(input);
3332 	}
3333 
3334 	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
3335 	input = fopen(path, "r");
3336 	if (input != NULL) {
3337 		fscanf(input, "%d", &turbo);
3338 		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
3339 		fclose(input);
3340 	}
3341 }
3342 
3343 
3344 /*
3345  * print_epb()
3346  * Decode the ENERGY_PERF_BIAS MSR
3347  */
3348 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3349 {
3350 	unsigned long long msr;
3351 	char *epb_string;
3352 	int cpu;
3353 
3354 	if (!has_epb)
3355 		return 0;
3356 
3357 	cpu = t->cpu_id;
3358 
3359 	/* EPB is per-package */
3360 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3361 		return 0;
3362 
3363 	if (cpu_migrate(cpu)) {
3364 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3365 		return -1;
3366 	}
3367 
3368 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
3369 		return 0;
3370 
3371 	switch (msr & 0xF) {
3372 	case ENERGY_PERF_BIAS_PERFORMANCE:
3373 		epb_string = "performance";
3374 		break;
3375 	case ENERGY_PERF_BIAS_NORMAL:
3376 		epb_string = "balanced";
3377 		break;
3378 	case ENERGY_PERF_BIAS_POWERSAVE:
3379 		epb_string = "powersave";
3380 		break;
3381 	default:
3382 		epb_string = "custom";
3383 		break;
3384 	}
3385 	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
3386 
3387 	return 0;
3388 }
3389 /*
3390  * print_hwp()
3391  * Decode the MSR_HWP_CAPABILITIES
3392  */
3393 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3394 {
3395 	unsigned long long msr;
3396 	int cpu;
3397 
3398 	if (!has_hwp)
3399 		return 0;
3400 
3401 	cpu = t->cpu_id;
3402 
3403 	/* MSR_HWP_CAPABILITIES is per-package */
3404 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3405 		return 0;
3406 
3407 	if (cpu_migrate(cpu)) {
3408 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3409 		return -1;
3410 	}
3411 
3412 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
3413 		return 0;
3414 
3415 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
3416 		cpu, msr, (msr & (1 << 0)) ? "" : "No-");
3417 
3418 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
3419 	if ((msr & (1 << 0)) == 0)
3420 		return 0;
3421 
3422 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
3423 		return 0;
3424 
3425 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
3426 			"(high %d guar %d eff %d low %d)\n",
3427 			cpu, msr,
3428 			(unsigned int)HWP_HIGHEST_PERF(msr),
3429 			(unsigned int)HWP_GUARANTEED_PERF(msr),
3430 			(unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
3431 			(unsigned int)HWP_LOWEST_PERF(msr));
3432 
3433 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
3434 		return 0;
3435 
3436 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
3437 			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
3438 			cpu, msr,
3439 			(unsigned int)(((msr) >> 0) & 0xff),
3440 			(unsigned int)(((msr) >> 8) & 0xff),
3441 			(unsigned int)(((msr) >> 16) & 0xff),
3442 			(unsigned int)(((msr) >> 24) & 0xff),
3443 			(unsigned int)(((msr) >> 32) & 0xff3),
3444 			(unsigned int)(((msr) >> 42) & 0x1));
3445 
3446 	if (has_hwp_pkg) {
3447 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
3448 			return 0;
3449 
3450 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
3451 			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
3452 			cpu, msr,
3453 			(unsigned int)(((msr) >> 0) & 0xff),
3454 			(unsigned int)(((msr) >> 8) & 0xff),
3455 			(unsigned int)(((msr) >> 16) & 0xff),
3456 			(unsigned int)(((msr) >> 24) & 0xff),
3457 			(unsigned int)(((msr) >> 32) & 0xff3));
3458 	}
3459 	if (has_hwp_notify) {
3460 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
3461 			return 0;
3462 
3463 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
3464 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
3465 			cpu, msr,
3466 			((msr) & 0x1) ? "EN" : "Dis",
3467 			((msr) & 0x2) ? "EN" : "Dis");
3468 	}
3469 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
3470 		return 0;
3471 
3472 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
3473 			"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
3474 			cpu, msr,
3475 			((msr) & 0x1) ? "" : "No-",
3476 			((msr) & 0x2) ? "" : "No-");
3477 
3478 	return 0;
3479 }
3480 
3481 /*
3482  * print_perf_limit()
3483  */
3484 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3485 {
3486 	unsigned long long msr;
3487 	int cpu;
3488 
3489 	cpu = t->cpu_id;
3490 
3491 	/* per-package */
3492 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3493 		return 0;
3494 
3495 	if (cpu_migrate(cpu)) {
3496 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3497 		return -1;
3498 	}
3499 
3500 	if (do_core_perf_limit_reasons) {
3501 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
3502 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3503 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
3504 			(msr & 1 << 15) ? "bit15, " : "",
3505 			(msr & 1 << 14) ? "bit14, " : "",
3506 			(msr & 1 << 13) ? "Transitions, " : "",
3507 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
3508 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
3509 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3510 			(msr & 1 << 9) ? "CorePwr, " : "",
3511 			(msr & 1 << 8) ? "Amps, " : "",
3512 			(msr & 1 << 6) ? "VR-Therm, " : "",
3513 			(msr & 1 << 5) ? "Auto-HWP, " : "",
3514 			(msr & 1 << 4) ? "Graphics, " : "",
3515 			(msr & 1 << 2) ? "bit2, " : "",
3516 			(msr & 1 << 1) ? "ThermStatus, " : "",
3517 			(msr & 1 << 0) ? "PROCHOT, " : "");
3518 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
3519 			(msr & 1 << 31) ? "bit31, " : "",
3520 			(msr & 1 << 30) ? "bit30, " : "",
3521 			(msr & 1 << 29) ? "Transitions, " : "",
3522 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
3523 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
3524 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3525 			(msr & 1 << 25) ? "CorePwr, " : "",
3526 			(msr & 1 << 24) ? "Amps, " : "",
3527 			(msr & 1 << 22) ? "VR-Therm, " : "",
3528 			(msr & 1 << 21) ? "Auto-HWP, " : "",
3529 			(msr & 1 << 20) ? "Graphics, " : "",
3530 			(msr & 1 << 18) ? "bit18, " : "",
3531 			(msr & 1 << 17) ? "ThermStatus, " : "",
3532 			(msr & 1 << 16) ? "PROCHOT, " : "");
3533 
3534 	}
3535 	if (do_gfx_perf_limit_reasons) {
3536 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
3537 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3538 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
3539 			(msr & 1 << 0) ? "PROCHOT, " : "",
3540 			(msr & 1 << 1) ? "ThermStatus, " : "",
3541 			(msr & 1 << 4) ? "Graphics, " : "",
3542 			(msr & 1 << 6) ? "VR-Therm, " : "",
3543 			(msr & 1 << 8) ? "Amps, " : "",
3544 			(msr & 1 << 9) ? "GFXPwr, " : "",
3545 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3546 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3547 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
3548 			(msr & 1 << 16) ? "PROCHOT, " : "",
3549 			(msr & 1 << 17) ? "ThermStatus, " : "",
3550 			(msr & 1 << 20) ? "Graphics, " : "",
3551 			(msr & 1 << 22) ? "VR-Therm, " : "",
3552 			(msr & 1 << 24) ? "Amps, " : "",
3553 			(msr & 1 << 25) ? "GFXPwr, " : "",
3554 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3555 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3556 	}
3557 	if (do_ring_perf_limit_reasons) {
3558 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
3559 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
3560 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
3561 			(msr & 1 << 0) ? "PROCHOT, " : "",
3562 			(msr & 1 << 1) ? "ThermStatus, " : "",
3563 			(msr & 1 << 6) ? "VR-Therm, " : "",
3564 			(msr & 1 << 8) ? "Amps, " : "",
3565 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
3566 			(msr & 1 << 11) ? "PkgPwrL2, " : "");
3567 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
3568 			(msr & 1 << 16) ? "PROCHOT, " : "",
3569 			(msr & 1 << 17) ? "ThermStatus, " : "",
3570 			(msr & 1 << 22) ? "VR-Therm, " : "",
3571 			(msr & 1 << 24) ? "Amps, " : "",
3572 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
3573 			(msr & 1 << 27) ? "PkgPwrL2, " : "");
3574 	}
3575 	return 0;
3576 }
3577 
3578 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
3579 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
3580 
3581 double get_tdp(unsigned int model)
3582 {
3583 	unsigned long long msr;
3584 
3585 	if (do_rapl & RAPL_PKG_POWER_INFO)
3586 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
3587 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3588 
3589 	switch (model) {
3590 	case INTEL_FAM6_ATOM_SILVERMONT1:
3591 	case INTEL_FAM6_ATOM_SILVERMONT2:
3592 		return 30.0;
3593 	default:
3594 		return 135.0;
3595 	}
3596 }
3597 
3598 /*
3599  * rapl_dram_energy_units_probe()
3600  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
3601  */
3602 static double
3603 rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
3604 {
3605 	/* only called for genuine_intel, family 6 */
3606 
3607 	switch (model) {
3608 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3609 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3610 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3611 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3612 	case INTEL_FAM6_XEON_PHI_KNM:
3613 		return (rapl_dram_energy_units = 15.3 / 1000000);
3614 	default:
3615 		return (rapl_energy_units);
3616 	}
3617 }
3618 
3619 
3620 /*
3621  * rapl_probe()
3622  *
3623  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
3624  */
3625 void rapl_probe(unsigned int family, unsigned int model)
3626 {
3627 	unsigned long long msr;
3628 	unsigned int time_unit;
3629 	double tdp;
3630 
3631 	if (!genuine_intel)
3632 		return;
3633 
3634 	if (family != 6)
3635 		return;
3636 
3637 	switch (model) {
3638 	case INTEL_FAM6_SANDYBRIDGE:
3639 	case INTEL_FAM6_IVYBRIDGE:
3640 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3641 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3642 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3643 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3644 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3645 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
3646 		if (rapl_joules) {
3647 			BIC_PRESENT(BIC_Pkg_J);
3648 			BIC_PRESENT(BIC_Cor_J);
3649 			BIC_PRESENT(BIC_GFX_J);
3650 		} else {
3651 			BIC_PRESENT(BIC_PkgWatt);
3652 			BIC_PRESENT(BIC_CorWatt);
3653 			BIC_PRESENT(BIC_GFXWatt);
3654 		}
3655 		break;
3656 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3657 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
3658 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3659 		if (rapl_joules)
3660 			BIC_PRESENT(BIC_Pkg_J);
3661 		else
3662 			BIC_PRESENT(BIC_PkgWatt);
3663 		break;
3664 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
3665 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
3666 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
3667 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
3668 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
3669 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
3670 		BIC_PRESENT(BIC_PKG__);
3671 		BIC_PRESENT(BIC_RAM__);
3672 		if (rapl_joules) {
3673 			BIC_PRESENT(BIC_Pkg_J);
3674 			BIC_PRESENT(BIC_Cor_J);
3675 			BIC_PRESENT(BIC_RAM_J);
3676 			BIC_PRESENT(BIC_GFX_J);
3677 		} else {
3678 			BIC_PRESENT(BIC_PkgWatt);
3679 			BIC_PRESENT(BIC_CorWatt);
3680 			BIC_PRESENT(BIC_RAMWatt);
3681 			BIC_PRESENT(BIC_GFXWatt);
3682 		}
3683 		break;
3684 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3685 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3686 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
3687 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3688 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
3689 	case INTEL_FAM6_XEON_PHI_KNM:
3690 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
3691 		BIC_PRESENT(BIC_PKG__);
3692 		BIC_PRESENT(BIC_RAM__);
3693 		if (rapl_joules) {
3694 			BIC_PRESENT(BIC_Pkg_J);
3695 			BIC_PRESENT(BIC_RAM_J);
3696 		} else {
3697 			BIC_PRESENT(BIC_PkgWatt);
3698 			BIC_PRESENT(BIC_RAMWatt);
3699 		}
3700 		break;
3701 	case INTEL_FAM6_SANDYBRIDGE_X:
3702 	case INTEL_FAM6_IVYBRIDGE_X:
3703 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
3704 		BIC_PRESENT(BIC_PKG__);
3705 		BIC_PRESENT(BIC_RAM__);
3706 		if (rapl_joules) {
3707 			BIC_PRESENT(BIC_Pkg_J);
3708 			BIC_PRESENT(BIC_Cor_J);
3709 			BIC_PRESENT(BIC_RAM_J);
3710 		} else {
3711 			BIC_PRESENT(BIC_PkgWatt);
3712 			BIC_PRESENT(BIC_CorWatt);
3713 			BIC_PRESENT(BIC_RAMWatt);
3714 		}
3715 		break;
3716 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
3717 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
3718 		do_rapl = RAPL_PKG | RAPL_CORES;
3719 		if (rapl_joules) {
3720 			BIC_PRESENT(BIC_Pkg_J);
3721 			BIC_PRESENT(BIC_Cor_J);
3722 		} else {
3723 			BIC_PRESENT(BIC_PkgWatt);
3724 			BIC_PRESENT(BIC_CorWatt);
3725 		}
3726 		break;
3727 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
3728 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3729 		BIC_PRESENT(BIC_PKG__);
3730 		BIC_PRESENT(BIC_RAM__);
3731 		if (rapl_joules) {
3732 			BIC_PRESENT(BIC_Pkg_J);
3733 			BIC_PRESENT(BIC_Cor_J);
3734 			BIC_PRESENT(BIC_RAM_J);
3735 		} else {
3736 			BIC_PRESENT(BIC_PkgWatt);
3737 			BIC_PRESENT(BIC_CorWatt);
3738 			BIC_PRESENT(BIC_RAMWatt);
3739 		}
3740 		break;
3741 	default:
3742 		return;
3743 	}
3744 
3745 	/* units on package 0, verify later other packages match */
3746 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
3747 		return;
3748 
3749 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
3750 	if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3751 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3752 	else
3753 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3754 
3755 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
3756 
3757 	time_unit = msr >> 16 & 0xF;
3758 	if (time_unit == 0)
3759 		time_unit = 0xA;
3760 
3761 	rapl_time_units = 1.0 / (1 << (time_unit));
3762 
3763 	tdp = get_tdp(model);
3764 
3765 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
3766 	if (!quiet)
3767 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
3768 
3769 	return;
3770 }
3771 
3772 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
3773 {
3774 	if (!genuine_intel)
3775 		return;
3776 
3777 	if (family != 6)
3778 		return;
3779 
3780 	switch (model) {
3781 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3782 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3783 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3784 		do_gfx_perf_limit_reasons = 1;
3785 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3786 		do_core_perf_limit_reasons = 1;
3787 		do_ring_perf_limit_reasons = 1;
3788 	default:
3789 		return;
3790 	}
3791 }
3792 
3793 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
3794 {
3795 	if (is_skx(family, model) || is_bdx(family, model))
3796 		has_automatic_cstate_conversion = 1;
3797 }
3798 
3799 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3800 {
3801 	unsigned long long msr;
3802 	unsigned int dts, dts2;
3803 	int cpu;
3804 
3805 	if (!(do_dts || do_ptm))
3806 		return 0;
3807 
3808 	cpu = t->cpu_id;
3809 
3810 	/* DTS is per-core, no need to print for each thread */
3811 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
3812 		return 0;
3813 
3814 	if (cpu_migrate(cpu)) {
3815 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3816 		return -1;
3817 	}
3818 
3819 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
3820 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
3821 			return 0;
3822 
3823 		dts = (msr >> 16) & 0x7F;
3824 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
3825 			cpu, msr, tcc_activation_temp - dts);
3826 
3827 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
3828 			return 0;
3829 
3830 		dts = (msr >> 16) & 0x7F;
3831 		dts2 = (msr >> 8) & 0x7F;
3832 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3833 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3834 	}
3835 
3836 
3837 	if (do_dts && debug) {
3838 		unsigned int resolution;
3839 
3840 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
3841 			return 0;
3842 
3843 		dts = (msr >> 16) & 0x7F;
3844 		resolution = (msr >> 27) & 0xF;
3845 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
3846 			cpu, msr, tcc_activation_temp - dts, resolution);
3847 
3848 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
3849 			return 0;
3850 
3851 		dts = (msr >> 16) & 0x7F;
3852 		dts2 = (msr >> 8) & 0x7F;
3853 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
3854 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
3855 	}
3856 
3857 	return 0;
3858 }
3859 
3860 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
3861 {
3862 	fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
3863 		cpu, label,
3864 		((msr >> 15) & 1) ? "EN" : "DIS",
3865 		((msr >> 0) & 0x7FFF) * rapl_power_units,
3866 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
3867 		(((msr >> 16) & 1) ? "EN" : "DIS"));
3868 
3869 	return;
3870 }
3871 
3872 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3873 {
3874 	unsigned long long msr;
3875 	int cpu;
3876 
3877 	if (!do_rapl)
3878 		return 0;
3879 
3880 	/* RAPL counters are per package, so print only for 1st thread/package */
3881 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
3882 		return 0;
3883 
3884 	cpu = t->cpu_id;
3885 	if (cpu_migrate(cpu)) {
3886 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
3887 		return -1;
3888 	}
3889 
3890 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
3891 		return -1;
3892 
3893 	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
3894 		rapl_power_units, rapl_energy_units, rapl_time_units);
3895 
3896 	if (do_rapl & RAPL_PKG_POWER_INFO) {
3897 
3898 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
3899                 	return -5;
3900 
3901 
3902 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3903 			cpu, msr,
3904 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3905 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3906 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3907 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3908 
3909 	}
3910 	if (do_rapl & RAPL_PKG) {
3911 
3912 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
3913 			return -9;
3914 
3915 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
3916 			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
3917 
3918 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
3919 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
3920 			cpu,
3921 			((msr >> 47) & 1) ? "EN" : "DIS",
3922 			((msr >> 32) & 0x7FFF) * rapl_power_units,
3923 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
3924 			((msr >> 48) & 1) ? "EN" : "DIS");
3925 	}
3926 
3927 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
3928 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
3929                 	return -6;
3930 
3931 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
3932 			cpu, msr,
3933 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3934 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3935 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
3936 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
3937 	}
3938 	if (do_rapl & RAPL_DRAM) {
3939 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
3940 			return -9;
3941 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
3942 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3943 
3944 		print_power_limit_msr(cpu, msr, "DRAM Limit");
3945 	}
3946 	if (do_rapl & RAPL_CORE_POLICY) {
3947 		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
3948 			return -7;
3949 
3950 		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
3951 	}
3952 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
3953 		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
3954 			return -9;
3955 		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
3956 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3957 		print_power_limit_msr(cpu, msr, "Cores Limit");
3958 	}
3959 	if (do_rapl & RAPL_GFX) {
3960 		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
3961 			return -8;
3962 
3963 		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
3964 
3965 		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
3966 			return -9;
3967 		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
3968 				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
3969 		print_power_limit_msr(cpu, msr, "GFX Limit");
3970 	}
3971 	return 0;
3972 }
3973 
3974 /*
3975  * SNB adds support for additional MSRs:
3976  *
3977  * MSR_PKG_C7_RESIDENCY            0x000003fa
3978  * MSR_CORE_C7_RESIDENCY           0x000003fe
3979  * MSR_PKG_C2_RESIDENCY            0x0000060d
3980  */
3981 
3982 int has_snb_msrs(unsigned int family, unsigned int model)
3983 {
3984 	if (!genuine_intel)
3985 		return 0;
3986 
3987 	switch (model) {
3988 	case INTEL_FAM6_SANDYBRIDGE:
3989 	case INTEL_FAM6_SANDYBRIDGE_X:
3990 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3991 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3992 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
3993 	case INTEL_FAM6_HASWELL_X:	/* HSW */
3994 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
3995 	case INTEL_FAM6_HASWELL_GT3E:	/* HSW */
3996 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
3997 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
3998 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3999 	case INTEL_FAM6_BROADWELL_XEON_D:	/* BDX-DE */
4000 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4001 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4002 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4003 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4004 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4005 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
4006 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4007 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
4008 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
4009 		return 1;
4010 	}
4011 	return 0;
4012 }
4013 
4014 /*
4015  * HSW adds support for additional MSRs:
4016  *
4017  * MSR_PKG_C8_RESIDENCY		0x00000630
4018  * MSR_PKG_C9_RESIDENCY		0x00000631
4019  * MSR_PKG_C10_RESIDENCY	0x00000632
4020  *
4021  * MSR_PKGC8_IRTL		0x00000633
4022  * MSR_PKGC9_IRTL		0x00000634
4023  * MSR_PKGC10_IRTL		0x00000635
4024  *
4025  */
4026 int has_hsw_msrs(unsigned int family, unsigned int model)
4027 {
4028 	if (!genuine_intel)
4029 		return 0;
4030 
4031 	switch (model) {
4032 	case INTEL_FAM6_HASWELL_ULT:	/* HSW */
4033 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
4034 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4035 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4036 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4037 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4038 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4039 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4040 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
4041 		return 1;
4042 	}
4043 	return 0;
4044 }
4045 
4046 /*
4047  * SKL adds support for additional MSRS:
4048  *
4049  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4050  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4051  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4052  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4053  */
4054 int has_skl_msrs(unsigned int family, unsigned int model)
4055 {
4056 	if (!genuine_intel)
4057 		return 0;
4058 
4059 	switch (model) {
4060 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4061 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4062 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4063 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4064 	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
4065 		return 1;
4066 	}
4067 	return 0;
4068 }
4069 
4070 int is_slm(unsigned int family, unsigned int model)
4071 {
4072 	if (!genuine_intel)
4073 		return 0;
4074 	switch (model) {
4075 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
4076 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
4077 		return 1;
4078 	}
4079 	return 0;
4080 }
4081 
4082 int is_knl(unsigned int family, unsigned int model)
4083 {
4084 	if (!genuine_intel)
4085 		return 0;
4086 	switch (model) {
4087 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4088 	case INTEL_FAM6_XEON_PHI_KNM:
4089 		return 1;
4090 	}
4091 	return 0;
4092 }
4093 
4094 int is_cnl(unsigned int family, unsigned int model)
4095 {
4096 	if (!genuine_intel)
4097 		return 0;
4098 
4099 	switch (model) {
4100 	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
4101 		return 1;
4102 	}
4103 
4104 	return 0;
4105 }
4106 
4107 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
4108 {
4109 	if (is_knl(family, model))
4110 		return 1024;
4111 	return 1;
4112 }
4113 
4114 #define SLM_BCLK_FREQS 5
4115 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
4116 
4117 double slm_bclk(void)
4118 {
4119 	unsigned long long msr = 3;
4120 	unsigned int i;
4121 	double freq;
4122 
4123 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
4124 		fprintf(outf, "SLM BCLK: unknown\n");
4125 
4126 	i = msr & 0xf;
4127 	if (i >= SLM_BCLK_FREQS) {
4128 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
4129 		i = 3;
4130 	}
4131 	freq = slm_freq_table[i];
4132 
4133 	if (!quiet)
4134 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
4135 
4136 	return freq;
4137 }
4138 
4139 double discover_bclk(unsigned int family, unsigned int model)
4140 {
4141 	if (has_snb_msrs(family, model) || is_knl(family, model))
4142 		return 100.00;
4143 	else if (is_slm(family, model))
4144 		return slm_bclk();
4145 	else
4146 		return 133.33;
4147 }
4148 
4149 /*
4150  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
4151  * the Thermal Control Circuit (TCC) activates.
4152  * This is usually equal to tjMax.
4153  *
4154  * Older processors do not have this MSR, so there we guess,
4155  * but also allow cmdline over-ride with -T.
4156  *
4157  * Several MSR temperature values are in units of degrees-C
4158  * below this value, including the Digital Thermal Sensor (DTS),
4159  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
4160  */
4161 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4162 {
4163 	unsigned long long msr;
4164 	unsigned int target_c_local;
4165 	int cpu;
4166 
4167 	/* tcc_activation_temp is used only for dts or ptm */
4168 	if (!(do_dts || do_ptm))
4169 		return 0;
4170 
4171 	/* this is a per-package concept */
4172 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4173 		return 0;
4174 
4175 	cpu = t->cpu_id;
4176 	if (cpu_migrate(cpu)) {
4177 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
4178 		return -1;
4179 	}
4180 
4181 	if (tcc_activation_temp_override != 0) {
4182 		tcc_activation_temp = tcc_activation_temp_override;
4183 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
4184 			cpu, tcc_activation_temp);
4185 		return 0;
4186 	}
4187 
4188 	/* Temperature Target MSR is Nehalem and newer only */
4189 	if (!do_nhm_platform_info)
4190 		goto guess;
4191 
4192 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
4193 		goto guess;
4194 
4195 	target_c_local = (msr >> 16) & 0xFF;
4196 
4197 	if (!quiet)
4198 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
4199 			cpu, msr, target_c_local);
4200 
4201 	if (!target_c_local)
4202 		goto guess;
4203 
4204 	tcc_activation_temp = target_c_local;
4205 
4206 	return 0;
4207 
4208 guess:
4209 	tcc_activation_temp = TJMAX_DEFAULT;
4210 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
4211 		cpu, tcc_activation_temp);
4212 
4213 	return 0;
4214 }
4215 
4216 void decode_feature_control_msr(void)
4217 {
4218 	unsigned long long msr;
4219 
4220 	if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
4221 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
4222 			base_cpu, msr,
4223 			msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
4224 			msr & (1 << 18) ? "SGX" : "");
4225 }
4226 
4227 void decode_misc_enable_msr(void)
4228 {
4229 	unsigned long long msr;
4230 
4231 	if (!genuine_intel)
4232 		return;
4233 
4234 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
4235 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
4236 			base_cpu, msr,
4237 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
4238 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
4239 			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
4240 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
4241 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
4242 }
4243 
4244 void decode_misc_feature_control(void)
4245 {
4246 	unsigned long long msr;
4247 
4248 	if (!has_misc_feature_control)
4249 		return;
4250 
4251 	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
4252 		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
4253 			base_cpu, msr,
4254 			msr & (0 << 0) ? "No-" : "",
4255 			msr & (1 << 0) ? "No-" : "",
4256 			msr & (2 << 0) ? "No-" : "",
4257 			msr & (3 << 0) ? "No-" : "");
4258 }
4259 /*
4260  * Decode MSR_MISC_PWR_MGMT
4261  *
4262  * Decode the bits according to the Nehalem documentation
4263  * bit[0] seems to continue to have same meaning going forward
4264  * bit[1] less so...
4265  */
4266 void decode_misc_pwr_mgmt_msr(void)
4267 {
4268 	unsigned long long msr;
4269 
4270 	if (!do_nhm_platform_info)
4271 		return;
4272 
4273 	if (no_MSR_MISC_PWR_MGMT)
4274 		return;
4275 
4276 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
4277 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
4278 			base_cpu, msr,
4279 			msr & (1 << 0) ? "DIS" : "EN",
4280 			msr & (1 << 1) ? "EN" : "DIS",
4281 			msr & (1 << 8) ? "EN" : "DIS");
4282 }
4283 /*
4284  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
4285  *
4286  * This MSRs are present on Silvermont processors,
4287  * Intel Atom processor E3000 series (Baytrail), and friends.
4288  */
4289 void decode_c6_demotion_policy_msr(void)
4290 {
4291 	unsigned long long msr;
4292 
4293 	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
4294 		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
4295 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4296 
4297 	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
4298 		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
4299 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
4300 }
4301 
4302 void process_cpuid()
4303 {
4304 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
4305 	unsigned int fms, family, model, stepping;
4306 	unsigned int has_turbo;
4307 
4308 	eax = ebx = ecx = edx = 0;
4309 
4310 	__cpuid(0, max_level, ebx, ecx, edx);
4311 
4312 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
4313 		genuine_intel = 1;
4314 
4315 	if (!quiet)
4316 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
4317 			(char *)&ebx, (char *)&edx, (char *)&ecx);
4318 
4319 	__cpuid(1, fms, ebx, ecx, edx);
4320 	family = (fms >> 8) & 0xf;
4321 	model = (fms >> 4) & 0xf;
4322 	stepping = fms & 0xf;
4323 	if (family == 6 || family == 0xf)
4324 		model += ((fms >> 16) & 0xf) << 4;
4325 
4326 	if (!quiet) {
4327 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
4328 			max_level, family, model, stepping, family, model, stepping);
4329 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
4330 			ecx & (1 << 0) ? "SSE3" : "-",
4331 			ecx & (1 << 3) ? "MONITOR" : "-",
4332 			ecx & (1 << 6) ? "SMX" : "-",
4333 			ecx & (1 << 7) ? "EIST" : "-",
4334 			ecx & (1 << 8) ? "TM2" : "-",
4335 			edx & (1 << 4) ? "TSC" : "-",
4336 			edx & (1 << 5) ? "MSR" : "-",
4337 			edx & (1 << 22) ? "ACPI-TM" : "-",
4338 			edx & (1 << 29) ? "TM" : "-");
4339 	}
4340 
4341 	if (!(edx & (1 << 5)))
4342 		errx(1, "CPUID: no MSR");
4343 
4344 	/*
4345 	 * check max extended function levels of CPUID.
4346 	 * This is needed to check for invariant TSC.
4347 	 * This check is valid for both Intel and AMD.
4348 	 */
4349 	ebx = ecx = edx = 0;
4350 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
4351 
4352 	if (max_extended_level >= 0x80000007) {
4353 
4354 		/*
4355 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
4356 		 * this check is valid for both Intel and AMD
4357 		 */
4358 		__cpuid(0x80000007, eax, ebx, ecx, edx);
4359 		has_invariant_tsc = edx & (1 << 8);
4360 	}
4361 
4362 	/*
4363 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
4364 	 * this check is valid for both Intel and AMD
4365 	 */
4366 
4367 	__cpuid(0x6, eax, ebx, ecx, edx);
4368 	has_aperf = ecx & (1 << 0);
4369 	if (has_aperf) {
4370 		BIC_PRESENT(BIC_Avg_MHz);
4371 		BIC_PRESENT(BIC_Busy);
4372 		BIC_PRESENT(BIC_Bzy_MHz);
4373 	}
4374 	do_dts = eax & (1 << 0);
4375 	if (do_dts)
4376 		BIC_PRESENT(BIC_CoreTmp);
4377 	has_turbo = eax & (1 << 1);
4378 	do_ptm = eax & (1 << 6);
4379 	if (do_ptm)
4380 		BIC_PRESENT(BIC_PkgTmp);
4381 	has_hwp = eax & (1 << 7);
4382 	has_hwp_notify = eax & (1 << 8);
4383 	has_hwp_activity_window = eax & (1 << 9);
4384 	has_hwp_epp = eax & (1 << 10);
4385 	has_hwp_pkg = eax & (1 << 11);
4386 	has_epb = ecx & (1 << 3);
4387 
4388 	if (!quiet)
4389 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
4390 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
4391 			has_aperf ? "" : "No-",
4392 			has_turbo ? "" : "No-",
4393 			do_dts ? "" : "No-",
4394 			do_ptm ? "" : "No-",
4395 			has_hwp ? "" : "No-",
4396 			has_hwp_notify ? "" : "No-",
4397 			has_hwp_activity_window ? "" : "No-",
4398 			has_hwp_epp ? "" : "No-",
4399 			has_hwp_pkg ? "" : "No-",
4400 			has_epb ? "" : "No-");
4401 
4402 	if (!quiet)
4403 		decode_misc_enable_msr();
4404 
4405 
4406 	if (max_level >= 0x7 && !quiet) {
4407 		int has_sgx;
4408 
4409 		ecx = 0;
4410 
4411 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
4412 
4413 		has_sgx = ebx & (1 << 2);
4414 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
4415 
4416 		if (has_sgx)
4417 			decode_feature_control_msr();
4418 	}
4419 
4420 	if (max_level >= 0x15) {
4421 		unsigned int eax_crystal;
4422 		unsigned int ebx_tsc;
4423 
4424 		/*
4425 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
4426 		 */
4427 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
4428 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
4429 
4430 		if (ebx_tsc != 0) {
4431 
4432 			if (!quiet && (ebx != 0))
4433 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
4434 					eax_crystal, ebx_tsc, crystal_hz);
4435 
4436 			if (crystal_hz == 0)
4437 				switch(model) {
4438 				case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
4439 				case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
4440 				case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
4441 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
4442 					crystal_hz = 24000000;	/* 24.0 MHz */
4443 					break;
4444 				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
4445 					crystal_hz = 25000000;	/* 25.0 MHz */
4446 					break;
4447 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4448 				case INTEL_FAM6_ATOM_GEMINI_LAKE:
4449 					crystal_hz = 19200000;	/* 19.2 MHz */
4450 					break;
4451 				default:
4452 					crystal_hz = 0;
4453 			}
4454 
4455 			if (crystal_hz) {
4456 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
4457 				if (!quiet)
4458 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
4459 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
4460 			}
4461 		}
4462 	}
4463 	if (max_level >= 0x16) {
4464 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
4465 
4466 		/*
4467 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
4468 		 */
4469 		base_mhz = max_mhz = bus_mhz = edx = 0;
4470 
4471 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
4472 		if (!quiet)
4473 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
4474 				base_mhz, max_mhz, bus_mhz);
4475 	}
4476 
4477 	if (has_aperf)
4478 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
4479 
4480 	BIC_PRESENT(BIC_IRQ);
4481 	BIC_PRESENT(BIC_TSC_MHz);
4482 
4483 	if (probe_nhm_msrs(family, model)) {
4484 		do_nhm_platform_info = 1;
4485 		BIC_PRESENT(BIC_CPU_c1);
4486 		BIC_PRESENT(BIC_CPU_c3);
4487 		BIC_PRESENT(BIC_CPU_c6);
4488 		BIC_PRESENT(BIC_SMI);
4489 	}
4490 	do_snb_cstates = has_snb_msrs(family, model);
4491 
4492 	if (do_snb_cstates)
4493 		BIC_PRESENT(BIC_CPU_c7);
4494 
4495 	do_irtl_snb = has_snb_msrs(family, model);
4496 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
4497 		BIC_PRESENT(BIC_Pkgpc2);
4498 	if (pkg_cstate_limit >= PCL__3)
4499 		BIC_PRESENT(BIC_Pkgpc3);
4500 	if (pkg_cstate_limit >= PCL__6)
4501 		BIC_PRESENT(BIC_Pkgpc6);
4502 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
4503 		BIC_PRESENT(BIC_Pkgpc7);
4504 	if (has_slv_msrs(family, model)) {
4505 		BIC_NOT_PRESENT(BIC_Pkgpc2);
4506 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4507 		BIC_PRESENT(BIC_Pkgpc6);
4508 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4509 		BIC_PRESENT(BIC_Mod_c6);
4510 		use_c1_residency_msr = 1;
4511 	}
4512 	if (is_dnv(family, model)) {
4513 		BIC_PRESENT(BIC_CPU_c1);
4514 		BIC_NOT_PRESENT(BIC_CPU_c3);
4515 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4516 		BIC_NOT_PRESENT(BIC_CPU_c7);
4517 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4518 		use_c1_residency_msr = 1;
4519 	}
4520 	if (is_skx(family, model)) {
4521 		BIC_NOT_PRESENT(BIC_CPU_c3);
4522 		BIC_NOT_PRESENT(BIC_Pkgpc3);
4523 		BIC_NOT_PRESENT(BIC_CPU_c7);
4524 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4525 	}
4526 	if (is_bdx(family, model)) {
4527 		BIC_NOT_PRESENT(BIC_CPU_c7);
4528 		BIC_NOT_PRESENT(BIC_Pkgpc7);
4529 	}
4530 	if (has_hsw_msrs(family, model)) {
4531 		BIC_PRESENT(BIC_Pkgpc8);
4532 		BIC_PRESENT(BIC_Pkgpc9);
4533 		BIC_PRESENT(BIC_Pkgpc10);
4534 	}
4535 	do_irtl_hsw = has_hsw_msrs(family, model);
4536 	if (has_skl_msrs(family, model)) {
4537 		BIC_PRESENT(BIC_Totl_c0);
4538 		BIC_PRESENT(BIC_Any_c0);
4539 		BIC_PRESENT(BIC_GFX_c0);
4540 		BIC_PRESENT(BIC_CPUGFX);
4541 	}
4542 	do_slm_cstates = is_slm(family, model);
4543 	do_knl_cstates  = is_knl(family, model);
4544 	do_cnl_cstates = is_cnl(family, model);
4545 
4546 	if (!quiet)
4547 		decode_misc_pwr_mgmt_msr();
4548 
4549 	if (!quiet && has_slv_msrs(family, model))
4550 		decode_c6_demotion_policy_msr();
4551 
4552 	rapl_probe(family, model);
4553 	perf_limit_reasons_probe(family, model);
4554 	automatic_cstate_conversion_probe(family, model);
4555 
4556 	if (!quiet)
4557 		dump_cstate_pstate_config_info(family, model);
4558 
4559 	if (!quiet)
4560 		dump_sysfs_cstate_config();
4561 	if (!quiet)
4562 		dump_sysfs_pstate_config();
4563 
4564 	if (has_skl_msrs(family, model))
4565 		calculate_tsc_tweak();
4566 
4567 	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
4568 		BIC_PRESENT(BIC_GFX_rc6);
4569 
4570 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
4571 		BIC_PRESENT(BIC_GFXMHz);
4572 
4573 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
4574 		BIC_PRESENT(BIC_CPU_LPI);
4575 	else
4576 		BIC_NOT_PRESENT(BIC_CPU_LPI);
4577 
4578 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
4579 		BIC_PRESENT(BIC_SYS_LPI);
4580 	else
4581 		BIC_NOT_PRESENT(BIC_SYS_LPI);
4582 
4583 	if (!quiet)
4584 		decode_misc_feature_control();
4585 
4586 	return;
4587 }
4588 
4589 
4590 /*
4591  * in /dev/cpu/ return success for names that are numbers
4592  * ie. filter out ".", "..", "microcode".
4593  */
4594 int dir_filter(const struct dirent *dirp)
4595 {
4596 	if (isdigit(dirp->d_name[0]))
4597 		return 1;
4598 	else
4599 		return 0;
4600 }
4601 
4602 int open_dev_cpu_msr(int dummy1)
4603 {
4604 	return 0;
4605 }
4606 
4607 void topology_probe()
4608 {
4609 	int i;
4610 	int max_core_id = 0;
4611 	int max_package_id = 0;
4612 	int max_siblings = 0;
4613 
4614 	/* Initialize num_cpus, max_cpu_num */
4615 	set_max_cpu_num();
4616 	topo.num_cpus = 0;
4617 	for_all_proc_cpus(count_cpus);
4618 	if (!summary_only && topo.num_cpus > 1)
4619 		BIC_PRESENT(BIC_CPU);
4620 
4621 	if (debug > 1)
4622 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
4623 
4624 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
4625 	if (cpus == NULL)
4626 		err(1, "calloc cpus");
4627 
4628 	/*
4629 	 * Allocate and initialize cpu_present_set
4630 	 */
4631 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
4632 	if (cpu_present_set == NULL)
4633 		err(3, "CPU_ALLOC");
4634 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4635 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
4636 	for_all_proc_cpus(mark_cpu_present);
4637 
4638 	/*
4639 	 * Validate that all cpus in cpu_subset are also in cpu_present_set
4640 	 */
4641 	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
4642 		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
4643 			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
4644 				err(1, "cpu%d not present", i);
4645 	}
4646 
4647 	/*
4648 	 * Allocate and initialize cpu_affinity_set
4649 	 */
4650 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
4651 	if (cpu_affinity_set == NULL)
4652 		err(3, "CPU_ALLOC");
4653 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
4654 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
4655 
4656 
4657 	/*
4658 	 * For online cpus
4659 	 * find max_core_id, max_package_id
4660 	 */
4661 	for (i = 0; i <= topo.max_cpu_num; ++i) {
4662 		int siblings;
4663 
4664 		if (cpu_is_not_present(i)) {
4665 			if (debug > 1)
4666 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
4667 			continue;
4668 		}
4669 
4670 		cpus[i].logical_cpu_id = i;
4671 
4672 		/* get package information */
4673 		cpus[i].physical_package_id = get_physical_package_id(i);
4674 		if (cpus[i].physical_package_id > max_package_id)
4675 			max_package_id = cpus[i].physical_package_id;
4676 
4677 		/* get numa node information */
4678 		cpus[i].node_id = get_node_id(&cpus[i]);
4679 
4680 		/* get core information */
4681 		cpus[i].physical_core_id = get_core_id(i);
4682 		if (cpus[i].physical_core_id > max_core_id)
4683 			max_core_id = cpus[i].physical_core_id;
4684 
4685 		/* get thread information */
4686 		siblings = get_thread_siblings(&cpus[i]);
4687 		if (siblings > max_siblings)
4688 			max_siblings = siblings;
4689 
4690 		if (debug > 1)
4691 			fprintf(outf, "cpu %d pkg %d node %d core %d\n",
4692 				i, cpus[i].physical_package_id,
4693 				cpus[i].node_id,
4694 				cpus[i].physical_core_id);
4695 	}
4696 	topo.num_cores_per_pkg = max_core_id + 1;
4697 	if (debug > 1)
4698 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
4699 			max_core_id, topo.num_cores_per_pkg);
4700 	if (!summary_only && topo.num_cores_per_pkg > 1)
4701 		BIC_PRESENT(BIC_Core);
4702 
4703 	topo.num_packages = max_package_id + 1;
4704 	if (debug > 1)
4705 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
4706 			max_package_id, topo.num_packages);
4707 	if (!summary_only && topo.num_packages > 1)
4708 		BIC_PRESENT(BIC_Package);
4709 
4710 	topo.num_threads_per_core = max_siblings;
4711 	if (debug > 1)
4712 		fprintf(outf, "max_siblings %d\n", max_siblings);
4713 }
4714 
4715 void
4716 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
4717 {
4718 	int i;
4719 
4720 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
4721 		topo.num_packages, sizeof(struct thread_data));
4722 	if (*t == NULL)
4723 		goto error;
4724 
4725 	for (i = 0; i < topo.num_threads_per_core *
4726 		topo.num_cores_per_pkg * topo.num_packages; i++)
4727 		(*t)[i].cpu_id = -1;
4728 
4729 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
4730 		sizeof(struct core_data));
4731 	if (*c == NULL)
4732 		goto error;
4733 
4734 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
4735 		(*c)[i].core_id = -1;
4736 
4737 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
4738 	if (*p == NULL)
4739 		goto error;
4740 
4741 	for (i = 0; i < topo.num_packages; i++)
4742 		(*p)[i].package_id = i;
4743 
4744 	return;
4745 error:
4746 	err(1, "calloc counters");
4747 }
4748 /*
4749  * init_counter()
4750  *
4751  * set cpu_id, core_num, pkg_num
4752  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
4753  *
4754  * increment topo.num_cores when 1st core in pkg seen
4755  */
4756 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
4757 	struct pkg_data *pkg_base, int thread_num, int core_num,
4758 	int pkg_num, int cpu_id)
4759 {
4760 	struct thread_data *t;
4761 	struct core_data *c;
4762 	struct pkg_data *p;
4763 
4764 	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
4765 	c = GET_CORE(core_base, core_num, pkg_num);
4766 	p = GET_PKG(pkg_base, pkg_num);
4767 
4768 	t->cpu_id = cpu_id;
4769 	if (thread_num == 0) {
4770 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
4771 		if (cpu_is_first_core_in_package(cpu_id))
4772 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
4773 	}
4774 
4775 	c->core_id = core_num;
4776 	p->package_id = pkg_num;
4777 }
4778 
4779 
4780 int initialize_counters(int cpu_id)
4781 {
4782 	int my_thread_id, my_core_id, my_package_id;
4783 
4784 	my_package_id = get_physical_package_id(cpu_id);
4785 	my_core_id = get_core_id(cpu_id);
4786 	my_thread_id = get_cpu_position_in_core(cpu_id);
4787 	if (!my_thread_id)
4788 		topo.num_cores++;
4789 
4790 	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4791 	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
4792 	return 0;
4793 }
4794 
4795 void allocate_output_buffer()
4796 {
4797 	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
4798 	outp = output_buffer;
4799 	if (outp == NULL)
4800 		err(-1, "calloc output buffer");
4801 }
4802 void allocate_fd_percpu(void)
4803 {
4804 	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4805 	if (fd_percpu == NULL)
4806 		err(-1, "calloc fd_percpu");
4807 }
4808 void allocate_irq_buffers(void)
4809 {
4810 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
4811 	if (irq_column_2_cpu == NULL)
4812 		err(-1, "calloc %d", topo.num_cpus);
4813 
4814 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
4815 	if (irqs_per_cpu == NULL)
4816 		err(-1, "calloc %d", topo.max_cpu_num + 1);
4817 }
4818 void setup_all_buffers(void)
4819 {
4820 	topology_probe();
4821 	allocate_irq_buffers();
4822 	allocate_fd_percpu();
4823 	allocate_counters(&thread_even, &core_even, &package_even);
4824 	allocate_counters(&thread_odd, &core_odd, &package_odd);
4825 	allocate_output_buffer();
4826 	for_all_proc_cpus(initialize_counters);
4827 }
4828 
4829 void set_base_cpu(void)
4830 {
4831 	base_cpu = sched_getcpu();
4832 	if (base_cpu < 0)
4833 		err(-ENODEV, "No valid cpus found");
4834 
4835 	if (debug > 1)
4836 		fprintf(outf, "base_cpu = %d\n", base_cpu);
4837 }
4838 
4839 void turbostat_init()
4840 {
4841 	setup_all_buffers();
4842 	set_base_cpu();
4843 	check_dev_msr();
4844 	check_permissions();
4845 	process_cpuid();
4846 
4847 
4848 	if (!quiet)
4849 		for_all_cpus(print_hwp, ODD_COUNTERS);
4850 
4851 	if (!quiet)
4852 		for_all_cpus(print_epb, ODD_COUNTERS);
4853 
4854 	if (!quiet)
4855 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
4856 
4857 	if (!quiet)
4858 		for_all_cpus(print_rapl, ODD_COUNTERS);
4859 
4860 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
4861 
4862 	if (!quiet)
4863 		for_all_cpus(print_thermal, ODD_COUNTERS);
4864 
4865 	if (!quiet && do_irtl_snb)
4866 		print_irtl();
4867 }
4868 
4869 int fork_it(char **argv)
4870 {
4871 	pid_t child_pid;
4872 	int status;
4873 
4874 	snapshot_proc_sysfs_files();
4875 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
4876 	if (status)
4877 		exit(status);
4878 	/* clear affinity side-effect of get_counters() */
4879 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
4880 	gettimeofday(&tv_even, (struct timezone *)NULL);
4881 
4882 	child_pid = fork();
4883 	if (!child_pid) {
4884 		/* child */
4885 		execvp(argv[0], argv);
4886 		err(errno, "exec %s", argv[0]);
4887 	} else {
4888 
4889 		/* parent */
4890 		if (child_pid == -1)
4891 			err(1, "fork");
4892 
4893 		signal(SIGINT, SIG_IGN);
4894 		signal(SIGQUIT, SIG_IGN);
4895 		if (waitpid(child_pid, &status, 0) == -1)
4896 			err(status, "waitpid");
4897 	}
4898 	/*
4899 	 * n.b. fork_it() does not check for errors from for_all_cpus()
4900 	 * because re-starting is problematic when forking
4901 	 */
4902 	snapshot_proc_sysfs_files();
4903 	for_all_cpus(get_counters, ODD_COUNTERS);
4904 	gettimeofday(&tv_odd, (struct timezone *)NULL);
4905 	timersub(&tv_odd, &tv_even, &tv_delta);
4906 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
4907 		fprintf(outf, "%s: Counter reset detected\n", progname);
4908 	else {
4909 		compute_average(EVEN_COUNTERS);
4910 		format_all_counters(EVEN_COUNTERS);
4911 	}
4912 
4913 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
4914 
4915 	flush_output_stderr();
4916 
4917 	return status;
4918 }
4919 
4920 int get_and_dump_counters(void)
4921 {
4922 	int status;
4923 
4924 	snapshot_proc_sysfs_files();
4925 	status = for_all_cpus(get_counters, ODD_COUNTERS);
4926 	if (status)
4927 		return status;
4928 
4929 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
4930 	if (status)
4931 		return status;
4932 
4933 	flush_output_stdout();
4934 
4935 	return status;
4936 }
4937 
4938 void print_version() {
4939 	fprintf(outf, "turbostat version 17.06.23"
4940 		" - Len Brown <lenb@kernel.org>\n");
4941 }
4942 
4943 int add_counter(unsigned int msr_num, char *path, char *name,
4944 	unsigned int width, enum counter_scope scope,
4945 	enum counter_type type, enum counter_format format, int flags)
4946 {
4947 	struct msr_counter *msrp;
4948 
4949 	msrp = calloc(1, sizeof(struct msr_counter));
4950 	if (msrp == NULL) {
4951 		perror("calloc");
4952 		exit(1);
4953 	}
4954 
4955 	msrp->msr_num = msr_num;
4956 	strncpy(msrp->name, name, NAME_BYTES);
4957 	if (path)
4958 		strncpy(msrp->path, path, PATH_BYTES);
4959 	msrp->width = width;
4960 	msrp->type = type;
4961 	msrp->format = format;
4962 	msrp->flags = flags;
4963 
4964 	switch (scope) {
4965 
4966 	case SCOPE_CPU:
4967 		msrp->next = sys.tp;
4968 		sys.tp = msrp;
4969 		sys.added_thread_counters++;
4970 		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
4971 			fprintf(stderr, "exceeded max %d added thread counters\n",
4972 				MAX_ADDED_COUNTERS);
4973 			exit(-1);
4974 		}
4975 		break;
4976 
4977 	case SCOPE_CORE:
4978 		msrp->next = sys.cp;
4979 		sys.cp = msrp;
4980 		sys.added_core_counters++;
4981 		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
4982 			fprintf(stderr, "exceeded max %d added core counters\n",
4983 				MAX_ADDED_COUNTERS);
4984 			exit(-1);
4985 		}
4986 		break;
4987 
4988 	case SCOPE_PACKAGE:
4989 		msrp->next = sys.pp;
4990 		sys.pp = msrp;
4991 		sys.added_package_counters++;
4992 		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
4993 			fprintf(stderr, "exceeded max %d added package counters\n",
4994 				MAX_ADDED_COUNTERS);
4995 			exit(-1);
4996 		}
4997 		break;
4998 	}
4999 
5000 	return 0;
5001 }
5002 
5003 void parse_add_command(char *add_command)
5004 {
5005 	int msr_num = 0;
5006 	char *path = NULL;
5007 	char name_buffer[NAME_BYTES] = "";
5008 	int width = 64;
5009 	int fail = 0;
5010 	enum counter_scope scope = SCOPE_CPU;
5011 	enum counter_type type = COUNTER_CYCLES;
5012 	enum counter_format format = FORMAT_DELTA;
5013 
5014 	while (add_command) {
5015 
5016 		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
5017 			goto next;
5018 
5019 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
5020 			goto next;
5021 
5022 		if (*add_command == '/') {
5023 			path = add_command;
5024 			goto next;
5025 		}
5026 
5027 		if (sscanf(add_command, "u%d", &width) == 1) {
5028 			if ((width == 32) || (width == 64))
5029 				goto next;
5030 			width = 64;
5031 		}
5032 		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
5033 			scope = SCOPE_CPU;
5034 			goto next;
5035 		}
5036 		if (!strncmp(add_command, "core", strlen("core"))) {
5037 			scope = SCOPE_CORE;
5038 			goto next;
5039 		}
5040 		if (!strncmp(add_command, "package", strlen("package"))) {
5041 			scope = SCOPE_PACKAGE;
5042 			goto next;
5043 		}
5044 		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
5045 			type = COUNTER_CYCLES;
5046 			goto next;
5047 		}
5048 		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
5049 			type = COUNTER_SECONDS;
5050 			goto next;
5051 		}
5052 		if (!strncmp(add_command, "usec", strlen("usec"))) {
5053 			type = COUNTER_USEC;
5054 			goto next;
5055 		}
5056 		if (!strncmp(add_command, "raw", strlen("raw"))) {
5057 			format = FORMAT_RAW;
5058 			goto next;
5059 		}
5060 		if (!strncmp(add_command, "delta", strlen("delta"))) {
5061 			format = FORMAT_DELTA;
5062 			goto next;
5063 		}
5064 		if (!strncmp(add_command, "percent", strlen("percent"))) {
5065 			format = FORMAT_PERCENT;
5066 			goto next;
5067 		}
5068 
5069 		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
5070 			char *eos;
5071 
5072 			eos = strchr(name_buffer, ',');
5073 			if (eos)
5074 				*eos = '\0';
5075 			goto next;
5076 		}
5077 
5078 next:
5079 		add_command = strchr(add_command, ',');
5080 		if (add_command) {
5081 			*add_command = '\0';
5082 			add_command++;
5083 		}
5084 
5085 	}
5086 	if ((msr_num == 0) && (path == NULL)) {
5087 		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
5088 		fail++;
5089 	}
5090 
5091 	/* generate default column header */
5092 	if (*name_buffer == '\0') {
5093 		if (width == 32)
5094 			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5095 		else
5096 			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
5097 	}
5098 
5099 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
5100 		fail++;
5101 
5102 	if (fail) {
5103 		help();
5104 		exit(1);
5105 	}
5106 }
5107 
5108 int is_deferred_skip(char *name)
5109 {
5110 	int i;
5111 
5112 	for (i = 0; i < deferred_skip_index; ++i)
5113 		if (!strcmp(name, deferred_skip_names[i]))
5114 			return 1;
5115 	return 0;
5116 }
5117 
5118 void probe_sysfs(void)
5119 {
5120 	char path[64];
5121 	char name_buf[16];
5122 	FILE *input;
5123 	int state;
5124 	char *sp;
5125 
5126 	if (!DO_BIC(BIC_sysfs))
5127 		return;
5128 
5129 	for (state = 10; state >= 0; --state) {
5130 
5131 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5132 			base_cpu, state);
5133 		input = fopen(path, "r");
5134 		if (input == NULL)
5135 			continue;
5136 		fgets(name_buf, sizeof(name_buf), input);
5137 
5138 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5139 		sp = strchr(name_buf, '-');
5140 		if (!sp)
5141 			sp = strchrnul(name_buf, '\n');
5142 		*sp = '%';
5143 		*(sp + 1) = '\0';
5144 
5145 		fclose(input);
5146 
5147 		sprintf(path, "cpuidle/state%d/time", state);
5148 
5149 		if (is_deferred_skip(name_buf))
5150 			continue;
5151 
5152 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
5153 				FORMAT_PERCENT, SYSFS_PERCPU);
5154 	}
5155 
5156 	for (state = 10; state >= 0; --state) {
5157 
5158 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
5159 			base_cpu, state);
5160 		input = fopen(path, "r");
5161 		if (input == NULL)
5162 			continue;
5163 		fgets(name_buf, sizeof(name_buf), input);
5164 		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
5165 		sp = strchr(name_buf, '-');
5166 		if (!sp)
5167 			sp = strchrnul(name_buf, '\n');
5168 		*sp = '\0';
5169 		fclose(input);
5170 
5171 		sprintf(path, "cpuidle/state%d/usage", state);
5172 
5173 		if (is_deferred_skip(name_buf))
5174 			continue;
5175 
5176 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
5177 				FORMAT_DELTA, SYSFS_PERCPU);
5178 	}
5179 
5180 }
5181 
5182 
5183 /*
5184  * parse cpuset with following syntax
5185  * 1,2,4..6,8-10 and set bits in cpu_subset
5186  */
5187 void parse_cpu_command(char *optarg)
5188 {
5189 	unsigned int start, end;
5190 	char *next;
5191 
5192 	if (!strcmp(optarg, "core")) {
5193 		if (cpu_subset)
5194 			goto error;
5195 		show_core_only++;
5196 		return;
5197 	}
5198 	if (!strcmp(optarg, "package")) {
5199 		if (cpu_subset)
5200 			goto error;
5201 		show_pkg_only++;
5202 		return;
5203 	}
5204 	if (show_core_only || show_pkg_only)
5205 		goto error;
5206 
5207 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
5208 	if (cpu_subset == NULL)
5209 		err(3, "CPU_ALLOC");
5210 	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
5211 
5212 	CPU_ZERO_S(cpu_subset_size, cpu_subset);
5213 
5214 	next = optarg;
5215 
5216 	while (next && *next) {
5217 
5218 		if (*next == '-')	/* no negative cpu numbers */
5219 			goto error;
5220 
5221 		start = strtoul(next, &next, 10);
5222 
5223 		if (start >= CPU_SUBSET_MAXCPUS)
5224 			goto error;
5225 		CPU_SET_S(start, cpu_subset_size, cpu_subset);
5226 
5227 		if (*next == '\0')
5228 			break;
5229 
5230 		if (*next == ',') {
5231 			next += 1;
5232 			continue;
5233 		}
5234 
5235 		if (*next == '-') {
5236 			next += 1;	/* start range */
5237 		} else if (*next == '.') {
5238 			next += 1;
5239 			if (*next == '.')
5240 				next += 1;	/* start range */
5241 			else
5242 				goto error;
5243 		}
5244 
5245 		end = strtoul(next, &next, 10);
5246 		if (end <= start)
5247 			goto error;
5248 
5249 		while (++start <= end) {
5250 			if (start >= CPU_SUBSET_MAXCPUS)
5251 				goto error;
5252 			CPU_SET_S(start, cpu_subset_size, cpu_subset);
5253 		}
5254 
5255 		if (*next == ',')
5256 			next += 1;
5257 		else if (*next != '\0')
5258 			goto error;
5259 	}
5260 
5261 	return;
5262 
5263 error:
5264 	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
5265 	help();
5266 	exit(-1);
5267 }
5268 
5269 
5270 void cmdline(int argc, char **argv)
5271 {
5272 	int opt;
5273 	int option_index = 0;
5274 	static struct option long_options[] = {
5275 		{"add",		required_argument,	0, 'a'},
5276 		{"cpu",		required_argument,	0, 'c'},
5277 		{"Dump",	no_argument,		0, 'D'},
5278 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
5279 		{"enable",	required_argument,	0, 'e'},
5280 		{"interval",	required_argument,	0, 'i'},
5281 		{"num_iterations",	required_argument,	0, 'n'},
5282 		{"help",	no_argument,		0, 'h'},
5283 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
5284 		{"Joules",	no_argument,		0, 'J'},
5285 		{"list",	no_argument,		0, 'l'},
5286 		{"out",		required_argument,	0, 'o'},
5287 		{"quiet",	no_argument,		0, 'q'},
5288 		{"show",	required_argument,	0, 's'},
5289 		{"Summary",	no_argument,		0, 'S'},
5290 		{"TCC",		required_argument,	0, 'T'},
5291 		{"version",	no_argument,		0, 'v' },
5292 		{0,		0,			0,  0 }
5293 	};
5294 
5295 	progname = argv[0];
5296 
5297 	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
5298 				long_options, &option_index)) != -1) {
5299 		switch (opt) {
5300 		case 'a':
5301 			parse_add_command(optarg);
5302 			break;
5303 		case 'c':
5304 			parse_cpu_command(optarg);
5305 			break;
5306 		case 'D':
5307 			dump_only++;
5308 			break;
5309 		case 'e':
5310 			/* --enable specified counter */
5311 			bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5312 			break;
5313 		case 'd':
5314 			debug++;
5315 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5316 			break;
5317 		case 'H':
5318 			/*
5319 			 * --hide: do not show those specified
5320 			 *  multiple invocations simply clear more bits in enabled mask
5321 			 */
5322 			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
5323 			break;
5324 		case 'h':
5325 		default:
5326 			help();
5327 			exit(1);
5328 		case 'i':
5329 			{
5330 				double interval = strtod(optarg, NULL);
5331 
5332 				if (interval < 0.001) {
5333 					fprintf(outf, "interval %f seconds is too small\n",
5334 						interval);
5335 					exit(2);
5336 				}
5337 
5338 				interval_tv.tv_sec = interval_ts.tv_sec = interval;
5339 				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
5340 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
5341 			}
5342 			break;
5343 		case 'J':
5344 			rapl_joules++;
5345 			break;
5346 		case 'l':
5347 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
5348 			list_header_only++;
5349 			quiet++;
5350 			break;
5351 		case 'o':
5352 			outf = fopen_or_die(optarg, "w");
5353 			break;
5354 		case 'q':
5355 			quiet = 1;
5356 			break;
5357 		case 'n':
5358 			num_iterations = strtod(optarg, NULL);
5359 
5360 			if (num_iterations <= 0) {
5361 				fprintf(outf, "iterations %d should be positive number\n",
5362 					num_iterations);
5363 				exit(2);
5364 			}
5365 			break;
5366 		case 's':
5367 			/*
5368 			 * --show: show only those specified
5369 			 *  The 1st invocation will clear and replace the enabled mask
5370 			 *  subsequent invocations can add to it.
5371 			 */
5372 			if (shown == 0)
5373 				bic_enabled = bic_lookup(optarg, SHOW_LIST);
5374 			else
5375 				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
5376 			shown = 1;
5377 			break;
5378 		case 'S':
5379 			summary_only++;
5380 			break;
5381 		case 'T':
5382 			tcc_activation_temp_override = atoi(optarg);
5383 			break;
5384 		case 'v':
5385 			print_version();
5386 			exit(0);
5387 			break;
5388 		}
5389 	}
5390 }
5391 
5392 int main(int argc, char **argv)
5393 {
5394 	outf = stderr;
5395 
5396 	cmdline(argc, argv);
5397 
5398 	if (!quiet)
5399 		print_version();
5400 
5401 	probe_sysfs();
5402 
5403 	turbostat_init();
5404 
5405 	/* dump counters and exit */
5406 	if (dump_only)
5407 		return get_and_dump_counters();
5408 
5409 	/* list header and exit */
5410 	if (list_header_only) {
5411 		print_header(",");
5412 		flush_output_stdout();
5413 		return 0;
5414 	}
5415 
5416 	/*
5417 	 * if any params left, it must be a command to fork
5418 	 */
5419 	if (argc - optind)
5420 		return fork_it(argv + optind);
5421 	else
5422 		turbostat_loop();
5423 
5424 	return 0;
5425 }
5426