1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2021 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9 
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36 #include <linux/perf_event.h>
37 #include <asm/unistd.h>
38 #include <stdbool.h>
39 
40 char *proc_stat = "/proc/stat";
41 FILE *outf;
42 int *fd_percpu;
43 int *fd_instr_count_percpu;
44 struct timeval interval_tv = { 5, 0 };
45 struct timespec interval_ts = { 5, 0 };
46 
47 /* Save original CPU model */
48 unsigned int model_orig;
49 
50 unsigned int num_iterations;
51 unsigned int header_iterations;
52 unsigned int debug;
53 unsigned int quiet;
54 unsigned int shown;
55 unsigned int sums_need_wide_columns;
56 unsigned int rapl_joules;
57 unsigned int summary_only;
58 unsigned int list_header_only;
59 unsigned int dump_only;
60 unsigned int do_snb_cstates;
61 unsigned int do_knl_cstates;
62 unsigned int do_slm_cstates;
63 unsigned int use_c1_residency_msr;
64 unsigned int has_aperf;
65 unsigned int has_epb;
66 unsigned int do_irtl_snb;
67 unsigned int do_irtl_hsw;
68 unsigned int units = 1000000;	/* MHz etc */
69 unsigned int genuine_intel;
70 unsigned int authentic_amd;
71 unsigned int hygon_genuine;
72 unsigned int max_level, max_extended_level;
73 unsigned int has_invariant_tsc;
74 unsigned int do_nhm_platform_info;
75 unsigned int no_MSR_MISC_PWR_MGMT;
76 unsigned int aperf_mperf_multiplier = 1;
77 double bclk;
78 double base_hz;
79 unsigned int has_base_hz;
80 double tsc_tweak = 1.0;
81 unsigned int show_pkg_only;
82 unsigned int show_core_only;
83 char *output_buffer, *outp;
84 unsigned int do_rapl;
85 unsigned int do_dts;
86 unsigned int do_ptm;
87 unsigned int do_ipc;
88 unsigned long long gfx_cur_rc6_ms;
89 unsigned long long cpuidle_cur_cpu_lpi_us;
90 unsigned long long cpuidle_cur_sys_lpi_us;
91 unsigned int gfx_cur_mhz;
92 unsigned int gfx_act_mhz;
93 unsigned int tj_max;
94 unsigned int tj_max_override;
95 int tcc_offset_bits;
96 double rapl_power_units, rapl_time_units;
97 double rapl_dram_energy_units, rapl_energy_units;
98 double rapl_joule_counter_range;
99 unsigned int do_core_perf_limit_reasons;
100 unsigned int has_automatic_cstate_conversion;
101 unsigned int dis_cstate_prewake;
102 unsigned int do_gfx_perf_limit_reasons;
103 unsigned int do_ring_perf_limit_reasons;
104 unsigned int crystal_hz;
105 unsigned long long tsc_hz;
106 int base_cpu;
107 double discover_bclk(unsigned int family, unsigned int model);
108 unsigned int has_hwp;		/* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
109 			/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
110 unsigned int has_hwp_notify;	/* IA32_HWP_INTERRUPT */
111 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
112 unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
113 unsigned int has_hwp_pkg;	/* IA32_HWP_REQUEST_PKG */
114 unsigned int has_misc_feature_control;
115 unsigned int first_counter_read = 1;
116 int ignore_stdin;
117 
118 #define RAPL_PKG		(1 << 0)
119 					/* 0x610 MSR_PKG_POWER_LIMIT */
120 					/* 0x611 MSR_PKG_ENERGY_STATUS */
121 #define RAPL_PKG_PERF_STATUS	(1 << 1)
122 					/* 0x613 MSR_PKG_PERF_STATUS */
123 #define RAPL_PKG_POWER_INFO	(1 << 2)
124 					/* 0x614 MSR_PKG_POWER_INFO */
125 
126 #define RAPL_DRAM		(1 << 3)
127 					/* 0x618 MSR_DRAM_POWER_LIMIT */
128 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
129 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
130 					/* 0x61b MSR_DRAM_PERF_STATUS */
131 #define RAPL_DRAM_POWER_INFO	(1 << 5)
132 					/* 0x61c MSR_DRAM_POWER_INFO */
133 
134 #define RAPL_CORES_POWER_LIMIT	(1 << 6)
135 					/* 0x638 MSR_PP0_POWER_LIMIT */
136 #define RAPL_CORE_POLICY	(1 << 7)
137 					/* 0x63a MSR_PP0_POLICY */
138 
139 #define RAPL_GFX		(1 << 8)
140 					/* 0x640 MSR_PP1_POWER_LIMIT */
141 					/* 0x641 MSR_PP1_ENERGY_STATUS */
142 					/* 0x642 MSR_PP1_POLICY */
143 
144 #define RAPL_CORES_ENERGY_STATUS	(1 << 9)
145 					/* 0x639 MSR_PP0_ENERGY_STATUS */
146 #define RAPL_PER_CORE_ENERGY	(1 << 10)
147 					/* Indicates cores energy collection is per-core,
148 					 * not per-package. */
149 #define RAPL_AMD_F17H		(1 << 11)
150 					/* 0xc0010299 MSR_RAPL_PWR_UNIT */
151 					/* 0xc001029a MSR_CORE_ENERGY_STAT */
152 					/* 0xc001029b MSR_PKG_ENERGY_STAT */
153 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
154 #define	TJMAX_DEFAULT	100
155 
156 /* MSRs that are not yet in the kernel-provided header. */
157 #define MSR_RAPL_PWR_UNIT	0xc0010299
158 #define MSR_CORE_ENERGY_STAT	0xc001029a
159 #define MSR_PKG_ENERGY_STAT	0xc001029b
160 
161 #define MAX(a, b) ((a) > (b) ? (a) : (b))
162 
163 /*
164  * buffer size used by sscanf() for added column names
165  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
166  */
167 #define	NAME_BYTES 20
168 #define PATH_BYTES 128
169 
170 int backwards_count;
171 char *progname;
172 
173 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
174 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
175 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
176 #define MAX_ADDED_COUNTERS 8
177 #define MAX_ADDED_THREAD_COUNTERS 24
178 #define BITMASK_SIZE 32
179 
180 struct thread_data {
181 	struct timeval tv_begin;
182 	struct timeval tv_end;
183 	struct timeval tv_delta;
184 	unsigned long long tsc;
185 	unsigned long long aperf;
186 	unsigned long long mperf;
187 	unsigned long long c1;
188 	unsigned long long instr_count;
189 	unsigned long long irq_count;
190 	unsigned int smi_count;
191 	unsigned int cpu_id;
192 	unsigned int apic_id;
193 	unsigned int x2apic_id;
194 	unsigned int flags;
195 	bool is_atom;
196 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
197 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
198 	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
199 } *thread_even, *thread_odd;
200 
201 struct core_data {
202 	unsigned long long c3;
203 	unsigned long long c6;
204 	unsigned long long c7;
205 	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
206 	unsigned int core_temp_c;
207 	unsigned int core_energy;	/* MSR_CORE_ENERGY_STAT */
208 	unsigned int core_id;
209 	unsigned long long core_throt_cnt;
210 	unsigned long long counter[MAX_ADDED_COUNTERS];
211 } *core_even, *core_odd;
212 
213 struct pkg_data {
214 	unsigned long long pc2;
215 	unsigned long long pc3;
216 	unsigned long long pc6;
217 	unsigned long long pc7;
218 	unsigned long long pc8;
219 	unsigned long long pc9;
220 	unsigned long long pc10;
221 	unsigned long long cpu_lpi;
222 	unsigned long long sys_lpi;
223 	unsigned long long pkg_wtd_core_c0;
224 	unsigned long long pkg_any_core_c0;
225 	unsigned long long pkg_any_gfxe_c0;
226 	unsigned long long pkg_both_core_gfxe_c0;
227 	long long gfx_rc6_ms;
228 	unsigned int gfx_mhz;
229 	unsigned int gfx_act_mhz;
230 	unsigned int package_id;
231 	unsigned long long energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
232 	unsigned long long energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
233 	unsigned long long energy_cores;	/* MSR_PP0_ENERGY_STATUS */
234 	unsigned long long energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
235 	unsigned long long rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
236 	unsigned long long rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
237 	unsigned int pkg_temp_c;
238 	unsigned long long counter[MAX_ADDED_COUNTERS];
239 } *package_even, *package_odd;
240 
241 #define ODD_COUNTERS thread_odd, core_odd, package_odd
242 #define EVEN_COUNTERS thread_even, core_even, package_even
243 
244 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
245 	((thread_base) +						      \
246 	 ((pkg_no) *							      \
247 	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
248 	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
249 	 ((core_no) * topo.threads_per_core) +				      \
250 	 (thread_no))
251 
252 #define GET_CORE(core_base, core_no, node_no, pkg_no)			\
253 	((core_base) +							\
254 	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
255 	 ((node_no) * topo.cores_per_node) +				\
256 	 (core_no))
257 
258 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
259 
260 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
261 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
262 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
263 
264 struct msr_counter {
265 	unsigned int msr_num;
266 	char name[NAME_BYTES];
267 	char path[PATH_BYTES];
268 	unsigned int width;
269 	enum counter_type type;
270 	enum counter_format format;
271 	struct msr_counter *next;
272 	unsigned int flags;
273 #define	FLAGS_HIDE	(1 << 0)
274 #define	FLAGS_SHOW	(1 << 1)
275 #define	SYSFS_PERCPU	(1 << 1)
276 };
277 
278 /*
279  * The accumulated sum of MSR is defined as a monotonic
280  * increasing MSR, it will be accumulated periodically,
281  * despite its register's bit width.
282  */
283 enum {
284 	IDX_PKG_ENERGY,
285 	IDX_DRAM_ENERGY,
286 	IDX_PP0_ENERGY,
287 	IDX_PP1_ENERGY,
288 	IDX_PKG_PERF,
289 	IDX_DRAM_PERF,
290 	IDX_COUNT,
291 };
292 
293 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
294 
295 struct msr_sum_array {
296 	/* get_msr_sum() = sum + (get_msr() - last) */
297 	struct {
298 		/*The accumulated MSR value is updated by the timer */
299 		unsigned long long sum;
300 		/*The MSR footprint recorded in last timer */
301 		unsigned long long last;
302 	} entries[IDX_COUNT];
303 };
304 
305 /* The percpu MSR sum array.*/
306 struct msr_sum_array *per_cpu_msr_sum;
307 
308 off_t idx_to_offset(int idx)
309 {
310 	off_t offset;
311 
312 	switch (idx) {
313 	case IDX_PKG_ENERGY:
314 		if (do_rapl & RAPL_AMD_F17H)
315 			offset = MSR_PKG_ENERGY_STAT;
316 		else
317 			offset = MSR_PKG_ENERGY_STATUS;
318 		break;
319 	case IDX_DRAM_ENERGY:
320 		offset = MSR_DRAM_ENERGY_STATUS;
321 		break;
322 	case IDX_PP0_ENERGY:
323 		offset = MSR_PP0_ENERGY_STATUS;
324 		break;
325 	case IDX_PP1_ENERGY:
326 		offset = MSR_PP1_ENERGY_STATUS;
327 		break;
328 	case IDX_PKG_PERF:
329 		offset = MSR_PKG_PERF_STATUS;
330 		break;
331 	case IDX_DRAM_PERF:
332 		offset = MSR_DRAM_PERF_STATUS;
333 		break;
334 	default:
335 		offset = -1;
336 	}
337 	return offset;
338 }
339 
340 int offset_to_idx(off_t offset)
341 {
342 	int idx;
343 
344 	switch (offset) {
345 	case MSR_PKG_ENERGY_STATUS:
346 	case MSR_PKG_ENERGY_STAT:
347 		idx = IDX_PKG_ENERGY;
348 		break;
349 	case MSR_DRAM_ENERGY_STATUS:
350 		idx = IDX_DRAM_ENERGY;
351 		break;
352 	case MSR_PP0_ENERGY_STATUS:
353 		idx = IDX_PP0_ENERGY;
354 		break;
355 	case MSR_PP1_ENERGY_STATUS:
356 		idx = IDX_PP1_ENERGY;
357 		break;
358 	case MSR_PKG_PERF_STATUS:
359 		idx = IDX_PKG_PERF;
360 		break;
361 	case MSR_DRAM_PERF_STATUS:
362 		idx = IDX_DRAM_PERF;
363 		break;
364 	default:
365 		idx = -1;
366 	}
367 	return idx;
368 }
369 
370 int idx_valid(int idx)
371 {
372 	switch (idx) {
373 	case IDX_PKG_ENERGY:
374 		return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
375 	case IDX_DRAM_ENERGY:
376 		return do_rapl & RAPL_DRAM;
377 	case IDX_PP0_ENERGY:
378 		return do_rapl & RAPL_CORES_ENERGY_STATUS;
379 	case IDX_PP1_ENERGY:
380 		return do_rapl & RAPL_GFX;
381 	case IDX_PKG_PERF:
382 		return do_rapl & RAPL_PKG_PERF_STATUS;
383 	case IDX_DRAM_PERF:
384 		return do_rapl & RAPL_DRAM_PERF_STATUS;
385 	default:
386 		return 0;
387 	}
388 }
389 
390 struct sys_counters {
391 	unsigned int added_thread_counters;
392 	unsigned int added_core_counters;
393 	unsigned int added_package_counters;
394 	struct msr_counter *tp;
395 	struct msr_counter *cp;
396 	struct msr_counter *pp;
397 } sys;
398 
399 struct system_summary {
400 	struct thread_data threads;
401 	struct core_data cores;
402 	struct pkg_data packages;
403 } average;
404 
405 struct cpu_topology {
406 	int physical_package_id;
407 	int die_id;
408 	int logical_cpu_id;
409 	int physical_node_id;
410 	int logical_node_id;	/* 0-based count within the package */
411 	int physical_core_id;
412 	int thread_id;
413 	cpu_set_t *put_ids;	/* Processing Unit/Thread IDs */
414 } *cpus;
415 
416 struct topo_params {
417 	int num_packages;
418 	int num_die;
419 	int num_cpus;
420 	int num_cores;
421 	int max_cpu_num;
422 	int max_node_num;
423 	int nodes_per_pkg;
424 	int cores_per_node;
425 	int threads_per_core;
426 } topo;
427 
428 struct timeval tv_even, tv_odd, tv_delta;
429 
430 int *irq_column_2_cpu;		/* /proc/interrupts column numbers */
431 int *irqs_per_cpu;		/* indexed by cpu_num */
432 
433 void setup_all_buffers(void);
434 
435 char *sys_lpi_file;
436 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
437 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
438 
439 int cpu_is_not_present(int cpu)
440 {
441 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
442 }
443 
444 /*
445  * run func(thread, core, package) in topology order
446  * skip non-present cpus
447  */
448 
449 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
450 		 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
451 {
452 	int retval, pkg_no, core_no, thread_no, node_no;
453 
454 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
455 		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
456 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
457 				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
458 					struct thread_data *t;
459 					struct core_data *c;
460 					struct pkg_data *p;
461 
462 					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
463 
464 					if (cpu_is_not_present(t->cpu_id))
465 						continue;
466 
467 					c = GET_CORE(core_base, core_no, node_no, pkg_no);
468 					p = GET_PKG(pkg_base, pkg_no);
469 
470 					retval = func(t, c, p);
471 					if (retval)
472 						return retval;
473 				}
474 			}
475 		}
476 	}
477 	return 0;
478 }
479 
480 int cpu_migrate(int cpu)
481 {
482 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
483 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
484 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
485 		return -1;
486 	else
487 		return 0;
488 }
489 
490 int get_msr_fd(int cpu)
491 {
492 	char pathname[32];
493 	int fd;
494 
495 	fd = fd_percpu[cpu];
496 
497 	if (fd)
498 		return fd;
499 
500 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
501 	fd = open(pathname, O_RDONLY);
502 	if (fd < 0)
503 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
504 
505 	fd_percpu[cpu] = fd;
506 
507 	return fd;
508 }
509 
510 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
511 {
512 	return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
513 }
514 
515 static int perf_instr_count_open(int cpu_num)
516 {
517 	struct perf_event_attr pea;
518 	int fd;
519 
520 	memset(&pea, 0, sizeof(struct perf_event_attr));
521 	pea.type = PERF_TYPE_HARDWARE;
522 	pea.size = sizeof(struct perf_event_attr);
523 	pea.config = PERF_COUNT_HW_INSTRUCTIONS;
524 
525 	/* counter for cpu_num, including user + kernel and all processes */
526 	fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
527 	if (fd == -1)
528 		err(-1, "cpu%d: perf instruction counter\n", cpu_num);
529 
530 	return fd;
531 }
532 
533 int get_instr_count_fd(int cpu)
534 {
535 	if (fd_instr_count_percpu[cpu])
536 		return fd_instr_count_percpu[cpu];
537 
538 	fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
539 
540 	return fd_instr_count_percpu[cpu];
541 }
542 
543 int get_msr(int cpu, off_t offset, unsigned long long *msr)
544 {
545 	ssize_t retval;
546 
547 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
548 
549 	if (retval != sizeof *msr)
550 		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
551 
552 	return 0;
553 }
554 
555 /*
556  * This list matches the column headers, except
557  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
558  * 2. Core and CPU are moved to the end, we can't have strings that contain them
559  *    matching on them for --show and --hide.
560  */
561 struct msr_counter bic[] = {
562 	{ 0x0, "usec" },
563 	{ 0x0, "Time_Of_Day_Seconds" },
564 	{ 0x0, "Package" },
565 	{ 0x0, "Node" },
566 	{ 0x0, "Avg_MHz" },
567 	{ 0x0, "Busy%" },
568 	{ 0x0, "Bzy_MHz" },
569 	{ 0x0, "TSC_MHz" },
570 	{ 0x0, "IRQ" },
571 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
572 	{ 0x0, "sysfs" },
573 	{ 0x0, "CPU%c1" },
574 	{ 0x0, "CPU%c3" },
575 	{ 0x0, "CPU%c6" },
576 	{ 0x0, "CPU%c7" },
577 	{ 0x0, "ThreadC" },
578 	{ 0x0, "CoreTmp" },
579 	{ 0x0, "CoreCnt" },
580 	{ 0x0, "PkgTmp" },
581 	{ 0x0, "GFX%rc6" },
582 	{ 0x0, "GFXMHz" },
583 	{ 0x0, "Pkg%pc2" },
584 	{ 0x0, "Pkg%pc3" },
585 	{ 0x0, "Pkg%pc6" },
586 	{ 0x0, "Pkg%pc7" },
587 	{ 0x0, "Pkg%pc8" },
588 	{ 0x0, "Pkg%pc9" },
589 	{ 0x0, "Pk%pc10" },
590 	{ 0x0, "CPU%LPI" },
591 	{ 0x0, "SYS%LPI" },
592 	{ 0x0, "PkgWatt" },
593 	{ 0x0, "CorWatt" },
594 	{ 0x0, "GFXWatt" },
595 	{ 0x0, "PkgCnt" },
596 	{ 0x0, "RAMWatt" },
597 	{ 0x0, "PKG_%" },
598 	{ 0x0, "RAM_%" },
599 	{ 0x0, "Pkg_J" },
600 	{ 0x0, "Cor_J" },
601 	{ 0x0, "GFX_J" },
602 	{ 0x0, "RAM_J" },
603 	{ 0x0, "Mod%c6" },
604 	{ 0x0, "Totl%C0" },
605 	{ 0x0, "Any%C0" },
606 	{ 0x0, "GFX%C0" },
607 	{ 0x0, "CPUGFX%" },
608 	{ 0x0, "Core" },
609 	{ 0x0, "CPU" },
610 	{ 0x0, "APIC" },
611 	{ 0x0, "X2APIC" },
612 	{ 0x0, "Die" },
613 	{ 0x0, "GFXAMHz" },
614 	{ 0x0, "IPC" },
615 	{ 0x0, "CoreThr" },
616 };
617 
618 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
619 #define	BIC_USEC	(1ULL << 0)
620 #define	BIC_TOD		(1ULL << 1)
621 #define	BIC_Package	(1ULL << 2)
622 #define	BIC_Node	(1ULL << 3)
623 #define	BIC_Avg_MHz	(1ULL << 4)
624 #define	BIC_Busy	(1ULL << 5)
625 #define	BIC_Bzy_MHz	(1ULL << 6)
626 #define	BIC_TSC_MHz	(1ULL << 7)
627 #define	BIC_IRQ		(1ULL << 8)
628 #define	BIC_SMI		(1ULL << 9)
629 #define	BIC_sysfs	(1ULL << 10)
630 #define	BIC_CPU_c1	(1ULL << 11)
631 #define	BIC_CPU_c3	(1ULL << 12)
632 #define	BIC_CPU_c6	(1ULL << 13)
633 #define	BIC_CPU_c7	(1ULL << 14)
634 #define	BIC_ThreadC	(1ULL << 15)
635 #define	BIC_CoreTmp	(1ULL << 16)
636 #define	BIC_CoreCnt	(1ULL << 17)
637 #define	BIC_PkgTmp	(1ULL << 18)
638 #define	BIC_GFX_rc6	(1ULL << 19)
639 #define	BIC_GFXMHz	(1ULL << 20)
640 #define	BIC_Pkgpc2	(1ULL << 21)
641 #define	BIC_Pkgpc3	(1ULL << 22)
642 #define	BIC_Pkgpc6	(1ULL << 23)
643 #define	BIC_Pkgpc7	(1ULL << 24)
644 #define	BIC_Pkgpc8	(1ULL << 25)
645 #define	BIC_Pkgpc9	(1ULL << 26)
646 #define	BIC_Pkgpc10	(1ULL << 27)
647 #define BIC_CPU_LPI	(1ULL << 28)
648 #define BIC_SYS_LPI	(1ULL << 29)
649 #define	BIC_PkgWatt	(1ULL << 30)
650 #define	BIC_CorWatt	(1ULL << 31)
651 #define	BIC_GFXWatt	(1ULL << 32)
652 #define	BIC_PkgCnt	(1ULL << 33)
653 #define	BIC_RAMWatt	(1ULL << 34)
654 #define	BIC_PKG__	(1ULL << 35)
655 #define	BIC_RAM__	(1ULL << 36)
656 #define	BIC_Pkg_J	(1ULL << 37)
657 #define	BIC_Cor_J	(1ULL << 38)
658 #define	BIC_GFX_J	(1ULL << 39)
659 #define	BIC_RAM_J	(1ULL << 40)
660 #define	BIC_Mod_c6	(1ULL << 41)
661 #define	BIC_Totl_c0	(1ULL << 42)
662 #define	BIC_Any_c0	(1ULL << 43)
663 #define	BIC_GFX_c0	(1ULL << 44)
664 #define	BIC_CPUGFX	(1ULL << 45)
665 #define	BIC_Core	(1ULL << 46)
666 #define	BIC_CPU		(1ULL << 47)
667 #define	BIC_APIC	(1ULL << 48)
668 #define	BIC_X2APIC	(1ULL << 49)
669 #define	BIC_Die		(1ULL << 50)
670 #define	BIC_GFXACTMHz	(1ULL << 51)
671 #define	BIC_IPC		(1ULL << 52)
672 #define	BIC_CORE_THROT_CNT	(1ULL << 53)
673 
674 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
675 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
676 #define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
677 #define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
678 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
679 
680 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
681 
682 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
683 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
684 
685 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
686 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
687 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
688 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
689 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
690 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
691 
692 #define MAX_DEFERRED 16
693 char *deferred_add_names[MAX_DEFERRED];
694 char *deferred_skip_names[MAX_DEFERRED];
695 int deferred_add_index;
696 int deferred_skip_index;
697 
698 /*
699  * HIDE_LIST - hide this list of counters, show the rest [default]
700  * SHOW_LIST - show this list of counters, hide the rest
701  */
702 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
703 
704 void help(void)
705 {
706 	fprintf(outf,
707 		"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
708 		"\n"
709 		"Turbostat forks the specified COMMAND and prints statistics\n"
710 		"when COMMAND completes.\n"
711 		"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
712 		"to print statistics, until interrupted.\n"
713 		"  -a, --add	add a counter\n"
714 		"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
715 		"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
716 		"		  {core | package | j,k,l..m,n-p }\n"
717 		"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
718 		"  -D, --Dump	displays the raw counter values\n"
719 		"  -e, --enable	[all | column]\n"
720 		"		shows all or the specified disabled column\n"
721 		"  -H, --hide [column|column,column,...]\n"
722 		"		hide the specified column(s)\n"
723 		"  -i, --interval sec.subsec\n"
724 		"		Override default 5-second measurement interval\n"
725 		"  -J, --Joules	displays energy in Joules instead of Watts\n"
726 		"  -l, --list	list column headers only\n"
727 		"  -n, --num_iterations num\n"
728 		"		number of the measurement iterations\n"
729 		"  -N, --header_iterations num\n"
730 		"		print header every num iterations\n"
731 		"  -o, --out file\n"
732 		"		create or truncate \"file\" for all output\n"
733 		"  -q, --quiet	skip decoding system configuration header\n"
734 		"  -s, --show [column|column,column,...]\n"
735 		"		show only the specified column(s)\n"
736 		"  -S, --Summary\n"
737 		"		limits output to 1-line system summary per interval\n"
738 		"  -T, --TCC temperature\n"
739 		"		sets the Thermal Control Circuit temperature in\n"
740 		"		  degrees Celsius\n"
741 		"  -h, --help	print this help message\n"
742 		"  -v, --version	print version information\n" "\n" "For more help, run \"man turbostat\"\n");
743 }
744 
745 /*
746  * bic_lookup
747  * for all the strings in comma separate name_list,
748  * set the approprate bit in return value.
749  */
750 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
751 {
752 	int i;
753 	unsigned long long retval = 0;
754 
755 	while (name_list) {
756 		char *comma;
757 
758 		comma = strchr(name_list, ',');
759 
760 		if (comma)
761 			*comma = '\0';
762 
763 		for (i = 0; i < MAX_BIC; ++i) {
764 			if (!strcmp(name_list, bic[i].name)) {
765 				retval |= (1ULL << i);
766 				break;
767 			}
768 			if (!strcmp(name_list, "all")) {
769 				retval |= ~0;
770 				break;
771 			} else if (!strcmp(name_list, "topology")) {
772 				retval |= BIC_TOPOLOGY;
773 				break;
774 			} else if (!strcmp(name_list, "power")) {
775 				retval |= BIC_THERMAL_PWR;
776 				break;
777 			} else if (!strcmp(name_list, "idle")) {
778 				retval |= BIC_IDLE;
779 				break;
780 			} else if (!strcmp(name_list, "frequency")) {
781 				retval |= BIC_FREQUENCY;
782 				break;
783 			} else if (!strcmp(name_list, "other")) {
784 				retval |= BIC_OTHER;
785 				break;
786 			}
787 
788 		}
789 		if (i == MAX_BIC) {
790 			if (mode == SHOW_LIST) {
791 				deferred_add_names[deferred_add_index++] = name_list;
792 				if (deferred_add_index >= MAX_DEFERRED) {
793 					fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
794 							MAX_DEFERRED, name_list);
795 					help();
796 					exit(1);
797 				}
798 			} else {
799 				deferred_skip_names[deferred_skip_index++] = name_list;
800 				if (debug)
801 					fprintf(stderr, "deferred \"%s\"\n", name_list);
802 				if (deferred_skip_index >= MAX_DEFERRED) {
803 					fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
804 							MAX_DEFERRED, name_list);
805 					help();
806 					exit(1);
807 				}
808 			}
809 		}
810 
811 		name_list = comma;
812 		if (name_list)
813 			name_list++;
814 
815 	}
816 	return retval;
817 }
818 
819 void print_header(char *delim)
820 {
821 	struct msr_counter *mp;
822 	int printed = 0;
823 
824 	if (DO_BIC(BIC_USEC))
825 		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
826 	if (DO_BIC(BIC_TOD))
827 		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
828 	if (DO_BIC(BIC_Package))
829 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
830 	if (DO_BIC(BIC_Die))
831 		outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
832 	if (DO_BIC(BIC_Node))
833 		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
834 	if (DO_BIC(BIC_Core))
835 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
836 	if (DO_BIC(BIC_CPU))
837 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
838 	if (DO_BIC(BIC_APIC))
839 		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
840 	if (DO_BIC(BIC_X2APIC))
841 		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
842 	if (DO_BIC(BIC_Avg_MHz))
843 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
844 	if (DO_BIC(BIC_Busy))
845 		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
846 	if (DO_BIC(BIC_Bzy_MHz))
847 		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
848 	if (DO_BIC(BIC_TSC_MHz))
849 		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
850 
851 	if (DO_BIC(BIC_IPC))
852 		outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
853 
854 	if (DO_BIC(BIC_IRQ)) {
855 		if (sums_need_wide_columns)
856 			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
857 		else
858 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
859 	}
860 
861 	if (DO_BIC(BIC_SMI))
862 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
863 
864 	for (mp = sys.tp; mp; mp = mp->next) {
865 
866 		if (mp->format == FORMAT_RAW) {
867 			if (mp->width == 64)
868 				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
869 			else
870 				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
871 		} else {
872 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
873 				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
874 			else
875 				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
876 		}
877 	}
878 
879 	if (DO_BIC(BIC_CPU_c1))
880 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
881 	if (DO_BIC(BIC_CPU_c3))
882 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
883 	if (DO_BIC(BIC_CPU_c6))
884 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
885 	if (DO_BIC(BIC_CPU_c7))
886 		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
887 
888 	if (DO_BIC(BIC_Mod_c6))
889 		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
890 
891 	if (DO_BIC(BIC_CoreTmp))
892 		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
893 
894 	if (DO_BIC(BIC_CORE_THROT_CNT))
895 		outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
896 
897 	if (do_rapl && !rapl_joules) {
898 		if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
899 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
900 	} else if (do_rapl && rapl_joules) {
901 		if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
902 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
903 	}
904 
905 	for (mp = sys.cp; mp; mp = mp->next) {
906 		if (mp->format == FORMAT_RAW) {
907 			if (mp->width == 64)
908 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
909 			else
910 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
911 		} else {
912 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
913 				outp += sprintf(outp, "%s%8s", delim, mp->name);
914 			else
915 				outp += sprintf(outp, "%s%s", delim, mp->name);
916 		}
917 	}
918 
919 	if (DO_BIC(BIC_PkgTmp))
920 		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
921 
922 	if (DO_BIC(BIC_GFX_rc6))
923 		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
924 
925 	if (DO_BIC(BIC_GFXMHz))
926 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
927 
928 	if (DO_BIC(BIC_GFXACTMHz))
929 		outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
930 
931 	if (DO_BIC(BIC_Totl_c0))
932 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
933 	if (DO_BIC(BIC_Any_c0))
934 		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
935 	if (DO_BIC(BIC_GFX_c0))
936 		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
937 	if (DO_BIC(BIC_CPUGFX))
938 		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
939 
940 	if (DO_BIC(BIC_Pkgpc2))
941 		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
942 	if (DO_BIC(BIC_Pkgpc3))
943 		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
944 	if (DO_BIC(BIC_Pkgpc6))
945 		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
946 	if (DO_BIC(BIC_Pkgpc7))
947 		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
948 	if (DO_BIC(BIC_Pkgpc8))
949 		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
950 	if (DO_BIC(BIC_Pkgpc9))
951 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
952 	if (DO_BIC(BIC_Pkgpc10))
953 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
954 	if (DO_BIC(BIC_CPU_LPI))
955 		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
956 	if (DO_BIC(BIC_SYS_LPI))
957 		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
958 
959 	if (do_rapl && !rapl_joules) {
960 		if (DO_BIC(BIC_PkgWatt))
961 			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
962 		if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
963 			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
964 		if (DO_BIC(BIC_GFXWatt))
965 			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
966 		if (DO_BIC(BIC_RAMWatt))
967 			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
968 		if (DO_BIC(BIC_PKG__))
969 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
970 		if (DO_BIC(BIC_RAM__))
971 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
972 	} else if (do_rapl && rapl_joules) {
973 		if (DO_BIC(BIC_Pkg_J))
974 			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
975 		if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
976 			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
977 		if (DO_BIC(BIC_GFX_J))
978 			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
979 		if (DO_BIC(BIC_RAM_J))
980 			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
981 		if (DO_BIC(BIC_PKG__))
982 			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
983 		if (DO_BIC(BIC_RAM__))
984 			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
985 	}
986 	for (mp = sys.pp; mp; mp = mp->next) {
987 		if (mp->format == FORMAT_RAW) {
988 			if (mp->width == 64)
989 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
990 			else
991 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
992 		} else {
993 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
994 				outp += sprintf(outp, "%s%8s", delim, mp->name);
995 			else
996 				outp += sprintf(outp, "%s%s", delim, mp->name);
997 		}
998 	}
999 
1000 	outp += sprintf(outp, "\n");
1001 }
1002 
1003 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1004 {
1005 	int i;
1006 	struct msr_counter *mp;
1007 
1008 	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
1009 
1010 	if (t) {
1011 		outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
1012 		outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
1013 		outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
1014 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
1015 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
1016 
1017 		if (DO_BIC(BIC_IPC))
1018 			outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
1019 
1020 		if (DO_BIC(BIC_IRQ))
1021 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
1022 		if (DO_BIC(BIC_SMI))
1023 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
1024 
1025 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1026 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
1027 		}
1028 	}
1029 
1030 	if (c) {
1031 		outp += sprintf(outp, "core: %d\n", c->core_id);
1032 		outp += sprintf(outp, "c3: %016llX\n", c->c3);
1033 		outp += sprintf(outp, "c6: %016llX\n", c->c6);
1034 		outp += sprintf(outp, "c7: %016llX\n", c->c7);
1035 		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1036 		outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
1037 		outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
1038 
1039 		for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1040 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
1041 		}
1042 		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1043 	}
1044 
1045 	if (p) {
1046 		outp += sprintf(outp, "package: %d\n", p->package_id);
1047 
1048 		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1049 		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1050 		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1051 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
1052 
1053 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
1054 		if (DO_BIC(BIC_Pkgpc3))
1055 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
1056 		if (DO_BIC(BIC_Pkgpc6))
1057 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
1058 		if (DO_BIC(BIC_Pkgpc7))
1059 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
1060 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
1061 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
1062 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
1063 		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
1064 		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
1065 		outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
1066 		outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
1067 		outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
1068 		outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
1069 		outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
1070 		outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
1071 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
1072 
1073 		for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1074 			outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
1075 		}
1076 	}
1077 
1078 	outp += sprintf(outp, "\n");
1079 
1080 	return 0;
1081 }
1082 
1083 /*
1084  * column formatting convention & formats
1085  */
1086 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1087 {
1088 	double interval_float, tsc;
1089 	char *fmt8;
1090 	int i;
1091 	struct msr_counter *mp;
1092 	char *delim = "\t";
1093 	int printed = 0;
1094 
1095 	/* if showing only 1st thread in core and this isn't one, bail out */
1096 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1097 		return 0;
1098 
1099 	/* if showing only 1st thread in pkg and this isn't one, bail out */
1100 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1101 		return 0;
1102 
1103 	/*if not summary line and --cpu is used */
1104 	if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
1105 		return 0;
1106 
1107 	if (DO_BIC(BIC_USEC)) {
1108 		/* on each row, print how many usec each timestamp took to gather */
1109 		struct timeval tv;
1110 
1111 		timersub(&t->tv_end, &t->tv_begin, &tv);
1112 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
1113 	}
1114 
1115 	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
1116 	if (DO_BIC(BIC_TOD))
1117 		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
1118 
1119 	interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
1120 
1121 	tsc = t->tsc * tsc_tweak;
1122 
1123 	/* topo columns, print blanks on 1st (average) line */
1124 	if (t == &average.threads) {
1125 		if (DO_BIC(BIC_Package))
1126 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1127 		if (DO_BIC(BIC_Die))
1128 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1129 		if (DO_BIC(BIC_Node))
1130 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1131 		if (DO_BIC(BIC_Core))
1132 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1133 		if (DO_BIC(BIC_CPU))
1134 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1135 		if (DO_BIC(BIC_APIC))
1136 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1137 		if (DO_BIC(BIC_X2APIC))
1138 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1139 	} else {
1140 		if (DO_BIC(BIC_Package)) {
1141 			if (p)
1142 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
1143 			else
1144 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1145 		}
1146 		if (DO_BIC(BIC_Die)) {
1147 			if (c)
1148 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
1149 			else
1150 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1151 		}
1152 		if (DO_BIC(BIC_Node)) {
1153 			if (t)
1154 				outp += sprintf(outp, "%s%d",
1155 						(printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
1156 			else
1157 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1158 		}
1159 		if (DO_BIC(BIC_Core)) {
1160 			if (c)
1161 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
1162 			else
1163 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1164 		}
1165 		if (DO_BIC(BIC_CPU))
1166 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
1167 		if (DO_BIC(BIC_APIC))
1168 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
1169 		if (DO_BIC(BIC_X2APIC))
1170 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
1171 	}
1172 
1173 	if (DO_BIC(BIC_Avg_MHz))
1174 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
1175 
1176 	if (DO_BIC(BIC_Busy))
1177 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
1178 
1179 	if (DO_BIC(BIC_Bzy_MHz)) {
1180 		if (has_base_hz)
1181 			outp +=
1182 			    sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
1183 		else
1184 			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1185 					tsc / units * t->aperf / t->mperf / interval_float);
1186 	}
1187 
1188 	if (DO_BIC(BIC_TSC_MHz))
1189 		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
1190 
1191 	if (DO_BIC(BIC_IPC))
1192 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
1193 
1194 	/* IRQ */
1195 	if (DO_BIC(BIC_IRQ)) {
1196 		if (sums_need_wide_columns)
1197 			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
1198 		else
1199 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1200 	}
1201 
1202 	/* SMI */
1203 	if (DO_BIC(BIC_SMI))
1204 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1205 
1206 	/* Added counters */
1207 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1208 		if (mp->format == FORMAT_RAW) {
1209 			if (mp->width == 32)
1210 				outp +=
1211 				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
1212 			else
1213 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1214 		} else if (mp->format == FORMAT_DELTA) {
1215 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1216 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1217 			else
1218 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1219 		} else if (mp->format == FORMAT_PERCENT) {
1220 			if (mp->type == COUNTER_USEC)
1221 				outp +=
1222 				    sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1223 					    t->counter[i] / interval_float / 10000);
1224 			else
1225 				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
1226 		}
1227 	}
1228 
1229 	/* C1 */
1230 	if (DO_BIC(BIC_CPU_c1))
1231 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
1232 
1233 	/* print per-core data only for 1st thread in core */
1234 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1235 		goto done;
1236 
1237 	if (DO_BIC(BIC_CPU_c3))
1238 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
1239 	if (DO_BIC(BIC_CPU_c6))
1240 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
1241 	if (DO_BIC(BIC_CPU_c7))
1242 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
1243 
1244 	/* Mod%c6 */
1245 	if (DO_BIC(BIC_Mod_c6))
1246 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1247 
1248 	if (DO_BIC(BIC_CoreTmp))
1249 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1250 
1251 	/* Core throttle count */
1252 	if (DO_BIC(BIC_CORE_THROT_CNT))
1253 		outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
1254 
1255 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1256 		if (mp->format == FORMAT_RAW) {
1257 			if (mp->width == 32)
1258 				outp +=
1259 				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
1260 			else
1261 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1262 		} else if (mp->format == FORMAT_DELTA) {
1263 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1264 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1265 			else
1266 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1267 		} else if (mp->format == FORMAT_PERCENT) {
1268 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
1269 		}
1270 	}
1271 
1272 	fmt8 = "%s%.2f";
1273 
1274 	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1275 		outp +=
1276 		    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1277 	if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1278 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1279 
1280 	/* print per-package data only for 1st core in package */
1281 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1282 		goto done;
1283 
1284 	/* PkgTmp */
1285 	if (DO_BIC(BIC_PkgTmp))
1286 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1287 
1288 	/* GFXrc6 */
1289 	if (DO_BIC(BIC_GFX_rc6)) {
1290 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
1291 			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1292 		} else {
1293 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1294 					p->gfx_rc6_ms / 10.0 / interval_float);
1295 		}
1296 	}
1297 
1298 	/* GFXMHz */
1299 	if (DO_BIC(BIC_GFXMHz))
1300 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1301 
1302 	/* GFXACTMHz */
1303 	if (DO_BIC(BIC_GFXACTMHz))
1304 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
1305 
1306 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1307 	if (DO_BIC(BIC_Totl_c0))
1308 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
1309 	if (DO_BIC(BIC_Any_c0))
1310 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
1311 	if (DO_BIC(BIC_GFX_c0))
1312 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
1313 	if (DO_BIC(BIC_CPUGFX))
1314 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
1315 
1316 	if (DO_BIC(BIC_Pkgpc2))
1317 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
1318 	if (DO_BIC(BIC_Pkgpc3))
1319 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
1320 	if (DO_BIC(BIC_Pkgpc6))
1321 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
1322 	if (DO_BIC(BIC_Pkgpc7))
1323 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
1324 	if (DO_BIC(BIC_Pkgpc8))
1325 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
1326 	if (DO_BIC(BIC_Pkgpc9))
1327 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
1328 	if (DO_BIC(BIC_Pkgpc10))
1329 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
1330 
1331 	if (DO_BIC(BIC_CPU_LPI))
1332 		outp +=
1333 		    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1334 	if (DO_BIC(BIC_SYS_LPI))
1335 		outp +=
1336 		    sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1337 
1338 	if (DO_BIC(BIC_PkgWatt))
1339 		outp +=
1340 		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1341 
1342 	if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1343 		outp +=
1344 		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1345 	if (DO_BIC(BIC_GFXWatt))
1346 		outp +=
1347 		    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1348 	if (DO_BIC(BIC_RAMWatt))
1349 		outp +=
1350 		    sprintf(outp, fmt8, (printed++ ? delim : ""),
1351 			    p->energy_dram * rapl_dram_energy_units / interval_float);
1352 	if (DO_BIC(BIC_Pkg_J))
1353 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1354 	if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1355 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1356 	if (DO_BIC(BIC_GFX_J))
1357 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1358 	if (DO_BIC(BIC_RAM_J))
1359 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1360 	if (DO_BIC(BIC_PKG__))
1361 		outp +=
1362 		    sprintf(outp, fmt8, (printed++ ? delim : ""),
1363 			    100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1364 	if (DO_BIC(BIC_RAM__))
1365 		outp +=
1366 		    sprintf(outp, fmt8, (printed++ ? delim : ""),
1367 			    100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1368 
1369 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1370 		if (mp->format == FORMAT_RAW) {
1371 			if (mp->width == 32)
1372 				outp +=
1373 				    sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
1374 			else
1375 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1376 		} else if (mp->format == FORMAT_DELTA) {
1377 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1378 				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1379 			else
1380 				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1381 		} else if (mp->format == FORMAT_PERCENT) {
1382 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
1383 		}
1384 	}
1385 
1386 done:
1387 	if (*(outp - 1) != '\n')
1388 		outp += sprintf(outp, "\n");
1389 
1390 	return 0;
1391 }
1392 
1393 void flush_output_stdout(void)
1394 {
1395 	FILE *filep;
1396 
1397 	if (outf == stderr)
1398 		filep = stdout;
1399 	else
1400 		filep = outf;
1401 
1402 	fputs(output_buffer, filep);
1403 	fflush(filep);
1404 
1405 	outp = output_buffer;
1406 }
1407 
1408 void flush_output_stderr(void)
1409 {
1410 	fputs(output_buffer, outf);
1411 	fflush(outf);
1412 	outp = output_buffer;
1413 }
1414 
1415 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1416 {
1417 	static int count;
1418 
1419 	if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
1420 		print_header("\t");
1421 
1422 	format_counters(&average.threads, &average.cores, &average.packages);
1423 
1424 	count++;
1425 
1426 	if (summary_only)
1427 		return;
1428 
1429 	for_all_cpus(format_counters, t, c, p);
1430 }
1431 
1432 #define DELTA_WRAP32(new, old)			\
1433 	old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
1434 
1435 int delta_package(struct pkg_data *new, struct pkg_data *old)
1436 {
1437 	int i;
1438 	struct msr_counter *mp;
1439 
1440 	if (DO_BIC(BIC_Totl_c0))
1441 		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1442 	if (DO_BIC(BIC_Any_c0))
1443 		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1444 	if (DO_BIC(BIC_GFX_c0))
1445 		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1446 	if (DO_BIC(BIC_CPUGFX))
1447 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1448 
1449 	old->pc2 = new->pc2 - old->pc2;
1450 	if (DO_BIC(BIC_Pkgpc3))
1451 		old->pc3 = new->pc3 - old->pc3;
1452 	if (DO_BIC(BIC_Pkgpc6))
1453 		old->pc6 = new->pc6 - old->pc6;
1454 	if (DO_BIC(BIC_Pkgpc7))
1455 		old->pc7 = new->pc7 - old->pc7;
1456 	old->pc8 = new->pc8 - old->pc8;
1457 	old->pc9 = new->pc9 - old->pc9;
1458 	old->pc10 = new->pc10 - old->pc10;
1459 	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1460 	old->sys_lpi = new->sys_lpi - old->sys_lpi;
1461 	old->pkg_temp_c = new->pkg_temp_c;
1462 
1463 	/* flag an error when rc6 counter resets/wraps */
1464 	if (old->gfx_rc6_ms > new->gfx_rc6_ms)
1465 		old->gfx_rc6_ms = -1;
1466 	else
1467 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1468 
1469 	old->gfx_mhz = new->gfx_mhz;
1470 	old->gfx_act_mhz = new->gfx_act_mhz;
1471 
1472 	old->energy_pkg = new->energy_pkg - old->energy_pkg;
1473 	old->energy_cores = new->energy_cores - old->energy_cores;
1474 	old->energy_gfx = new->energy_gfx - old->energy_gfx;
1475 	old->energy_dram = new->energy_dram - old->energy_dram;
1476 	old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
1477 	old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
1478 
1479 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1480 		if (mp->format == FORMAT_RAW)
1481 			old->counter[i] = new->counter[i];
1482 		else
1483 			old->counter[i] = new->counter[i] - old->counter[i];
1484 	}
1485 
1486 	return 0;
1487 }
1488 
1489 void delta_core(struct core_data *new, struct core_data *old)
1490 {
1491 	int i;
1492 	struct msr_counter *mp;
1493 
1494 	old->c3 = new->c3 - old->c3;
1495 	old->c6 = new->c6 - old->c6;
1496 	old->c7 = new->c7 - old->c7;
1497 	old->core_temp_c = new->core_temp_c;
1498 	old->core_throt_cnt = new->core_throt_cnt;
1499 	old->mc6_us = new->mc6_us - old->mc6_us;
1500 
1501 	DELTA_WRAP32(new->core_energy, old->core_energy);
1502 
1503 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1504 		if (mp->format == FORMAT_RAW)
1505 			old->counter[i] = new->counter[i];
1506 		else
1507 			old->counter[i] = new->counter[i] - old->counter[i];
1508 	}
1509 }
1510 
1511 int soft_c1_residency_display(int bic)
1512 {
1513 	if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1514 		return 0;
1515 
1516 	return DO_BIC_READ(bic);
1517 }
1518 
1519 /*
1520  * old = new - old
1521  */
1522 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
1523 {
1524 	int i;
1525 	struct msr_counter *mp;
1526 
1527 	/* we run cpuid just the 1st time, copy the results */
1528 	if (DO_BIC(BIC_APIC))
1529 		new->apic_id = old->apic_id;
1530 	if (DO_BIC(BIC_X2APIC))
1531 		new->x2apic_id = old->x2apic_id;
1532 
1533 	/*
1534 	 * the timestamps from start of measurement interval are in "old"
1535 	 * the timestamp from end of measurement interval are in "new"
1536 	 * over-write old w/ new so we can print end of interval values
1537 	 */
1538 
1539 	timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1540 	old->tv_begin = new->tv_begin;
1541 	old->tv_end = new->tv_end;
1542 
1543 	old->tsc = new->tsc - old->tsc;
1544 
1545 	/* check for TSC < 1 Mcycles over interval */
1546 	if (old->tsc < (1000 * 1000))
1547 		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1548 		     "You can disable all c-states by booting with \"idle=poll\"\n"
1549 		     "or just the deep ones with \"processor.max_cstate=1\"");
1550 
1551 	old->c1 = new->c1 - old->c1;
1552 
1553 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
1554 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1555 			old->aperf = new->aperf - old->aperf;
1556 			old->mperf = new->mperf - old->mperf;
1557 		} else {
1558 			return -1;
1559 		}
1560 	}
1561 
1562 	if (use_c1_residency_msr) {
1563 		/*
1564 		 * Some models have a dedicated C1 residency MSR,
1565 		 * which should be more accurate than the derivation below.
1566 		 */
1567 	} else {
1568 		/*
1569 		 * As counter collection is not atomic,
1570 		 * it is possible for mperf's non-halted cycles + idle states
1571 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
1572 		 */
1573 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1574 			old->c1 = 0;
1575 		else {
1576 			/* normal case, derive c1 */
1577 			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1578 			    - core_delta->c6 - core_delta->c7;
1579 		}
1580 	}
1581 
1582 	if (old->mperf == 0) {
1583 		if (debug > 1)
1584 			fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1585 		old->mperf = 1;	/* divide by 0 protection */
1586 	}
1587 
1588 	if (DO_BIC(BIC_IPC))
1589 		old->instr_count = new->instr_count - old->instr_count;
1590 
1591 	if (DO_BIC(BIC_IRQ))
1592 		old->irq_count = new->irq_count - old->irq_count;
1593 
1594 	if (DO_BIC(BIC_SMI))
1595 		old->smi_count = new->smi_count - old->smi_count;
1596 
1597 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1598 		if (mp->format == FORMAT_RAW)
1599 			old->counter[i] = new->counter[i];
1600 		else
1601 			old->counter[i] = new->counter[i] - old->counter[i];
1602 	}
1603 	return 0;
1604 }
1605 
1606 int delta_cpu(struct thread_data *t, struct core_data *c,
1607 	      struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
1608 {
1609 	int retval = 0;
1610 
1611 	/* calculate core delta only for 1st thread in core */
1612 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1613 		delta_core(c, c2);
1614 
1615 	/* always calculate thread delta */
1616 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
1617 	if (retval)
1618 		return retval;
1619 
1620 	/* calculate package delta only for 1st core in package */
1621 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1622 		retval = delta_package(p, p2);
1623 
1624 	return retval;
1625 }
1626 
1627 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1628 {
1629 	int i;
1630 	struct msr_counter *mp;
1631 
1632 	t->tv_begin.tv_sec = 0;
1633 	t->tv_begin.tv_usec = 0;
1634 	t->tv_end.tv_sec = 0;
1635 	t->tv_end.tv_usec = 0;
1636 	t->tv_delta.tv_sec = 0;
1637 	t->tv_delta.tv_usec = 0;
1638 
1639 	t->tsc = 0;
1640 	t->aperf = 0;
1641 	t->mperf = 0;
1642 	t->c1 = 0;
1643 
1644 	t->instr_count = 0;
1645 
1646 	t->irq_count = 0;
1647 	t->smi_count = 0;
1648 
1649 	/* tells format_counters to dump all fields from this set */
1650 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1651 
1652 	c->c3 = 0;
1653 	c->c6 = 0;
1654 	c->c7 = 0;
1655 	c->mc6_us = 0;
1656 	c->core_temp_c = 0;
1657 	c->core_energy = 0;
1658 	c->core_throt_cnt = 0;
1659 
1660 	p->pkg_wtd_core_c0 = 0;
1661 	p->pkg_any_core_c0 = 0;
1662 	p->pkg_any_gfxe_c0 = 0;
1663 	p->pkg_both_core_gfxe_c0 = 0;
1664 
1665 	p->pc2 = 0;
1666 	if (DO_BIC(BIC_Pkgpc3))
1667 		p->pc3 = 0;
1668 	if (DO_BIC(BIC_Pkgpc6))
1669 		p->pc6 = 0;
1670 	if (DO_BIC(BIC_Pkgpc7))
1671 		p->pc7 = 0;
1672 	p->pc8 = 0;
1673 	p->pc9 = 0;
1674 	p->pc10 = 0;
1675 	p->cpu_lpi = 0;
1676 	p->sys_lpi = 0;
1677 
1678 	p->energy_pkg = 0;
1679 	p->energy_dram = 0;
1680 	p->energy_cores = 0;
1681 	p->energy_gfx = 0;
1682 	p->rapl_pkg_perf_status = 0;
1683 	p->rapl_dram_perf_status = 0;
1684 	p->pkg_temp_c = 0;
1685 
1686 	p->gfx_rc6_ms = 0;
1687 	p->gfx_mhz = 0;
1688 	p->gfx_act_mhz = 0;
1689 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1690 		t->counter[i] = 0;
1691 
1692 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1693 		c->counter[i] = 0;
1694 
1695 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1696 		p->counter[i] = 0;
1697 }
1698 
1699 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1700 {
1701 	int i;
1702 	struct msr_counter *mp;
1703 
1704 	/* copy un-changing apic_id's */
1705 	if (DO_BIC(BIC_APIC))
1706 		average.threads.apic_id = t->apic_id;
1707 	if (DO_BIC(BIC_X2APIC))
1708 		average.threads.x2apic_id = t->x2apic_id;
1709 
1710 	/* remember first tv_begin */
1711 	if (average.threads.tv_begin.tv_sec == 0)
1712 		average.threads.tv_begin = t->tv_begin;
1713 
1714 	/* remember last tv_end */
1715 	average.threads.tv_end = t->tv_end;
1716 
1717 	average.threads.tsc += t->tsc;
1718 	average.threads.aperf += t->aperf;
1719 	average.threads.mperf += t->mperf;
1720 	average.threads.c1 += t->c1;
1721 
1722 	average.threads.instr_count += t->instr_count;
1723 
1724 	average.threads.irq_count += t->irq_count;
1725 	average.threads.smi_count += t->smi_count;
1726 
1727 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1728 		if (mp->format == FORMAT_RAW)
1729 			continue;
1730 		average.threads.counter[i] += t->counter[i];
1731 	}
1732 
1733 	/* sum per-core values only for 1st thread in core */
1734 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1735 		return 0;
1736 
1737 	average.cores.c3 += c->c3;
1738 	average.cores.c6 += c->c6;
1739 	average.cores.c7 += c->c7;
1740 	average.cores.mc6_us += c->mc6_us;
1741 
1742 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1743 	average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
1744 
1745 	average.cores.core_energy += c->core_energy;
1746 
1747 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1748 		if (mp->format == FORMAT_RAW)
1749 			continue;
1750 		average.cores.counter[i] += c->counter[i];
1751 	}
1752 
1753 	/* sum per-pkg values only for 1st core in pkg */
1754 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1755 		return 0;
1756 
1757 	if (DO_BIC(BIC_Totl_c0))
1758 		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1759 	if (DO_BIC(BIC_Any_c0))
1760 		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1761 	if (DO_BIC(BIC_GFX_c0))
1762 		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1763 	if (DO_BIC(BIC_CPUGFX))
1764 		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1765 
1766 	average.packages.pc2 += p->pc2;
1767 	if (DO_BIC(BIC_Pkgpc3))
1768 		average.packages.pc3 += p->pc3;
1769 	if (DO_BIC(BIC_Pkgpc6))
1770 		average.packages.pc6 += p->pc6;
1771 	if (DO_BIC(BIC_Pkgpc7))
1772 		average.packages.pc7 += p->pc7;
1773 	average.packages.pc8 += p->pc8;
1774 	average.packages.pc9 += p->pc9;
1775 	average.packages.pc10 += p->pc10;
1776 
1777 	average.packages.cpu_lpi = p->cpu_lpi;
1778 	average.packages.sys_lpi = p->sys_lpi;
1779 
1780 	average.packages.energy_pkg += p->energy_pkg;
1781 	average.packages.energy_dram += p->energy_dram;
1782 	average.packages.energy_cores += p->energy_cores;
1783 	average.packages.energy_gfx += p->energy_gfx;
1784 
1785 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1786 	average.packages.gfx_mhz = p->gfx_mhz;
1787 	average.packages.gfx_act_mhz = p->gfx_act_mhz;
1788 
1789 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1790 
1791 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1792 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1793 
1794 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1795 		if (mp->format == FORMAT_RAW)
1796 			continue;
1797 		average.packages.counter[i] += p->counter[i];
1798 	}
1799 	return 0;
1800 }
1801 
1802 /*
1803  * sum the counters for all cpus in the system
1804  * compute the weighted average
1805  */
1806 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1807 {
1808 	int i;
1809 	struct msr_counter *mp;
1810 
1811 	clear_counters(&average.threads, &average.cores, &average.packages);
1812 
1813 	for_all_cpus(sum_counters, t, c, p);
1814 
1815 	/* Use the global time delta for the average. */
1816 	average.threads.tv_delta = tv_delta;
1817 
1818 	average.threads.tsc /= topo.num_cpus;
1819 	average.threads.aperf /= topo.num_cpus;
1820 	average.threads.mperf /= topo.num_cpus;
1821 	average.threads.instr_count /= topo.num_cpus;
1822 	average.threads.c1 /= topo.num_cpus;
1823 
1824 	if (average.threads.irq_count > 9999999)
1825 		sums_need_wide_columns = 1;
1826 
1827 	average.cores.c3 /= topo.num_cores;
1828 	average.cores.c6 /= topo.num_cores;
1829 	average.cores.c7 /= topo.num_cores;
1830 	average.cores.mc6_us /= topo.num_cores;
1831 
1832 	if (DO_BIC(BIC_Totl_c0))
1833 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
1834 	if (DO_BIC(BIC_Any_c0))
1835 		average.packages.pkg_any_core_c0 /= topo.num_packages;
1836 	if (DO_BIC(BIC_GFX_c0))
1837 		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
1838 	if (DO_BIC(BIC_CPUGFX))
1839 		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
1840 
1841 	average.packages.pc2 /= topo.num_packages;
1842 	if (DO_BIC(BIC_Pkgpc3))
1843 		average.packages.pc3 /= topo.num_packages;
1844 	if (DO_BIC(BIC_Pkgpc6))
1845 		average.packages.pc6 /= topo.num_packages;
1846 	if (DO_BIC(BIC_Pkgpc7))
1847 		average.packages.pc7 /= topo.num_packages;
1848 
1849 	average.packages.pc8 /= topo.num_packages;
1850 	average.packages.pc9 /= topo.num_packages;
1851 	average.packages.pc10 /= topo.num_packages;
1852 
1853 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1854 		if (mp->format == FORMAT_RAW)
1855 			continue;
1856 		if (mp->type == COUNTER_ITEMS) {
1857 			if (average.threads.counter[i] > 9999999)
1858 				sums_need_wide_columns = 1;
1859 			continue;
1860 		}
1861 		average.threads.counter[i] /= topo.num_cpus;
1862 	}
1863 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1864 		if (mp->format == FORMAT_RAW)
1865 			continue;
1866 		if (mp->type == COUNTER_ITEMS) {
1867 			if (average.cores.counter[i] > 9999999)
1868 				sums_need_wide_columns = 1;
1869 		}
1870 		average.cores.counter[i] /= topo.num_cores;
1871 	}
1872 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1873 		if (mp->format == FORMAT_RAW)
1874 			continue;
1875 		if (mp->type == COUNTER_ITEMS) {
1876 			if (average.packages.counter[i] > 9999999)
1877 				sums_need_wide_columns = 1;
1878 		}
1879 		average.packages.counter[i] /= topo.num_packages;
1880 	}
1881 }
1882 
1883 static unsigned long long rdtsc(void)
1884 {
1885 	unsigned int low, high;
1886 
1887 	asm volatile ("rdtsc":"=a" (low), "=d"(high));
1888 
1889 	return low | ((unsigned long long)high) << 32;
1890 }
1891 
1892 /*
1893  * Open a file, and exit on failure
1894  */
1895 FILE *fopen_or_die(const char *path, const char *mode)
1896 {
1897 	FILE *filep = fopen(path, mode);
1898 
1899 	if (!filep)
1900 		err(1, "%s: open failed", path);
1901 	return filep;
1902 }
1903 
1904 /*
1905  * snapshot_sysfs_counter()
1906  *
1907  * return snapshot of given counter
1908  */
1909 unsigned long long snapshot_sysfs_counter(char *path)
1910 {
1911 	FILE *fp;
1912 	int retval;
1913 	unsigned long long counter;
1914 
1915 	fp = fopen_or_die(path, "r");
1916 
1917 	retval = fscanf(fp, "%lld", &counter);
1918 	if (retval != 1)
1919 		err(1, "snapshot_sysfs_counter(%s)", path);
1920 
1921 	fclose(fp);
1922 
1923 	return counter;
1924 }
1925 
1926 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
1927 {
1928 	if (mp->msr_num != 0) {
1929 		if (get_msr(cpu, mp->msr_num, counterp))
1930 			return -1;
1931 	} else {
1932 		char path[128 + PATH_BYTES];
1933 
1934 		if (mp->flags & SYSFS_PERCPU) {
1935 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
1936 
1937 			*counterp = snapshot_sysfs_counter(path);
1938 		} else {
1939 			*counterp = snapshot_sysfs_counter(mp->path);
1940 		}
1941 	}
1942 
1943 	return 0;
1944 }
1945 
1946 int get_epb(int cpu)
1947 {
1948 	char path[128 + PATH_BYTES];
1949 	unsigned long long msr;
1950 	int ret, epb = -1;
1951 	FILE *fp;
1952 
1953 	sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
1954 
1955 	fp = fopen(path, "r");
1956 	if (!fp)
1957 		goto msr_fallback;
1958 
1959 	ret = fscanf(fp, "%d", &epb);
1960 	if (ret != 1)
1961 		err(1, "%s(%s)", __func__, path);
1962 
1963 	fclose(fp);
1964 
1965 	return epb;
1966 
1967 msr_fallback:
1968 	get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
1969 
1970 	return msr & 0xf;
1971 }
1972 
1973 void get_apic_id(struct thread_data *t)
1974 {
1975 	unsigned int eax, ebx, ecx, edx;
1976 
1977 	if (DO_BIC(BIC_APIC)) {
1978 		eax = ebx = ecx = edx = 0;
1979 		__cpuid(1, eax, ebx, ecx, edx);
1980 
1981 		t->apic_id = (ebx >> 24) & 0xff;
1982 	}
1983 
1984 	if (!DO_BIC(BIC_X2APIC))
1985 		return;
1986 
1987 	if (authentic_amd || hygon_genuine) {
1988 		unsigned int topology_extensions;
1989 
1990 		if (max_extended_level < 0x8000001e)
1991 			return;
1992 
1993 		eax = ebx = ecx = edx = 0;
1994 		__cpuid(0x80000001, eax, ebx, ecx, edx);
1995 		topology_extensions = ecx & (1 << 22);
1996 
1997 		if (topology_extensions == 0)
1998 			return;
1999 
2000 		eax = ebx = ecx = edx = 0;
2001 		__cpuid(0x8000001e, eax, ebx, ecx, edx);
2002 
2003 		t->x2apic_id = eax;
2004 		return;
2005 	}
2006 
2007 	if (!genuine_intel)
2008 		return;
2009 
2010 	if (max_level < 0xb)
2011 		return;
2012 
2013 	ecx = 0;
2014 	__cpuid(0xb, eax, ebx, ecx, edx);
2015 	t->x2apic_id = edx;
2016 
2017 	if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
2018 		fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
2019 }
2020 
2021 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
2022 {
2023 	char path[128 + PATH_BYTES];
2024 	unsigned long long tmp;
2025 	FILE *fp;
2026 	int ret;
2027 
2028 	sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
2029 	fp = fopen(path, "r");
2030 	if (!fp)
2031 		return -1;
2032 	ret = fscanf(fp, "%lld", &tmp);
2033 	if (ret != 1)
2034 		return -1;
2035 	fclose(fp);
2036 	*cnt = tmp;
2037 
2038 	return 0;
2039 }
2040 
2041 /*
2042  * get_counters(...)
2043  * migrate to cpu
2044  * acquire and record local counters for that cpu
2045  */
2046 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2047 {
2048 	int cpu = t->cpu_id;
2049 	unsigned long long msr;
2050 	int aperf_mperf_retry_count = 0;
2051 	struct msr_counter *mp;
2052 	int i;
2053 
2054 	if (cpu_migrate(cpu)) {
2055 		fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
2056 		return -1;
2057 	}
2058 
2059 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
2060 
2061 	if (first_counter_read)
2062 		get_apic_id(t);
2063 retry:
2064 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
2065 
2066 	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
2067 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
2068 
2069 		/*
2070 		 * The TSC, APERF and MPERF must be read together for
2071 		 * APERF/MPERF and MPERF/TSC to give accurate results.
2072 		 *
2073 		 * Unfortunately, APERF and MPERF are read by
2074 		 * individual system call, so delays may occur
2075 		 * between them.  If the time to read them
2076 		 * varies by a large amount, we re-read them.
2077 		 */
2078 
2079 		/*
2080 		 * This initial dummy APERF read has been seen to
2081 		 * reduce jitter in the subsequent reads.
2082 		 */
2083 
2084 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2085 			return -3;
2086 
2087 		t->tsc = rdtsc();	/* re-read close to APERF */
2088 
2089 		tsc_before = t->tsc;
2090 
2091 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2092 			return -3;
2093 
2094 		tsc_between = rdtsc();
2095 
2096 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
2097 			return -4;
2098 
2099 		tsc_after = rdtsc();
2100 
2101 		aperf_time = tsc_between - tsc_before;
2102 		mperf_time = tsc_after - tsc_between;
2103 
2104 		/*
2105 		 * If the system call latency to read APERF and MPERF
2106 		 * differ by more than 2x, then try again.
2107 		 */
2108 		if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
2109 			aperf_mperf_retry_count++;
2110 			if (aperf_mperf_retry_count < 5)
2111 				goto retry;
2112 			else
2113 				warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
2114 		}
2115 		aperf_mperf_retry_count = 0;
2116 
2117 		t->aperf = t->aperf * aperf_mperf_multiplier;
2118 		t->mperf = t->mperf * aperf_mperf_multiplier;
2119 	}
2120 
2121 	if (DO_BIC(BIC_IPC))
2122 		if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
2123 			return -4;
2124 
2125 	if (DO_BIC(BIC_IRQ))
2126 		t->irq_count = irqs_per_cpu[cpu];
2127 	if (DO_BIC(BIC_SMI)) {
2128 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
2129 			return -5;
2130 		t->smi_count = msr & 0xFFFFFFFF;
2131 	}
2132 	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
2133 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
2134 			return -6;
2135 	}
2136 
2137 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2138 		if (get_mp(cpu, mp, &t->counter[i]))
2139 			return -10;
2140 	}
2141 
2142 	/* collect core counters only for 1st thread in core */
2143 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2144 		goto done;
2145 
2146 	if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
2147 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
2148 			return -6;
2149 	}
2150 
2151 	if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
2152 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
2153 			return -7;
2154 	} else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
2155 		if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
2156 			return -7;
2157 	}
2158 
2159 	if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
2160 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
2161 			return -8;
2162 		else if (t->is_atom) {
2163 			/*
2164 			 * For Atom CPUs that has core cstate deeper than c6,
2165 			 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
2166 			 * Minus CC7 (and deeper cstates) residency to get
2167 			 * accturate cc6 residency.
2168 			 */
2169 			c->c6 -= c->c7;
2170 		}
2171 	}
2172 
2173 	if (DO_BIC(BIC_Mod_c6))
2174 		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
2175 			return -8;
2176 
2177 	if (DO_BIC(BIC_CoreTmp)) {
2178 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2179 			return -9;
2180 		c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
2181 	}
2182 
2183 	if (DO_BIC(BIC_CORE_THROT_CNT))
2184 		get_core_throt_cnt(cpu, &c->core_throt_cnt);
2185 
2186 	if (do_rapl & RAPL_AMD_F17H) {
2187 		if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
2188 			return -14;
2189 		c->core_energy = msr & 0xFFFFFFFF;
2190 	}
2191 
2192 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2193 		if (get_mp(cpu, mp, &c->counter[i]))
2194 			return -10;
2195 	}
2196 
2197 	/* collect package counters only for 1st core in package */
2198 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2199 		goto done;
2200 
2201 	if (DO_BIC(BIC_Totl_c0)) {
2202 		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
2203 			return -10;
2204 	}
2205 	if (DO_BIC(BIC_Any_c0)) {
2206 		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
2207 			return -11;
2208 	}
2209 	if (DO_BIC(BIC_GFX_c0)) {
2210 		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
2211 			return -12;
2212 	}
2213 	if (DO_BIC(BIC_CPUGFX)) {
2214 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
2215 			return -13;
2216 	}
2217 	if (DO_BIC(BIC_Pkgpc3))
2218 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
2219 			return -9;
2220 	if (DO_BIC(BIC_Pkgpc6)) {
2221 		if (do_slm_cstates) {
2222 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
2223 				return -10;
2224 		} else {
2225 			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
2226 				return -10;
2227 		}
2228 	}
2229 
2230 	if (DO_BIC(BIC_Pkgpc2))
2231 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
2232 			return -11;
2233 	if (DO_BIC(BIC_Pkgpc7))
2234 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
2235 			return -12;
2236 	if (DO_BIC(BIC_Pkgpc8))
2237 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
2238 			return -13;
2239 	if (DO_BIC(BIC_Pkgpc9))
2240 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
2241 			return -13;
2242 	if (DO_BIC(BIC_Pkgpc10))
2243 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
2244 			return -13;
2245 
2246 	if (DO_BIC(BIC_CPU_LPI))
2247 		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
2248 	if (DO_BIC(BIC_SYS_LPI))
2249 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
2250 
2251 	if (do_rapl & RAPL_PKG) {
2252 		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
2253 			return -13;
2254 		p->energy_pkg = msr;
2255 	}
2256 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
2257 		if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
2258 			return -14;
2259 		p->energy_cores = msr;
2260 	}
2261 	if (do_rapl & RAPL_DRAM) {
2262 		if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
2263 			return -15;
2264 		p->energy_dram = msr;
2265 	}
2266 	if (do_rapl & RAPL_GFX) {
2267 		if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
2268 			return -16;
2269 		p->energy_gfx = msr;
2270 	}
2271 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
2272 		if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
2273 			return -16;
2274 		p->rapl_pkg_perf_status = msr;
2275 	}
2276 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
2277 		if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
2278 			return -16;
2279 		p->rapl_dram_perf_status = msr;
2280 	}
2281 	if (do_rapl & RAPL_AMD_F17H) {
2282 		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
2283 			return -13;
2284 		p->energy_pkg = msr;
2285 	}
2286 	if (DO_BIC(BIC_PkgTmp)) {
2287 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2288 			return -17;
2289 		p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
2290 	}
2291 
2292 	if (DO_BIC(BIC_GFX_rc6))
2293 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
2294 
2295 	if (DO_BIC(BIC_GFXMHz))
2296 		p->gfx_mhz = gfx_cur_mhz;
2297 
2298 	if (DO_BIC(BIC_GFXACTMHz))
2299 		p->gfx_act_mhz = gfx_act_mhz;
2300 
2301 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2302 		if (get_mp(cpu, mp, &p->counter[i]))
2303 			return -10;
2304 	}
2305 done:
2306 	gettimeofday(&t->tv_end, (struct timezone *)NULL);
2307 
2308 	return 0;
2309 }
2310 
2311 /*
2312  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2313  * If you change the values, note they are used both in comparisons
2314  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2315  */
2316 
2317 #define PCLUKN 0		/* Unknown */
2318 #define PCLRSV 1		/* Reserved */
2319 #define PCL__0 2		/* PC0 */
2320 #define PCL__1 3		/* PC1 */
2321 #define PCL__2 4		/* PC2 */
2322 #define PCL__3 5		/* PC3 */
2323 #define PCL__4 6		/* PC4 */
2324 #define PCL__6 7		/* PC6 */
2325 #define PCL_6N 8		/* PC6 No Retention */
2326 #define PCL_6R 9		/* PC6 Retention */
2327 #define PCL__7 10		/* PC7 */
2328 #define PCL_7S 11		/* PC7 Shrink */
2329 #define PCL__8 12		/* PC8 */
2330 #define PCL__9 13		/* PC9 */
2331 #define PCL_10 14		/* PC10 */
2332 #define PCLUNL 15		/* Unlimited */
2333 
2334 int pkg_cstate_limit = PCLUKN;
2335 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2336 	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
2337 };
2338 
2339 int nhm_pkg_cstate_limits[16] =
2340     { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2341 	PCLRSV, PCLRSV
2342 };
2343 
2344 int snb_pkg_cstate_limits[16] =
2345     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2346 	PCLRSV, PCLRSV
2347 };
2348 
2349 int hsw_pkg_cstate_limits[16] =
2350     { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2351 	PCLRSV, PCLRSV
2352 };
2353 
2354 int slv_pkg_cstate_limits[16] =
2355     { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2356 	PCL__6, PCL__7
2357 };
2358 
2359 int amt_pkg_cstate_limits[16] =
2360     { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2361 	PCLRSV, PCLRSV
2362 };
2363 
2364 int phi_pkg_cstate_limits[16] =
2365     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2366 	PCLRSV, PCLRSV
2367 };
2368 
2369 int glm_pkg_cstate_limits[16] =
2370     { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2371 	PCLRSV, PCLRSV
2372 };
2373 
2374 int skx_pkg_cstate_limits[16] =
2375     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2376 	PCLRSV, PCLRSV
2377 };
2378 
2379 int icx_pkg_cstate_limits[16] =
2380     { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2381 	PCLRSV, PCLRSV
2382 };
2383 
2384 static void calculate_tsc_tweak()
2385 {
2386 	tsc_tweak = base_hz / tsc_hz;
2387 }
2388 
2389 void prewake_cstate_probe(unsigned int family, unsigned int model);
2390 
2391 static void dump_nhm_platform_info(void)
2392 {
2393 	unsigned long long msr;
2394 	unsigned int ratio;
2395 
2396 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2397 
2398 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2399 
2400 	ratio = (msr >> 40) & 0xFF;
2401 	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
2402 
2403 	ratio = (msr >> 8) & 0xFF;
2404 	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2405 
2406 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2407 	fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2408 		base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2409 
2410 	/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
2411 	if (dis_cstate_prewake)
2412 		fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
2413 
2414 	return;
2415 }
2416 
2417 static void dump_hsw_turbo_ratio_limits(void)
2418 {
2419 	unsigned long long msr;
2420 	unsigned int ratio;
2421 
2422 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2423 
2424 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2425 
2426 	ratio = (msr >> 8) & 0xFF;
2427 	if (ratio)
2428 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
2429 
2430 	ratio = (msr >> 0) & 0xFF;
2431 	if (ratio)
2432 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
2433 	return;
2434 }
2435 
2436 static void dump_ivt_turbo_ratio_limits(void)
2437 {
2438 	unsigned long long msr;
2439 	unsigned int ratio;
2440 
2441 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2442 
2443 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2444 
2445 	ratio = (msr >> 56) & 0xFF;
2446 	if (ratio)
2447 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
2448 
2449 	ratio = (msr >> 48) & 0xFF;
2450 	if (ratio)
2451 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
2452 
2453 	ratio = (msr >> 40) & 0xFF;
2454 	if (ratio)
2455 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
2456 
2457 	ratio = (msr >> 32) & 0xFF;
2458 	if (ratio)
2459 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
2460 
2461 	ratio = (msr >> 24) & 0xFF;
2462 	if (ratio)
2463 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
2464 
2465 	ratio = (msr >> 16) & 0xFF;
2466 	if (ratio)
2467 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
2468 
2469 	ratio = (msr >> 8) & 0xFF;
2470 	if (ratio)
2471 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
2472 
2473 	ratio = (msr >> 0) & 0xFF;
2474 	if (ratio)
2475 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
2476 	return;
2477 }
2478 
2479 int has_turbo_ratio_group_limits(int family, int model)
2480 {
2481 
2482 	if (!genuine_intel)
2483 		return 0;
2484 
2485 	switch (model) {
2486 	case INTEL_FAM6_ATOM_GOLDMONT:
2487 	case INTEL_FAM6_SKYLAKE_X:
2488 	case INTEL_FAM6_ICELAKE_X:
2489 	case INTEL_FAM6_ATOM_GOLDMONT_D:
2490 	case INTEL_FAM6_ATOM_TREMONT_D:
2491 		return 1;
2492 	}
2493 	return 0;
2494 }
2495 
2496 static void dump_turbo_ratio_limits(int family, int model)
2497 {
2498 	unsigned long long msr, core_counts;
2499 	unsigned int ratio, group_size;
2500 
2501 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2502 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2503 
2504 	if (has_turbo_ratio_group_limits(family, model)) {
2505 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2506 		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2507 	} else {
2508 		core_counts = 0x0807060504030201;
2509 	}
2510 
2511 	ratio = (msr >> 56) & 0xFF;
2512 	group_size = (core_counts >> 56) & 0xFF;
2513 	if (ratio)
2514 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2515 			ratio, bclk, ratio * bclk, group_size);
2516 
2517 	ratio = (msr >> 48) & 0xFF;
2518 	group_size = (core_counts >> 48) & 0xFF;
2519 	if (ratio)
2520 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2521 			ratio, bclk, ratio * bclk, group_size);
2522 
2523 	ratio = (msr >> 40) & 0xFF;
2524 	group_size = (core_counts >> 40) & 0xFF;
2525 	if (ratio)
2526 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2527 			ratio, bclk, ratio * bclk, group_size);
2528 
2529 	ratio = (msr >> 32) & 0xFF;
2530 	group_size = (core_counts >> 32) & 0xFF;
2531 	if (ratio)
2532 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2533 			ratio, bclk, ratio * bclk, group_size);
2534 
2535 	ratio = (msr >> 24) & 0xFF;
2536 	group_size = (core_counts >> 24) & 0xFF;
2537 	if (ratio)
2538 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2539 			ratio, bclk, ratio * bclk, group_size);
2540 
2541 	ratio = (msr >> 16) & 0xFF;
2542 	group_size = (core_counts >> 16) & 0xFF;
2543 	if (ratio)
2544 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2545 			ratio, bclk, ratio * bclk, group_size);
2546 
2547 	ratio = (msr >> 8) & 0xFF;
2548 	group_size = (core_counts >> 8) & 0xFF;
2549 	if (ratio)
2550 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2551 			ratio, bclk, ratio * bclk, group_size);
2552 
2553 	ratio = (msr >> 0) & 0xFF;
2554 	group_size = (core_counts >> 0) & 0xFF;
2555 	if (ratio)
2556 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2557 			ratio, bclk, ratio * bclk, group_size);
2558 	return;
2559 }
2560 
2561 static void dump_atom_turbo_ratio_limits(void)
2562 {
2563 	unsigned long long msr;
2564 	unsigned int ratio;
2565 
2566 	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2567 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2568 
2569 	ratio = (msr >> 0) & 0x3F;
2570 	if (ratio)
2571 		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
2572 
2573 	ratio = (msr >> 8) & 0x3F;
2574 	if (ratio)
2575 		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
2576 
2577 	ratio = (msr >> 16) & 0x3F;
2578 	if (ratio)
2579 		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2580 
2581 	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2582 	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2583 
2584 	ratio = (msr >> 24) & 0x3F;
2585 	if (ratio)
2586 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
2587 
2588 	ratio = (msr >> 16) & 0x3F;
2589 	if (ratio)
2590 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
2591 
2592 	ratio = (msr >> 8) & 0x3F;
2593 	if (ratio)
2594 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
2595 
2596 	ratio = (msr >> 0) & 0x3F;
2597 	if (ratio)
2598 		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
2599 }
2600 
2601 static void dump_knl_turbo_ratio_limits(void)
2602 {
2603 	const unsigned int buckets_no = 7;
2604 
2605 	unsigned long long msr;
2606 	int delta_cores, delta_ratio;
2607 	int i, b_nr;
2608 	unsigned int cores[buckets_no];
2609 	unsigned int ratio[buckets_no];
2610 
2611 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2612 
2613 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2614 
2615 	/*
2616 	 * Turbo encoding in KNL is as follows:
2617 	 * [0] -- Reserved
2618 	 * [7:1] -- Base value of number of active cores of bucket 1.
2619 	 * [15:8] -- Base value of freq ratio of bucket 1.
2620 	 * [20:16] -- +ve delta of number of active cores of bucket 2.
2621 	 * i.e. active cores of bucket 2 =
2622 	 * active cores of bucket 1 + delta
2623 	 * [23:21] -- Negative delta of freq ratio of bucket 2.
2624 	 * i.e. freq ratio of bucket 2 =
2625 	 * freq ratio of bucket 1 - delta
2626 	 * [28:24]-- +ve delta of number of active cores of bucket 3.
2627 	 * [31:29]-- -ve delta of freq ratio of bucket 3.
2628 	 * [36:32]-- +ve delta of number of active cores of bucket 4.
2629 	 * [39:37]-- -ve delta of freq ratio of bucket 4.
2630 	 * [44:40]-- +ve delta of number of active cores of bucket 5.
2631 	 * [47:45]-- -ve delta of freq ratio of bucket 5.
2632 	 * [52:48]-- +ve delta of number of active cores of bucket 6.
2633 	 * [55:53]-- -ve delta of freq ratio of bucket 6.
2634 	 * [60:56]-- +ve delta of number of active cores of bucket 7.
2635 	 * [63:61]-- -ve delta of freq ratio of bucket 7.
2636 	 */
2637 
2638 	b_nr = 0;
2639 	cores[b_nr] = (msr & 0xFF) >> 1;
2640 	ratio[b_nr] = (msr >> 8) & 0xFF;
2641 
2642 	for (i = 16; i < 64; i += 8) {
2643 		delta_cores = (msr >> i) & 0x1F;
2644 		delta_ratio = (msr >> (i + 5)) & 0x7;
2645 
2646 		cores[b_nr + 1] = cores[b_nr] + delta_cores;
2647 		ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2648 		b_nr++;
2649 	}
2650 
2651 	for (i = buckets_no - 1; i >= 0; i--)
2652 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2653 			fprintf(outf,
2654 				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
2655 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
2656 }
2657 
2658 static void dump_nhm_cst_cfg(void)
2659 {
2660 	unsigned long long msr;
2661 
2662 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2663 
2664 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2665 
2666 	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2667 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2668 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2669 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2670 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2671 		(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
2672 
2673 #define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
2674 	if (has_automatic_cstate_conversion) {
2675 		fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2676 	}
2677 
2678 	fprintf(outf, ")\n");
2679 
2680 	return;
2681 }
2682 
2683 static void dump_config_tdp(void)
2684 {
2685 	unsigned long long msr;
2686 
2687 	get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2688 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2689 	fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2690 
2691 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2692 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2693 	if (msr) {
2694 		fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2695 		fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2696 		fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2697 		fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2698 	}
2699 	fprintf(outf, ")\n");
2700 
2701 	get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2702 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2703 	if (msr) {
2704 		fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2705 		fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2706 		fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2707 		fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2708 	}
2709 	fprintf(outf, ")\n");
2710 
2711 	get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2712 	fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2713 	if ((msr) & 0x3)
2714 		fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2715 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2716 	fprintf(outf, ")\n");
2717 
2718 	get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2719 	fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2720 	fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2721 	fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2722 	fprintf(outf, ")\n");
2723 }
2724 
2725 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2726 
2727 void print_irtl(void)
2728 {
2729 	unsigned long long msr;
2730 
2731 	get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2732 	fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2733 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2734 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2735 
2736 	get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2737 	fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2738 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2739 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2740 
2741 	get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2742 	fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2743 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2744 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2745 
2746 	if (!do_irtl_hsw)
2747 		return;
2748 
2749 	get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2750 	fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2751 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2752 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2753 
2754 	get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2755 	fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2756 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2757 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2758 
2759 	get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2760 	fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2761 	fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2762 		(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2763 
2764 }
2765 
2766 void free_fd_percpu(void)
2767 {
2768 	int i;
2769 
2770 	for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2771 		if (fd_percpu[i] != 0)
2772 			close(fd_percpu[i]);
2773 	}
2774 
2775 	free(fd_percpu);
2776 }
2777 
2778 void free_all_buffers(void)
2779 {
2780 	int i;
2781 
2782 	CPU_FREE(cpu_present_set);
2783 	cpu_present_set = NULL;
2784 	cpu_present_setsize = 0;
2785 
2786 	CPU_FREE(cpu_affinity_set);
2787 	cpu_affinity_set = NULL;
2788 	cpu_affinity_setsize = 0;
2789 
2790 	free(thread_even);
2791 	free(core_even);
2792 	free(package_even);
2793 
2794 	thread_even = NULL;
2795 	core_even = NULL;
2796 	package_even = NULL;
2797 
2798 	free(thread_odd);
2799 	free(core_odd);
2800 	free(package_odd);
2801 
2802 	thread_odd = NULL;
2803 	core_odd = NULL;
2804 	package_odd = NULL;
2805 
2806 	free(output_buffer);
2807 	output_buffer = NULL;
2808 	outp = NULL;
2809 
2810 	free_fd_percpu();
2811 
2812 	free(irq_column_2_cpu);
2813 	free(irqs_per_cpu);
2814 
2815 	for (i = 0; i <= topo.max_cpu_num; ++i) {
2816 		if (cpus[i].put_ids)
2817 			CPU_FREE(cpus[i].put_ids);
2818 	}
2819 	free(cpus);
2820 }
2821 
2822 /*
2823  * Parse a file containing a single int.
2824  * Return 0 if file can not be opened
2825  * Exit if file can be opened, but can not be parsed
2826  */
2827 int parse_int_file(const char *fmt, ...)
2828 {
2829 	va_list args;
2830 	char path[PATH_MAX];
2831 	FILE *filep;
2832 	int value;
2833 
2834 	va_start(args, fmt);
2835 	vsnprintf(path, sizeof(path), fmt, args);
2836 	va_end(args);
2837 	filep = fopen(path, "r");
2838 	if (!filep)
2839 		return 0;
2840 	if (fscanf(filep, "%d", &value) != 1)
2841 		err(1, "%s: failed to parse number from file", path);
2842 	fclose(filep);
2843 	return value;
2844 }
2845 
2846 /*
2847  * cpu_is_first_core_in_package(cpu)
2848  * return 1 if given CPU is 1st core in package
2849  */
2850 int cpu_is_first_core_in_package(int cpu)
2851 {
2852 	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
2853 }
2854 
2855 int get_physical_package_id(int cpu)
2856 {
2857 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
2858 }
2859 
2860 int get_die_id(int cpu)
2861 {
2862 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
2863 }
2864 
2865 int get_core_id(int cpu)
2866 {
2867 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
2868 }
2869 
2870 void set_node_data(void)
2871 {
2872 	int pkg, node, lnode, cpu, cpux;
2873 	int cpu_count;
2874 
2875 	/* initialize logical_node_id */
2876 	for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
2877 		cpus[cpu].logical_node_id = -1;
2878 
2879 	cpu_count = 0;
2880 	for (pkg = 0; pkg < topo.num_packages; pkg++) {
2881 		lnode = 0;
2882 		for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2883 			if (cpus[cpu].physical_package_id != pkg)
2884 				continue;
2885 			/* find a cpu with an unset logical_node_id */
2886 			if (cpus[cpu].logical_node_id != -1)
2887 				continue;
2888 			cpus[cpu].logical_node_id = lnode;
2889 			node = cpus[cpu].physical_node_id;
2890 			cpu_count++;
2891 			/*
2892 			 * find all matching cpus on this pkg and set
2893 			 * the logical_node_id
2894 			 */
2895 			for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
2896 				if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
2897 					cpus[cpux].logical_node_id = lnode;
2898 					cpu_count++;
2899 				}
2900 			}
2901 			lnode++;
2902 			if (lnode > topo.nodes_per_pkg)
2903 				topo.nodes_per_pkg = lnode;
2904 		}
2905 		if (cpu_count >= topo.max_cpu_num)
2906 			break;
2907 	}
2908 }
2909 
2910 int get_physical_node_id(struct cpu_topology *thiscpu)
2911 {
2912 	char path[80];
2913 	FILE *filep;
2914 	int i;
2915 	int cpu = thiscpu->logical_cpu_id;
2916 
2917 	for (i = 0; i <= topo.max_cpu_num; i++) {
2918 		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
2919 		filep = fopen(path, "r");
2920 		if (!filep)
2921 			continue;
2922 		fclose(filep);
2923 		return i;
2924 	}
2925 	return -1;
2926 }
2927 
2928 int get_thread_siblings(struct cpu_topology *thiscpu)
2929 {
2930 	char path[80], character;
2931 	FILE *filep;
2932 	unsigned long map;
2933 	int so, shift, sib_core;
2934 	int cpu = thiscpu->logical_cpu_id;
2935 	int offset = topo.max_cpu_num + 1;
2936 	size_t size;
2937 	int thread_id = 0;
2938 
2939 	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
2940 	if (thiscpu->thread_id < 0)
2941 		thiscpu->thread_id = thread_id++;
2942 	if (!thiscpu->put_ids)
2943 		return -1;
2944 
2945 	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2946 	CPU_ZERO_S(size, thiscpu->put_ids);
2947 
2948 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
2949 	filep = fopen(path, "r");
2950 
2951 	if (!filep) {
2952 		warnx("%s: open failed", path);
2953 		return -1;
2954 	}
2955 	do {
2956 		offset -= BITMASK_SIZE;
2957 		if (fscanf(filep, "%lx%c", &map, &character) != 2)
2958 			err(1, "%s: failed to parse file", path);
2959 		for (shift = 0; shift < BITMASK_SIZE; shift++) {
2960 			if ((map >> shift) & 0x1) {
2961 				so = shift + offset;
2962 				sib_core = get_core_id(so);
2963 				if (sib_core == thiscpu->physical_core_id) {
2964 					CPU_SET_S(so, size, thiscpu->put_ids);
2965 					if ((so != cpu) && (cpus[so].thread_id < 0))
2966 						cpus[so].thread_id = thread_id++;
2967 				}
2968 			}
2969 		}
2970 	} while (!strncmp(&character, ",", 1));
2971 	fclose(filep);
2972 
2973 	return CPU_COUNT_S(size, thiscpu->put_ids);
2974 }
2975 
2976 /*
2977  * run func(thread, core, package) in topology order
2978  * skip non-present cpus
2979  */
2980 
2981 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
2982 			       struct pkg_data *, struct thread_data *, struct core_data *,
2983 			       struct pkg_data *), struct thread_data *thread_base,
2984 		   struct core_data *core_base, struct pkg_data *pkg_base,
2985 		   struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
2986 {
2987 	int retval, pkg_no, node_no, core_no, thread_no;
2988 
2989 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2990 		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
2991 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2992 				for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2993 					struct thread_data *t, *t2;
2994 					struct core_data *c, *c2;
2995 					struct pkg_data *p, *p2;
2996 
2997 					t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2998 
2999 					if (cpu_is_not_present(t->cpu_id))
3000 						continue;
3001 
3002 					t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
3003 
3004 					c = GET_CORE(core_base, core_no, node_no, pkg_no);
3005 					c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
3006 
3007 					p = GET_PKG(pkg_base, pkg_no);
3008 					p2 = GET_PKG(pkg_base2, pkg_no);
3009 
3010 					retval = func(t, c, p, t2, c2, p2);
3011 					if (retval)
3012 						return retval;
3013 				}
3014 			}
3015 		}
3016 	}
3017 	return 0;
3018 }
3019 
3020 /*
3021  * run func(cpu) on every cpu in /proc/stat
3022  * return max_cpu number
3023  */
3024 int for_all_proc_cpus(int (func) (int))
3025 {
3026 	FILE *fp;
3027 	int cpu_num;
3028 	int retval;
3029 
3030 	fp = fopen_or_die(proc_stat, "r");
3031 
3032 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
3033 	if (retval != 0)
3034 		err(1, "%s: failed to parse format", proc_stat);
3035 
3036 	while (1) {
3037 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
3038 		if (retval != 1)
3039 			break;
3040 
3041 		retval = func(cpu_num);
3042 		if (retval) {
3043 			fclose(fp);
3044 			return (retval);
3045 		}
3046 	}
3047 	fclose(fp);
3048 	return 0;
3049 }
3050 
3051 void re_initialize(void)
3052 {
3053 	free_all_buffers();
3054 	setup_all_buffers();
3055 	fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
3056 }
3057 
3058 void set_max_cpu_num(void)
3059 {
3060 	FILE *filep;
3061 	int base_cpu;
3062 	unsigned long dummy;
3063 	char pathname[64];
3064 
3065 	base_cpu = sched_getcpu();
3066 	if (base_cpu < 0)
3067 		err(1, "cannot find calling cpu ID");
3068 	sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
3069 
3070 	filep = fopen_or_die(pathname, "r");
3071 	topo.max_cpu_num = 0;
3072 	while (fscanf(filep, "%lx,", &dummy) == 1)
3073 		topo.max_cpu_num += BITMASK_SIZE;
3074 	fclose(filep);
3075 	topo.max_cpu_num--;	/* 0 based */
3076 }
3077 
3078 /*
3079  * count_cpus()
3080  * remember the last one seen, it will be the max
3081  */
3082 int count_cpus(int cpu)
3083 {
3084 	topo.num_cpus++;
3085 	return 0;
3086 }
3087 
3088 int mark_cpu_present(int cpu)
3089 {
3090 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
3091 	return 0;
3092 }
3093 
3094 int init_thread_id(int cpu)
3095 {
3096 	cpus[cpu].thread_id = -1;
3097 	return 0;
3098 }
3099 
3100 /*
3101  * snapshot_proc_interrupts()
3102  *
3103  * read and record summary of /proc/interrupts
3104  *
3105  * return 1 if config change requires a restart, else return 0
3106  */
3107 int snapshot_proc_interrupts(void)
3108 {
3109 	static FILE *fp;
3110 	int column, retval;
3111 
3112 	if (fp == NULL)
3113 		fp = fopen_or_die("/proc/interrupts", "r");
3114 	else
3115 		rewind(fp);
3116 
3117 	/* read 1st line of /proc/interrupts to get cpu* name for each column */
3118 	for (column = 0; column < topo.num_cpus; ++column) {
3119 		int cpu_number;
3120 
3121 		retval = fscanf(fp, " CPU%d", &cpu_number);
3122 		if (retval != 1)
3123 			break;
3124 
3125 		if (cpu_number > topo.max_cpu_num) {
3126 			warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
3127 			return 1;
3128 		}
3129 
3130 		irq_column_2_cpu[column] = cpu_number;
3131 		irqs_per_cpu[cpu_number] = 0;
3132 	}
3133 
3134 	/* read /proc/interrupt count lines and sum up irqs per cpu */
3135 	while (1) {
3136 		int column;
3137 		char buf[64];
3138 
3139 		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
3140 		if (retval != 1)
3141 			break;
3142 
3143 		/* read the count per cpu */
3144 		for (column = 0; column < topo.num_cpus; ++column) {
3145 
3146 			int cpu_number, irq_count;
3147 
3148 			retval = fscanf(fp, " %d", &irq_count);
3149 			if (retval != 1)
3150 				break;
3151 
3152 			cpu_number = irq_column_2_cpu[column];
3153 			irqs_per_cpu[cpu_number] += irq_count;
3154 
3155 		}
3156 
3157 		while (getc(fp) != '\n') ;	/* flush interrupt description */
3158 
3159 	}
3160 	return 0;
3161 }
3162 
3163 /*
3164  * snapshot_gfx_rc6_ms()
3165  *
3166  * record snapshot of
3167  * /sys/class/drm/card0/power/rc6_residency_ms
3168  *
3169  * return 1 if config change requires a restart, else return 0
3170  */
3171 int snapshot_gfx_rc6_ms(void)
3172 {
3173 	FILE *fp;
3174 	int retval;
3175 
3176 	fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
3177 
3178 	retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
3179 	if (retval != 1)
3180 		err(1, "GFX rc6");
3181 
3182 	fclose(fp);
3183 
3184 	return 0;
3185 }
3186 
3187 /*
3188  * snapshot_gfx_mhz()
3189  *
3190  * record snapshot of
3191  * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
3192  *
3193  * return 1 if config change requires a restart, else return 0
3194  */
3195 int snapshot_gfx_mhz(void)
3196 {
3197 	static FILE *fp;
3198 	int retval;
3199 
3200 	if (fp == NULL)
3201 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
3202 	else {
3203 		rewind(fp);
3204 		fflush(fp);
3205 	}
3206 
3207 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
3208 	if (retval != 1)
3209 		err(1, "GFX MHz");
3210 
3211 	return 0;
3212 }
3213 
3214 /*
3215  * snapshot_gfx_cur_mhz()
3216  *
3217  * record snapshot of
3218  * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
3219  *
3220  * return 1 if config change requires a restart, else return 0
3221  */
3222 int snapshot_gfx_act_mhz(void)
3223 {
3224 	static FILE *fp;
3225 	int retval;
3226 
3227 	if (fp == NULL)
3228 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
3229 	else {
3230 		rewind(fp);
3231 		fflush(fp);
3232 	}
3233 
3234 	retval = fscanf(fp, "%d", &gfx_act_mhz);
3235 	if (retval != 1)
3236 		err(1, "GFX ACT MHz");
3237 
3238 	return 0;
3239 }
3240 
3241 /*
3242  * snapshot_cpu_lpi()
3243  *
3244  * record snapshot of
3245  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
3246  */
3247 int snapshot_cpu_lpi_us(void)
3248 {
3249 	FILE *fp;
3250 	int retval;
3251 
3252 	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
3253 
3254 	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
3255 	if (retval != 1) {
3256 		fprintf(stderr, "Disabling Low Power Idle CPU output\n");
3257 		BIC_NOT_PRESENT(BIC_CPU_LPI);
3258 		fclose(fp);
3259 		return -1;
3260 	}
3261 
3262 	fclose(fp);
3263 
3264 	return 0;
3265 }
3266 
3267 /*
3268  * snapshot_sys_lpi()
3269  *
3270  * record snapshot of sys_lpi_file
3271  */
3272 int snapshot_sys_lpi_us(void)
3273 {
3274 	FILE *fp;
3275 	int retval;
3276 
3277 	fp = fopen_or_die(sys_lpi_file, "r");
3278 
3279 	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
3280 	if (retval != 1) {
3281 		fprintf(stderr, "Disabling Low Power Idle System output\n");
3282 		BIC_NOT_PRESENT(BIC_SYS_LPI);
3283 		fclose(fp);
3284 		return -1;
3285 	}
3286 	fclose(fp);
3287 
3288 	return 0;
3289 }
3290 
3291 /*
3292  * snapshot /proc and /sys files
3293  *
3294  * return 1 if configuration restart needed, else return 0
3295  */
3296 int snapshot_proc_sysfs_files(void)
3297 {
3298 	if (DO_BIC(BIC_IRQ))
3299 		if (snapshot_proc_interrupts())
3300 			return 1;
3301 
3302 	if (DO_BIC(BIC_GFX_rc6))
3303 		snapshot_gfx_rc6_ms();
3304 
3305 	if (DO_BIC(BIC_GFXMHz))
3306 		snapshot_gfx_mhz();
3307 
3308 	if (DO_BIC(BIC_GFXACTMHz))
3309 		snapshot_gfx_act_mhz();
3310 
3311 	if (DO_BIC(BIC_CPU_LPI))
3312 		snapshot_cpu_lpi_us();
3313 
3314 	if (DO_BIC(BIC_SYS_LPI))
3315 		snapshot_sys_lpi_us();
3316 
3317 	return 0;
3318 }
3319 
3320 int exit_requested;
3321 
3322 static void signal_handler(int signal)
3323 {
3324 	switch (signal) {
3325 	case SIGINT:
3326 		exit_requested = 1;
3327 		if (debug)
3328 			fprintf(stderr, " SIGINT\n");
3329 		break;
3330 	case SIGUSR1:
3331 		if (debug > 1)
3332 			fprintf(stderr, "SIGUSR1\n");
3333 		break;
3334 	}
3335 }
3336 
3337 void setup_signal_handler(void)
3338 {
3339 	struct sigaction sa;
3340 
3341 	memset(&sa, 0, sizeof(sa));
3342 
3343 	sa.sa_handler = &signal_handler;
3344 
3345 	if (sigaction(SIGINT, &sa, NULL) < 0)
3346 		err(1, "sigaction SIGINT");
3347 	if (sigaction(SIGUSR1, &sa, NULL) < 0)
3348 		err(1, "sigaction SIGUSR1");
3349 }
3350 
3351 void do_sleep(void)
3352 {
3353 	struct timeval tout;
3354 	struct timespec rest;
3355 	fd_set readfds;
3356 	int retval;
3357 
3358 	FD_ZERO(&readfds);
3359 	FD_SET(0, &readfds);
3360 
3361 	if (ignore_stdin) {
3362 		nanosleep(&interval_ts, NULL);
3363 		return;
3364 	}
3365 
3366 	tout = interval_tv;
3367 	retval = select(1, &readfds, NULL, NULL, &tout);
3368 
3369 	if (retval == 1) {
3370 		switch (getc(stdin)) {
3371 		case 'q':
3372 			exit_requested = 1;
3373 			break;
3374 		case EOF:
3375 			/*
3376 			 * 'stdin' is a pipe closed on the other end. There
3377 			 * won't be any further input.
3378 			 */
3379 			ignore_stdin = 1;
3380 			/* Sleep the rest of the time */
3381 			rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3382 			rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3383 			nanosleep(&rest, NULL);
3384 		}
3385 	}
3386 }
3387 
3388 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
3389 {
3390 	int ret, idx;
3391 	unsigned long long msr_cur, msr_last;
3392 
3393 	if (!per_cpu_msr_sum)
3394 		return 1;
3395 
3396 	idx = offset_to_idx(offset);
3397 	if (idx < 0)
3398 		return idx;
3399 	/* get_msr_sum() = sum + (get_msr() - last) */
3400 	ret = get_msr(cpu, offset, &msr_cur);
3401 	if (ret)
3402 		return ret;
3403 	msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
3404 	DELTA_WRAP32(msr_cur, msr_last);
3405 	*msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
3406 
3407 	return 0;
3408 }
3409 
3410 timer_t timerid;
3411 
3412 /* Timer callback, update the sum of MSRs periodically. */
3413 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3414 {
3415 	int i, ret;
3416 	int cpu = t->cpu_id;
3417 
3418 	for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
3419 		unsigned long long msr_cur, msr_last;
3420 		off_t offset;
3421 
3422 		if (!idx_valid(i))
3423 			continue;
3424 		offset = idx_to_offset(i);
3425 		if (offset < 0)
3426 			continue;
3427 		ret = get_msr(cpu, offset, &msr_cur);
3428 		if (ret) {
3429 			fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
3430 			continue;
3431 		}
3432 
3433 		msr_last = per_cpu_msr_sum[cpu].entries[i].last;
3434 		per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
3435 
3436 		DELTA_WRAP32(msr_cur, msr_last);
3437 		per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
3438 	}
3439 	return 0;
3440 }
3441 
3442 static void msr_record_handler(union sigval v)
3443 {
3444 	for_all_cpus(update_msr_sum, EVEN_COUNTERS);
3445 }
3446 
3447 void msr_sum_record(void)
3448 {
3449 	struct itimerspec its;
3450 	struct sigevent sev;
3451 
3452 	per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
3453 	if (!per_cpu_msr_sum) {
3454 		fprintf(outf, "Can not allocate memory for long time MSR.\n");
3455 		return;
3456 	}
3457 	/*
3458 	 * Signal handler might be restricted, so use thread notifier instead.
3459 	 */
3460 	memset(&sev, 0, sizeof(struct sigevent));
3461 	sev.sigev_notify = SIGEV_THREAD;
3462 	sev.sigev_notify_function = msr_record_handler;
3463 
3464 	sev.sigev_value.sival_ptr = &timerid;
3465 	if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
3466 		fprintf(outf, "Can not create timer.\n");
3467 		goto release_msr;
3468 	}
3469 
3470 	its.it_value.tv_sec = 0;
3471 	its.it_value.tv_nsec = 1;
3472 	/*
3473 	 * A wraparound time has been calculated early.
3474 	 * Some sources state that the peak power for a
3475 	 * microprocessor is usually 1.5 times the TDP rating,
3476 	 * use 2 * TDP for safety.
3477 	 */
3478 	its.it_interval.tv_sec = rapl_joule_counter_range / 2;
3479 	its.it_interval.tv_nsec = 0;
3480 
3481 	if (timer_settime(timerid, 0, &its, NULL) == -1) {
3482 		fprintf(outf, "Can not set timer.\n");
3483 		goto release_timer;
3484 	}
3485 	return;
3486 
3487 release_timer:
3488 	timer_delete(timerid);
3489 release_msr:
3490 	free(per_cpu_msr_sum);
3491 }
3492 
3493 /*
3494  * set_my_sched_priority(pri)
3495  * return previous
3496  */
3497 int set_my_sched_priority(int priority)
3498 {
3499 	int retval;
3500 	int original_priority;
3501 
3502 	errno = 0;
3503 	original_priority = getpriority(PRIO_PROCESS, 0);
3504 	if (errno && (original_priority == -1))
3505 		err(errno, "getpriority");
3506 
3507 	retval = setpriority(PRIO_PROCESS, 0, priority);
3508 	if (retval)
3509 		err(retval, "setpriority(%d)", priority);
3510 
3511 	errno = 0;
3512 	retval = getpriority(PRIO_PROCESS, 0);
3513 	if (retval != priority)
3514 		err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
3515 
3516 	return original_priority;
3517 }
3518 
3519 void turbostat_loop()
3520 {
3521 	int retval;
3522 	int restarted = 0;
3523 	int done_iters = 0;
3524 
3525 	setup_signal_handler();
3526 
3527 	/*
3528 	 * elevate own priority for interval mode
3529 	 */
3530 	set_my_sched_priority(-20);
3531 
3532 restart:
3533 	restarted++;
3534 
3535 	snapshot_proc_sysfs_files();
3536 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3537 	first_counter_read = 0;
3538 	if (retval < -1) {
3539 		exit(retval);
3540 	} else if (retval == -1) {
3541 		if (restarted > 10) {
3542 			exit(retval);
3543 		}
3544 		re_initialize();
3545 		goto restart;
3546 	}
3547 	restarted = 0;
3548 	done_iters = 0;
3549 	gettimeofday(&tv_even, (struct timezone *)NULL);
3550 
3551 	while (1) {
3552 		if (for_all_proc_cpus(cpu_is_not_present)) {
3553 			re_initialize();
3554 			goto restart;
3555 		}
3556 		do_sleep();
3557 		if (snapshot_proc_sysfs_files())
3558 			goto restart;
3559 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
3560 		if (retval < -1) {
3561 			exit(retval);
3562 		} else if (retval == -1) {
3563 			re_initialize();
3564 			goto restart;
3565 		}
3566 		gettimeofday(&tv_odd, (struct timezone *)NULL);
3567 		timersub(&tv_odd, &tv_even, &tv_delta);
3568 		if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3569 			re_initialize();
3570 			goto restart;
3571 		}
3572 		compute_average(EVEN_COUNTERS);
3573 		format_all_counters(EVEN_COUNTERS);
3574 		flush_output_stdout();
3575 		if (exit_requested)
3576 			break;
3577 		if (num_iterations && ++done_iters >= num_iterations)
3578 			break;
3579 		do_sleep();
3580 		if (snapshot_proc_sysfs_files())
3581 			goto restart;
3582 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3583 		if (retval < -1) {
3584 			exit(retval);
3585 		} else if (retval == -1) {
3586 			re_initialize();
3587 			goto restart;
3588 		}
3589 		gettimeofday(&tv_even, (struct timezone *)NULL);
3590 		timersub(&tv_even, &tv_odd, &tv_delta);
3591 		if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3592 			re_initialize();
3593 			goto restart;
3594 		}
3595 		compute_average(ODD_COUNTERS);
3596 		format_all_counters(ODD_COUNTERS);
3597 		flush_output_stdout();
3598 		if (exit_requested)
3599 			break;
3600 		if (num_iterations && ++done_iters >= num_iterations)
3601 			break;
3602 	}
3603 }
3604 
3605 void check_dev_msr()
3606 {
3607 	struct stat sb;
3608 	char pathname[32];
3609 
3610 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3611 	if (stat(pathname, &sb))
3612 		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3613 			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3614 }
3615 
3616 /*
3617  * check for CAP_SYS_RAWIO
3618  * return 0 on success
3619  * return 1 on fail
3620  */
3621 int check_for_cap_sys_rawio(void)
3622 {
3623 	cap_t caps;
3624 	cap_flag_value_t cap_flag_value;
3625 
3626 	caps = cap_get_proc();
3627 	if (caps == NULL)
3628 		err(-6, "cap_get_proc\n");
3629 
3630 	if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3631 		err(-6, "cap_get\n");
3632 
3633 	if (cap_flag_value != CAP_SET) {
3634 		warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3635 		return 1;
3636 	}
3637 
3638 	if (cap_free(caps) == -1)
3639 		err(-6, "cap_free\n");
3640 
3641 	return 0;
3642 }
3643 
3644 void check_permissions(void)
3645 {
3646 	int do_exit = 0;
3647 	char pathname[32];
3648 
3649 	/* check for CAP_SYS_RAWIO */
3650 	do_exit += check_for_cap_sys_rawio();
3651 
3652 	/* test file permissions */
3653 	sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3654 	if (euidaccess(pathname, R_OK)) {
3655 		do_exit++;
3656 		warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3657 	}
3658 
3659 	/* if all else fails, thell them to be root */
3660 	if (do_exit)
3661 		if (getuid() != 0)
3662 			warnx("... or simply run as root");
3663 
3664 	if (do_exit)
3665 		exit(-6);
3666 }
3667 
3668 /*
3669  * NHM adds support for additional MSRs:
3670  *
3671  * MSR_SMI_COUNT                   0x00000034
3672  *
3673  * MSR_PLATFORM_INFO               0x000000ce
3674  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3675  *
3676  * MSR_MISC_PWR_MGMT               0x000001aa
3677  *
3678  * MSR_PKG_C3_RESIDENCY            0x000003f8
3679  * MSR_PKG_C6_RESIDENCY            0x000003f9
3680  * MSR_CORE_C3_RESIDENCY           0x000003fc
3681  * MSR_CORE_C6_RESIDENCY           0x000003fd
3682  *
3683  * Side effect:
3684  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3685  * sets has_misc_feature_control
3686  */
3687 int probe_nhm_msrs(unsigned int family, unsigned int model)
3688 {
3689 	unsigned long long msr;
3690 	unsigned int base_ratio;
3691 	int *pkg_cstate_limits;
3692 
3693 	if (!genuine_intel)
3694 		return 0;
3695 
3696 	if (family != 6)
3697 		return 0;
3698 
3699 	bclk = discover_bclk(family, model);
3700 
3701 	switch (model) {
3702 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3703 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3704 		pkg_cstate_limits = nhm_pkg_cstate_limits;
3705 		break;
3706 	case INTEL_FAM6_SANDYBRIDGE:	/* SNB */
3707 	case INTEL_FAM6_SANDYBRIDGE_X:	/* SNB Xeon */
3708 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3709 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3710 		pkg_cstate_limits = snb_pkg_cstate_limits;
3711 		has_misc_feature_control = 1;
3712 		break;
3713 	case INTEL_FAM6_HASWELL:	/* HSW */
3714 	case INTEL_FAM6_HASWELL_G:	/* HSW */
3715 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3716 	case INTEL_FAM6_HASWELL_L:	/* HSW */
3717 	case INTEL_FAM6_BROADWELL:	/* BDW */
3718 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
3719 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3720 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
3721 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
3722 		pkg_cstate_limits = hsw_pkg_cstate_limits;
3723 		has_misc_feature_control = 1;
3724 		break;
3725 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3726 		pkg_cstate_limits = skx_pkg_cstate_limits;
3727 		has_misc_feature_control = 1;
3728 		break;
3729 	case INTEL_FAM6_ICELAKE_X:	/* ICX */
3730 		pkg_cstate_limits = icx_pkg_cstate_limits;
3731 		has_misc_feature_control = 1;
3732 		break;
3733 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
3734 		no_MSR_MISC_PWR_MGMT = 1;
3735 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
3736 		pkg_cstate_limits = slv_pkg_cstate_limits;
3737 		break;
3738 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
3739 		pkg_cstate_limits = amt_pkg_cstate_limits;
3740 		no_MSR_MISC_PWR_MGMT = 1;
3741 		break;
3742 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
3743 		pkg_cstate_limits = phi_pkg_cstate_limits;
3744 		break;
3745 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
3746 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3747 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
3748 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
3749 	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
3750 		pkg_cstate_limits = glm_pkg_cstate_limits;
3751 		break;
3752 	default:
3753 		return 0;
3754 	}
3755 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3756 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3757 
3758 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3759 	base_ratio = (msr >> 8) & 0xFF;
3760 
3761 	base_hz = base_ratio * bclk * 1000000;
3762 	has_base_hz = 1;
3763 	return 1;
3764 }
3765 
3766 /*
3767  * SLV client has support for unique MSRs:
3768  *
3769  * MSR_CC6_DEMOTION_POLICY_CONFIG
3770  * MSR_MC6_DEMOTION_POLICY_CONFIG
3771  */
3772 
3773 int has_slv_msrs(unsigned int family, unsigned int model)
3774 {
3775 	if (!genuine_intel)
3776 		return 0;
3777 
3778 	switch (model) {
3779 	case INTEL_FAM6_ATOM_SILVERMONT:
3780 	case INTEL_FAM6_ATOM_SILVERMONT_MID:
3781 	case INTEL_FAM6_ATOM_AIRMONT_MID:
3782 		return 1;
3783 	}
3784 	return 0;
3785 }
3786 
3787 int is_dnv(unsigned int family, unsigned int model)
3788 {
3789 
3790 	if (!genuine_intel)
3791 		return 0;
3792 
3793 	switch (model) {
3794 	case INTEL_FAM6_ATOM_GOLDMONT_D:
3795 		return 1;
3796 	}
3797 	return 0;
3798 }
3799 
3800 int is_bdx(unsigned int family, unsigned int model)
3801 {
3802 
3803 	if (!genuine_intel)
3804 		return 0;
3805 
3806 	switch (model) {
3807 	case INTEL_FAM6_BROADWELL_X:
3808 		return 1;
3809 	}
3810 	return 0;
3811 }
3812 
3813 int is_skx(unsigned int family, unsigned int model)
3814 {
3815 
3816 	if (!genuine_intel)
3817 		return 0;
3818 
3819 	switch (model) {
3820 	case INTEL_FAM6_SKYLAKE_X:
3821 		return 1;
3822 	}
3823 	return 0;
3824 }
3825 
3826 int is_icx(unsigned int family, unsigned int model)
3827 {
3828 
3829 	if (!genuine_intel)
3830 		return 0;
3831 
3832 	switch (model) {
3833 	case INTEL_FAM6_ICELAKE_X:
3834 		return 1;
3835 	}
3836 	return 0;
3837 }
3838 
3839 int is_ehl(unsigned int family, unsigned int model)
3840 {
3841 	if (!genuine_intel)
3842 		return 0;
3843 
3844 	switch (model) {
3845 	case INTEL_FAM6_ATOM_TREMONT:
3846 		return 1;
3847 	}
3848 	return 0;
3849 }
3850 
3851 int is_jvl(unsigned int family, unsigned int model)
3852 {
3853 	if (!genuine_intel)
3854 		return 0;
3855 
3856 	switch (model) {
3857 	case INTEL_FAM6_ATOM_TREMONT_D:
3858 		return 1;
3859 	}
3860 	return 0;
3861 }
3862 
3863 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
3864 {
3865 	if (has_slv_msrs(family, model))
3866 		return 0;
3867 
3868 	switch (model) {
3869 		/* Nehalem compatible, but do not include turbo-ratio limit support */
3870 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
3871 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI - Knights Landing (different MSR definition) */
3872 		return 0;
3873 	default:
3874 		return 1;
3875 	}
3876 }
3877 
3878 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
3879 {
3880 	if (has_slv_msrs(family, model))
3881 		return 1;
3882 
3883 	return 0;
3884 }
3885 
3886 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
3887 {
3888 	if (!genuine_intel)
3889 		return 0;
3890 
3891 	if (family != 6)
3892 		return 0;
3893 
3894 	switch (model) {
3895 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
3896 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3897 		return 1;
3898 	default:
3899 		return 0;
3900 	}
3901 }
3902 
3903 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
3904 {
3905 	if (!genuine_intel)
3906 		return 0;
3907 
3908 	if (family != 6)
3909 		return 0;
3910 
3911 	switch (model) {
3912 	case INTEL_FAM6_HASWELL_X:	/* HSW Xeon */
3913 		return 1;
3914 	default:
3915 		return 0;
3916 	}
3917 }
3918 
3919 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
3920 {
3921 	if (!genuine_intel)
3922 		return 0;
3923 
3924 	if (family != 6)
3925 		return 0;
3926 
3927 	switch (model) {
3928 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3929 		return 1;
3930 	default:
3931 		return 0;
3932 	}
3933 }
3934 
3935 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
3936 {
3937 	if (!genuine_intel)
3938 		return 0;
3939 
3940 	if (family != 6)
3941 		return 0;
3942 
3943 	switch (model) {
3944 	case INTEL_FAM6_ATOM_GOLDMONT:
3945 	case INTEL_FAM6_SKYLAKE_X:
3946 	case INTEL_FAM6_ICELAKE_X:
3947 		return 1;
3948 	default:
3949 		return 0;
3950 	}
3951 }
3952 
3953 int has_config_tdp(unsigned int family, unsigned int model)
3954 {
3955 	if (!genuine_intel)
3956 		return 0;
3957 
3958 	if (family != 6)
3959 		return 0;
3960 
3961 	switch (model) {
3962 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
3963 	case INTEL_FAM6_HASWELL:	/* HSW */
3964 	case INTEL_FAM6_HASWELL_X:	/* HSX */
3965 	case INTEL_FAM6_HASWELL_L:	/* HSW */
3966 	case INTEL_FAM6_HASWELL_G:	/* HSW */
3967 	case INTEL_FAM6_BROADWELL:	/* BDW */
3968 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
3969 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
3970 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
3971 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
3972 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
3973 	case INTEL_FAM6_ICELAKE_X:	/* ICX */
3974 
3975 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
3976 		return 1;
3977 	default:
3978 		return 0;
3979 	}
3980 }
3981 
3982 /*
3983  * tcc_offset_bits:
3984  * 0: Tcc Offset not supported (Default)
3985  * 6: Bit 29:24 of MSR_PLATFORM_INFO
3986  * 4: Bit 27:24 of MSR_PLATFORM_INFO
3987  */
3988 void check_tcc_offset(int model)
3989 {
3990 	unsigned long long msr;
3991 
3992 	if (!genuine_intel)
3993 		return;
3994 
3995 	switch (model) {
3996 	case INTEL_FAM6_SKYLAKE_L:
3997 	case INTEL_FAM6_SKYLAKE:
3998 	case INTEL_FAM6_KABYLAKE_L:
3999 	case INTEL_FAM6_KABYLAKE:
4000 	case INTEL_FAM6_ICELAKE_L:
4001 	case INTEL_FAM6_ICELAKE:
4002 	case INTEL_FAM6_TIGERLAKE_L:
4003 	case INTEL_FAM6_TIGERLAKE:
4004 	case INTEL_FAM6_COMETLAKE:
4005 		if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
4006 			msr = (msr >> 30) & 1;
4007 			if (msr)
4008 				tcc_offset_bits = 6;
4009 		}
4010 		return;
4011 	default:
4012 		return;
4013 	}
4014 }
4015 
4016 static void remove_underbar(char *s)
4017 {
4018 	char *to = s;
4019 
4020 	while (*s) {
4021 		if (*s != '_')
4022 			*to++ = *s;
4023 		s++;
4024 	}
4025 
4026 	*to = 0;
4027 }
4028 
4029 static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
4030 {
4031 	if (!do_nhm_platform_info)
4032 		return;
4033 
4034 	dump_nhm_platform_info();
4035 
4036 	if (has_hsw_turbo_ratio_limit(family, model))
4037 		dump_hsw_turbo_ratio_limits();
4038 
4039 	if (has_ivt_turbo_ratio_limit(family, model))
4040 		dump_ivt_turbo_ratio_limits();
4041 
4042 	if (has_turbo_ratio_limit(family, model))
4043 		dump_turbo_ratio_limits(family, model);
4044 
4045 	if (has_atom_turbo_ratio_limit(family, model))
4046 		dump_atom_turbo_ratio_limits();
4047 
4048 	if (has_knl_turbo_ratio_limit(family, model))
4049 		dump_knl_turbo_ratio_limits();
4050 
4051 	if (has_config_tdp(family, model))
4052 		dump_config_tdp();
4053 
4054 	dump_nhm_cst_cfg();
4055 }
4056 
4057 static void dump_sysfs_file(char *path)
4058 {
4059 	FILE *input;
4060 	char cpuidle_buf[64];
4061 
4062 	input = fopen(path, "r");
4063 	if (input == NULL) {
4064 		if (debug)
4065 			fprintf(outf, "NSFOD %s\n", path);
4066 		return;
4067 	}
4068 	if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
4069 		err(1, "%s: failed to read file", path);
4070 	fclose(input);
4071 
4072 	fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
4073 }
4074 
4075 static void dump_sysfs_cstate_config(void)
4076 {
4077 	char path[64];
4078 	char name_buf[16];
4079 	char desc[64];
4080 	FILE *input;
4081 	int state;
4082 	char *sp;
4083 
4084 	if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
4085 		fprintf(outf, "cpuidle not loaded\n");
4086 		return;
4087 	}
4088 
4089 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
4090 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
4091 	dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
4092 
4093 	for (state = 0; state < 10; ++state) {
4094 
4095 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
4096 		input = fopen(path, "r");
4097 		if (input == NULL)
4098 			continue;
4099 		if (!fgets(name_buf, sizeof(name_buf), input))
4100 			err(1, "%s: failed to read file", path);
4101 
4102 		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4103 		sp = strchr(name_buf, '-');
4104 		if (!sp)
4105 			sp = strchrnul(name_buf, '\n');
4106 		*sp = '\0';
4107 		fclose(input);
4108 
4109 		remove_underbar(name_buf);
4110 
4111 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
4112 		input = fopen(path, "r");
4113 		if (input == NULL)
4114 			continue;
4115 		if (!fgets(desc, sizeof(desc), input))
4116 			err(1, "%s: failed to read file", path);
4117 
4118 		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
4119 		fclose(input);
4120 	}
4121 }
4122 
4123 static void dump_sysfs_pstate_config(void)
4124 {
4125 	char path[64];
4126 	char driver_buf[64];
4127 	char governor_buf[64];
4128 	FILE *input;
4129 	int turbo;
4130 
4131 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
4132 	input = fopen(path, "r");
4133 	if (input == NULL) {
4134 		fprintf(outf, "NSFOD %s\n", path);
4135 		return;
4136 	}
4137 	if (!fgets(driver_buf, sizeof(driver_buf), input))
4138 		err(1, "%s: failed to read file", path);
4139 	fclose(input);
4140 
4141 	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
4142 	input = fopen(path, "r");
4143 	if (input == NULL) {
4144 		fprintf(outf, "NSFOD %s\n", path);
4145 		return;
4146 	}
4147 	if (!fgets(governor_buf, sizeof(governor_buf), input))
4148 		err(1, "%s: failed to read file", path);
4149 	fclose(input);
4150 
4151 	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
4152 	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
4153 
4154 	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
4155 	input = fopen(path, "r");
4156 	if (input != NULL) {
4157 		if (fscanf(input, "%d", &turbo) != 1)
4158 			err(1, "%s: failed to parse number from file", path);
4159 		fprintf(outf, "cpufreq boost: %d\n", turbo);
4160 		fclose(input);
4161 	}
4162 
4163 	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
4164 	input = fopen(path, "r");
4165 	if (input != NULL) {
4166 		if (fscanf(input, "%d", &turbo) != 1)
4167 			err(1, "%s: failed to parse number from file", path);
4168 		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
4169 		fclose(input);
4170 	}
4171 }
4172 
4173 /*
4174  * print_epb()
4175  * Decode the ENERGY_PERF_BIAS MSR
4176  */
4177 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4178 {
4179 	char *epb_string;
4180 	int cpu, epb;
4181 
4182 	if (!has_epb)
4183 		return 0;
4184 
4185 	cpu = t->cpu_id;
4186 
4187 	/* EPB is per-package */
4188 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4189 		return 0;
4190 
4191 	if (cpu_migrate(cpu)) {
4192 		fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
4193 		return -1;
4194 	}
4195 
4196 	epb = get_epb(cpu);
4197 	if (epb < 0)
4198 		return 0;
4199 
4200 	switch (epb) {
4201 	case ENERGY_PERF_BIAS_PERFORMANCE:
4202 		epb_string = "performance";
4203 		break;
4204 	case ENERGY_PERF_BIAS_NORMAL:
4205 		epb_string = "balanced";
4206 		break;
4207 	case ENERGY_PERF_BIAS_POWERSAVE:
4208 		epb_string = "powersave";
4209 		break;
4210 	default:
4211 		epb_string = "custom";
4212 		break;
4213 	}
4214 	fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
4215 
4216 	return 0;
4217 }
4218 
4219 /*
4220  * print_hwp()
4221  * Decode the MSR_HWP_CAPABILITIES
4222  */
4223 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4224 {
4225 	unsigned long long msr;
4226 	int cpu;
4227 
4228 	if (!has_hwp)
4229 		return 0;
4230 
4231 	cpu = t->cpu_id;
4232 
4233 	/* MSR_HWP_CAPABILITIES is per-package */
4234 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4235 		return 0;
4236 
4237 	if (cpu_migrate(cpu)) {
4238 		fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
4239 		return -1;
4240 	}
4241 
4242 	if (get_msr(cpu, MSR_PM_ENABLE, &msr))
4243 		return 0;
4244 
4245 	fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
4246 
4247 	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
4248 	if ((msr & (1 << 0)) == 0)
4249 		return 0;
4250 
4251 	if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
4252 		return 0;
4253 
4254 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
4255 		"(high %d guar %d eff %d low %d)\n",
4256 		cpu, msr,
4257 		(unsigned int)HWP_HIGHEST_PERF(msr),
4258 		(unsigned int)HWP_GUARANTEED_PERF(msr),
4259 		(unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
4260 
4261 	if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
4262 		return 0;
4263 
4264 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
4265 		"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
4266 		cpu, msr,
4267 		(unsigned int)(((msr) >> 0) & 0xff),
4268 		(unsigned int)(((msr) >> 8) & 0xff),
4269 		(unsigned int)(((msr) >> 16) & 0xff),
4270 		(unsigned int)(((msr) >> 24) & 0xff),
4271 		(unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
4272 
4273 	if (has_hwp_pkg) {
4274 		if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
4275 			return 0;
4276 
4277 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
4278 			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
4279 			cpu, msr,
4280 			(unsigned int)(((msr) >> 0) & 0xff),
4281 			(unsigned int)(((msr) >> 8) & 0xff),
4282 			(unsigned int)(((msr) >> 16) & 0xff),
4283 			(unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
4284 	}
4285 	if (has_hwp_notify) {
4286 		if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
4287 			return 0;
4288 
4289 		fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
4290 			"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
4291 			cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
4292 	}
4293 	if (get_msr(cpu, MSR_HWP_STATUS, &msr))
4294 		return 0;
4295 
4296 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
4297 		"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
4298 		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
4299 
4300 	return 0;
4301 }
4302 
4303 /*
4304  * print_perf_limit()
4305  */
4306 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4307 {
4308 	unsigned long long msr;
4309 	int cpu;
4310 
4311 	cpu = t->cpu_id;
4312 
4313 	/* per-package */
4314 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4315 		return 0;
4316 
4317 	if (cpu_migrate(cpu)) {
4318 		fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
4319 		return -1;
4320 	}
4321 
4322 	if (do_core_perf_limit_reasons) {
4323 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
4324 		fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4325 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
4326 			(msr & 1 << 15) ? "bit15, " : "",
4327 			(msr & 1 << 14) ? "bit14, " : "",
4328 			(msr & 1 << 13) ? "Transitions, " : "",
4329 			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
4330 			(msr & 1 << 11) ? "PkgPwrL2, " : "",
4331 			(msr & 1 << 10) ? "PkgPwrL1, " : "",
4332 			(msr & 1 << 9) ? "CorePwr, " : "",
4333 			(msr & 1 << 8) ? "Amps, " : "",
4334 			(msr & 1 << 6) ? "VR-Therm, " : "",
4335 			(msr & 1 << 5) ? "Auto-HWP, " : "",
4336 			(msr & 1 << 4) ? "Graphics, " : "",
4337 			(msr & 1 << 2) ? "bit2, " : "",
4338 			(msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
4339 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
4340 			(msr & 1 << 31) ? "bit31, " : "",
4341 			(msr & 1 << 30) ? "bit30, " : "",
4342 			(msr & 1 << 29) ? "Transitions, " : "",
4343 			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
4344 			(msr & 1 << 27) ? "PkgPwrL2, " : "",
4345 			(msr & 1 << 26) ? "PkgPwrL1, " : "",
4346 			(msr & 1 << 25) ? "CorePwr, " : "",
4347 			(msr & 1 << 24) ? "Amps, " : "",
4348 			(msr & 1 << 22) ? "VR-Therm, " : "",
4349 			(msr & 1 << 21) ? "Auto-HWP, " : "",
4350 			(msr & 1 << 20) ? "Graphics, " : "",
4351 			(msr & 1 << 18) ? "bit18, " : "",
4352 			(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
4353 
4354 	}
4355 	if (do_gfx_perf_limit_reasons) {
4356 		get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
4357 		fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4358 		fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
4359 			(msr & 1 << 0) ? "PROCHOT, " : "",
4360 			(msr & 1 << 1) ? "ThermStatus, " : "",
4361 			(msr & 1 << 4) ? "Graphics, " : "",
4362 			(msr & 1 << 6) ? "VR-Therm, " : "",
4363 			(msr & 1 << 8) ? "Amps, " : "",
4364 			(msr & 1 << 9) ? "GFXPwr, " : "",
4365 			(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4366 		fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
4367 			(msr & 1 << 16) ? "PROCHOT, " : "",
4368 			(msr & 1 << 17) ? "ThermStatus, " : "",
4369 			(msr & 1 << 20) ? "Graphics, " : "",
4370 			(msr & 1 << 22) ? "VR-Therm, " : "",
4371 			(msr & 1 << 24) ? "Amps, " : "",
4372 			(msr & 1 << 25) ? "GFXPwr, " : "",
4373 			(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4374 	}
4375 	if (do_ring_perf_limit_reasons) {
4376 		get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
4377 		fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4378 		fprintf(outf, " (Active: %s%s%s%s%s%s)",
4379 			(msr & 1 << 0) ? "PROCHOT, " : "",
4380 			(msr & 1 << 1) ? "ThermStatus, " : "",
4381 			(msr & 1 << 6) ? "VR-Therm, " : "",
4382 			(msr & 1 << 8) ? "Amps, " : "",
4383 			(msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4384 		fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
4385 			(msr & 1 << 16) ? "PROCHOT, " : "",
4386 			(msr & 1 << 17) ? "ThermStatus, " : "",
4387 			(msr & 1 << 22) ? "VR-Therm, " : "",
4388 			(msr & 1 << 24) ? "Amps, " : "",
4389 			(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4390 	}
4391 	return 0;
4392 }
4393 
4394 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
4395 #define	RAPL_TIME_GRANULARITY	0x3F	/* 6 bit time granularity */
4396 
4397 double get_tdp_intel(unsigned int model)
4398 {
4399 	unsigned long long msr;
4400 
4401 	if (do_rapl & RAPL_PKG_POWER_INFO)
4402 		if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
4403 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
4404 
4405 	switch (model) {
4406 	case INTEL_FAM6_ATOM_SILVERMONT:
4407 	case INTEL_FAM6_ATOM_SILVERMONT_D:
4408 		return 30.0;
4409 	default:
4410 		return 135.0;
4411 	}
4412 }
4413 
4414 double get_tdp_amd(unsigned int family)
4415 {
4416 	/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
4417 	return 280.0;
4418 }
4419 
4420 /*
4421  * rapl_dram_energy_units_probe()
4422  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
4423  */
4424 static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
4425 {
4426 	/* only called for genuine_intel, family 6 */
4427 
4428 	switch (model) {
4429 	case INTEL_FAM6_HASWELL_X:	/* HSX */
4430 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
4431 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
4432 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4433 	case INTEL_FAM6_ICELAKE_X:	/* ICX */
4434 		return (rapl_dram_energy_units = 15.3 / 1000000);
4435 	default:
4436 		return (rapl_energy_units);
4437 	}
4438 }
4439 
4440 void rapl_probe_intel(unsigned int family, unsigned int model)
4441 {
4442 	unsigned long long msr;
4443 	unsigned int time_unit;
4444 	double tdp;
4445 
4446 	if (family != 6)
4447 		return;
4448 
4449 	switch (model) {
4450 	case INTEL_FAM6_SANDYBRIDGE:
4451 	case INTEL_FAM6_IVYBRIDGE:
4452 	case INTEL_FAM6_HASWELL:	/* HSW */
4453 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4454 	case INTEL_FAM6_HASWELL_G:	/* HSW */
4455 	case INTEL_FAM6_BROADWELL:	/* BDW */
4456 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
4457 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
4458 		if (rapl_joules) {
4459 			BIC_PRESENT(BIC_Pkg_J);
4460 			BIC_PRESENT(BIC_Cor_J);
4461 			BIC_PRESENT(BIC_GFX_J);
4462 		} else {
4463 			BIC_PRESENT(BIC_PkgWatt);
4464 			BIC_PRESENT(BIC_CorWatt);
4465 			BIC_PRESENT(BIC_GFXWatt);
4466 		}
4467 		break;
4468 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4469 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4470 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
4471 		if (rapl_joules)
4472 			BIC_PRESENT(BIC_Pkg_J);
4473 		else
4474 			BIC_PRESENT(BIC_PkgWatt);
4475 		break;
4476 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
4477 		do_rapl =
4478 		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4479 		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
4480 		if (rapl_joules) {
4481 			BIC_PRESENT(BIC_Pkg_J);
4482 			BIC_PRESENT(BIC_Cor_J);
4483 			BIC_PRESENT(BIC_RAM_J);
4484 			BIC_PRESENT(BIC_GFX_J);
4485 		} else {
4486 			BIC_PRESENT(BIC_PkgWatt);
4487 			BIC_PRESENT(BIC_CorWatt);
4488 			BIC_PRESENT(BIC_RAMWatt);
4489 			BIC_PRESENT(BIC_GFXWatt);
4490 		}
4491 		break;
4492 	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
4493 		do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4494 		BIC_PRESENT(BIC_PKG__);
4495 		if (rapl_joules)
4496 			BIC_PRESENT(BIC_Pkg_J);
4497 		else
4498 			BIC_PRESENT(BIC_PkgWatt);
4499 		break;
4500 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4501 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4502 		do_rapl =
4503 		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4504 		    | RAPL_GFX | RAPL_PKG_POWER_INFO;
4505 		BIC_PRESENT(BIC_PKG__);
4506 		BIC_PRESENT(BIC_RAM__);
4507 		if (rapl_joules) {
4508 			BIC_PRESENT(BIC_Pkg_J);
4509 			BIC_PRESENT(BIC_Cor_J);
4510 			BIC_PRESENT(BIC_RAM_J);
4511 			BIC_PRESENT(BIC_GFX_J);
4512 		} else {
4513 			BIC_PRESENT(BIC_PkgWatt);
4514 			BIC_PRESENT(BIC_CorWatt);
4515 			BIC_PRESENT(BIC_RAMWatt);
4516 			BIC_PRESENT(BIC_GFXWatt);
4517 		}
4518 		break;
4519 	case INTEL_FAM6_HASWELL_X:	/* HSX */
4520 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
4521 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
4522 	case INTEL_FAM6_ICELAKE_X:	/* ICX */
4523 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4524 		do_rapl =
4525 		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4526 		    RAPL_PKG_POWER_INFO;
4527 		BIC_PRESENT(BIC_PKG__);
4528 		BIC_PRESENT(BIC_RAM__);
4529 		if (rapl_joules) {
4530 			BIC_PRESENT(BIC_Pkg_J);
4531 			BIC_PRESENT(BIC_RAM_J);
4532 		} else {
4533 			BIC_PRESENT(BIC_PkgWatt);
4534 			BIC_PRESENT(BIC_RAMWatt);
4535 		}
4536 		break;
4537 	case INTEL_FAM6_SANDYBRIDGE_X:
4538 	case INTEL_FAM6_IVYBRIDGE_X:
4539 		do_rapl =
4540 		    RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
4541 		    RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
4542 		BIC_PRESENT(BIC_PKG__);
4543 		BIC_PRESENT(BIC_RAM__);
4544 		if (rapl_joules) {
4545 			BIC_PRESENT(BIC_Pkg_J);
4546 			BIC_PRESENT(BIC_Cor_J);
4547 			BIC_PRESENT(BIC_RAM_J);
4548 		} else {
4549 			BIC_PRESENT(BIC_PkgWatt);
4550 			BIC_PRESENT(BIC_CorWatt);
4551 			BIC_PRESENT(BIC_RAMWatt);
4552 		}
4553 		break;
4554 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
4555 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
4556 		do_rapl = RAPL_PKG | RAPL_CORES;
4557 		if (rapl_joules) {
4558 			BIC_PRESENT(BIC_Pkg_J);
4559 			BIC_PRESENT(BIC_Cor_J);
4560 		} else {
4561 			BIC_PRESENT(BIC_PkgWatt);
4562 			BIC_PRESENT(BIC_CorWatt);
4563 		}
4564 		break;
4565 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
4566 		do_rapl =
4567 		    RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4568 		    RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4569 		BIC_PRESENT(BIC_PKG__);
4570 		BIC_PRESENT(BIC_RAM__);
4571 		if (rapl_joules) {
4572 			BIC_PRESENT(BIC_Pkg_J);
4573 			BIC_PRESENT(BIC_Cor_J);
4574 			BIC_PRESENT(BIC_RAM_J);
4575 		} else {
4576 			BIC_PRESENT(BIC_PkgWatt);
4577 			BIC_PRESENT(BIC_CorWatt);
4578 			BIC_PRESENT(BIC_RAMWatt);
4579 		}
4580 		break;
4581 	default:
4582 		return;
4583 	}
4584 
4585 	/* units on package 0, verify later other packages match */
4586 	if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4587 		return;
4588 
4589 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
4590 	if (model == INTEL_FAM6_ATOM_SILVERMONT)
4591 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4592 	else
4593 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4594 
4595 	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4596 
4597 	time_unit = msr >> 16 & 0xF;
4598 	if (time_unit == 0)
4599 		time_unit = 0xA;
4600 
4601 	rapl_time_units = 1.0 / (1 << (time_unit));
4602 
4603 	tdp = get_tdp_intel(model);
4604 
4605 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4606 	if (!quiet)
4607 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4608 }
4609 
4610 void rapl_probe_amd(unsigned int family, unsigned int model)
4611 {
4612 	unsigned long long msr;
4613 	unsigned int eax, ebx, ecx, edx;
4614 	unsigned int has_rapl = 0;
4615 	double tdp;
4616 
4617 	if (max_extended_level >= 0x80000007) {
4618 		__cpuid(0x80000007, eax, ebx, ecx, edx);
4619 		/* RAPL (Fam 17h+) */
4620 		has_rapl = edx & (1 << 14);
4621 	}
4622 
4623 	if (!has_rapl || family < 0x17)
4624 		return;
4625 
4626 	do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4627 	if (rapl_joules) {
4628 		BIC_PRESENT(BIC_Pkg_J);
4629 		BIC_PRESENT(BIC_Cor_J);
4630 	} else {
4631 		BIC_PRESENT(BIC_PkgWatt);
4632 		BIC_PRESENT(BIC_CorWatt);
4633 	}
4634 
4635 	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4636 		return;
4637 
4638 	rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4639 	rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4640 	rapl_power_units = ldexp(1.0, -(msr & 0xf));
4641 
4642 	tdp = get_tdp_amd(family);
4643 
4644 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4645 	if (!quiet)
4646 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4647 }
4648 
4649 /*
4650  * rapl_probe()
4651  *
4652  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4653  */
4654 void rapl_probe(unsigned int family, unsigned int model)
4655 {
4656 	if (genuine_intel)
4657 		rapl_probe_intel(family, model);
4658 	if (authentic_amd || hygon_genuine)
4659 		rapl_probe_amd(family, model);
4660 }
4661 
4662 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4663 {
4664 	if (!genuine_intel)
4665 		return;
4666 
4667 	if (family != 6)
4668 		return;
4669 
4670 	switch (model) {
4671 	case INTEL_FAM6_HASWELL:	/* HSW */
4672 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4673 	case INTEL_FAM6_HASWELL_G:	/* HSW */
4674 		do_gfx_perf_limit_reasons = 1;
4675 	case INTEL_FAM6_HASWELL_X:	/* HSX */
4676 		do_core_perf_limit_reasons = 1;
4677 		do_ring_perf_limit_reasons = 1;
4678 	default:
4679 		return;
4680 	}
4681 }
4682 
4683 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
4684 {
4685 	if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
4686 		has_automatic_cstate_conversion = 1;
4687 }
4688 
4689 void prewake_cstate_probe(unsigned int family, unsigned int model)
4690 {
4691 	if (is_icx(family, model))
4692 		dis_cstate_prewake = 1;
4693 }
4694 
4695 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4696 {
4697 	unsigned long long msr;
4698 	unsigned int dts, dts2;
4699 	int cpu;
4700 
4701 	if (!(do_dts || do_ptm))
4702 		return 0;
4703 
4704 	cpu = t->cpu_id;
4705 
4706 	/* DTS is per-core, no need to print for each thread */
4707 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
4708 		return 0;
4709 
4710 	if (cpu_migrate(cpu)) {
4711 		fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
4712 		return -1;
4713 	}
4714 
4715 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
4716 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4717 			return 0;
4718 
4719 		dts = (msr >> 16) & 0x7F;
4720 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
4721 
4722 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
4723 			return 0;
4724 
4725 		dts = (msr >> 16) & 0x7F;
4726 		dts2 = (msr >> 8) & 0x7F;
4727 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4728 			cpu, msr, tj_max - dts, tj_max - dts2);
4729 	}
4730 
4731 	if (do_dts && debug) {
4732 		unsigned int resolution;
4733 
4734 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4735 			return 0;
4736 
4737 		dts = (msr >> 16) & 0x7F;
4738 		resolution = (msr >> 27) & 0xF;
4739 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
4740 			cpu, msr, tj_max - dts, resolution);
4741 
4742 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
4743 			return 0;
4744 
4745 		dts = (msr >> 16) & 0x7F;
4746 		dts2 = (msr >> 8) & 0x7F;
4747 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
4748 			cpu, msr, tj_max - dts, tj_max - dts2);
4749 	}
4750 
4751 	return 0;
4752 }
4753 
4754 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
4755 {
4756 	fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
4757 		cpu, label,
4758 		((msr >> 15) & 1) ? "EN" : "DIS",
4759 		((msr >> 0) & 0x7FFF) * rapl_power_units,
4760 		(1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
4761 		(((msr >> 16) & 1) ? "EN" : "DIS"));
4762 
4763 	return;
4764 }
4765 
4766 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4767 {
4768 	unsigned long long msr;
4769 	const char *msr_name;
4770 	int cpu;
4771 
4772 	if (!do_rapl)
4773 		return 0;
4774 
4775 	/* RAPL counters are per package, so print only for 1st thread/package */
4776 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4777 		return 0;
4778 
4779 	cpu = t->cpu_id;
4780 	if (cpu_migrate(cpu)) {
4781 		fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
4782 		return -1;
4783 	}
4784 
4785 	if (do_rapl & RAPL_AMD_F17H) {
4786 		msr_name = "MSR_RAPL_PWR_UNIT";
4787 		if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
4788 			return -1;
4789 	} else {
4790 		msr_name = "MSR_RAPL_POWER_UNIT";
4791 		if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
4792 			return -1;
4793 	}
4794 
4795 	fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
4796 		rapl_power_units, rapl_energy_units, rapl_time_units);
4797 
4798 	if (do_rapl & RAPL_PKG_POWER_INFO) {
4799 
4800 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
4801 			return -5;
4802 
4803 		fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4804 			cpu, msr,
4805 			((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4806 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4807 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4808 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4809 
4810 	}
4811 	if (do_rapl & RAPL_PKG) {
4812 
4813 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
4814 			return -9;
4815 
4816 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
4817 			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
4818 
4819 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
4820 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
4821 			cpu,
4822 			((msr >> 47) & 1) ? "EN" : "DIS",
4823 			((msr >> 32) & 0x7FFF) * rapl_power_units,
4824 			(1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
4825 			((msr >> 48) & 1) ? "EN" : "DIS");
4826 
4827 		if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
4828 			return -9;
4829 
4830 		fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
4831 		fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
4832 			cpu,
4833 			((msr >> 0) & 0x1FFF) * rapl_power_units,
4834 			(msr >> 31) & 1 ? "" : "UN");
4835 	}
4836 
4837 	if (do_rapl & RAPL_DRAM_POWER_INFO) {
4838 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
4839 			return -6;
4840 
4841 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
4842 			cpu, msr,
4843 			((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4844 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4845 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
4846 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
4847 	}
4848 	if (do_rapl & RAPL_DRAM) {
4849 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
4850 			return -9;
4851 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
4852 			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4853 
4854 		print_power_limit_msr(cpu, msr, "DRAM Limit");
4855 	}
4856 	if (do_rapl & RAPL_CORE_POLICY) {
4857 		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
4858 			return -7;
4859 
4860 		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
4861 	}
4862 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
4863 		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
4864 			return -9;
4865 		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
4866 			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4867 		print_power_limit_msr(cpu, msr, "Cores Limit");
4868 	}
4869 	if (do_rapl & RAPL_GFX) {
4870 		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
4871 			return -8;
4872 
4873 		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
4874 
4875 		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
4876 			return -9;
4877 		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
4878 			cpu, msr, (msr >> 31) & 1 ? "" : "UN");
4879 		print_power_limit_msr(cpu, msr, "GFX Limit");
4880 	}
4881 	return 0;
4882 }
4883 
4884 /*
4885  * SNB adds support for additional MSRs:
4886  *
4887  * MSR_PKG_C7_RESIDENCY            0x000003fa
4888  * MSR_CORE_C7_RESIDENCY           0x000003fe
4889  * MSR_PKG_C2_RESIDENCY            0x0000060d
4890  */
4891 
4892 int has_snb_msrs(unsigned int family, unsigned int model)
4893 {
4894 	if (!genuine_intel)
4895 		return 0;
4896 
4897 	switch (model) {
4898 	case INTEL_FAM6_SANDYBRIDGE:
4899 	case INTEL_FAM6_SANDYBRIDGE_X:
4900 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
4901 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
4902 	case INTEL_FAM6_HASWELL:	/* HSW */
4903 	case INTEL_FAM6_HASWELL_X:	/* HSW */
4904 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4905 	case INTEL_FAM6_HASWELL_G:	/* HSW */
4906 	case INTEL_FAM6_BROADWELL:	/* BDW */
4907 	case INTEL_FAM6_BROADWELL_G:	/* BDW */
4908 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
4909 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4910 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4911 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
4912 	case INTEL_FAM6_ICELAKE_X:	/* ICX */
4913 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4914 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4915 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
4916 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
4917 	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
4918 		return 1;
4919 	}
4920 	return 0;
4921 }
4922 
4923 /*
4924  * HSW ULT added support for C8/C9/C10 MSRs:
4925  *
4926  * MSR_PKG_C8_RESIDENCY		0x00000630
4927  * MSR_PKG_C9_RESIDENCY		0x00000631
4928  * MSR_PKG_C10_RESIDENCY	0x00000632
4929  *
4930  * MSR_PKGC8_IRTL		0x00000633
4931  * MSR_PKGC9_IRTL		0x00000634
4932  * MSR_PKGC10_IRTL		0x00000635
4933  *
4934  */
4935 int has_c8910_msrs(unsigned int family, unsigned int model)
4936 {
4937 	if (!genuine_intel)
4938 		return 0;
4939 
4940 	switch (model) {
4941 	case INTEL_FAM6_HASWELL_L:	/* HSW */
4942 	case INTEL_FAM6_BROADWELL:	/* BDW */
4943 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4944 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4945 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
4946 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4947 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
4948 		return 1;
4949 	}
4950 	return 0;
4951 }
4952 
4953 /*
4954  * SKL adds support for additional MSRS:
4955  *
4956  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
4957  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
4958  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
4959  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
4960  */
4961 int has_skl_msrs(unsigned int family, unsigned int model)
4962 {
4963 	if (!genuine_intel)
4964 		return 0;
4965 
4966 	switch (model) {
4967 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
4968 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
4969 		return 1;
4970 	}
4971 	return 0;
4972 }
4973 
4974 int is_slm(unsigned int family, unsigned int model)
4975 {
4976 	if (!genuine_intel)
4977 		return 0;
4978 	switch (model) {
4979 	case INTEL_FAM6_ATOM_SILVERMONT:	/* BYT */
4980 	case INTEL_FAM6_ATOM_SILVERMONT_D:	/* AVN */
4981 		return 1;
4982 	}
4983 	return 0;
4984 }
4985 
4986 int is_knl(unsigned int family, unsigned int model)
4987 {
4988 	if (!genuine_intel)
4989 		return 0;
4990 	switch (model) {
4991 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
4992 		return 1;
4993 	}
4994 	return 0;
4995 }
4996 
4997 int is_cnl(unsigned int family, unsigned int model)
4998 {
4999 	if (!genuine_intel)
5000 		return 0;
5001 
5002 	switch (model) {
5003 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
5004 		return 1;
5005 	}
5006 
5007 	return 0;
5008 }
5009 
5010 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
5011 {
5012 	if (is_knl(family, model))
5013 		return 1024;
5014 	return 1;
5015 }
5016 
5017 #define SLM_BCLK_FREQS 5
5018 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
5019 
5020 double slm_bclk(void)
5021 {
5022 	unsigned long long msr = 3;
5023 	unsigned int i;
5024 	double freq;
5025 
5026 	if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
5027 		fprintf(outf, "SLM BCLK: unknown\n");
5028 
5029 	i = msr & 0xf;
5030 	if (i >= SLM_BCLK_FREQS) {
5031 		fprintf(outf, "SLM BCLK[%d] invalid\n", i);
5032 		i = 3;
5033 	}
5034 	freq = slm_freq_table[i];
5035 
5036 	if (!quiet)
5037 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
5038 
5039 	return freq;
5040 }
5041 
5042 double discover_bclk(unsigned int family, unsigned int model)
5043 {
5044 	if (has_snb_msrs(family, model) || is_knl(family, model))
5045 		return 100.00;
5046 	else if (is_slm(family, model))
5047 		return slm_bclk();
5048 	else
5049 		return 133.33;
5050 }
5051 
5052 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5053 {
5054 	unsigned int eax, ebx, ecx, edx;
5055 
5056 	if (!genuine_intel)
5057 		return 0;
5058 
5059 	if (cpu_migrate(t->cpu_id)) {
5060 		fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
5061 		return -1;
5062 	}
5063 
5064 	if (max_level < 0x1a)
5065 		return 0;
5066 
5067 	__cpuid(0x1a, eax, ebx, ecx, edx);
5068 	eax = (eax >> 24) & 0xFF;
5069 	if (eax == 0x20)
5070 		t->is_atom = true;
5071 	return 0;
5072 }
5073 
5074 /*
5075  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
5076  * the Thermal Control Circuit (TCC) activates.
5077  * This is usually equal to tjMax.
5078  *
5079  * Older processors do not have this MSR, so there we guess,
5080  * but also allow cmdline over-ride with -T.
5081  *
5082  * Several MSR temperature values are in units of degrees-C
5083  * below this value, including the Digital Thermal Sensor (DTS),
5084  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
5085  */
5086 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5087 {
5088 	unsigned long long msr;
5089 	unsigned int tcc_default, tcc_offset;
5090 	int cpu;
5091 
5092 	/* tj_max is used only for dts or ptm */
5093 	if (!(do_dts || do_ptm))
5094 		return 0;
5095 
5096 	/* this is a per-package concept */
5097 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
5098 		return 0;
5099 
5100 	cpu = t->cpu_id;
5101 	if (cpu_migrate(cpu)) {
5102 		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
5103 		return -1;
5104 	}
5105 
5106 	if (tj_max_override != 0) {
5107 		tj_max = tj_max_override;
5108 		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
5109 		return 0;
5110 	}
5111 
5112 	/* Temperature Target MSR is Nehalem and newer only */
5113 	if (!do_nhm_platform_info)
5114 		goto guess;
5115 
5116 	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
5117 		goto guess;
5118 
5119 	tcc_default = (msr >> 16) & 0xFF;
5120 
5121 	if (!quiet) {
5122 		switch (tcc_offset_bits) {
5123 		case 4:
5124 			tcc_offset = (msr >> 24) & 0xF;
5125 			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5126 				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5127 			break;
5128 		case 6:
5129 			tcc_offset = (msr >> 24) & 0x3F;
5130 			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5131 				cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5132 			break;
5133 		default:
5134 			fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
5135 			break;
5136 		}
5137 	}
5138 
5139 	if (!tcc_default)
5140 		goto guess;
5141 
5142 	tj_max = tcc_default;
5143 
5144 	return 0;
5145 
5146 guess:
5147 	tj_max = TJMAX_DEFAULT;
5148 	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
5149 
5150 	return 0;
5151 }
5152 
5153 void decode_feature_control_msr(void)
5154 {
5155 	unsigned long long msr;
5156 
5157 	if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
5158 		fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
5159 			base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
5160 }
5161 
5162 void decode_misc_enable_msr(void)
5163 {
5164 	unsigned long long msr;
5165 
5166 	if (!genuine_intel)
5167 		return;
5168 
5169 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
5170 		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
5171 			base_cpu, msr,
5172 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
5173 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
5174 			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
5175 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
5176 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
5177 }
5178 
5179 void decode_misc_feature_control(void)
5180 {
5181 	unsigned long long msr;
5182 
5183 	if (!has_misc_feature_control)
5184 		return;
5185 
5186 	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
5187 		fprintf(outf,
5188 			"cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
5189 			base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
5190 			msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
5191 }
5192 
5193 /*
5194  * Decode MSR_MISC_PWR_MGMT
5195  *
5196  * Decode the bits according to the Nehalem documentation
5197  * bit[0] seems to continue to have same meaning going forward
5198  * bit[1] less so...
5199  */
5200 void decode_misc_pwr_mgmt_msr(void)
5201 {
5202 	unsigned long long msr;
5203 
5204 	if (!do_nhm_platform_info)
5205 		return;
5206 
5207 	if (no_MSR_MISC_PWR_MGMT)
5208 		return;
5209 
5210 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
5211 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
5212 			base_cpu, msr,
5213 			msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
5214 }
5215 
5216 /*
5217  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
5218  *
5219  * This MSRs are present on Silvermont processors,
5220  * Intel Atom processor E3000 series (Baytrail), and friends.
5221  */
5222 void decode_c6_demotion_policy_msr(void)
5223 {
5224 	unsigned long long msr;
5225 
5226 	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
5227 		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
5228 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5229 
5230 	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
5231 		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
5232 			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5233 }
5234 
5235 /*
5236  * When models are the same, for the purpose of turbostat, reuse
5237  */
5238 unsigned int intel_model_duplicates(unsigned int model)
5239 {
5240 
5241 	switch (model) {
5242 	case INTEL_FAM6_NEHALEM_EP:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
5243 	case INTEL_FAM6_NEHALEM:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
5244 	case 0x1F:		/* Core i7 and i5 Processor - Nehalem */
5245 	case INTEL_FAM6_WESTMERE:	/* Westmere Client - Clarkdale, Arrandale */
5246 	case INTEL_FAM6_WESTMERE_EP:	/* Westmere EP - Gulftown */
5247 		return INTEL_FAM6_NEHALEM;
5248 
5249 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
5250 	case INTEL_FAM6_WESTMERE_EX:	/* Westmere-EX Xeon - Eagleton */
5251 		return INTEL_FAM6_NEHALEM_EX;
5252 
5253 	case INTEL_FAM6_XEON_PHI_KNM:
5254 		return INTEL_FAM6_XEON_PHI_KNL;
5255 
5256 	case INTEL_FAM6_BROADWELL_X:
5257 	case INTEL_FAM6_BROADWELL_D:	/* BDX-DE */
5258 		return INTEL_FAM6_BROADWELL_X;
5259 
5260 	case INTEL_FAM6_SKYLAKE_L:
5261 	case INTEL_FAM6_SKYLAKE:
5262 	case INTEL_FAM6_KABYLAKE_L:
5263 	case INTEL_FAM6_KABYLAKE:
5264 	case INTEL_FAM6_COMETLAKE_L:
5265 	case INTEL_FAM6_COMETLAKE:
5266 		return INTEL_FAM6_SKYLAKE_L;
5267 
5268 	case INTEL_FAM6_ICELAKE_L:
5269 	case INTEL_FAM6_ICELAKE_NNPI:
5270 	case INTEL_FAM6_TIGERLAKE_L:
5271 	case INTEL_FAM6_TIGERLAKE:
5272 	case INTEL_FAM6_ROCKETLAKE:
5273 	case INTEL_FAM6_LAKEFIELD:
5274 	case INTEL_FAM6_ALDERLAKE:
5275 	case INTEL_FAM6_ALDERLAKE_L:
5276 		return INTEL_FAM6_CANNONLAKE_L;
5277 
5278 	case INTEL_FAM6_ATOM_TREMONT_L:
5279 		return INTEL_FAM6_ATOM_TREMONT;
5280 
5281 	case INTEL_FAM6_ICELAKE_D:
5282 	case INTEL_FAM6_SAPPHIRERAPIDS_X:
5283 		return INTEL_FAM6_ICELAKE_X;
5284 	}
5285 	return model;
5286 }
5287 
5288 void print_dev_latency(void)
5289 {
5290 	char *path = "/dev/cpu_dma_latency";
5291 	int fd;
5292 	int value;
5293 	int retval;
5294 
5295 	fd = open(path, O_RDONLY);
5296 	if (fd < 0) {
5297 		warn("fopen %s\n", path);
5298 		return;
5299 	}
5300 
5301 	retval = read(fd, (void *)&value, sizeof(int));
5302 	if (retval != sizeof(int)) {
5303 		warn("read %s\n", path);
5304 		close(fd);
5305 		return;
5306 	}
5307 	fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
5308 
5309 	close(fd);
5310 }
5311 
5312 /*
5313  * Linux-perf manages the the HW instructions-retired counter
5314  * by enabling when requested, and hiding rollover
5315  */
5316 void linux_perf_init(void)
5317 {
5318 	if (!BIC_IS_ENABLED(BIC_IPC))
5319 		return;
5320 
5321 	if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
5322 		return;
5323 
5324 	fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5325 	if (fd_instr_count_percpu == NULL)
5326 		err(-1, "calloc fd_instr_count_percpu");
5327 
5328 	BIC_PRESENT(BIC_IPC);
5329 }
5330 
5331 void process_cpuid()
5332 {
5333 	unsigned int eax, ebx, ecx, edx;
5334 	unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
5335 	unsigned int has_turbo;
5336 	unsigned long long ucode_patch = 0;
5337 
5338 	eax = ebx = ecx = edx = 0;
5339 
5340 	__cpuid(0, max_level, ebx, ecx, edx);
5341 
5342 	if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
5343 		genuine_intel = 1;
5344 	else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
5345 		authentic_amd = 1;
5346 	else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
5347 		hygon_genuine = 1;
5348 
5349 	if (!quiet)
5350 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
5351 			(char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
5352 
5353 	__cpuid(1, fms, ebx, ecx, edx);
5354 	family = (fms >> 8) & 0xf;
5355 	model = (fms >> 4) & 0xf;
5356 	stepping = fms & 0xf;
5357 	if (family == 0xf)
5358 		family += (fms >> 20) & 0xff;
5359 	if (family >= 6)
5360 		model += ((fms >> 16) & 0xf) << 4;
5361 	ecx_flags = ecx;
5362 	edx_flags = edx;
5363 
5364 	if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
5365 		warnx("get_msr(UCODE)\n");
5366 
5367 	/*
5368 	 * check max extended function levels of CPUID.
5369 	 * This is needed to check for invariant TSC.
5370 	 * This check is valid for both Intel and AMD.
5371 	 */
5372 	ebx = ecx = edx = 0;
5373 	__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
5374 
5375 	if (!quiet) {
5376 		fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
5377 			family, model, stepping, family, model, stepping,
5378 			(unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
5379 		fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
5380 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
5381 			ecx_flags & (1 << 0) ? "SSE3" : "-",
5382 			ecx_flags & (1 << 3) ? "MONITOR" : "-",
5383 			ecx_flags & (1 << 6) ? "SMX" : "-",
5384 			ecx_flags & (1 << 7) ? "EIST" : "-",
5385 			ecx_flags & (1 << 8) ? "TM2" : "-",
5386 			edx_flags & (1 << 4) ? "TSC" : "-",
5387 			edx_flags & (1 << 5) ? "MSR" : "-",
5388 			edx_flags & (1 << 22) ? "ACPI-TM" : "-",
5389 			edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
5390 	}
5391 	if (genuine_intel) {
5392 		model_orig = model;
5393 		model = intel_model_duplicates(model);
5394 	}
5395 
5396 	if (!(edx_flags & (1 << 5)))
5397 		errx(1, "CPUID: no MSR");
5398 
5399 	if (max_extended_level >= 0x80000007) {
5400 
5401 		/*
5402 		 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
5403 		 * this check is valid for both Intel and AMD
5404 		 */
5405 		__cpuid(0x80000007, eax, ebx, ecx, edx);
5406 		has_invariant_tsc = edx & (1 << 8);
5407 	}
5408 
5409 	/*
5410 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
5411 	 * this check is valid for both Intel and AMD
5412 	 */
5413 
5414 	__cpuid(0x6, eax, ebx, ecx, edx);
5415 	has_aperf = ecx & (1 << 0);
5416 	if (has_aperf) {
5417 		BIC_PRESENT(BIC_Avg_MHz);
5418 		BIC_PRESENT(BIC_Busy);
5419 		BIC_PRESENT(BIC_Bzy_MHz);
5420 	}
5421 	do_dts = eax & (1 << 0);
5422 	if (do_dts)
5423 		BIC_PRESENT(BIC_CoreTmp);
5424 	has_turbo = eax & (1 << 1);
5425 	do_ptm = eax & (1 << 6);
5426 	if (do_ptm)
5427 		BIC_PRESENT(BIC_PkgTmp);
5428 	has_hwp = eax & (1 << 7);
5429 	has_hwp_notify = eax & (1 << 8);
5430 	has_hwp_activity_window = eax & (1 << 9);
5431 	has_hwp_epp = eax & (1 << 10);
5432 	has_hwp_pkg = eax & (1 << 11);
5433 	has_epb = ecx & (1 << 3);
5434 
5435 	if (!quiet)
5436 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
5437 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
5438 			has_aperf ? "" : "No-",
5439 			has_turbo ? "" : "No-",
5440 			do_dts ? "" : "No-",
5441 			do_ptm ? "" : "No-",
5442 			has_hwp ? "" : "No-",
5443 			has_hwp_notify ? "" : "No-",
5444 			has_hwp_activity_window ? "" : "No-",
5445 			has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
5446 
5447 	if (!quiet)
5448 		decode_misc_enable_msr();
5449 
5450 	if (max_level >= 0x7 && !quiet) {
5451 		int has_sgx;
5452 
5453 		ecx = 0;
5454 
5455 		__cpuid_count(0x7, 0, eax, ebx, ecx, edx);
5456 
5457 		has_sgx = ebx & (1 << 2);
5458 		fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
5459 
5460 		if (has_sgx)
5461 			decode_feature_control_msr();
5462 	}
5463 
5464 	if (max_level >= 0x15) {
5465 		unsigned int eax_crystal;
5466 		unsigned int ebx_tsc;
5467 
5468 		/*
5469 		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
5470 		 */
5471 		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
5472 		__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
5473 
5474 		if (ebx_tsc != 0) {
5475 
5476 			if (!quiet && (ebx != 0))
5477 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
5478 					eax_crystal, ebx_tsc, crystal_hz);
5479 
5480 			if (crystal_hz == 0)
5481 				switch (model) {
5482 				case INTEL_FAM6_SKYLAKE_L:	/* SKL */
5483 					crystal_hz = 24000000;	/* 24.0 MHz */
5484 					break;
5485 				case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
5486 					crystal_hz = 25000000;	/* 25.0 MHz */
5487 					break;
5488 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
5489 				case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5490 					crystal_hz = 19200000;	/* 19.2 MHz */
5491 					break;
5492 				default:
5493 					crystal_hz = 0;
5494 				}
5495 
5496 			if (crystal_hz) {
5497 				tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
5498 				if (!quiet)
5499 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
5500 						tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
5501 			}
5502 		}
5503 	}
5504 	if (max_level >= 0x16) {
5505 		unsigned int base_mhz, max_mhz, bus_mhz, edx;
5506 
5507 		/*
5508 		 * CPUID 16H Base MHz, Max MHz, Bus MHz
5509 		 */
5510 		base_mhz = max_mhz = bus_mhz = edx = 0;
5511 
5512 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
5513 		if (!quiet)
5514 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
5515 				base_mhz, max_mhz, bus_mhz);
5516 	}
5517 
5518 	if (has_aperf)
5519 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
5520 
5521 	BIC_PRESENT(BIC_IRQ);
5522 	BIC_PRESENT(BIC_TSC_MHz);
5523 
5524 	if (probe_nhm_msrs(family, model)) {
5525 		do_nhm_platform_info = 1;
5526 		BIC_PRESENT(BIC_CPU_c1);
5527 		BIC_PRESENT(BIC_CPU_c3);
5528 		BIC_PRESENT(BIC_CPU_c6);
5529 		BIC_PRESENT(BIC_SMI);
5530 	}
5531 	do_snb_cstates = has_snb_msrs(family, model);
5532 
5533 	if (do_snb_cstates)
5534 		BIC_PRESENT(BIC_CPU_c7);
5535 
5536 	do_irtl_snb = has_snb_msrs(family, model);
5537 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
5538 		BIC_PRESENT(BIC_Pkgpc2);
5539 	if (pkg_cstate_limit >= PCL__3)
5540 		BIC_PRESENT(BIC_Pkgpc3);
5541 	if (pkg_cstate_limit >= PCL__6)
5542 		BIC_PRESENT(BIC_Pkgpc6);
5543 	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
5544 		BIC_PRESENT(BIC_Pkgpc7);
5545 	if (has_slv_msrs(family, model)) {
5546 		BIC_NOT_PRESENT(BIC_Pkgpc2);
5547 		BIC_NOT_PRESENT(BIC_Pkgpc3);
5548 		BIC_PRESENT(BIC_Pkgpc6);
5549 		BIC_NOT_PRESENT(BIC_Pkgpc7);
5550 		BIC_PRESENT(BIC_Mod_c6);
5551 		use_c1_residency_msr = 1;
5552 	}
5553 	if (is_jvl(family, model)) {
5554 		BIC_NOT_PRESENT(BIC_CPU_c3);
5555 		BIC_NOT_PRESENT(BIC_CPU_c7);
5556 		BIC_NOT_PRESENT(BIC_Pkgpc2);
5557 		BIC_NOT_PRESENT(BIC_Pkgpc3);
5558 		BIC_NOT_PRESENT(BIC_Pkgpc6);
5559 		BIC_NOT_PRESENT(BIC_Pkgpc7);
5560 	}
5561 	if (is_dnv(family, model)) {
5562 		BIC_PRESENT(BIC_CPU_c1);
5563 		BIC_NOT_PRESENT(BIC_CPU_c3);
5564 		BIC_NOT_PRESENT(BIC_Pkgpc3);
5565 		BIC_NOT_PRESENT(BIC_CPU_c7);
5566 		BIC_NOT_PRESENT(BIC_Pkgpc7);
5567 		use_c1_residency_msr = 1;
5568 	}
5569 	if (is_skx(family, model) || is_icx(family, model)) {
5570 		BIC_NOT_PRESENT(BIC_CPU_c3);
5571 		BIC_NOT_PRESENT(BIC_Pkgpc3);
5572 		BIC_NOT_PRESENT(BIC_CPU_c7);
5573 		BIC_NOT_PRESENT(BIC_Pkgpc7);
5574 	}
5575 	if (is_bdx(family, model)) {
5576 		BIC_NOT_PRESENT(BIC_CPU_c7);
5577 		BIC_NOT_PRESENT(BIC_Pkgpc7);
5578 	}
5579 	if (has_c8910_msrs(family, model)) {
5580 		if (pkg_cstate_limit >= PCL__8)
5581 			BIC_PRESENT(BIC_Pkgpc8);
5582 		if (pkg_cstate_limit >= PCL__9)
5583 			BIC_PRESENT(BIC_Pkgpc9);
5584 		if (pkg_cstate_limit >= PCL_10)
5585 			BIC_PRESENT(BIC_Pkgpc10);
5586 	}
5587 	do_irtl_hsw = has_c8910_msrs(family, model);
5588 	if (has_skl_msrs(family, model)) {
5589 		BIC_PRESENT(BIC_Totl_c0);
5590 		BIC_PRESENT(BIC_Any_c0);
5591 		BIC_PRESENT(BIC_GFX_c0);
5592 		BIC_PRESENT(BIC_CPUGFX);
5593 	}
5594 	do_slm_cstates = is_slm(family, model);
5595 	do_knl_cstates = is_knl(family, model);
5596 
5597 	if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
5598 		BIC_NOT_PRESENT(BIC_CPU_c3);
5599 
5600 	if (!quiet)
5601 		decode_misc_pwr_mgmt_msr();
5602 
5603 	if (!quiet && has_slv_msrs(family, model))
5604 		decode_c6_demotion_policy_msr();
5605 
5606 	rapl_probe(family, model);
5607 	perf_limit_reasons_probe(family, model);
5608 	automatic_cstate_conversion_probe(family, model);
5609 
5610 	check_tcc_offset(model_orig);
5611 
5612 	if (!quiet)
5613 		dump_cstate_pstate_config_info(family, model);
5614 
5615 	if (!quiet)
5616 		print_dev_latency();
5617 	if (!quiet)
5618 		dump_sysfs_cstate_config();
5619 	if (!quiet)
5620 		dump_sysfs_pstate_config();
5621 
5622 	if (has_skl_msrs(family, model) || is_ehl(family, model))
5623 		calculate_tsc_tweak();
5624 
5625 	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
5626 		BIC_PRESENT(BIC_GFX_rc6);
5627 
5628 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
5629 		BIC_PRESENT(BIC_GFXMHz);
5630 
5631 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
5632 		BIC_PRESENT(BIC_GFXACTMHz);
5633 
5634 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
5635 		BIC_PRESENT(BIC_CPU_LPI);
5636 	else
5637 		BIC_NOT_PRESENT(BIC_CPU_LPI);
5638 
5639 	if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
5640 		BIC_PRESENT(BIC_CORE_THROT_CNT);
5641 	else
5642 		BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
5643 
5644 	if (!access(sys_lpi_file_sysfs, R_OK)) {
5645 		sys_lpi_file = sys_lpi_file_sysfs;
5646 		BIC_PRESENT(BIC_SYS_LPI);
5647 	} else if (!access(sys_lpi_file_debugfs, R_OK)) {
5648 		sys_lpi_file = sys_lpi_file_debugfs;
5649 		BIC_PRESENT(BIC_SYS_LPI);
5650 	} else {
5651 		sys_lpi_file_sysfs = NULL;
5652 		BIC_NOT_PRESENT(BIC_SYS_LPI);
5653 	}
5654 
5655 	if (!quiet)
5656 		decode_misc_feature_control();
5657 
5658 	return;
5659 }
5660 
5661 /*
5662  * in /dev/cpu/ return success for names that are numbers
5663  * ie. filter out ".", "..", "microcode".
5664  */
5665 int dir_filter(const struct dirent *dirp)
5666 {
5667 	if (isdigit(dirp->d_name[0]))
5668 		return 1;
5669 	else
5670 		return 0;
5671 }
5672 
5673 int open_dev_cpu_msr(int dummy1)
5674 {
5675 	return 0;
5676 }
5677 
5678 void topology_probe()
5679 {
5680 	int i;
5681 	int max_core_id = 0;
5682 	int max_package_id = 0;
5683 	int max_die_id = 0;
5684 	int max_siblings = 0;
5685 
5686 	/* Initialize num_cpus, max_cpu_num */
5687 	set_max_cpu_num();
5688 	topo.num_cpus = 0;
5689 	for_all_proc_cpus(count_cpus);
5690 	if (!summary_only && topo.num_cpus > 1)
5691 		BIC_PRESENT(BIC_CPU);
5692 
5693 	if (debug > 1)
5694 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
5695 
5696 	cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
5697 	if (cpus == NULL)
5698 		err(1, "calloc cpus");
5699 
5700 	/*
5701 	 * Allocate and initialize cpu_present_set
5702 	 */
5703 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
5704 	if (cpu_present_set == NULL)
5705 		err(3, "CPU_ALLOC");
5706 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5707 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
5708 	for_all_proc_cpus(mark_cpu_present);
5709 
5710 	/*
5711 	 * Validate that all cpus in cpu_subset are also in cpu_present_set
5712 	 */
5713 	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
5714 		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
5715 			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
5716 				err(1, "cpu%d not present", i);
5717 	}
5718 
5719 	/*
5720 	 * Allocate and initialize cpu_affinity_set
5721 	 */
5722 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
5723 	if (cpu_affinity_set == NULL)
5724 		err(3, "CPU_ALLOC");
5725 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5726 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
5727 
5728 	for_all_proc_cpus(init_thread_id);
5729 
5730 	/*
5731 	 * For online cpus
5732 	 * find max_core_id, max_package_id
5733 	 */
5734 	for (i = 0; i <= topo.max_cpu_num; ++i) {
5735 		int siblings;
5736 
5737 		if (cpu_is_not_present(i)) {
5738 			if (debug > 1)
5739 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
5740 			continue;
5741 		}
5742 
5743 		cpus[i].logical_cpu_id = i;
5744 
5745 		/* get package information */
5746 		cpus[i].physical_package_id = get_physical_package_id(i);
5747 		if (cpus[i].physical_package_id > max_package_id)
5748 			max_package_id = cpus[i].physical_package_id;
5749 
5750 		/* get die information */
5751 		cpus[i].die_id = get_die_id(i);
5752 		if (cpus[i].die_id > max_die_id)
5753 			max_die_id = cpus[i].die_id;
5754 
5755 		/* get numa node information */
5756 		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
5757 		if (cpus[i].physical_node_id > topo.max_node_num)
5758 			topo.max_node_num = cpus[i].physical_node_id;
5759 
5760 		/* get core information */
5761 		cpus[i].physical_core_id = get_core_id(i);
5762 		if (cpus[i].physical_core_id > max_core_id)
5763 			max_core_id = cpus[i].physical_core_id;
5764 
5765 		/* get thread information */
5766 		siblings = get_thread_siblings(&cpus[i]);
5767 		if (siblings > max_siblings)
5768 			max_siblings = siblings;
5769 		if (cpus[i].thread_id == 0)
5770 			topo.num_cores++;
5771 	}
5772 
5773 	topo.cores_per_node = max_core_id + 1;
5774 	if (debug > 1)
5775 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
5776 	if (!summary_only && topo.cores_per_node > 1)
5777 		BIC_PRESENT(BIC_Core);
5778 
5779 	topo.num_die = max_die_id + 1;
5780 	if (debug > 1)
5781 		fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
5782 	if (!summary_only && topo.num_die > 1)
5783 		BIC_PRESENT(BIC_Die);
5784 
5785 	topo.num_packages = max_package_id + 1;
5786 	if (debug > 1)
5787 		fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
5788 	if (!summary_only && topo.num_packages > 1)
5789 		BIC_PRESENT(BIC_Package);
5790 
5791 	set_node_data();
5792 	if (debug > 1)
5793 		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
5794 	if (!summary_only && topo.nodes_per_pkg > 1)
5795 		BIC_PRESENT(BIC_Node);
5796 
5797 	topo.threads_per_core = max_siblings;
5798 	if (debug > 1)
5799 		fprintf(outf, "max_siblings %d\n", max_siblings);
5800 
5801 	if (debug < 1)
5802 		return;
5803 
5804 	for (i = 0; i <= topo.max_cpu_num; ++i) {
5805 		if (cpu_is_not_present(i))
5806 			continue;
5807 		fprintf(outf,
5808 			"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
5809 			i, cpus[i].physical_package_id, cpus[i].die_id,
5810 			cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
5811 	}
5812 
5813 }
5814 
5815 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
5816 {
5817 	int i;
5818 	int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
5819 	int num_threads = topo.threads_per_core * num_cores;
5820 
5821 	*t = calloc(num_threads, sizeof(struct thread_data));
5822 	if (*t == NULL)
5823 		goto error;
5824 
5825 	for (i = 0; i < num_threads; i++)
5826 		(*t)[i].cpu_id = -1;
5827 
5828 	*c = calloc(num_cores, sizeof(struct core_data));
5829 	if (*c == NULL)
5830 		goto error;
5831 
5832 	for (i = 0; i < num_cores; i++)
5833 		(*c)[i].core_id = -1;
5834 
5835 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
5836 	if (*p == NULL)
5837 		goto error;
5838 
5839 	for (i = 0; i < topo.num_packages; i++)
5840 		(*p)[i].package_id = i;
5841 
5842 	return;
5843 error:
5844 	err(1, "calloc counters");
5845 }
5846 
5847 /*
5848  * init_counter()
5849  *
5850  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
5851  */
5852 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
5853 {
5854 	int pkg_id = cpus[cpu_id].physical_package_id;
5855 	int node_id = cpus[cpu_id].logical_node_id;
5856 	int core_id = cpus[cpu_id].physical_core_id;
5857 	int thread_id = cpus[cpu_id].thread_id;
5858 	struct thread_data *t;
5859 	struct core_data *c;
5860 	struct pkg_data *p;
5861 
5862 	/* Workaround for systems where physical_node_id==-1
5863 	 * and logical_node_id==(-1 - topo.num_cpus)
5864 	 */
5865 	if (node_id < 0)
5866 		node_id = 0;
5867 
5868 	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
5869 	c = GET_CORE(core_base, core_id, node_id, pkg_id);
5870 	p = GET_PKG(pkg_base, pkg_id);
5871 
5872 	t->cpu_id = cpu_id;
5873 	if (thread_id == 0) {
5874 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
5875 		if (cpu_is_first_core_in_package(cpu_id))
5876 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
5877 	}
5878 
5879 	c->core_id = core_id;
5880 	p->package_id = pkg_id;
5881 }
5882 
5883 int initialize_counters(int cpu_id)
5884 {
5885 	init_counter(EVEN_COUNTERS, cpu_id);
5886 	init_counter(ODD_COUNTERS, cpu_id);
5887 	return 0;
5888 }
5889 
5890 void allocate_output_buffer()
5891 {
5892 	output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
5893 	outp = output_buffer;
5894 	if (outp == NULL)
5895 		err(-1, "calloc output buffer");
5896 }
5897 
5898 void allocate_fd_percpu(void)
5899 {
5900 	fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5901 	if (fd_percpu == NULL)
5902 		err(-1, "calloc fd_percpu");
5903 }
5904 
5905 void allocate_irq_buffers(void)
5906 {
5907 	irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
5908 	if (irq_column_2_cpu == NULL)
5909 		err(-1, "calloc %d", topo.num_cpus);
5910 
5911 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5912 	if (irqs_per_cpu == NULL)
5913 		err(-1, "calloc %d", topo.max_cpu_num + 1);
5914 }
5915 
5916 void setup_all_buffers(void)
5917 {
5918 	topology_probe();
5919 	allocate_irq_buffers();
5920 	allocate_fd_percpu();
5921 	allocate_counters(&thread_even, &core_even, &package_even);
5922 	allocate_counters(&thread_odd, &core_odd, &package_odd);
5923 	allocate_output_buffer();
5924 	for_all_proc_cpus(initialize_counters);
5925 }
5926 
5927 void set_base_cpu(void)
5928 {
5929 	base_cpu = sched_getcpu();
5930 	if (base_cpu < 0)
5931 		err(-ENODEV, "No valid cpus found");
5932 
5933 	if (debug > 1)
5934 		fprintf(outf, "base_cpu = %d\n", base_cpu);
5935 }
5936 
5937 void turbostat_init()
5938 {
5939 	setup_all_buffers();
5940 	set_base_cpu();
5941 	check_dev_msr();
5942 	check_permissions();
5943 	process_cpuid();
5944 	linux_perf_init();
5945 
5946 	if (!quiet)
5947 		for_all_cpus(print_hwp, ODD_COUNTERS);
5948 
5949 	if (!quiet)
5950 		for_all_cpus(print_epb, ODD_COUNTERS);
5951 
5952 	if (!quiet)
5953 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
5954 
5955 	if (!quiet)
5956 		for_all_cpus(print_rapl, ODD_COUNTERS);
5957 
5958 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
5959 
5960 	for_all_cpus(get_cpu_type, ODD_COUNTERS);
5961 	for_all_cpus(get_cpu_type, EVEN_COUNTERS);
5962 
5963 	if (!quiet)
5964 		for_all_cpus(print_thermal, ODD_COUNTERS);
5965 
5966 	if (!quiet && do_irtl_snb)
5967 		print_irtl();
5968 }
5969 
5970 int fork_it(char **argv)
5971 {
5972 	pid_t child_pid;
5973 	int status;
5974 
5975 	snapshot_proc_sysfs_files();
5976 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
5977 	first_counter_read = 0;
5978 	if (status)
5979 		exit(status);
5980 	/* clear affinity side-effect of get_counters() */
5981 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
5982 	gettimeofday(&tv_even, (struct timezone *)NULL);
5983 
5984 	child_pid = fork();
5985 	if (!child_pid) {
5986 		/* child */
5987 		execvp(argv[0], argv);
5988 		err(errno, "exec %s", argv[0]);
5989 	} else {
5990 
5991 		/* parent */
5992 		if (child_pid == -1)
5993 			err(1, "fork");
5994 
5995 		signal(SIGINT, SIG_IGN);
5996 		signal(SIGQUIT, SIG_IGN);
5997 		if (waitpid(child_pid, &status, 0) == -1)
5998 			err(status, "waitpid");
5999 
6000 		if (WIFEXITED(status))
6001 			status = WEXITSTATUS(status);
6002 	}
6003 	/*
6004 	 * n.b. fork_it() does not check for errors from for_all_cpus()
6005 	 * because re-starting is problematic when forking
6006 	 */
6007 	snapshot_proc_sysfs_files();
6008 	for_all_cpus(get_counters, ODD_COUNTERS);
6009 	gettimeofday(&tv_odd, (struct timezone *)NULL);
6010 	timersub(&tv_odd, &tv_even, &tv_delta);
6011 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
6012 		fprintf(outf, "%s: Counter reset detected\n", progname);
6013 	else {
6014 		compute_average(EVEN_COUNTERS);
6015 		format_all_counters(EVEN_COUNTERS);
6016 	}
6017 
6018 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
6019 
6020 	flush_output_stderr();
6021 
6022 	return status;
6023 }
6024 
6025 int get_and_dump_counters(void)
6026 {
6027 	int status;
6028 
6029 	snapshot_proc_sysfs_files();
6030 	status = for_all_cpus(get_counters, ODD_COUNTERS);
6031 	if (status)
6032 		return status;
6033 
6034 	status = for_all_cpus(dump_counters, ODD_COUNTERS);
6035 	if (status)
6036 		return status;
6037 
6038 	flush_output_stdout();
6039 
6040 	return status;
6041 }
6042 
6043 void print_version()
6044 {
6045 	fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n");
6046 }
6047 
6048 int add_counter(unsigned int msr_num, char *path, char *name,
6049 		unsigned int width, enum counter_scope scope,
6050 		enum counter_type type, enum counter_format format, int flags)
6051 {
6052 	struct msr_counter *msrp;
6053 
6054 	msrp = calloc(1, sizeof(struct msr_counter));
6055 	if (msrp == NULL) {
6056 		perror("calloc");
6057 		exit(1);
6058 	}
6059 
6060 	msrp->msr_num = msr_num;
6061 	strncpy(msrp->name, name, NAME_BYTES - 1);
6062 	if (path)
6063 		strncpy(msrp->path, path, PATH_BYTES - 1);
6064 	msrp->width = width;
6065 	msrp->type = type;
6066 	msrp->format = format;
6067 	msrp->flags = flags;
6068 
6069 	switch (scope) {
6070 
6071 	case SCOPE_CPU:
6072 		msrp->next = sys.tp;
6073 		sys.tp = msrp;
6074 		sys.added_thread_counters++;
6075 		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
6076 			fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
6077 			exit(-1);
6078 		}
6079 		break;
6080 
6081 	case SCOPE_CORE:
6082 		msrp->next = sys.cp;
6083 		sys.cp = msrp;
6084 		sys.added_core_counters++;
6085 		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
6086 			fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
6087 			exit(-1);
6088 		}
6089 		break;
6090 
6091 	case SCOPE_PACKAGE:
6092 		msrp->next = sys.pp;
6093 		sys.pp = msrp;
6094 		sys.added_package_counters++;
6095 		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
6096 			fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
6097 			exit(-1);
6098 		}
6099 		break;
6100 	}
6101 
6102 	return 0;
6103 }
6104 
6105 void parse_add_command(char *add_command)
6106 {
6107 	int msr_num = 0;
6108 	char *path = NULL;
6109 	char name_buffer[NAME_BYTES] = "";
6110 	int width = 64;
6111 	int fail = 0;
6112 	enum counter_scope scope = SCOPE_CPU;
6113 	enum counter_type type = COUNTER_CYCLES;
6114 	enum counter_format format = FORMAT_DELTA;
6115 
6116 	while (add_command) {
6117 
6118 		if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
6119 			goto next;
6120 
6121 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
6122 			goto next;
6123 
6124 		if (*add_command == '/') {
6125 			path = add_command;
6126 			goto next;
6127 		}
6128 
6129 		if (sscanf(add_command, "u%d", &width) == 1) {
6130 			if ((width == 32) || (width == 64))
6131 				goto next;
6132 			width = 64;
6133 		}
6134 		if (!strncmp(add_command, "cpu", strlen("cpu"))) {
6135 			scope = SCOPE_CPU;
6136 			goto next;
6137 		}
6138 		if (!strncmp(add_command, "core", strlen("core"))) {
6139 			scope = SCOPE_CORE;
6140 			goto next;
6141 		}
6142 		if (!strncmp(add_command, "package", strlen("package"))) {
6143 			scope = SCOPE_PACKAGE;
6144 			goto next;
6145 		}
6146 		if (!strncmp(add_command, "cycles", strlen("cycles"))) {
6147 			type = COUNTER_CYCLES;
6148 			goto next;
6149 		}
6150 		if (!strncmp(add_command, "seconds", strlen("seconds"))) {
6151 			type = COUNTER_SECONDS;
6152 			goto next;
6153 		}
6154 		if (!strncmp(add_command, "usec", strlen("usec"))) {
6155 			type = COUNTER_USEC;
6156 			goto next;
6157 		}
6158 		if (!strncmp(add_command, "raw", strlen("raw"))) {
6159 			format = FORMAT_RAW;
6160 			goto next;
6161 		}
6162 		if (!strncmp(add_command, "delta", strlen("delta"))) {
6163 			format = FORMAT_DELTA;
6164 			goto next;
6165 		}
6166 		if (!strncmp(add_command, "percent", strlen("percent"))) {
6167 			format = FORMAT_PERCENT;
6168 			goto next;
6169 		}
6170 
6171 		if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {	/* 18 < NAME_BYTES */
6172 			char *eos;
6173 
6174 			eos = strchr(name_buffer, ',');
6175 			if (eos)
6176 				*eos = '\0';
6177 			goto next;
6178 		}
6179 
6180 next:
6181 		add_command = strchr(add_command, ',');
6182 		if (add_command) {
6183 			*add_command = '\0';
6184 			add_command++;
6185 		}
6186 
6187 	}
6188 	if ((msr_num == 0) && (path == NULL)) {
6189 		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
6190 		fail++;
6191 	}
6192 
6193 	/* generate default column header */
6194 	if (*name_buffer == '\0') {
6195 		if (width == 32)
6196 			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6197 		else
6198 			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6199 	}
6200 
6201 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
6202 		fail++;
6203 
6204 	if (fail) {
6205 		help();
6206 		exit(1);
6207 	}
6208 }
6209 
6210 int is_deferred_add(char *name)
6211 {
6212 	int i;
6213 
6214 	for (i = 0; i < deferred_add_index; ++i)
6215 		if (!strcmp(name, deferred_add_names[i]))
6216 			return 1;
6217 	return 0;
6218 }
6219 
6220 int is_deferred_skip(char *name)
6221 {
6222 	int i;
6223 
6224 	for (i = 0; i < deferred_skip_index; ++i)
6225 		if (!strcmp(name, deferred_skip_names[i]))
6226 			return 1;
6227 	return 0;
6228 }
6229 
6230 void probe_sysfs(void)
6231 {
6232 	char path[64];
6233 	char name_buf[16];
6234 	FILE *input;
6235 	int state;
6236 	char *sp;
6237 
6238 	for (state = 10; state >= 0; --state) {
6239 
6240 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6241 		input = fopen(path, "r");
6242 		if (input == NULL)
6243 			continue;
6244 		if (!fgets(name_buf, sizeof(name_buf), input))
6245 			err(1, "%s: failed to read file", path);
6246 
6247 		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6248 		sp = strchr(name_buf, '-');
6249 		if (!sp)
6250 			sp = strchrnul(name_buf, '\n');
6251 		*sp = '%';
6252 		*(sp + 1) = '\0';
6253 
6254 		remove_underbar(name_buf);
6255 
6256 		fclose(input);
6257 
6258 		sprintf(path, "cpuidle/state%d/time", state);
6259 
6260 		if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6261 			continue;
6262 
6263 		if (is_deferred_skip(name_buf))
6264 			continue;
6265 
6266 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
6267 	}
6268 
6269 	for (state = 10; state >= 0; --state) {
6270 
6271 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6272 		input = fopen(path, "r");
6273 		if (input == NULL)
6274 			continue;
6275 		if (!fgets(name_buf, sizeof(name_buf), input))
6276 			err(1, "%s: failed to read file", path);
6277 		/* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6278 		sp = strchr(name_buf, '-');
6279 		if (!sp)
6280 			sp = strchrnul(name_buf, '\n');
6281 		*sp = '\0';
6282 		fclose(input);
6283 
6284 		remove_underbar(name_buf);
6285 
6286 		sprintf(path, "cpuidle/state%d/usage", state);
6287 
6288 		if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6289 			continue;
6290 
6291 		if (is_deferred_skip(name_buf))
6292 			continue;
6293 
6294 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
6295 	}
6296 
6297 }
6298 
6299 /*
6300  * parse cpuset with following syntax
6301  * 1,2,4..6,8-10 and set bits in cpu_subset
6302  */
6303 void parse_cpu_command(char *optarg)
6304 {
6305 	unsigned int start, end;
6306 	char *next;
6307 
6308 	if (!strcmp(optarg, "core")) {
6309 		if (cpu_subset)
6310 			goto error;
6311 		show_core_only++;
6312 		return;
6313 	}
6314 	if (!strcmp(optarg, "package")) {
6315 		if (cpu_subset)
6316 			goto error;
6317 		show_pkg_only++;
6318 		return;
6319 	}
6320 	if (show_core_only || show_pkg_only)
6321 		goto error;
6322 
6323 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
6324 	if (cpu_subset == NULL)
6325 		err(3, "CPU_ALLOC");
6326 	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
6327 
6328 	CPU_ZERO_S(cpu_subset_size, cpu_subset);
6329 
6330 	next = optarg;
6331 
6332 	while (next && *next) {
6333 
6334 		if (*next == '-')	/* no negative cpu numbers */
6335 			goto error;
6336 
6337 		start = strtoul(next, &next, 10);
6338 
6339 		if (start >= CPU_SUBSET_MAXCPUS)
6340 			goto error;
6341 		CPU_SET_S(start, cpu_subset_size, cpu_subset);
6342 
6343 		if (*next == '\0')
6344 			break;
6345 
6346 		if (*next == ',') {
6347 			next += 1;
6348 			continue;
6349 		}
6350 
6351 		if (*next == '-') {
6352 			next += 1;	/* start range */
6353 		} else if (*next == '.') {
6354 			next += 1;
6355 			if (*next == '.')
6356 				next += 1;	/* start range */
6357 			else
6358 				goto error;
6359 		}
6360 
6361 		end = strtoul(next, &next, 10);
6362 		if (end <= start)
6363 			goto error;
6364 
6365 		while (++start <= end) {
6366 			if (start >= CPU_SUBSET_MAXCPUS)
6367 				goto error;
6368 			CPU_SET_S(start, cpu_subset_size, cpu_subset);
6369 		}
6370 
6371 		if (*next == ',')
6372 			next += 1;
6373 		else if (*next != '\0')
6374 			goto error;
6375 	}
6376 
6377 	return;
6378 
6379 error:
6380 	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
6381 	help();
6382 	exit(-1);
6383 }
6384 
6385 void cmdline(int argc, char **argv)
6386 {
6387 	int opt;
6388 	int option_index = 0;
6389 	static struct option long_options[] = {
6390 		{ "add", required_argument, 0, 'a' },
6391 		{ "cpu", required_argument, 0, 'c' },
6392 		{ "Dump", no_argument, 0, 'D' },
6393 		{ "debug", no_argument, 0, 'd' },	/* internal, not documented */
6394 		{ "enable", required_argument, 0, 'e' },
6395 		{ "interval", required_argument, 0, 'i' },
6396 		{ "IPC", no_argument, 0, 'I' },
6397 		{ "num_iterations", required_argument, 0, 'n' },
6398 		{ "header_iterations", required_argument, 0, 'N' },
6399 		{ "help", no_argument, 0, 'h' },
6400 		{ "hide", required_argument, 0, 'H' },	// meh, -h taken by --help
6401 		{ "Joules", no_argument, 0, 'J' },
6402 		{ "list", no_argument, 0, 'l' },
6403 		{ "out", required_argument, 0, 'o' },
6404 		{ "quiet", no_argument, 0, 'q' },
6405 		{ "show", required_argument, 0, 's' },
6406 		{ "Summary", no_argument, 0, 'S' },
6407 		{ "TCC", required_argument, 0, 'T' },
6408 		{ "version", no_argument, 0, 'v' },
6409 		{ 0, 0, 0, 0 }
6410 	};
6411 
6412 	progname = argv[0];
6413 
6414 	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
6415 		switch (opt) {
6416 		case 'a':
6417 			parse_add_command(optarg);
6418 			break;
6419 		case 'c':
6420 			parse_cpu_command(optarg);
6421 			break;
6422 		case 'D':
6423 			dump_only++;
6424 			break;
6425 		case 'e':
6426 			/* --enable specified counter */
6427 			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
6428 			break;
6429 		case 'd':
6430 			debug++;
6431 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6432 			break;
6433 		case 'H':
6434 			/*
6435 			 * --hide: do not show those specified
6436 			 *  multiple invocations simply clear more bits in enabled mask
6437 			 */
6438 			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
6439 			break;
6440 		case 'h':
6441 		default:
6442 			help();
6443 			exit(1);
6444 		case 'i':
6445 			{
6446 				double interval = strtod(optarg, NULL);
6447 
6448 				if (interval < 0.001) {
6449 					fprintf(outf, "interval %f seconds is too small\n", interval);
6450 					exit(2);
6451 				}
6452 
6453 				interval_tv.tv_sec = interval_ts.tv_sec = interval;
6454 				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
6455 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
6456 			}
6457 			break;
6458 		case 'J':
6459 			rapl_joules++;
6460 			break;
6461 		case 'l':
6462 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6463 			list_header_only++;
6464 			quiet++;
6465 			break;
6466 		case 'o':
6467 			outf = fopen_or_die(optarg, "w");
6468 			break;
6469 		case 'q':
6470 			quiet = 1;
6471 			break;
6472 		case 'n':
6473 			num_iterations = strtod(optarg, NULL);
6474 
6475 			if (num_iterations <= 0) {
6476 				fprintf(outf, "iterations %d should be positive number\n", num_iterations);
6477 				exit(2);
6478 			}
6479 			break;
6480 		case 'N':
6481 			header_iterations = strtod(optarg, NULL);
6482 
6483 			if (header_iterations <= 0) {
6484 				fprintf(outf, "iterations %d should be positive number\n",
6485 					header_iterations);
6486 				exit(2);
6487 			}
6488 			break;
6489 		case 's':
6490 			/*
6491 			 * --show: show only those specified
6492 			 *  The 1st invocation will clear and replace the enabled mask
6493 			 *  subsequent invocations can add to it.
6494 			 */
6495 			if (shown == 0)
6496 				bic_enabled = bic_lookup(optarg, SHOW_LIST);
6497 			else
6498 				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
6499 			shown = 1;
6500 			break;
6501 		case 'S':
6502 			summary_only++;
6503 			break;
6504 		case 'T':
6505 			tj_max_override = atoi(optarg);
6506 			break;
6507 		case 'v':
6508 			print_version();
6509 			exit(0);
6510 			break;
6511 		}
6512 	}
6513 }
6514 
6515 int main(int argc, char **argv)
6516 {
6517 	outf = stderr;
6518 	cmdline(argc, argv);
6519 
6520 	if (!quiet)
6521 		print_version();
6522 
6523 	probe_sysfs();
6524 
6525 	turbostat_init();
6526 
6527 	msr_sum_record();
6528 
6529 	/* dump counters and exit */
6530 	if (dump_only)
6531 		return get_and_dump_counters();
6532 
6533 	/* list header and exit */
6534 	if (list_header_only) {
6535 		print_header(",");
6536 		flush_output_stdout();
6537 		return 0;
6538 	}
6539 
6540 	/*
6541 	 * if any params left, it must be a command to fork
6542 	 */
6543 	if (argc - optind)
6544 		return fork_it(argv + optind);
6545 	else
6546 		turbostat_loop();
6547 
6548 	return 0;
6549 }
6550