1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2013 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include <asm/msr.h>
24 #include <stdio.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <sys/stat.h>
29 #include <sys/resource.h>
30 #include <fcntl.h>
31 #include <signal.h>
32 #include <sys/time.h>
33 #include <stdlib.h>
34 #include <dirent.h>
35 #include <string.h>
36 #include <ctype.h>
37 #include <sched.h>
38 
39 char *proc_stat = "/proc/stat";
40 unsigned int interval_sec = 5;	/* set with -i interval_sec */
41 unsigned int verbose;		/* set with -v */
42 unsigned int rapl_verbose;	/* set with -R */
43 unsigned int thermal_verbose;	/* set with -T */
44 unsigned int summary_only;	/* set with -s */
45 unsigned int skip_c0;
46 unsigned int skip_c1;
47 unsigned int do_nhm_cstates;
48 unsigned int do_snb_cstates;
49 unsigned int do_c8_c9_c10;
50 unsigned int do_slm_cstates;
51 unsigned int use_c1_residency_msr;
52 unsigned int has_aperf;
53 unsigned int has_epb;
54 unsigned int units = 1000000000;	/* Ghz etc */
55 unsigned int genuine_intel;
56 unsigned int has_invariant_tsc;
57 unsigned int do_nehalem_platform_info;
58 unsigned int do_nehalem_turbo_ratio_limit;
59 unsigned int do_ivt_turbo_ratio_limit;
60 unsigned int extra_msr_offset32;
61 unsigned int extra_msr_offset64;
62 unsigned int extra_delta_offset32;
63 unsigned int extra_delta_offset64;
64 int do_smi;
65 double bclk;
66 unsigned int show_pkg;
67 unsigned int show_core;
68 unsigned int show_cpu;
69 unsigned int show_pkg_only;
70 unsigned int show_core_only;
71 char *output_buffer, *outp;
72 unsigned int do_rapl;
73 unsigned int do_dts;
74 unsigned int do_ptm;
75 unsigned int tcc_activation_temp;
76 unsigned int tcc_activation_temp_override;
77 double rapl_power_units, rapl_energy_units, rapl_time_units;
78 double rapl_joule_counter_range;
79 
80 #define RAPL_PKG	(1 << 0)
81 #define RAPL_CORES	(1 << 1)
82 #define RAPL_GFX	(1 << 2)
83 #define RAPL_DRAM	(1 << 3)
84 #define RAPL_PKG_PERF_STATUS	(1 << 4)
85 #define RAPL_DRAM_PERF_STATUS	(1 << 5)
86 #define RAPL_PKG_POWER_INFO	(1 << 6)
87 #define RAPL_CORE_POLICY	(1 << 7)
88 #define	TJMAX_DEFAULT	100
89 
90 #define MAX(a, b) ((a) > (b) ? (a) : (b))
91 
92 int aperf_mperf_unstable;
93 int backwards_count;
94 char *progname;
95 
96 cpu_set_t *cpu_present_set, *cpu_affinity_set;
97 size_t cpu_present_setsize, cpu_affinity_setsize;
98 
99 struct thread_data {
100 	unsigned long long tsc;
101 	unsigned long long aperf;
102 	unsigned long long mperf;
103 	unsigned long long c1;
104 	unsigned long long extra_msr64;
105 	unsigned long long extra_delta64;
106 	unsigned long long extra_msr32;
107 	unsigned long long extra_delta32;
108 	unsigned int smi_count;
109 	unsigned int cpu_id;
110 	unsigned int flags;
111 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
112 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
113 } *thread_even, *thread_odd;
114 
115 struct core_data {
116 	unsigned long long c3;
117 	unsigned long long c6;
118 	unsigned long long c7;
119 	unsigned int core_temp_c;
120 	unsigned int core_id;
121 } *core_even, *core_odd;
122 
123 struct pkg_data {
124 	unsigned long long pc2;
125 	unsigned long long pc3;
126 	unsigned long long pc6;
127 	unsigned long long pc7;
128 	unsigned long long pc8;
129 	unsigned long long pc9;
130 	unsigned long long pc10;
131 	unsigned int package_id;
132 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
133 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
134 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
135 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
136 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
137 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
138 	unsigned int pkg_temp_c;
139 
140 } *package_even, *package_odd;
141 
142 #define ODD_COUNTERS thread_odd, core_odd, package_odd
143 #define EVEN_COUNTERS thread_even, core_even, package_even
144 
145 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
146 	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
147 		topo.num_threads_per_core + \
148 		(core_no) * topo.num_threads_per_core + (thread_no))
149 #define GET_CORE(core_base, core_no, pkg_no) \
150 	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
151 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
152 
153 struct system_summary {
154 	struct thread_data threads;
155 	struct core_data cores;
156 	struct pkg_data packages;
157 } sum, average;
158 
159 
160 struct topo_params {
161 	int num_packages;
162 	int num_cpus;
163 	int num_cores;
164 	int max_cpu_num;
165 	int num_cores_per_pkg;
166 	int num_threads_per_core;
167 } topo;
168 
169 struct timeval tv_even, tv_odd, tv_delta;
170 
171 void setup_all_buffers(void);
172 
173 int cpu_is_not_present(int cpu)
174 {
175 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
176 }
177 /*
178  * run func(thread, core, package) in topology order
179  * skip non-present cpus
180  */
181 
182 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
183 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
184 {
185 	int retval, pkg_no, core_no, thread_no;
186 
187 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
188 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
189 			for (thread_no = 0; thread_no <
190 				topo.num_threads_per_core; ++thread_no) {
191 				struct thread_data *t;
192 				struct core_data *c;
193 				struct pkg_data *p;
194 
195 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
196 
197 				if (cpu_is_not_present(t->cpu_id))
198 					continue;
199 
200 				c = GET_CORE(core_base, core_no, pkg_no);
201 				p = GET_PKG(pkg_base, pkg_no);
202 
203 				retval = func(t, c, p);
204 				if (retval)
205 					return retval;
206 			}
207 		}
208 	}
209 	return 0;
210 }
211 
212 int cpu_migrate(int cpu)
213 {
214 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
215 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
216 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
217 		return -1;
218 	else
219 		return 0;
220 }
221 
222 int get_msr(int cpu, off_t offset, unsigned long long *msr)
223 {
224 	ssize_t retval;
225 	char pathname[32];
226 	int fd;
227 
228 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
229 	fd = open(pathname, O_RDONLY);
230 	if (fd < 0)
231 		return -1;
232 
233 	retval = pread(fd, msr, sizeof *msr, offset);
234 	close(fd);
235 
236 	if (retval != sizeof *msr) {
237 		fprintf(stderr, "%s offset 0x%zx read failed\n", pathname, offset);
238 		return -1;
239 	}
240 
241 	return 0;
242 }
243 
244 void print_header(void)
245 {
246 	if (show_pkg)
247 		outp += sprintf(outp, "pk");
248 	if (show_pkg)
249 		outp += sprintf(outp, " ");
250 	if (show_core)
251 		outp += sprintf(outp, "cor");
252 	if (show_cpu)
253 		outp += sprintf(outp, " CPU");
254 	if (show_pkg || show_core || show_cpu)
255 		outp += sprintf(outp, " ");
256 	if (do_nhm_cstates)
257 		outp += sprintf(outp, "   %%c0");
258 	if (has_aperf)
259 		outp += sprintf(outp, "  GHz");
260 	outp += sprintf(outp, "  TSC");
261 	if (do_smi)
262 		outp += sprintf(outp, " SMI");
263 	if (extra_delta_offset32)
264 		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
265 	if (extra_delta_offset64)
266 		outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
267 	if (extra_msr_offset32)
268 		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
269 	if (extra_msr_offset64)
270 		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
271 	if (do_nhm_cstates)
272 		outp += sprintf(outp, "    %%c1");
273 	if (do_nhm_cstates && !do_slm_cstates)
274 		outp += sprintf(outp, "    %%c3");
275 	if (do_nhm_cstates)
276 		outp += sprintf(outp, "    %%c6");
277 	if (do_snb_cstates)
278 		outp += sprintf(outp, "    %%c7");
279 
280 	if (do_dts)
281 		outp += sprintf(outp, " CTMP");
282 	if (do_ptm)
283 		outp += sprintf(outp, " PTMP");
284 
285 	if (do_snb_cstates)
286 		outp += sprintf(outp, "   %%pc2");
287 	if (do_nhm_cstates && !do_slm_cstates)
288 		outp += sprintf(outp, "   %%pc3");
289 	if (do_nhm_cstates && !do_slm_cstates)
290 		outp += sprintf(outp, "   %%pc6");
291 	if (do_snb_cstates)
292 		outp += sprintf(outp, "   %%pc7");
293 	if (do_c8_c9_c10) {
294 		outp += sprintf(outp, "   %%pc8");
295 		outp += sprintf(outp, "   %%pc9");
296 		outp += sprintf(outp, "  %%pc10");
297 	}
298 
299 	if (do_rapl & RAPL_PKG)
300 		outp += sprintf(outp, "  Pkg_W");
301 	if (do_rapl & RAPL_CORES)
302 		outp += sprintf(outp, "  Cor_W");
303 	if (do_rapl & RAPL_GFX)
304 		outp += sprintf(outp, " GFX_W");
305 	if (do_rapl & RAPL_DRAM)
306 		outp += sprintf(outp, " RAM_W");
307 	if (do_rapl & RAPL_PKG_PERF_STATUS)
308 		outp += sprintf(outp, " PKG_%%");
309 	if (do_rapl & RAPL_DRAM_PERF_STATUS)
310 		outp += sprintf(outp, " RAM_%%");
311 
312 	outp += sprintf(outp, "\n");
313 }
314 
315 int dump_counters(struct thread_data *t, struct core_data *c,
316 	struct pkg_data *p)
317 {
318 	fprintf(stderr, "t %p, c %p, p %p\n", t, c, p);
319 
320 	if (t) {
321 		fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
322 		fprintf(stderr, "TSC: %016llX\n", t->tsc);
323 		fprintf(stderr, "aperf: %016llX\n", t->aperf);
324 		fprintf(stderr, "mperf: %016llX\n", t->mperf);
325 		fprintf(stderr, "c1: %016llX\n", t->c1);
326 		fprintf(stderr, "msr0x%x: %08llX\n",
327 			extra_delta_offset32, t->extra_delta32);
328 		fprintf(stderr, "msr0x%x: %016llX\n",
329 			extra_delta_offset64, t->extra_delta64);
330 		fprintf(stderr, "msr0x%x: %08llX\n",
331 			extra_msr_offset32, t->extra_msr32);
332 		fprintf(stderr, "msr0x%x: %016llX\n",
333 			extra_msr_offset64, t->extra_msr64);
334 		if (do_smi)
335 			fprintf(stderr, "SMI: %08X\n", t->smi_count);
336 	}
337 
338 	if (c) {
339 		fprintf(stderr, "core: %d\n", c->core_id);
340 		fprintf(stderr, "c3: %016llX\n", c->c3);
341 		fprintf(stderr, "c6: %016llX\n", c->c6);
342 		fprintf(stderr, "c7: %016llX\n", c->c7);
343 		fprintf(stderr, "DTS: %dC\n", c->core_temp_c);
344 	}
345 
346 	if (p) {
347 		fprintf(stderr, "package: %d\n", p->package_id);
348 		fprintf(stderr, "pc2: %016llX\n", p->pc2);
349 		fprintf(stderr, "pc3: %016llX\n", p->pc3);
350 		fprintf(stderr, "pc6: %016llX\n", p->pc6);
351 		fprintf(stderr, "pc7: %016llX\n", p->pc7);
352 		fprintf(stderr, "pc8: %016llX\n", p->pc8);
353 		fprintf(stderr, "pc9: %016llX\n", p->pc9);
354 		fprintf(stderr, "pc10: %016llX\n", p->pc10);
355 		fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg);
356 		fprintf(stderr, "Joules COR: %0X\n", p->energy_cores);
357 		fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx);
358 		fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram);
359 		fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status);
360 		fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status);
361 		fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c);
362 	}
363 	return 0;
364 }
365 
366 /*
367  * column formatting convention & formats
368  * package: "pk" 2 columns %2d
369  * core: "cor" 3 columns %3d
370  * CPU: "CPU" 3 columns %3d
371  * Pkg_W: %6.2
372  * Cor_W: %6.2
373  * GFX_W: %5.2
374  * RAM_W: %5.2
375  * GHz: "GHz" 3 columns %3.2
376  * TSC: "TSC" 3 columns %3.2
377  * SMI: "SMI" 4 columns %4d
378  * percentage " %pc3" %6.2
379  * Perf Status percentage: %5.2
380  * "CTMP" 4 columns %4d
381  */
382 int format_counters(struct thread_data *t, struct core_data *c,
383 	struct pkg_data *p)
384 {
385 	double interval_float;
386 	char *fmt5, *fmt6;
387 
388 	 /* if showing only 1st thread in core and this isn't one, bail out */
389 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
390 		return 0;
391 
392 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
393 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
394 		return 0;
395 
396 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
397 
398 	/* topo columns, print blanks on 1st (average) line */
399 	if (t == &average.threads) {
400 		if (show_pkg)
401 			outp += sprintf(outp, "  ");
402 		if (show_pkg && show_core)
403 			outp += sprintf(outp, " ");
404 		if (show_core)
405 			outp += sprintf(outp, "   ");
406 		if (show_cpu)
407 			outp += sprintf(outp, " " "   ");
408 	} else {
409 		if (show_pkg) {
410 			if (p)
411 				outp += sprintf(outp, "%2d", p->package_id);
412 			else
413 				outp += sprintf(outp, "  ");
414 		}
415 		if (show_pkg && show_core)
416 			outp += sprintf(outp, " ");
417 		if (show_core) {
418 			if (c)
419 				outp += sprintf(outp, "%3d", c->core_id);
420 			else
421 				outp += sprintf(outp, "   ");
422 		}
423 		if (show_cpu)
424 			outp += sprintf(outp, " %3d", t->cpu_id);
425 	}
426 	/* %c0 */
427 	if (do_nhm_cstates) {
428 		if (show_pkg || show_core || show_cpu)
429 			outp += sprintf(outp, " ");
430 		if (!skip_c0)
431 			outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc);
432 		else
433 			outp += sprintf(outp, "  ****");
434 	}
435 
436 	/* GHz */
437 	if (has_aperf) {
438 		if (!aperf_mperf_unstable) {
439 			outp += sprintf(outp, " %3.2f",
440 				1.0 * t->tsc / units * t->aperf /
441 				t->mperf / interval_float);
442 		} else {
443 			if (t->aperf > t->tsc || t->mperf > t->tsc) {
444 				outp += sprintf(outp, " ***");
445 			} else {
446 				outp += sprintf(outp, "%3.1f*",
447 					1.0 * t->tsc /
448 					units * t->aperf /
449 					t->mperf / interval_float);
450 			}
451 		}
452 	}
453 
454 	/* TSC */
455 	outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
456 
457 	/* SMI */
458 	if (do_smi)
459 		outp += sprintf(outp, "%4d", t->smi_count);
460 
461 	/* delta */
462 	if (extra_delta_offset32)
463 		outp += sprintf(outp, "  %11llu", t->extra_delta32);
464 
465 	/* DELTA */
466 	if (extra_delta_offset64)
467 		outp += sprintf(outp, "  %11llu", t->extra_delta64);
468 	/* msr */
469 	if (extra_msr_offset32)
470 		outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
471 
472 	/* MSR */
473 	if (extra_msr_offset64)
474 		outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
475 
476 	if (do_nhm_cstates) {
477 		if (!skip_c1)
478 			outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);
479 		else
480 			outp += sprintf(outp, "  ****");
481 	}
482 
483 	/* print per-core data only for 1st thread in core */
484 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
485 		goto done;
486 
487 	if (do_nhm_cstates && !do_slm_cstates)
488 		outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
489 	if (do_nhm_cstates)
490 		outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
491 	if (do_snb_cstates)
492 		outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
493 
494 	if (do_dts)
495 		outp += sprintf(outp, " %4d", c->core_temp_c);
496 
497 	/* print per-package data only for 1st core in package */
498 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
499 		goto done;
500 
501 	if (do_ptm)
502 		outp += sprintf(outp, " %4d", p->pkg_temp_c);
503 
504 	if (do_snb_cstates)
505 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
506 	if (do_nhm_cstates && !do_slm_cstates)
507 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
508 	if (do_nhm_cstates && !do_slm_cstates)
509 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
510 	if (do_snb_cstates)
511 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
512 	if (do_c8_c9_c10) {
513 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc8/t->tsc);
514 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc9/t->tsc);
515 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc10/t->tsc);
516 	}
517 
518 	/*
519  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
520  	 * indicate that results are suspect by printing "**" in fraction place.
521  	 */
522 	if (interval_float < rapl_joule_counter_range) {
523 		fmt5 = " %5.2f";
524 		fmt6 = " %6.2f";
525 	} else {
526 		fmt5 = " %3.0f**";
527 		fmt6 = " %4.0f**";
528 	}
529 
530 	if (do_rapl & RAPL_PKG)
531 		outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float);
532 	if (do_rapl & RAPL_CORES)
533 		outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float);
534 	if (do_rapl & RAPL_GFX)
535 		outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float);
536 	if (do_rapl & RAPL_DRAM)
537 		outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float);
538 	if (do_rapl & RAPL_PKG_PERF_STATUS )
539 		outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
540 	if (do_rapl & RAPL_DRAM_PERF_STATUS )
541 		outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
542 
543 done:
544 	outp += sprintf(outp, "\n");
545 
546 	return 0;
547 }
548 
549 void flush_stdout()
550 {
551 	fputs(output_buffer, stdout);
552 	fflush(stdout);
553 	outp = output_buffer;
554 }
555 void flush_stderr()
556 {
557 	fputs(output_buffer, stderr);
558 	outp = output_buffer;
559 }
560 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
561 {
562 	static int printed;
563 
564 	if (!printed || !summary_only)
565 		print_header();
566 
567 	if (topo.num_cpus > 1)
568 		format_counters(&average.threads, &average.cores,
569 			&average.packages);
570 
571 	printed = 1;
572 
573 	if (summary_only)
574 		return;
575 
576 	for_all_cpus(format_counters, t, c, p);
577 }
578 
579 #define DELTA_WRAP32(new, old)			\
580 	if (new > old) {			\
581 		old = new - old;		\
582 	} else {				\
583 		old = 0x100000000 + new - old;	\
584 	}
585 
586 void
587 delta_package(struct pkg_data *new, struct pkg_data *old)
588 {
589 	old->pc2 = new->pc2 - old->pc2;
590 	old->pc3 = new->pc3 - old->pc3;
591 	old->pc6 = new->pc6 - old->pc6;
592 	old->pc7 = new->pc7 - old->pc7;
593 	old->pc8 = new->pc8 - old->pc8;
594 	old->pc9 = new->pc9 - old->pc9;
595 	old->pc10 = new->pc10 - old->pc10;
596 	old->pkg_temp_c = new->pkg_temp_c;
597 
598 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
599 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
600 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
601 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
602 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
603 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
604 }
605 
606 void
607 delta_core(struct core_data *new, struct core_data *old)
608 {
609 	old->c3 = new->c3 - old->c3;
610 	old->c6 = new->c6 - old->c6;
611 	old->c7 = new->c7 - old->c7;
612 	old->core_temp_c = new->core_temp_c;
613 }
614 
615 /*
616  * old = new - old
617  */
618 void
619 delta_thread(struct thread_data *new, struct thread_data *old,
620 	struct core_data *core_delta)
621 {
622 	old->tsc = new->tsc - old->tsc;
623 
624 	/* check for TSC < 1 Mcycles over interval */
625 	if (old->tsc < (1000 * 1000)) {
626 		fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n");
627 		fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n");
628 		fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n");
629 		exit(-3);
630 	}
631 
632 	old->c1 = new->c1 - old->c1;
633 
634 	if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
635 		old->aperf = new->aperf - old->aperf;
636 		old->mperf = new->mperf - old->mperf;
637 	} else {
638 
639 		if (!aperf_mperf_unstable) {
640 			fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
641 			fprintf(stderr, "* Frequency results do not cover entire interval *\n");
642 			fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
643 
644 			aperf_mperf_unstable = 1;
645 		}
646 		/*
647 		 * mperf delta is likely a huge "positive" number
648 		 * can not use it for calculating c0 time
649 		 */
650 		skip_c0 = 1;
651 		skip_c1 = 1;
652 	}
653 
654 
655 	if (use_c1_residency_msr) {
656 		/*
657 		 * Some models have a dedicated C1 residency MSR,
658 		 * which should be more accurate than the derivation below.
659 		 */
660 	} else {
661 		/*
662 		 * As counter collection is not atomic,
663 		 * it is possible for mperf's non-halted cycles + idle states
664 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
665 		 */
666 		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
667 			old->c1 = 0;
668 		else {
669 			/* normal case, derive c1 */
670 			old->c1 = old->tsc - old->mperf - core_delta->c3
671 				- core_delta->c6 - core_delta->c7;
672 		}
673 	}
674 
675 	if (old->mperf == 0) {
676 		if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
677 		old->mperf = 1;	/* divide by 0 protection */
678 	}
679 
680 	old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
681 	old->extra_delta32 &= 0xFFFFFFFF;
682 
683 	old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
684 
685 	/*
686 	 * Extra MSR is just a snapshot, simply copy latest w/o subtracting
687 	 */
688 	old->extra_msr32 = new->extra_msr32;
689 	old->extra_msr64 = new->extra_msr64;
690 
691 	if (do_smi)
692 		old->smi_count = new->smi_count - old->smi_count;
693 }
694 
695 int delta_cpu(struct thread_data *t, struct core_data *c,
696 	struct pkg_data *p, struct thread_data *t2,
697 	struct core_data *c2, struct pkg_data *p2)
698 {
699 	/* calculate core delta only for 1st thread in core */
700 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
701 		delta_core(c, c2);
702 
703 	/* always calculate thread delta */
704 	delta_thread(t, t2, c2);	/* c2 is core delta */
705 
706 	/* calculate package delta only for 1st core in package */
707 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
708 		delta_package(p, p2);
709 
710 	return 0;
711 }
712 
713 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
714 {
715 	t->tsc = 0;
716 	t->aperf = 0;
717 	t->mperf = 0;
718 	t->c1 = 0;
719 
720 	t->smi_count = 0;
721 	t->extra_delta32 = 0;
722 	t->extra_delta64 = 0;
723 
724 	/* tells format_counters to dump all fields from this set */
725 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
726 
727 	c->c3 = 0;
728 	c->c6 = 0;
729 	c->c7 = 0;
730 	c->core_temp_c = 0;
731 
732 	p->pc2 = 0;
733 	p->pc3 = 0;
734 	p->pc6 = 0;
735 	p->pc7 = 0;
736 	p->pc8 = 0;
737 	p->pc9 = 0;
738 	p->pc10 = 0;
739 
740 	p->energy_pkg = 0;
741 	p->energy_dram = 0;
742 	p->energy_cores = 0;
743 	p->energy_gfx = 0;
744 	p->rapl_pkg_perf_status = 0;
745 	p->rapl_dram_perf_status = 0;
746 	p->pkg_temp_c = 0;
747 }
748 int sum_counters(struct thread_data *t, struct core_data *c,
749 	struct pkg_data *p)
750 {
751 	average.threads.tsc += t->tsc;
752 	average.threads.aperf += t->aperf;
753 	average.threads.mperf += t->mperf;
754 	average.threads.c1 += t->c1;
755 
756 	average.threads.extra_delta32 += t->extra_delta32;
757 	average.threads.extra_delta64 += t->extra_delta64;
758 
759 	/* sum per-core values only for 1st thread in core */
760 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
761 		return 0;
762 
763 	average.cores.c3 += c->c3;
764 	average.cores.c6 += c->c6;
765 	average.cores.c7 += c->c7;
766 
767 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
768 
769 	/* sum per-pkg values only for 1st core in pkg */
770 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
771 		return 0;
772 
773 	average.packages.pc2 += p->pc2;
774 	average.packages.pc3 += p->pc3;
775 	average.packages.pc6 += p->pc6;
776 	average.packages.pc7 += p->pc7;
777 	average.packages.pc8 += p->pc8;
778 	average.packages.pc9 += p->pc9;
779 	average.packages.pc10 += p->pc10;
780 
781 	average.packages.energy_pkg += p->energy_pkg;
782 	average.packages.energy_dram += p->energy_dram;
783 	average.packages.energy_cores += p->energy_cores;
784 	average.packages.energy_gfx += p->energy_gfx;
785 
786 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
787 
788 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
789 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
790 	return 0;
791 }
792 /*
793  * sum the counters for all cpus in the system
794  * compute the weighted average
795  */
796 void compute_average(struct thread_data *t, struct core_data *c,
797 	struct pkg_data *p)
798 {
799 	clear_counters(&average.threads, &average.cores, &average.packages);
800 
801 	for_all_cpus(sum_counters, t, c, p);
802 
803 	average.threads.tsc /= topo.num_cpus;
804 	average.threads.aperf /= topo.num_cpus;
805 	average.threads.mperf /= topo.num_cpus;
806 	average.threads.c1 /= topo.num_cpus;
807 
808 	average.threads.extra_delta32 /= topo.num_cpus;
809 	average.threads.extra_delta32 &= 0xFFFFFFFF;
810 
811 	average.threads.extra_delta64 /= topo.num_cpus;
812 
813 	average.cores.c3 /= topo.num_cores;
814 	average.cores.c6 /= topo.num_cores;
815 	average.cores.c7 /= topo.num_cores;
816 
817 	average.packages.pc2 /= topo.num_packages;
818 	average.packages.pc3 /= topo.num_packages;
819 	average.packages.pc6 /= topo.num_packages;
820 	average.packages.pc7 /= topo.num_packages;
821 
822 	average.packages.pc8 /= topo.num_packages;
823 	average.packages.pc9 /= topo.num_packages;
824 	average.packages.pc10 /= topo.num_packages;
825 }
826 
827 static unsigned long long rdtsc(void)
828 {
829 	unsigned int low, high;
830 
831 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
832 
833 	return low | ((unsigned long long)high) << 32;
834 }
835 
836 
837 /*
838  * get_counters(...)
839  * migrate to cpu
840  * acquire and record local counters for that cpu
841  */
842 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
843 {
844 	int cpu = t->cpu_id;
845 	unsigned long long msr;
846 
847 	if (cpu_migrate(cpu)) {
848 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
849 		return -1;
850 	}
851 
852 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
853 
854 	if (has_aperf) {
855 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
856 			return -3;
857 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
858 			return -4;
859 	}
860 
861 	if (do_smi) {
862 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
863 			return -5;
864 		t->smi_count = msr & 0xFFFFFFFF;
865 	}
866 	if (extra_delta_offset32) {
867 		if (get_msr(cpu, extra_delta_offset32, &msr))
868 			return -5;
869 		t->extra_delta32 = msr & 0xFFFFFFFF;
870 	}
871 
872 	if (extra_delta_offset64)
873 		if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
874 			return -5;
875 
876 	if (extra_msr_offset32) {
877 		if (get_msr(cpu, extra_msr_offset32, &msr))
878 			return -5;
879 		t->extra_msr32 = msr & 0xFFFFFFFF;
880 	}
881 
882 	if (extra_msr_offset64)
883 		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
884 			return -5;
885 
886 	if (use_c1_residency_msr) {
887 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
888 			return -6;
889 	}
890 
891 	/* collect core counters only for 1st thread in core */
892 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
893 		return 0;
894 
895 	if (do_nhm_cstates && !do_slm_cstates) {
896 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
897 			return -6;
898 	}
899 
900 	if (do_nhm_cstates) {
901 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
902 			return -7;
903 	}
904 
905 	if (do_snb_cstates)
906 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
907 			return -8;
908 
909 	if (do_dts) {
910 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
911 			return -9;
912 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
913 	}
914 
915 
916 	/* collect package counters only for 1st core in package */
917 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
918 		return 0;
919 
920 	if (do_nhm_cstates && !do_slm_cstates) {
921 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
922 			return -9;
923 		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
924 			return -10;
925 	}
926 	if (do_snb_cstates) {
927 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
928 			return -11;
929 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
930 			return -12;
931 	}
932 	if (do_c8_c9_c10) {
933 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
934 			return -13;
935 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
936 			return -13;
937 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
938 			return -13;
939 	}
940 	if (do_rapl & RAPL_PKG) {
941 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
942 			return -13;
943 		p->energy_pkg = msr & 0xFFFFFFFF;
944 	}
945 	if (do_rapl & RAPL_CORES) {
946 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
947 			return -14;
948 		p->energy_cores = msr & 0xFFFFFFFF;
949 	}
950 	if (do_rapl & RAPL_DRAM) {
951 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
952 			return -15;
953 		p->energy_dram = msr & 0xFFFFFFFF;
954 	}
955 	if (do_rapl & RAPL_GFX) {
956 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
957 			return -16;
958 		p->energy_gfx = msr & 0xFFFFFFFF;
959 	}
960 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
961 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
962 			return -16;
963 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
964 	}
965 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
966 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
967 			return -16;
968 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
969 	}
970 	if (do_ptm) {
971 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
972 			return -17;
973 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
974 	}
975 	return 0;
976 }
977 
978 void print_verbose_header(void)
979 {
980 	unsigned long long msr;
981 	unsigned int ratio;
982 
983 	if (!do_nehalem_platform_info)
984 		return;
985 
986 	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
987 
988 	fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
989 
990 	ratio = (msr >> 40) & 0xFF;
991 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
992 		ratio, bclk, ratio * bclk);
993 
994 	ratio = (msr >> 8) & 0xFF;
995 	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
996 		ratio, bclk, ratio * bclk);
997 
998 	get_msr(0, MSR_IA32_POWER_CTL, &msr);
999 	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1000 		msr, msr & 0x2 ? "EN" : "DIS");
1001 
1002 	if (!do_ivt_turbo_ratio_limit)
1003 		goto print_nhm_turbo_ratio_limits;
1004 
1005 	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
1006 
1007 	fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1008 
1009 	ratio = (msr >> 56) & 0xFF;
1010 	if (ratio)
1011 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1012 			ratio, bclk, ratio * bclk);
1013 
1014 	ratio = (msr >> 48) & 0xFF;
1015 	if (ratio)
1016 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1017 			ratio, bclk, ratio * bclk);
1018 
1019 	ratio = (msr >> 40) & 0xFF;
1020 	if (ratio)
1021 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1022 			ratio, bclk, ratio * bclk);
1023 
1024 	ratio = (msr >> 32) & 0xFF;
1025 	if (ratio)
1026 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1027 			ratio, bclk, ratio * bclk);
1028 
1029 	ratio = (msr >> 24) & 0xFF;
1030 	if (ratio)
1031 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1032 			ratio, bclk, ratio * bclk);
1033 
1034 	ratio = (msr >> 16) & 0xFF;
1035 	if (ratio)
1036 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1037 			ratio, bclk, ratio * bclk);
1038 
1039 	ratio = (msr >> 8) & 0xFF;
1040 	if (ratio)
1041 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1042 			ratio, bclk, ratio * bclk);
1043 
1044 	ratio = (msr >> 0) & 0xFF;
1045 	if (ratio)
1046 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1047 			ratio, bclk, ratio * bclk);
1048 
1049 print_nhm_turbo_ratio_limits:
1050 	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1051 
1052 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
1053 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
1054 
1055 	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
1056 
1057 	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
1058 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1059 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1060 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1061 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1062 		(msr & (1 << 15)) ? "" : "UN",
1063 		(unsigned int)msr & 7);
1064 
1065 
1066 	switch(msr & 0x7) {
1067 	case 0:
1068 		fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
1069 		break;
1070 	case 1:
1071 		fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
1072 		break;
1073 	case 2:
1074 		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
1075 		break;
1076 	case 3:
1077 		fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
1078 		break;
1079 	case 4:
1080 		fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
1081 		break;
1082 	case 5:
1083 		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
1084 		break;
1085 	case 6:
1086 		fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
1087 		break;
1088 	case 7:
1089 		fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
1090 		break;
1091 	default:
1092 		fprintf(stderr, "invalid");
1093 	}
1094 	fprintf(stderr, ")\n");
1095 
1096 	if (!do_nehalem_turbo_ratio_limit)
1097 		return;
1098 
1099 	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1100 
1101 	fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1102 
1103 	ratio = (msr >> 56) & 0xFF;
1104 	if (ratio)
1105 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1106 			ratio, bclk, ratio * bclk);
1107 
1108 	ratio = (msr >> 48) & 0xFF;
1109 	if (ratio)
1110 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1111 			ratio, bclk, ratio * bclk);
1112 
1113 	ratio = (msr >> 40) & 0xFF;
1114 	if (ratio)
1115 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1116 			ratio, bclk, ratio * bclk);
1117 
1118 	ratio = (msr >> 32) & 0xFF;
1119 	if (ratio)
1120 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1121 			ratio, bclk, ratio * bclk);
1122 
1123 	ratio = (msr >> 24) & 0xFF;
1124 	if (ratio)
1125 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1126 			ratio, bclk, ratio * bclk);
1127 
1128 	ratio = (msr >> 16) & 0xFF;
1129 	if (ratio)
1130 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1131 			ratio, bclk, ratio * bclk);
1132 
1133 	ratio = (msr >> 8) & 0xFF;
1134 	if (ratio)
1135 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1136 			ratio, bclk, ratio * bclk);
1137 
1138 	ratio = (msr >> 0) & 0xFF;
1139 	if (ratio)
1140 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1141 			ratio, bclk, ratio * bclk);
1142 }
1143 
1144 void free_all_buffers(void)
1145 {
1146 	CPU_FREE(cpu_present_set);
1147 	cpu_present_set = NULL;
1148 	cpu_present_set = 0;
1149 
1150 	CPU_FREE(cpu_affinity_set);
1151 	cpu_affinity_set = NULL;
1152 	cpu_affinity_setsize = 0;
1153 
1154 	free(thread_even);
1155 	free(core_even);
1156 	free(package_even);
1157 
1158 	thread_even = NULL;
1159 	core_even = NULL;
1160 	package_even = NULL;
1161 
1162 	free(thread_odd);
1163 	free(core_odd);
1164 	free(package_odd);
1165 
1166 	thread_odd = NULL;
1167 	core_odd = NULL;
1168 	package_odd = NULL;
1169 
1170 	free(output_buffer);
1171 	output_buffer = NULL;
1172 	outp = NULL;
1173 }
1174 
1175 /*
1176  * cpu_is_first_sibling_in_core(cpu)
1177  * return 1 if given CPU is 1st HT sibling in the core
1178  */
1179 int cpu_is_first_sibling_in_core(int cpu)
1180 {
1181 	char path[64];
1182 	FILE *filep;
1183 	int first_cpu;
1184 
1185 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1186 	filep = fopen(path, "r");
1187 	if (filep == NULL) {
1188 		perror(path);
1189 		exit(1);
1190 	}
1191 	fscanf(filep, "%d", &first_cpu);
1192 	fclose(filep);
1193 	return (cpu == first_cpu);
1194 }
1195 
1196 /*
1197  * cpu_is_first_core_in_package(cpu)
1198  * return 1 if given CPU is 1st core in package
1199  */
1200 int cpu_is_first_core_in_package(int cpu)
1201 {
1202 	char path[64];
1203 	FILE *filep;
1204 	int first_cpu;
1205 
1206 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1207 	filep = fopen(path, "r");
1208 	if (filep == NULL) {
1209 		perror(path);
1210 		exit(1);
1211 	}
1212 	fscanf(filep, "%d", &first_cpu);
1213 	fclose(filep);
1214 	return (cpu == first_cpu);
1215 }
1216 
1217 int get_physical_package_id(int cpu)
1218 {
1219 	char path[80];
1220 	FILE *filep;
1221 	int pkg;
1222 
1223 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1224 	filep = fopen(path, "r");
1225 	if (filep == NULL) {
1226 		perror(path);
1227 		exit(1);
1228 	}
1229 	fscanf(filep, "%d", &pkg);
1230 	fclose(filep);
1231 	return pkg;
1232 }
1233 
1234 int get_core_id(int cpu)
1235 {
1236 	char path[80];
1237 	FILE *filep;
1238 	int core;
1239 
1240 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1241 	filep = fopen(path, "r");
1242 	if (filep == NULL) {
1243 		perror(path);
1244 		exit(1);
1245 	}
1246 	fscanf(filep, "%d", &core);
1247 	fclose(filep);
1248 	return core;
1249 }
1250 
1251 int get_num_ht_siblings(int cpu)
1252 {
1253 	char path[80];
1254 	FILE *filep;
1255 	int sib1, sib2;
1256 	int matches;
1257 	char character;
1258 
1259 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1260 	filep = fopen(path, "r");
1261 	if (filep == NULL) {
1262 		perror(path);
1263 		exit(1);
1264 	}
1265 	/*
1266 	 * file format:
1267 	 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
1268 	 * otherwinse 1 sibling (self).
1269 	 */
1270 	matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
1271 
1272 	fclose(filep);
1273 
1274 	if (matches == 3)
1275 		return 2;
1276 	else
1277 		return 1;
1278 }
1279 
1280 /*
1281  * run func(thread, core, package) in topology order
1282  * skip non-present cpus
1283  */
1284 
1285 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1286 	struct pkg_data *, struct thread_data *, struct core_data *,
1287 	struct pkg_data *), struct thread_data *thread_base,
1288 	struct core_data *core_base, struct pkg_data *pkg_base,
1289 	struct thread_data *thread_base2, struct core_data *core_base2,
1290 	struct pkg_data *pkg_base2)
1291 {
1292 	int retval, pkg_no, core_no, thread_no;
1293 
1294 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1295 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1296 			for (thread_no = 0; thread_no <
1297 				topo.num_threads_per_core; ++thread_no) {
1298 				struct thread_data *t, *t2;
1299 				struct core_data *c, *c2;
1300 				struct pkg_data *p, *p2;
1301 
1302 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1303 
1304 				if (cpu_is_not_present(t->cpu_id))
1305 					continue;
1306 
1307 				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1308 
1309 				c = GET_CORE(core_base, core_no, pkg_no);
1310 				c2 = GET_CORE(core_base2, core_no, pkg_no);
1311 
1312 				p = GET_PKG(pkg_base, pkg_no);
1313 				p2 = GET_PKG(pkg_base2, pkg_no);
1314 
1315 				retval = func(t, c, p, t2, c2, p2);
1316 				if (retval)
1317 					return retval;
1318 			}
1319 		}
1320 	}
1321 	return 0;
1322 }
1323 
1324 /*
1325  * run func(cpu) on every cpu in /proc/stat
1326  * return max_cpu number
1327  */
1328 int for_all_proc_cpus(int (func)(int))
1329 {
1330 	FILE *fp;
1331 	int cpu_num;
1332 	int retval;
1333 
1334 	fp = fopen(proc_stat, "r");
1335 	if (fp == NULL) {
1336 		perror(proc_stat);
1337 		exit(1);
1338 	}
1339 
1340 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1341 	if (retval != 0) {
1342 		perror("/proc/stat format");
1343 		exit(1);
1344 	}
1345 
1346 	while (1) {
1347 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1348 		if (retval != 1)
1349 			break;
1350 
1351 		retval = func(cpu_num);
1352 		if (retval) {
1353 			fclose(fp);
1354 			return(retval);
1355 		}
1356 	}
1357 	fclose(fp);
1358 	return 0;
1359 }
1360 
1361 void re_initialize(void)
1362 {
1363 	free_all_buffers();
1364 	setup_all_buffers();
1365 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1366 }
1367 
1368 
1369 /*
1370  * count_cpus()
1371  * remember the last one seen, it will be the max
1372  */
1373 int count_cpus(int cpu)
1374 {
1375 	if (topo.max_cpu_num < cpu)
1376 		topo.max_cpu_num = cpu;
1377 
1378 	topo.num_cpus += 1;
1379 	return 0;
1380 }
1381 int mark_cpu_present(int cpu)
1382 {
1383 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1384 	return 0;
1385 }
1386 
1387 void turbostat_loop()
1388 {
1389 	int retval;
1390 	int restarted = 0;
1391 
1392 restart:
1393 	restarted++;
1394 
1395 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1396 	if (retval < -1) {
1397 		exit(retval);
1398 	} else if (retval == -1) {
1399 		if (restarted > 1) {
1400 			exit(retval);
1401 		}
1402 		re_initialize();
1403 		goto restart;
1404 	}
1405 	restarted = 0;
1406 	gettimeofday(&tv_even, (struct timezone *)NULL);
1407 
1408 	while (1) {
1409 		if (for_all_proc_cpus(cpu_is_not_present)) {
1410 			re_initialize();
1411 			goto restart;
1412 		}
1413 		sleep(interval_sec);
1414 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
1415 		if (retval < -1) {
1416 			exit(retval);
1417 		} else if (retval == -1) {
1418 			re_initialize();
1419 			goto restart;
1420 		}
1421 		gettimeofday(&tv_odd, (struct timezone *)NULL);
1422 		timersub(&tv_odd, &tv_even, &tv_delta);
1423 		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
1424 		compute_average(EVEN_COUNTERS);
1425 		format_all_counters(EVEN_COUNTERS);
1426 		flush_stdout();
1427 		sleep(interval_sec);
1428 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1429 		if (retval < -1) {
1430 			exit(retval);
1431 		} else if (retval == -1) {
1432 			re_initialize();
1433 			goto restart;
1434 		}
1435 		gettimeofday(&tv_even, (struct timezone *)NULL);
1436 		timersub(&tv_even, &tv_odd, &tv_delta);
1437 		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
1438 		compute_average(ODD_COUNTERS);
1439 		format_all_counters(ODD_COUNTERS);
1440 		flush_stdout();
1441 	}
1442 }
1443 
1444 void check_dev_msr()
1445 {
1446 	struct stat sb;
1447 
1448 	if (stat("/dev/cpu/0/msr", &sb)) {
1449 		fprintf(stderr, "no /dev/cpu/0/msr\n");
1450 		fprintf(stderr, "Try \"# modprobe msr\"\n");
1451 		exit(-5);
1452 	}
1453 }
1454 
1455 void check_super_user()
1456 {
1457 	if (getuid() != 0) {
1458 		fprintf(stderr, "must be root\n");
1459 		exit(-6);
1460 	}
1461 }
1462 
1463 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
1464 {
1465 	if (!genuine_intel)
1466 		return 0;
1467 
1468 	if (family != 6)
1469 		return 0;
1470 
1471 	switch (model) {
1472 	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
1473 	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
1474 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
1475 	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */
1476 	case 0x2C:	/* Westmere EP - Gulftown */
1477 	case 0x2A:	/* SNB */
1478 	case 0x2D:	/* SNB Xeon */
1479 	case 0x3A:	/* IVB */
1480 	case 0x3E:	/* IVB Xeon */
1481 	case 0x3C:	/* HSW */
1482 	case 0x3F:	/* HSW */
1483 	case 0x45:	/* HSW */
1484 	case 0x46:	/* HSW */
1485 	case 0x37:	/* BYT */
1486 	case 0x4D:	/* AVN */
1487 		return 1;
1488 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
1489 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
1490 	default:
1491 		return 0;
1492 	}
1493 }
1494 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
1495 {
1496 	if (!genuine_intel)
1497 		return 0;
1498 
1499 	if (family != 6)
1500 		return 0;
1501 
1502 	switch (model) {
1503 	case 0x3E:	/* IVB Xeon */
1504 		return 1;
1505 	default:
1506 		return 0;
1507 	}
1508 }
1509 
1510 /*
1511  * print_epb()
1512  * Decode the ENERGY_PERF_BIAS MSR
1513  */
1514 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1515 {
1516 	unsigned long long msr;
1517 	char *epb_string;
1518 	int cpu;
1519 
1520 	if (!has_epb)
1521 		return 0;
1522 
1523 	cpu = t->cpu_id;
1524 
1525 	/* EPB is per-package */
1526 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1527 		return 0;
1528 
1529 	if (cpu_migrate(cpu)) {
1530 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1531 		return -1;
1532 	}
1533 
1534 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
1535 		return 0;
1536 
1537 	switch (msr & 0x7) {
1538 	case ENERGY_PERF_BIAS_PERFORMANCE:
1539 		epb_string = "performance";
1540 		break;
1541 	case ENERGY_PERF_BIAS_NORMAL:
1542 		epb_string = "balanced";
1543 		break;
1544 	case ENERGY_PERF_BIAS_POWERSAVE:
1545 		epb_string = "powersave";
1546 		break;
1547 	default:
1548 		epb_string = "custom";
1549 		break;
1550 	}
1551 	fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
1552 
1553 	return 0;
1554 }
1555 
1556 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
1557 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
1558 
1559 double get_tdp(model)
1560 {
1561 	unsigned long long msr;
1562 
1563 	if (do_rapl & RAPL_PKG_POWER_INFO)
1564 		if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
1565 			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
1566 
1567 	switch (model) {
1568 	case 0x37:
1569 	case 0x4D:
1570 		return 30.0;
1571 	default:
1572 		return 135.0;
1573 	}
1574 }
1575 
1576 
1577 /*
1578  * rapl_probe()
1579  *
1580  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
1581  */
1582 void rapl_probe(unsigned int family, unsigned int model)
1583 {
1584 	unsigned long long msr;
1585 	unsigned int time_unit;
1586 	double tdp;
1587 
1588 	if (!genuine_intel)
1589 		return;
1590 
1591 	if (family != 6)
1592 		return;
1593 
1594 	switch (model) {
1595 	case 0x2A:
1596 	case 0x3A:
1597 	case 0x3C:	/* HSW */
1598 	case 0x3F:	/* HSW */
1599 	case 0x45:	/* HSW */
1600 	case 0x46:	/* HSW */
1601 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
1602 		break;
1603 	case 0x2D:
1604 	case 0x3E:
1605 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
1606 		break;
1607 	case 0x37:	/* BYT */
1608 	case 0x4D:	/* AVN */
1609 		do_rapl = RAPL_PKG | RAPL_CORES ;
1610 		break;
1611 	default:
1612 		return;
1613 	}
1614 
1615 	/* units on package 0, verify later other packages match */
1616 	if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
1617 		return;
1618 
1619 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
1620 	if (model == 0x37)
1621 		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
1622 	else
1623 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1624 
1625 	time_unit = msr >> 16 & 0xF;
1626 	if (time_unit == 0)
1627 		time_unit = 0xA;
1628 
1629 	rapl_time_units = 1.0 / (1 << (time_unit));
1630 
1631 	tdp = get_tdp(model);
1632 
1633 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
1634 	if (verbose)
1635 		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
1636 
1637 	return;
1638 }
1639 
1640 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1641 {
1642 	unsigned long long msr;
1643 	unsigned int dts;
1644 	int cpu;
1645 
1646 	if (!(do_dts || do_ptm))
1647 		return 0;
1648 
1649 	cpu = t->cpu_id;
1650 
1651 	/* DTS is per-core, no need to print for each thread */
1652 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1653 		return 0;
1654 
1655 	if (cpu_migrate(cpu)) {
1656 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1657 		return -1;
1658 	}
1659 
1660 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
1661 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1662 			return 0;
1663 
1664 		dts = (msr >> 16) & 0x7F;
1665 		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
1666 			cpu, msr, tcc_activation_temp - dts);
1667 
1668 #ifdef	THERM_DEBUG
1669 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
1670 			return 0;
1671 
1672 		dts = (msr >> 16) & 0x7F;
1673 		dts2 = (msr >> 8) & 0x7F;
1674 		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1675 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1676 #endif
1677 	}
1678 
1679 
1680 	if (do_dts) {
1681 		unsigned int resolution;
1682 
1683 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1684 			return 0;
1685 
1686 		dts = (msr >> 16) & 0x7F;
1687 		resolution = (msr >> 27) & 0xF;
1688 		fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
1689 			cpu, msr, tcc_activation_temp - dts, resolution);
1690 
1691 #ifdef THERM_DEBUG
1692 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
1693 			return 0;
1694 
1695 		dts = (msr >> 16) & 0x7F;
1696 		dts2 = (msr >> 8) & 0x7F;
1697 		fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1698 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1699 #endif
1700 	}
1701 
1702 	return 0;
1703 }
1704 
1705 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
1706 {
1707 	fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
1708 		cpu, label,
1709 		((msr >> 15) & 1) ? "EN" : "DIS",
1710 		((msr >> 0) & 0x7FFF) * rapl_power_units,
1711 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
1712 		(((msr >> 16) & 1) ? "EN" : "DIS"));
1713 
1714 	return;
1715 }
1716 
1717 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1718 {
1719 	unsigned long long msr;
1720 	int cpu;
1721 
1722 	if (!do_rapl)
1723 		return 0;
1724 
1725 	/* RAPL counters are per package, so print only for 1st thread/package */
1726 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1727 		return 0;
1728 
1729 	cpu = t->cpu_id;
1730 	if (cpu_migrate(cpu)) {
1731 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1732 		return -1;
1733 	}
1734 
1735 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
1736 		return -1;
1737 
1738 	if (verbose) {
1739 		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
1740 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
1741 			rapl_power_units, rapl_energy_units, rapl_time_units);
1742 	}
1743 	if (do_rapl & RAPL_PKG_POWER_INFO) {
1744 
1745 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
1746                 	return -5;
1747 
1748 
1749 		fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1750 			cpu, msr,
1751 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1752 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1753 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1754 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1755 
1756 	}
1757 	if (do_rapl & RAPL_PKG) {
1758 
1759 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
1760 			return -9;
1761 
1762 		fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
1763 			cpu, msr, (msr >> 63) & 1 ? "": "UN");
1764 
1765 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
1766 		fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
1767 			cpu,
1768 			((msr >> 47) & 1) ? "EN" : "DIS",
1769 			((msr >> 32) & 0x7FFF) * rapl_power_units,
1770 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
1771 			((msr >> 48) & 1) ? "EN" : "DIS");
1772 	}
1773 
1774 	if (do_rapl & RAPL_DRAM) {
1775 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
1776                 	return -6;
1777 
1778 
1779 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1780 			cpu, msr,
1781 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1782 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1783 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1784 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1785 
1786 
1787 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
1788 			return -9;
1789 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
1790 				cpu, msr, (msr >> 31) & 1 ? "": "UN");
1791 
1792 		print_power_limit_msr(cpu, msr, "DRAM Limit");
1793 	}
1794 	if (do_rapl & RAPL_CORE_POLICY) {
1795 		if (verbose) {
1796 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
1797 				return -7;
1798 
1799 			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
1800 		}
1801 	}
1802 	if (do_rapl & RAPL_CORES) {
1803 		if (verbose) {
1804 
1805 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
1806 				return -9;
1807 			fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
1808 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
1809 			print_power_limit_msr(cpu, msr, "Cores Limit");
1810 		}
1811 	}
1812 	if (do_rapl & RAPL_GFX) {
1813 		if (verbose) {
1814 			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
1815 				return -8;
1816 
1817 			fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
1818 
1819 			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
1820 				return -9;
1821 			fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
1822 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
1823 			print_power_limit_msr(cpu, msr, "GFX Limit");
1824 		}
1825 	}
1826 	return 0;
1827 }
1828 
1829 
1830 int is_snb(unsigned int family, unsigned int model)
1831 {
1832 	if (!genuine_intel)
1833 		return 0;
1834 
1835 	switch (model) {
1836 	case 0x2A:
1837 	case 0x2D:
1838 	case 0x3A:	/* IVB */
1839 	case 0x3E:	/* IVB Xeon */
1840 	case 0x3C:	/* HSW */
1841 	case 0x3F:	/* HSW */
1842 	case 0x45:	/* HSW */
1843 	case 0x46:	/* HSW */
1844 		return 1;
1845 	}
1846 	return 0;
1847 }
1848 
1849 int has_c8_c9_c10(unsigned int family, unsigned int model)
1850 {
1851 	if (!genuine_intel)
1852 		return 0;
1853 
1854 	switch (model) {
1855 	case 0x45:
1856 		return 1;
1857 	}
1858 	return 0;
1859 }
1860 
1861 
1862 int is_slm(unsigned int family, unsigned int model)
1863 {
1864 	if (!genuine_intel)
1865 		return 0;
1866 	switch (model) {
1867 	case 0x37:	/* BYT */
1868 	case 0x4D:	/* AVN */
1869 		return 1;
1870 	}
1871 	return 0;
1872 }
1873 
1874 #define SLM_BCLK_FREQS 5
1875 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
1876 
1877 double slm_bclk(void)
1878 {
1879 	unsigned long long msr = 3;
1880 	unsigned int i;
1881 	double freq;
1882 
1883 	if (get_msr(0, MSR_FSB_FREQ, &msr))
1884 		fprintf(stderr, "SLM BCLK: unknown\n");
1885 
1886 	i = msr & 0xf;
1887 	if (i >= SLM_BCLK_FREQS) {
1888 		fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
1889 		msr = 3;
1890 	}
1891 	freq = slm_freq_table[i];
1892 
1893 	fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
1894 
1895 	return freq;
1896 }
1897 
1898 double discover_bclk(unsigned int family, unsigned int model)
1899 {
1900 	if (is_snb(family, model))
1901 		return 100.00;
1902 	else if (is_slm(family, model))
1903 		return slm_bclk();
1904 	else
1905 		return 133.33;
1906 }
1907 
1908 /*
1909  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
1910  * the Thermal Control Circuit (TCC) activates.
1911  * This is usually equal to tjMax.
1912  *
1913  * Older processors do not have this MSR, so there we guess,
1914  * but also allow cmdline over-ride with -T.
1915  *
1916  * Several MSR temperature values are in units of degrees-C
1917  * below this value, including the Digital Thermal Sensor (DTS),
1918  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
1919  */
1920 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1921 {
1922 	unsigned long long msr;
1923 	unsigned int target_c_local;
1924 	int cpu;
1925 
1926 	/* tcc_activation_temp is used only for dts or ptm */
1927 	if (!(do_dts || do_ptm))
1928 		return 0;
1929 
1930 	/* this is a per-package concept */
1931 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1932 		return 0;
1933 
1934 	cpu = t->cpu_id;
1935 	if (cpu_migrate(cpu)) {
1936 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1937 		return -1;
1938 	}
1939 
1940 	if (tcc_activation_temp_override != 0) {
1941 		tcc_activation_temp = tcc_activation_temp_override;
1942 		fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
1943 			cpu, tcc_activation_temp);
1944 		return 0;
1945 	}
1946 
1947 	/* Temperature Target MSR is Nehalem and newer only */
1948 	if (!do_nehalem_platform_info)
1949 		goto guess;
1950 
1951 	if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
1952 		goto guess;
1953 
1954 	target_c_local = (msr >> 16) & 0x7F;
1955 
1956 	if (verbose)
1957 		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
1958 			cpu, msr, target_c_local);
1959 
1960 	if (target_c_local < 85 || target_c_local > 127)
1961 		goto guess;
1962 
1963 	tcc_activation_temp = target_c_local;
1964 
1965 	return 0;
1966 
1967 guess:
1968 	tcc_activation_temp = TJMAX_DEFAULT;
1969 	fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
1970 		cpu, tcc_activation_temp);
1971 
1972 	return 0;
1973 }
1974 void check_cpuid()
1975 {
1976 	unsigned int eax, ebx, ecx, edx, max_level;
1977 	unsigned int fms, family, model, stepping;
1978 
1979 	eax = ebx = ecx = edx = 0;
1980 
1981 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
1982 
1983 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
1984 		genuine_intel = 1;
1985 
1986 	if (verbose)
1987 		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
1988 			(char *)&ebx, (char *)&edx, (char *)&ecx);
1989 
1990 	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
1991 	family = (fms >> 8) & 0xf;
1992 	model = (fms >> 4) & 0xf;
1993 	stepping = fms & 0xf;
1994 	if (family == 6 || family == 0xf)
1995 		model += ((fms >> 16) & 0xf) << 4;
1996 
1997 	if (verbose)
1998 		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
1999 			max_level, family, model, stepping, family, model, stepping);
2000 
2001 	if (!(edx & (1 << 5))) {
2002 		fprintf(stderr, "CPUID: no MSR\n");
2003 		exit(1);
2004 	}
2005 
2006 	/*
2007 	 * check max extended function levels of CPUID.
2008 	 * This is needed to check for invariant TSC.
2009 	 * This check is valid for both Intel and AMD.
2010 	 */
2011 	ebx = ecx = edx = 0;
2012 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
2013 
2014 	if (max_level < 0x80000007) {
2015 		fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
2016 		exit(1);
2017 	}
2018 
2019 	/*
2020 	 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
2021 	 * this check is valid for both Intel and AMD
2022 	 */
2023 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
2024 	has_invariant_tsc = edx & (1 << 8);
2025 
2026 	if (!has_invariant_tsc) {
2027 		fprintf(stderr, "No invariant TSC\n");
2028 		exit(1);
2029 	}
2030 
2031 	/*
2032 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
2033 	 * this check is valid for both Intel and AMD
2034 	 */
2035 
2036 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
2037 	has_aperf = ecx & (1 << 0);
2038 	do_dts = eax & (1 << 0);
2039 	do_ptm = eax & (1 << 6);
2040 	has_epb = ecx & (1 << 3);
2041 
2042 	if (verbose)
2043 		fprintf(stderr, "CPUID(6): %s%s%s%s\n",
2044 			has_aperf ? "APERF" : "No APERF!",
2045 			do_dts ? ", DTS" : "",
2046 			do_ptm ? ", PTM": "",
2047 			has_epb ? ", EPB": "");
2048 
2049 	if (!has_aperf)
2050 		exit(-1);
2051 
2052 	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
2053 	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
2054 	do_smi = do_nhm_cstates;
2055 	do_snb_cstates = is_snb(family, model);
2056 	do_c8_c9_c10 = has_c8_c9_c10(family, model);
2057 	do_slm_cstates = is_slm(family, model);
2058 	bclk = discover_bclk(family, model);
2059 
2060 	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
2061 	do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
2062 	rapl_probe(family, model);
2063 
2064 	return;
2065 }
2066 
2067 
2068 void usage()
2069 {
2070 	fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
2071 		progname);
2072 	exit(1);
2073 }
2074 
2075 
2076 /*
2077  * in /dev/cpu/ return success for names that are numbers
2078  * ie. filter out ".", "..", "microcode".
2079  */
2080 int dir_filter(const struct dirent *dirp)
2081 {
2082 	if (isdigit(dirp->d_name[0]))
2083 		return 1;
2084 	else
2085 		return 0;
2086 }
2087 
2088 int open_dev_cpu_msr(int dummy1)
2089 {
2090 	return 0;
2091 }
2092 
2093 void topology_probe()
2094 {
2095 	int i;
2096 	int max_core_id = 0;
2097 	int max_package_id = 0;
2098 	int max_siblings = 0;
2099 	struct cpu_topology {
2100 		int core_id;
2101 		int physical_package_id;
2102 	} *cpus;
2103 
2104 	/* Initialize num_cpus, max_cpu_num */
2105 	topo.num_cpus = 0;
2106 	topo.max_cpu_num = 0;
2107 	for_all_proc_cpus(count_cpus);
2108 	if (!summary_only && topo.num_cpus > 1)
2109 		show_cpu = 1;
2110 
2111 	if (verbose > 1)
2112 		fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
2113 
2114 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
2115 	if (cpus == NULL) {
2116 		perror("calloc cpus");
2117 		exit(1);
2118 	}
2119 
2120 	/*
2121 	 * Allocate and initialize cpu_present_set
2122 	 */
2123 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
2124 	if (cpu_present_set == NULL) {
2125 		perror("CPU_ALLOC");
2126 		exit(3);
2127 	}
2128 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2129 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
2130 	for_all_proc_cpus(mark_cpu_present);
2131 
2132 	/*
2133 	 * Allocate and initialize cpu_affinity_set
2134 	 */
2135 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
2136 	if (cpu_affinity_set == NULL) {
2137 		perror("CPU_ALLOC");
2138 		exit(3);
2139 	}
2140 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
2141 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
2142 
2143 
2144 	/*
2145 	 * For online cpus
2146 	 * find max_core_id, max_package_id
2147 	 */
2148 	for (i = 0; i <= topo.max_cpu_num; ++i) {
2149 		int siblings;
2150 
2151 		if (cpu_is_not_present(i)) {
2152 			if (verbose > 1)
2153 				fprintf(stderr, "cpu%d NOT PRESENT\n", i);
2154 			continue;
2155 		}
2156 		cpus[i].core_id = get_core_id(i);
2157 		if (cpus[i].core_id > max_core_id)
2158 			max_core_id = cpus[i].core_id;
2159 
2160 		cpus[i].physical_package_id = get_physical_package_id(i);
2161 		if (cpus[i].physical_package_id > max_package_id)
2162 			max_package_id = cpus[i].physical_package_id;
2163 
2164 		siblings = get_num_ht_siblings(i);
2165 		if (siblings > max_siblings)
2166 			max_siblings = siblings;
2167 		if (verbose > 1)
2168 			fprintf(stderr, "cpu %d pkg %d core %d\n",
2169 				i, cpus[i].physical_package_id, cpus[i].core_id);
2170 	}
2171 	topo.num_cores_per_pkg = max_core_id + 1;
2172 	if (verbose > 1)
2173 		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
2174 			max_core_id, topo.num_cores_per_pkg);
2175 	if (!summary_only && topo.num_cores_per_pkg > 1)
2176 		show_core = 1;
2177 
2178 	topo.num_packages = max_package_id + 1;
2179 	if (verbose > 1)
2180 		fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
2181 			max_package_id, topo.num_packages);
2182 	if (!summary_only && topo.num_packages > 1)
2183 		show_pkg = 1;
2184 
2185 	topo.num_threads_per_core = max_siblings;
2186 	if (verbose > 1)
2187 		fprintf(stderr, "max_siblings %d\n", max_siblings);
2188 
2189 	free(cpus);
2190 }
2191 
2192 void
2193 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
2194 {
2195 	int i;
2196 
2197 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
2198 		topo.num_packages, sizeof(struct thread_data));
2199 	if (*t == NULL)
2200 		goto error;
2201 
2202 	for (i = 0; i < topo.num_threads_per_core *
2203 		topo.num_cores_per_pkg * topo.num_packages; i++)
2204 		(*t)[i].cpu_id = -1;
2205 
2206 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
2207 		sizeof(struct core_data));
2208 	if (*c == NULL)
2209 		goto error;
2210 
2211 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
2212 		(*c)[i].core_id = -1;
2213 
2214 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
2215 	if (*p == NULL)
2216 		goto error;
2217 
2218 	for (i = 0; i < topo.num_packages; i++)
2219 		(*p)[i].package_id = i;
2220 
2221 	return;
2222 error:
2223 	perror("calloc counters");
2224 	exit(1);
2225 }
2226 /*
2227  * init_counter()
2228  *
2229  * set cpu_id, core_num, pkg_num
2230  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
2231  *
2232  * increment topo.num_cores when 1st core in pkg seen
2233  */
2234 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
2235 	struct pkg_data *pkg_base, int thread_num, int core_num,
2236 	int pkg_num, int cpu_id)
2237 {
2238 	struct thread_data *t;
2239 	struct core_data *c;
2240 	struct pkg_data *p;
2241 
2242 	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
2243 	c = GET_CORE(core_base, core_num, pkg_num);
2244 	p = GET_PKG(pkg_base, pkg_num);
2245 
2246 	t->cpu_id = cpu_id;
2247 	if (thread_num == 0) {
2248 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
2249 		if (cpu_is_first_core_in_package(cpu_id))
2250 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
2251 	}
2252 
2253 	c->core_id = core_num;
2254 	p->package_id = pkg_num;
2255 }
2256 
2257 
2258 int initialize_counters(int cpu_id)
2259 {
2260 	int my_thread_id, my_core_id, my_package_id;
2261 
2262 	my_package_id = get_physical_package_id(cpu_id);
2263 	my_core_id = get_core_id(cpu_id);
2264 
2265 	if (cpu_is_first_sibling_in_core(cpu_id)) {
2266 		my_thread_id = 0;
2267 		topo.num_cores++;
2268 	} else {
2269 		my_thread_id = 1;
2270 	}
2271 
2272 	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2273 	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2274 	return 0;
2275 }
2276 
2277 void allocate_output_buffer()
2278 {
2279 	output_buffer = calloc(1, (1 + topo.num_cpus) * 256);
2280 	outp = output_buffer;
2281 	if (outp == NULL) {
2282 		perror("calloc");
2283 		exit(-1);
2284 	}
2285 }
2286 
2287 void setup_all_buffers(void)
2288 {
2289 	topology_probe();
2290 	allocate_counters(&thread_even, &core_even, &package_even);
2291 	allocate_counters(&thread_odd, &core_odd, &package_odd);
2292 	allocate_output_buffer();
2293 	for_all_proc_cpus(initialize_counters);
2294 }
2295 void turbostat_init()
2296 {
2297 	check_cpuid();
2298 
2299 	check_dev_msr();
2300 	check_super_user();
2301 
2302 	setup_all_buffers();
2303 
2304 	if (verbose)
2305 		print_verbose_header();
2306 
2307 	if (verbose)
2308 		for_all_cpus(print_epb, ODD_COUNTERS);
2309 
2310 	if (verbose)
2311 		for_all_cpus(print_rapl, ODD_COUNTERS);
2312 
2313 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
2314 
2315 	if (verbose)
2316 		for_all_cpus(print_thermal, ODD_COUNTERS);
2317 }
2318 
2319 int fork_it(char **argv)
2320 {
2321 	pid_t child_pid;
2322 	int status;
2323 
2324 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
2325 	if (status)
2326 		exit(status);
2327 	/* clear affinity side-effect of get_counters() */
2328 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
2329 	gettimeofday(&tv_even, (struct timezone *)NULL);
2330 
2331 	child_pid = fork();
2332 	if (!child_pid) {
2333 		/* child */
2334 		execvp(argv[0], argv);
2335 	} else {
2336 
2337 		/* parent */
2338 		if (child_pid == -1) {
2339 			perror("fork");
2340 			exit(1);
2341 		}
2342 
2343 		signal(SIGINT, SIG_IGN);
2344 		signal(SIGQUIT, SIG_IGN);
2345 		if (waitpid(child_pid, &status, 0) == -1) {
2346 			perror("wait");
2347 			exit(status);
2348 		}
2349 	}
2350 	/*
2351 	 * n.b. fork_it() does not check for errors from for_all_cpus()
2352 	 * because re-starting is problematic when forking
2353 	 */
2354 	for_all_cpus(get_counters, ODD_COUNTERS);
2355 	gettimeofday(&tv_odd, (struct timezone *)NULL);
2356 	timersub(&tv_odd, &tv_even, &tv_delta);
2357 	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
2358 	compute_average(EVEN_COUNTERS);
2359 	format_all_counters(EVEN_COUNTERS);
2360 	flush_stderr();
2361 
2362 	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
2363 
2364 	return status;
2365 }
2366 
2367 void cmdline(int argc, char **argv)
2368 {
2369 	int opt;
2370 
2371 	progname = argv[0];
2372 
2373 	while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) {
2374 		switch (opt) {
2375 		case 'p':
2376 			show_core_only++;
2377 			break;
2378 		case 'P':
2379 			show_pkg_only++;
2380 			break;
2381 		case 'S':
2382 			summary_only++;
2383 			break;
2384 		case 'v':
2385 			verbose++;
2386 			break;
2387 		case 'i':
2388 			interval_sec = atoi(optarg);
2389 			break;
2390 		case 'c':
2391 			sscanf(optarg, "%x", &extra_delta_offset32);
2392 			break;
2393 		case 'C':
2394 			sscanf(optarg, "%x", &extra_delta_offset64);
2395 			break;
2396 		case 'm':
2397 			sscanf(optarg, "%x", &extra_msr_offset32);
2398 			break;
2399 		case 'M':
2400 			sscanf(optarg, "%x", &extra_msr_offset64);
2401 			break;
2402 		case 'R':
2403 			rapl_verbose++;
2404 			break;
2405 		case 'T':
2406 			tcc_activation_temp_override = atoi(optarg);
2407 			break;
2408 		default:
2409 			usage();
2410 		}
2411 	}
2412 }
2413 
2414 int main(int argc, char **argv)
2415 {
2416 	cmdline(argc, argv);
2417 
2418 	if (verbose)
2419 		fprintf(stderr, "turbostat v3.5 April 26, 2013"
2420 			" - Len Brown <lenb@kernel.org>\n");
2421 
2422 	turbostat_init();
2423 
2424 	/*
2425 	 * if any params left, it must be a command to fork
2426 	 */
2427 	if (argc - optind)
2428 		return fork_it(argv + optind);
2429 	else
2430 		turbostat_loop();
2431 
2432 	return 0;
2433 }
2434