1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2012 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #define _GNU_SOURCE
23 #include <asm/msr.h>
24 #include <stdio.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/wait.h>
28 #include <sys/stat.h>
29 #include <sys/resource.h>
30 #include <fcntl.h>
31 #include <signal.h>
32 #include <sys/time.h>
33 #include <stdlib.h>
34 #include <dirent.h>
35 #include <string.h>
36 #include <ctype.h>
37 #include <sched.h>
38 
39 char *proc_stat = "/proc/stat";
40 unsigned int interval_sec = 5;	/* set with -i interval_sec */
41 unsigned int verbose;		/* set with -v */
42 unsigned int rapl_verbose;	/* set with -R */
43 unsigned int thermal_verbose;	/* set with -T */
44 unsigned int summary_only;	/* set with -s */
45 unsigned int skip_c0;
46 unsigned int skip_c1;
47 unsigned int do_nhm_cstates;
48 unsigned int do_snb_cstates;
49 unsigned int has_aperf;
50 unsigned int has_epb;
51 unsigned int units = 1000000000;	/* Ghz etc */
52 unsigned int genuine_intel;
53 unsigned int has_invariant_tsc;
54 unsigned int do_nehalem_platform_info;
55 unsigned int do_nehalem_turbo_ratio_limit;
56 unsigned int do_ivt_turbo_ratio_limit;
57 unsigned int extra_msr_offset32;
58 unsigned int extra_msr_offset64;
59 unsigned int extra_delta_offset32;
60 unsigned int extra_delta_offset64;
61 double bclk;
62 unsigned int show_pkg;
63 unsigned int show_core;
64 unsigned int show_cpu;
65 unsigned int show_pkg_only;
66 unsigned int show_core_only;
67 char *output_buffer, *outp;
68 unsigned int do_rapl;
69 unsigned int do_dts;
70 unsigned int do_ptm;
71 unsigned int tcc_activation_temp;
72 unsigned int tcc_activation_temp_override;
73 double rapl_power_units, rapl_energy_units, rapl_time_units;
74 double rapl_joule_counter_range;
75 
76 #define RAPL_PKG	(1 << 0)
77 #define RAPL_CORES	(1 << 1)
78 #define RAPL_GFX	(1 << 2)
79 #define RAPL_DRAM	(1 << 3)
80 #define RAPL_PKG_PERF_STATUS	(1 << 4)
81 #define RAPL_DRAM_PERF_STATUS	(1 << 5)
82 #define	TJMAX_DEFAULT	100
83 
84 #define MAX(a, b) ((a) > (b) ? (a) : (b))
85 
86 int aperf_mperf_unstable;
87 int backwards_count;
88 char *progname;
89 
90 cpu_set_t *cpu_present_set, *cpu_affinity_set;
91 size_t cpu_present_setsize, cpu_affinity_setsize;
92 
93 struct thread_data {
94 	unsigned long long tsc;
95 	unsigned long long aperf;
96 	unsigned long long mperf;
97 	unsigned long long c1;	/* derived */
98 	unsigned long long extra_msr64;
99 	unsigned long long extra_delta64;
100 	unsigned long long extra_msr32;
101 	unsigned long long extra_delta32;
102 	unsigned int cpu_id;
103 	unsigned int flags;
104 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
105 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
106 } *thread_even, *thread_odd;
107 
108 struct core_data {
109 	unsigned long long c3;
110 	unsigned long long c6;
111 	unsigned long long c7;
112 	unsigned int core_temp_c;
113 	unsigned int core_id;
114 } *core_even, *core_odd;
115 
116 struct pkg_data {
117 	unsigned long long pc2;
118 	unsigned long long pc3;
119 	unsigned long long pc6;
120 	unsigned long long pc7;
121 	unsigned int package_id;
122 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
123 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
124 	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
125 	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
126 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
127 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
128 	unsigned int pkg_temp_c;
129 
130 } *package_even, *package_odd;
131 
132 #define ODD_COUNTERS thread_odd, core_odd, package_odd
133 #define EVEN_COUNTERS thread_even, core_even, package_even
134 
135 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
136 	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
137 		topo.num_threads_per_core + \
138 		(core_no) * topo.num_threads_per_core + (thread_no))
139 #define GET_CORE(core_base, core_no, pkg_no) \
140 	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
141 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
142 
143 struct system_summary {
144 	struct thread_data threads;
145 	struct core_data cores;
146 	struct pkg_data packages;
147 } sum, average;
148 
149 
150 struct topo_params {
151 	int num_packages;
152 	int num_cpus;
153 	int num_cores;
154 	int max_cpu_num;
155 	int num_cores_per_pkg;
156 	int num_threads_per_core;
157 } topo;
158 
159 struct timeval tv_even, tv_odd, tv_delta;
160 
161 void setup_all_buffers(void);
162 
163 int cpu_is_not_present(int cpu)
164 {
165 	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
166 }
167 /*
168  * run func(thread, core, package) in topology order
169  * skip non-present cpus
170  */
171 
172 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
173 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
174 {
175 	int retval, pkg_no, core_no, thread_no;
176 
177 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
178 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
179 			for (thread_no = 0; thread_no <
180 				topo.num_threads_per_core; ++thread_no) {
181 				struct thread_data *t;
182 				struct core_data *c;
183 				struct pkg_data *p;
184 
185 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
186 
187 				if (cpu_is_not_present(t->cpu_id))
188 					continue;
189 
190 				c = GET_CORE(core_base, core_no, pkg_no);
191 				p = GET_PKG(pkg_base, pkg_no);
192 
193 				retval = func(t, c, p);
194 				if (retval)
195 					return retval;
196 			}
197 		}
198 	}
199 	return 0;
200 }
201 
202 int cpu_migrate(int cpu)
203 {
204 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
205 	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
206 	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
207 		return -1;
208 	else
209 		return 0;
210 }
211 
212 int get_msr(int cpu, off_t offset, unsigned long long *msr)
213 {
214 	ssize_t retval;
215 	char pathname[32];
216 	int fd;
217 
218 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
219 	fd = open(pathname, O_RDONLY);
220 	if (fd < 0)
221 		return -1;
222 
223 	retval = pread(fd, msr, sizeof *msr, offset);
224 	close(fd);
225 
226 	if (retval != sizeof *msr) {
227 		fprintf(stderr, "%s offset 0x%zx read failed\n", pathname, offset);
228 		return -1;
229 	}
230 
231 	return 0;
232 }
233 
234 void print_header(void)
235 {
236 	if (show_pkg)
237 		outp += sprintf(outp, "pk");
238 	if (show_pkg)
239 		outp += sprintf(outp, " ");
240 	if (show_core)
241 		outp += sprintf(outp, "cor");
242 	if (show_cpu)
243 		outp += sprintf(outp, " CPU");
244 	if (show_pkg || show_core || show_cpu)
245 		outp += sprintf(outp, " ");
246 	if (do_nhm_cstates)
247 		outp += sprintf(outp, "   %%c0");
248 	if (has_aperf)
249 		outp += sprintf(outp, "  GHz");
250 	outp += sprintf(outp, "  TSC");
251 	if (extra_delta_offset32)
252 		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
253 	if (extra_delta_offset64)
254 		outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
255 	if (extra_msr_offset32)
256 		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
257 	if (extra_msr_offset64)
258 		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
259 	if (do_nhm_cstates)
260 		outp += sprintf(outp, "    %%c1");
261 	if (do_nhm_cstates)
262 		outp += sprintf(outp, "    %%c3");
263 	if (do_nhm_cstates)
264 		outp += sprintf(outp, "    %%c6");
265 	if (do_snb_cstates)
266 		outp += sprintf(outp, "    %%c7");
267 
268 	if (do_dts)
269 		outp += sprintf(outp, " CTMP");
270 	if (do_ptm)
271 		outp += sprintf(outp, " PTMP");
272 
273 	if (do_snb_cstates)
274 		outp += sprintf(outp, "   %%pc2");
275 	if (do_nhm_cstates)
276 		outp += sprintf(outp, "   %%pc3");
277 	if (do_nhm_cstates)
278 		outp += sprintf(outp, "   %%pc6");
279 	if (do_snb_cstates)
280 		outp += sprintf(outp, "   %%pc7");
281 
282 	if (do_rapl & RAPL_PKG)
283 		outp += sprintf(outp, "  Pkg_W");
284 	if (do_rapl & RAPL_CORES)
285 		outp += sprintf(outp, "  Cor_W");
286 	if (do_rapl & RAPL_GFX)
287 		outp += sprintf(outp, " GFX_W");
288 	if (do_rapl & RAPL_DRAM)
289 		outp += sprintf(outp, " RAM_W");
290 	if (do_rapl & RAPL_PKG_PERF_STATUS)
291 		outp += sprintf(outp, " PKG_%%");
292 	if (do_rapl & RAPL_DRAM_PERF_STATUS)
293 		outp += sprintf(outp, " RAM_%%");
294 
295 	outp += sprintf(outp, "\n");
296 }
297 
298 int dump_counters(struct thread_data *t, struct core_data *c,
299 	struct pkg_data *p)
300 {
301 	fprintf(stderr, "t %p, c %p, p %p\n", t, c, p);
302 
303 	if (t) {
304 		fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
305 		fprintf(stderr, "TSC: %016llX\n", t->tsc);
306 		fprintf(stderr, "aperf: %016llX\n", t->aperf);
307 		fprintf(stderr, "mperf: %016llX\n", t->mperf);
308 		fprintf(stderr, "c1: %016llX\n", t->c1);
309 		fprintf(stderr, "msr0x%x: %08llX\n",
310 			extra_delta_offset32, t->extra_delta32);
311 		fprintf(stderr, "msr0x%x: %016llX\n",
312 			extra_delta_offset64, t->extra_delta64);
313 		fprintf(stderr, "msr0x%x: %08llX\n",
314 			extra_msr_offset32, t->extra_msr32);
315 		fprintf(stderr, "msr0x%x: %016llX\n",
316 			extra_msr_offset64, t->extra_msr64);
317 	}
318 
319 	if (c) {
320 		fprintf(stderr, "core: %d\n", c->core_id);
321 		fprintf(stderr, "c3: %016llX\n", c->c3);
322 		fprintf(stderr, "c6: %016llX\n", c->c6);
323 		fprintf(stderr, "c7: %016llX\n", c->c7);
324 		fprintf(stderr, "DTS: %dC\n", c->core_temp_c);
325 	}
326 
327 	if (p) {
328 		fprintf(stderr, "package: %d\n", p->package_id);
329 		fprintf(stderr, "pc2: %016llX\n", p->pc2);
330 		fprintf(stderr, "pc3: %016llX\n", p->pc3);
331 		fprintf(stderr, "pc6: %016llX\n", p->pc6);
332 		fprintf(stderr, "pc7: %016llX\n", p->pc7);
333 		fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg);
334 		fprintf(stderr, "Joules COR: %0X\n", p->energy_cores);
335 		fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx);
336 		fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram);
337 		fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status);
338 		fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status);
339 		fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c);
340 	}
341 	return 0;
342 }
343 
344 /*
345  * column formatting convention & formats
346  * package: "pk" 2 columns %2d
347  * core: "cor" 3 columns %3d
348  * CPU: "CPU" 3 columns %3d
349  * Pkg_W: %6.2
350  * Cor_W: %6.2
351  * GFX_W: %5.2
352  * RAM_W: %5.2
353  * GHz: "GHz" 3 columns %3.2
354  * TSC: "TSC" 3 columns %3.2
355  * percentage " %pc3" %6.2
356  * Perf Status percentage: %5.2
357  * "CTMP" 4 columns %4d
358  */
359 int format_counters(struct thread_data *t, struct core_data *c,
360 	struct pkg_data *p)
361 {
362 	double interval_float;
363 	char *fmt5, *fmt6;
364 
365 	 /* if showing only 1st thread in core and this isn't one, bail out */
366 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
367 		return 0;
368 
369 	 /* if showing only 1st thread in pkg and this isn't one, bail out */
370 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
371 		return 0;
372 
373 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
374 
375 	/* topo columns, print blanks on 1st (average) line */
376 	if (t == &average.threads) {
377 		if (show_pkg)
378 			outp += sprintf(outp, "  ");
379 		if (show_pkg && show_core)
380 			outp += sprintf(outp, " ");
381 		if (show_core)
382 			outp += sprintf(outp, "   ");
383 		if (show_cpu)
384 			outp += sprintf(outp, " " "   ");
385 	} else {
386 		if (show_pkg) {
387 			if (p)
388 				outp += sprintf(outp, "%2d", p->package_id);
389 			else
390 				outp += sprintf(outp, "  ");
391 		}
392 		if (show_pkg && show_core)
393 			outp += sprintf(outp, " ");
394 		if (show_core) {
395 			if (c)
396 				outp += sprintf(outp, "%3d", c->core_id);
397 			else
398 				outp += sprintf(outp, "   ");
399 		}
400 		if (show_cpu)
401 			outp += sprintf(outp, " %3d", t->cpu_id);
402 	}
403 	/* %c0 */
404 	if (do_nhm_cstates) {
405 		if (show_pkg || show_core || show_cpu)
406 			outp += sprintf(outp, " ");
407 		if (!skip_c0)
408 			outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc);
409 		else
410 			outp += sprintf(outp, "  ****");
411 	}
412 
413 	/* GHz */
414 	if (has_aperf) {
415 		if (!aperf_mperf_unstable) {
416 			outp += sprintf(outp, " %3.2f",
417 				1.0 * t->tsc / units * t->aperf /
418 				t->mperf / interval_float);
419 		} else {
420 			if (t->aperf > t->tsc || t->mperf > t->tsc) {
421 				outp += sprintf(outp, " ***");
422 			} else {
423 				outp += sprintf(outp, "%3.1f*",
424 					1.0 * t->tsc /
425 					units * t->aperf /
426 					t->mperf / interval_float);
427 			}
428 		}
429 	}
430 
431 	/* TSC */
432 	outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
433 
434 	/* delta */
435 	if (extra_delta_offset32)
436 		outp += sprintf(outp, "  %11llu", t->extra_delta32);
437 
438 	/* DELTA */
439 	if (extra_delta_offset64)
440 		outp += sprintf(outp, "  %11llu", t->extra_delta64);
441 	/* msr */
442 	if (extra_msr_offset32)
443 		outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
444 
445 	/* MSR */
446 	if (extra_msr_offset64)
447 		outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
448 
449 	if (do_nhm_cstates) {
450 		if (!skip_c1)
451 			outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);
452 		else
453 			outp += sprintf(outp, "  ****");
454 	}
455 
456 	/* print per-core data only for 1st thread in core */
457 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
458 		goto done;
459 
460 	if (do_nhm_cstates)
461 		outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
462 	if (do_nhm_cstates)
463 		outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
464 	if (do_snb_cstates)
465 		outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
466 
467 	if (do_dts)
468 		outp += sprintf(outp, " %4d", c->core_temp_c);
469 
470 	/* print per-package data only for 1st core in package */
471 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
472 		goto done;
473 
474 	if (do_ptm)
475 		outp += sprintf(outp, " %4d", p->pkg_temp_c);
476 
477 	if (do_snb_cstates)
478 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
479 	if (do_nhm_cstates)
480 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
481 	if (do_nhm_cstates)
482 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
483 	if (do_snb_cstates)
484 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
485 
486 	/*
487  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
488  	 * indicate that results are suspect by printing "**" in fraction place.
489  	 */
490 	if (interval_float < rapl_joule_counter_range) {
491 		fmt5 = " %5.2f";
492 		fmt6 = " %6.2f";
493 	} else {
494 		fmt5 = " %3.0f**";
495 		fmt6 = " %4.0f**";
496 	}
497 
498 	if (do_rapl & RAPL_PKG)
499 		outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float);
500 	if (do_rapl & RAPL_CORES)
501 		outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float);
502 	if (do_rapl & RAPL_GFX)
503 		outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float);
504 	if (do_rapl & RAPL_DRAM)
505 		outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float);
506 	if (do_rapl & RAPL_PKG_PERF_STATUS )
507 		outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
508 	if (do_rapl & RAPL_DRAM_PERF_STATUS )
509 		outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
510 
511 done:
512 	outp += sprintf(outp, "\n");
513 
514 	return 0;
515 }
516 
517 void flush_stdout()
518 {
519 	fputs(output_buffer, stdout);
520 	fflush(stdout);
521 	outp = output_buffer;
522 }
523 void flush_stderr()
524 {
525 	fputs(output_buffer, stderr);
526 	outp = output_buffer;
527 }
528 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
529 {
530 	static int printed;
531 
532 	if (!printed || !summary_only)
533 		print_header();
534 
535 	if (topo.num_cpus > 1)
536 		format_counters(&average.threads, &average.cores,
537 			&average.packages);
538 
539 	printed = 1;
540 
541 	if (summary_only)
542 		return;
543 
544 	for_all_cpus(format_counters, t, c, p);
545 }
546 
547 #define DELTA_WRAP32(new, old)			\
548 	if (new > old) {			\
549 		old = new - old;		\
550 	} else {				\
551 		old = 0x100000000 + new - old;	\
552 	}
553 
554 void
555 delta_package(struct pkg_data *new, struct pkg_data *old)
556 {
557 	old->pc2 = new->pc2 - old->pc2;
558 	old->pc3 = new->pc3 - old->pc3;
559 	old->pc6 = new->pc6 - old->pc6;
560 	old->pc7 = new->pc7 - old->pc7;
561 	old->pkg_temp_c = new->pkg_temp_c;
562 
563 	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
564 	DELTA_WRAP32(new->energy_cores, old->energy_cores);
565 	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
566 	DELTA_WRAP32(new->energy_dram, old->energy_dram);
567 	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
568 	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
569 }
570 
571 void
572 delta_core(struct core_data *new, struct core_data *old)
573 {
574 	old->c3 = new->c3 - old->c3;
575 	old->c6 = new->c6 - old->c6;
576 	old->c7 = new->c7 - old->c7;
577 	old->core_temp_c = new->core_temp_c;
578 }
579 
580 /*
581  * old = new - old
582  */
583 void
584 delta_thread(struct thread_data *new, struct thread_data *old,
585 	struct core_data *core_delta)
586 {
587 	old->tsc = new->tsc - old->tsc;
588 
589 	/* check for TSC < 1 Mcycles over interval */
590 	if (old->tsc < (1000 * 1000)) {
591 		fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n");
592 		fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n");
593 		fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n");
594 		exit(-3);
595 	}
596 
597 	old->c1 = new->c1 - old->c1;
598 
599 	if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
600 		old->aperf = new->aperf - old->aperf;
601 		old->mperf = new->mperf - old->mperf;
602 	} else {
603 
604 		if (!aperf_mperf_unstable) {
605 			fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
606 			fprintf(stderr, "* Frequency results do not cover entire interval *\n");
607 			fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
608 
609 			aperf_mperf_unstable = 1;
610 		}
611 		/*
612 		 * mperf delta is likely a huge "positive" number
613 		 * can not use it for calculating c0 time
614 		 */
615 		skip_c0 = 1;
616 		skip_c1 = 1;
617 	}
618 
619 
620 	/*
621 	 * As counter collection is not atomic,
622 	 * it is possible for mperf's non-halted cycles + idle states
623 	 * to exceed TSC's all cycles: show c1 = 0% in that case.
624 	 */
625 	if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
626 		old->c1 = 0;
627 	else {
628 		/* normal case, derive c1 */
629 		old->c1 = old->tsc - old->mperf - core_delta->c3
630 				- core_delta->c6 - core_delta->c7;
631 	}
632 
633 	if (old->mperf == 0) {
634 		if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
635 		old->mperf = 1;	/* divide by 0 protection */
636 	}
637 
638 	old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
639 	old->extra_delta32 &= 0xFFFFFFFF;
640 
641 	old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
642 
643 	/*
644 	 * Extra MSR is just a snapshot, simply copy latest w/o subtracting
645 	 */
646 	old->extra_msr32 = new->extra_msr32;
647 	old->extra_msr64 = new->extra_msr64;
648 }
649 
650 int delta_cpu(struct thread_data *t, struct core_data *c,
651 	struct pkg_data *p, struct thread_data *t2,
652 	struct core_data *c2, struct pkg_data *p2)
653 {
654 	/* calculate core delta only for 1st thread in core */
655 	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
656 		delta_core(c, c2);
657 
658 	/* always calculate thread delta */
659 	delta_thread(t, t2, c2);	/* c2 is core delta */
660 
661 	/* calculate package delta only for 1st core in package */
662 	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
663 		delta_package(p, p2);
664 
665 	return 0;
666 }
667 
668 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
669 {
670 	t->tsc = 0;
671 	t->aperf = 0;
672 	t->mperf = 0;
673 	t->c1 = 0;
674 
675 	t->extra_delta32 = 0;
676 	t->extra_delta64 = 0;
677 
678 	/* tells format_counters to dump all fields from this set */
679 	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
680 
681 	c->c3 = 0;
682 	c->c6 = 0;
683 	c->c7 = 0;
684 	c->core_temp_c = 0;
685 
686 	p->pc2 = 0;
687 	p->pc3 = 0;
688 	p->pc6 = 0;
689 	p->pc7 = 0;
690 
691 	p->energy_pkg = 0;
692 	p->energy_dram = 0;
693 	p->energy_cores = 0;
694 	p->energy_gfx = 0;
695 	p->rapl_pkg_perf_status = 0;
696 	p->rapl_dram_perf_status = 0;
697 	p->pkg_temp_c = 0;
698 }
699 int sum_counters(struct thread_data *t, struct core_data *c,
700 	struct pkg_data *p)
701 {
702 	average.threads.tsc += t->tsc;
703 	average.threads.aperf += t->aperf;
704 	average.threads.mperf += t->mperf;
705 	average.threads.c1 += t->c1;
706 
707 	average.threads.extra_delta32 += t->extra_delta32;
708 	average.threads.extra_delta64 += t->extra_delta64;
709 
710 	/* sum per-core values only for 1st thread in core */
711 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
712 		return 0;
713 
714 	average.cores.c3 += c->c3;
715 	average.cores.c6 += c->c6;
716 	average.cores.c7 += c->c7;
717 
718 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
719 
720 	/* sum per-pkg values only for 1st core in pkg */
721 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
722 		return 0;
723 
724 	average.packages.pc2 += p->pc2;
725 	average.packages.pc3 += p->pc3;
726 	average.packages.pc6 += p->pc6;
727 	average.packages.pc7 += p->pc7;
728 
729 	average.packages.energy_pkg += p->energy_pkg;
730 	average.packages.energy_dram += p->energy_dram;
731 	average.packages.energy_cores += p->energy_cores;
732 	average.packages.energy_gfx += p->energy_gfx;
733 
734 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
735 
736 	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
737 	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
738 	return 0;
739 }
740 /*
741  * sum the counters for all cpus in the system
742  * compute the weighted average
743  */
744 void compute_average(struct thread_data *t, struct core_data *c,
745 	struct pkg_data *p)
746 {
747 	clear_counters(&average.threads, &average.cores, &average.packages);
748 
749 	for_all_cpus(sum_counters, t, c, p);
750 
751 	average.threads.tsc /= topo.num_cpus;
752 	average.threads.aperf /= topo.num_cpus;
753 	average.threads.mperf /= topo.num_cpus;
754 	average.threads.c1 /= topo.num_cpus;
755 
756 	average.threads.extra_delta32 /= topo.num_cpus;
757 	average.threads.extra_delta32 &= 0xFFFFFFFF;
758 
759 	average.threads.extra_delta64 /= topo.num_cpus;
760 
761 	average.cores.c3 /= topo.num_cores;
762 	average.cores.c6 /= topo.num_cores;
763 	average.cores.c7 /= topo.num_cores;
764 
765 	average.packages.pc2 /= topo.num_packages;
766 	average.packages.pc3 /= topo.num_packages;
767 	average.packages.pc6 /= topo.num_packages;
768 	average.packages.pc7 /= topo.num_packages;
769 }
770 
771 static unsigned long long rdtsc(void)
772 {
773 	unsigned int low, high;
774 
775 	asm volatile("rdtsc" : "=a" (low), "=d" (high));
776 
777 	return low | ((unsigned long long)high) << 32;
778 }
779 
780 
781 /*
782  * get_counters(...)
783  * migrate to cpu
784  * acquire and record local counters for that cpu
785  */
786 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
787 {
788 	int cpu = t->cpu_id;
789 	unsigned long long msr;
790 
791 	if (cpu_migrate(cpu)) {
792 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
793 		return -1;
794 	}
795 
796 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
797 
798 	if (has_aperf) {
799 		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
800 			return -3;
801 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
802 			return -4;
803 	}
804 
805 	if (extra_delta_offset32) {
806 		if (get_msr(cpu, extra_delta_offset32, &msr))
807 			return -5;
808 		t->extra_delta32 = msr & 0xFFFFFFFF;
809 	}
810 
811 	if (extra_delta_offset64)
812 		if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
813 			return -5;
814 
815 	if (extra_msr_offset32) {
816 		if (get_msr(cpu, extra_msr_offset32, &msr))
817 			return -5;
818 		t->extra_msr32 = msr & 0xFFFFFFFF;
819 	}
820 
821 	if (extra_msr_offset64)
822 		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
823 			return -5;
824 
825 	/* collect core counters only for 1st thread in core */
826 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
827 		return 0;
828 
829 	if (do_nhm_cstates) {
830 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
831 			return -6;
832 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
833 			return -7;
834 	}
835 
836 	if (do_snb_cstates)
837 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
838 			return -8;
839 
840 	if (do_dts) {
841 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
842 			return -9;
843 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
844 	}
845 
846 
847 	/* collect package counters only for 1st core in package */
848 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
849 		return 0;
850 
851 	if (do_nhm_cstates) {
852 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
853 			return -9;
854 		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
855 			return -10;
856 	}
857 	if (do_snb_cstates) {
858 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
859 			return -11;
860 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
861 			return -12;
862 	}
863 	if (do_rapl & RAPL_PKG) {
864 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
865 			return -13;
866 		p->energy_pkg = msr & 0xFFFFFFFF;
867 	}
868 	if (do_rapl & RAPL_CORES) {
869 		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
870 			return -14;
871 		p->energy_cores = msr & 0xFFFFFFFF;
872 	}
873 	if (do_rapl & RAPL_DRAM) {
874 		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
875 			return -15;
876 		p->energy_dram = msr & 0xFFFFFFFF;
877 	}
878 	if (do_rapl & RAPL_GFX) {
879 		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
880 			return -16;
881 		p->energy_gfx = msr & 0xFFFFFFFF;
882 	}
883 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
884 		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
885 			return -16;
886 		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
887 	}
888 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
889 		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
890 			return -16;
891 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
892 	}
893 	if (do_ptm) {
894 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
895 			return -17;
896 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
897 	}
898 	return 0;
899 }
900 
901 void print_verbose_header(void)
902 {
903 	unsigned long long msr;
904 	unsigned int ratio;
905 
906 	if (!do_nehalem_platform_info)
907 		return;
908 
909 	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
910 
911 	if (verbose)
912 		fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
913 
914 	ratio = (msr >> 40) & 0xFF;
915 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
916 		ratio, bclk, ratio * bclk);
917 
918 	ratio = (msr >> 8) & 0xFF;
919 	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
920 		ratio, bclk, ratio * bclk);
921 
922 	if (!do_ivt_turbo_ratio_limit)
923 		goto print_nhm_turbo_ratio_limits;
924 
925 	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
926 
927 	if (verbose)
928 		fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
929 
930 	ratio = (msr >> 56) & 0xFF;
931 	if (ratio)
932 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
933 			ratio, bclk, ratio * bclk);
934 
935 	ratio = (msr >> 48) & 0xFF;
936 	if (ratio)
937 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
938 			ratio, bclk, ratio * bclk);
939 
940 	ratio = (msr >> 40) & 0xFF;
941 	if (ratio)
942 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
943 			ratio, bclk, ratio * bclk);
944 
945 	ratio = (msr >> 32) & 0xFF;
946 	if (ratio)
947 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
948 			ratio, bclk, ratio * bclk);
949 
950 	ratio = (msr >> 24) & 0xFF;
951 	if (ratio)
952 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
953 			ratio, bclk, ratio * bclk);
954 
955 	ratio = (msr >> 16) & 0xFF;
956 	if (ratio)
957 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
958 			ratio, bclk, ratio * bclk);
959 
960 	ratio = (msr >> 8) & 0xFF;
961 	if (ratio)
962 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
963 			ratio, bclk, ratio * bclk);
964 
965 	ratio = (msr >> 0) & 0xFF;
966 	if (ratio)
967 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
968 			ratio, bclk, ratio * bclk);
969 
970 print_nhm_turbo_ratio_limits:
971 	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
972 
973 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
974 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
975 
976 	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
977 
978 	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
979 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
980 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
981 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
982 		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
983 		(msr & (1 << 15)) ? "" : "UN",
984 		(unsigned int)msr & 7);
985 
986 
987 	switch(msr & 0x7) {
988 	case 0:
989 		fprintf(stderr, "pc0");
990 		break;
991 	case 1:
992 		fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
993 		break;
994 	case 2:
995 		fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
996 		break;
997 	case 3:
998 		fprintf(stderr, "pc6");
999 		break;
1000 	case 4:
1001 		fprintf(stderr, "pc7");
1002 		break;
1003 	case 5:
1004 		fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
1005 		break;
1006 	case 7:
1007 		fprintf(stderr, "unlimited");
1008 		break;
1009 	default:
1010 		fprintf(stderr, "invalid");
1011 	}
1012 	fprintf(stderr, ")\n");
1013 
1014 	if (!do_nehalem_turbo_ratio_limit)
1015 		return;
1016 
1017 	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1018 
1019 	if (verbose)
1020 		fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1021 
1022 	ratio = (msr >> 56) & 0xFF;
1023 	if (ratio)
1024 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1025 			ratio, bclk, ratio * bclk);
1026 
1027 	ratio = (msr >> 48) & 0xFF;
1028 	if (ratio)
1029 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1030 			ratio, bclk, ratio * bclk);
1031 
1032 	ratio = (msr >> 40) & 0xFF;
1033 	if (ratio)
1034 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1035 			ratio, bclk, ratio * bclk);
1036 
1037 	ratio = (msr >> 32) & 0xFF;
1038 	if (ratio)
1039 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1040 			ratio, bclk, ratio * bclk);
1041 
1042 	ratio = (msr >> 24) & 0xFF;
1043 	if (ratio)
1044 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1045 			ratio, bclk, ratio * bclk);
1046 
1047 	ratio = (msr >> 16) & 0xFF;
1048 	if (ratio)
1049 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1050 			ratio, bclk, ratio * bclk);
1051 
1052 	ratio = (msr >> 8) & 0xFF;
1053 	if (ratio)
1054 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1055 			ratio, bclk, ratio * bclk);
1056 
1057 	ratio = (msr >> 0) & 0xFF;
1058 	if (ratio)
1059 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1060 			ratio, bclk, ratio * bclk);
1061 }
1062 
1063 void free_all_buffers(void)
1064 {
1065 	CPU_FREE(cpu_present_set);
1066 	cpu_present_set = NULL;
1067 	cpu_present_set = 0;
1068 
1069 	CPU_FREE(cpu_affinity_set);
1070 	cpu_affinity_set = NULL;
1071 	cpu_affinity_setsize = 0;
1072 
1073 	free(thread_even);
1074 	free(core_even);
1075 	free(package_even);
1076 
1077 	thread_even = NULL;
1078 	core_even = NULL;
1079 	package_even = NULL;
1080 
1081 	free(thread_odd);
1082 	free(core_odd);
1083 	free(package_odd);
1084 
1085 	thread_odd = NULL;
1086 	core_odd = NULL;
1087 	package_odd = NULL;
1088 
1089 	free(output_buffer);
1090 	output_buffer = NULL;
1091 	outp = NULL;
1092 }
1093 
1094 /*
1095  * cpu_is_first_sibling_in_core(cpu)
1096  * return 1 if given CPU is 1st HT sibling in the core
1097  */
1098 int cpu_is_first_sibling_in_core(int cpu)
1099 {
1100 	char path[64];
1101 	FILE *filep;
1102 	int first_cpu;
1103 
1104 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1105 	filep = fopen(path, "r");
1106 	if (filep == NULL) {
1107 		perror(path);
1108 		exit(1);
1109 	}
1110 	fscanf(filep, "%d", &first_cpu);
1111 	fclose(filep);
1112 	return (cpu == first_cpu);
1113 }
1114 
1115 /*
1116  * cpu_is_first_core_in_package(cpu)
1117  * return 1 if given CPU is 1st core in package
1118  */
1119 int cpu_is_first_core_in_package(int cpu)
1120 {
1121 	char path[64];
1122 	FILE *filep;
1123 	int first_cpu;
1124 
1125 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
1126 	filep = fopen(path, "r");
1127 	if (filep == NULL) {
1128 		perror(path);
1129 		exit(1);
1130 	}
1131 	fscanf(filep, "%d", &first_cpu);
1132 	fclose(filep);
1133 	return (cpu == first_cpu);
1134 }
1135 
1136 int get_physical_package_id(int cpu)
1137 {
1138 	char path[80];
1139 	FILE *filep;
1140 	int pkg;
1141 
1142 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
1143 	filep = fopen(path, "r");
1144 	if (filep == NULL) {
1145 		perror(path);
1146 		exit(1);
1147 	}
1148 	fscanf(filep, "%d", &pkg);
1149 	fclose(filep);
1150 	return pkg;
1151 }
1152 
1153 int get_core_id(int cpu)
1154 {
1155 	char path[80];
1156 	FILE *filep;
1157 	int core;
1158 
1159 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
1160 	filep = fopen(path, "r");
1161 	if (filep == NULL) {
1162 		perror(path);
1163 		exit(1);
1164 	}
1165 	fscanf(filep, "%d", &core);
1166 	fclose(filep);
1167 	return core;
1168 }
1169 
1170 int get_num_ht_siblings(int cpu)
1171 {
1172 	char path[80];
1173 	FILE *filep;
1174 	int sib1, sib2;
1175 	int matches;
1176 	char character;
1177 
1178 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
1179 	filep = fopen(path, "r");
1180 	if (filep == NULL) {
1181 		perror(path);
1182 		exit(1);
1183 	}
1184 	/*
1185 	 * file format:
1186 	 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
1187 	 * otherwinse 1 sibling (self).
1188 	 */
1189 	matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
1190 
1191 	fclose(filep);
1192 
1193 	if (matches == 3)
1194 		return 2;
1195 	else
1196 		return 1;
1197 }
1198 
1199 /*
1200  * run func(thread, core, package) in topology order
1201  * skip non-present cpus
1202  */
1203 
1204 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
1205 	struct pkg_data *, struct thread_data *, struct core_data *,
1206 	struct pkg_data *), struct thread_data *thread_base,
1207 	struct core_data *core_base, struct pkg_data *pkg_base,
1208 	struct thread_data *thread_base2, struct core_data *core_base2,
1209 	struct pkg_data *pkg_base2)
1210 {
1211 	int retval, pkg_no, core_no, thread_no;
1212 
1213 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
1214 		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
1215 			for (thread_no = 0; thread_no <
1216 				topo.num_threads_per_core; ++thread_no) {
1217 				struct thread_data *t, *t2;
1218 				struct core_data *c, *c2;
1219 				struct pkg_data *p, *p2;
1220 
1221 				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
1222 
1223 				if (cpu_is_not_present(t->cpu_id))
1224 					continue;
1225 
1226 				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
1227 
1228 				c = GET_CORE(core_base, core_no, pkg_no);
1229 				c2 = GET_CORE(core_base2, core_no, pkg_no);
1230 
1231 				p = GET_PKG(pkg_base, pkg_no);
1232 				p2 = GET_PKG(pkg_base2, pkg_no);
1233 
1234 				retval = func(t, c, p, t2, c2, p2);
1235 				if (retval)
1236 					return retval;
1237 			}
1238 		}
1239 	}
1240 	return 0;
1241 }
1242 
1243 /*
1244  * run func(cpu) on every cpu in /proc/stat
1245  * return max_cpu number
1246  */
1247 int for_all_proc_cpus(int (func)(int))
1248 {
1249 	FILE *fp;
1250 	int cpu_num;
1251 	int retval;
1252 
1253 	fp = fopen(proc_stat, "r");
1254 	if (fp == NULL) {
1255 		perror(proc_stat);
1256 		exit(1);
1257 	}
1258 
1259 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
1260 	if (retval != 0) {
1261 		perror("/proc/stat format");
1262 		exit(1);
1263 	}
1264 
1265 	while (1) {
1266 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
1267 		if (retval != 1)
1268 			break;
1269 
1270 		retval = func(cpu_num);
1271 		if (retval) {
1272 			fclose(fp);
1273 			return(retval);
1274 		}
1275 	}
1276 	fclose(fp);
1277 	return 0;
1278 }
1279 
1280 void re_initialize(void)
1281 {
1282 	free_all_buffers();
1283 	setup_all_buffers();
1284 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
1285 }
1286 
1287 
1288 /*
1289  * count_cpus()
1290  * remember the last one seen, it will be the max
1291  */
1292 int count_cpus(int cpu)
1293 {
1294 	if (topo.max_cpu_num < cpu)
1295 		topo.max_cpu_num = cpu;
1296 
1297 	topo.num_cpus += 1;
1298 	return 0;
1299 }
1300 int mark_cpu_present(int cpu)
1301 {
1302 	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
1303 	return 0;
1304 }
1305 
1306 void turbostat_loop()
1307 {
1308 	int retval;
1309 	int restarted = 0;
1310 
1311 restart:
1312 	restarted++;
1313 
1314 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1315 	if (retval < -1) {
1316 		exit(retval);
1317 	} else if (retval == -1) {
1318 		if (restarted > 1) {
1319 			exit(retval);
1320 		}
1321 		re_initialize();
1322 		goto restart;
1323 	}
1324 	restarted = 0;
1325 	gettimeofday(&tv_even, (struct timezone *)NULL);
1326 
1327 	while (1) {
1328 		if (for_all_proc_cpus(cpu_is_not_present)) {
1329 			re_initialize();
1330 			goto restart;
1331 		}
1332 		sleep(interval_sec);
1333 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
1334 		if (retval < -1) {
1335 			exit(retval);
1336 		} else if (retval == -1) {
1337 			re_initialize();
1338 			goto restart;
1339 		}
1340 		gettimeofday(&tv_odd, (struct timezone *)NULL);
1341 		timersub(&tv_odd, &tv_even, &tv_delta);
1342 		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
1343 		compute_average(EVEN_COUNTERS);
1344 		format_all_counters(EVEN_COUNTERS);
1345 		flush_stdout();
1346 		sleep(interval_sec);
1347 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1348 		if (retval < -1) {
1349 			exit(retval);
1350 		} else if (retval == -1) {
1351 			re_initialize();
1352 			goto restart;
1353 		}
1354 		gettimeofday(&tv_even, (struct timezone *)NULL);
1355 		timersub(&tv_even, &tv_odd, &tv_delta);
1356 		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
1357 		compute_average(ODD_COUNTERS);
1358 		format_all_counters(ODD_COUNTERS);
1359 		flush_stdout();
1360 	}
1361 }
1362 
1363 void check_dev_msr()
1364 {
1365 	struct stat sb;
1366 
1367 	if (stat("/dev/cpu/0/msr", &sb)) {
1368 		fprintf(stderr, "no /dev/cpu/0/msr\n");
1369 		fprintf(stderr, "Try \"# modprobe msr\"\n");
1370 		exit(-5);
1371 	}
1372 }
1373 
1374 void check_super_user()
1375 {
1376 	if (getuid() != 0) {
1377 		fprintf(stderr, "must be root\n");
1378 		exit(-6);
1379 	}
1380 }
1381 
1382 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
1383 {
1384 	if (!genuine_intel)
1385 		return 0;
1386 
1387 	if (family != 6)
1388 		return 0;
1389 
1390 	switch (model) {
1391 	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
1392 	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
1393 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
1394 	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */
1395 	case 0x2C:	/* Westmere EP - Gulftown */
1396 	case 0x2A:	/* SNB */
1397 	case 0x2D:	/* SNB Xeon */
1398 	case 0x3A:	/* IVB */
1399 	case 0x3E:	/* IVB Xeon */
1400 		return 1;
1401 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
1402 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
1403 	default:
1404 		return 0;
1405 	}
1406 }
1407 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
1408 {
1409 	if (!genuine_intel)
1410 		return 0;
1411 
1412 	if (family != 6)
1413 		return 0;
1414 
1415 	switch (model) {
1416 	case 0x3E:	/* IVB Xeon */
1417 		return 1;
1418 	default:
1419 		return 0;
1420 	}
1421 }
1422 
1423 /*
1424  * print_epb()
1425  * Decode the ENERGY_PERF_BIAS MSR
1426  */
1427 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1428 {
1429 	unsigned long long msr;
1430 	char *epb_string;
1431 	int cpu;
1432 
1433 	if (!has_epb)
1434 		return 0;
1435 
1436 	cpu = t->cpu_id;
1437 
1438 	/* EPB is per-package */
1439 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1440 		return 0;
1441 
1442 	if (cpu_migrate(cpu)) {
1443 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1444 		return -1;
1445 	}
1446 
1447 	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
1448 		return 0;
1449 
1450 	switch (msr & 0x7) {
1451 	case ENERGY_PERF_BIAS_PERFORMANCE:
1452 		epb_string = "performance";
1453 		break;
1454 	case ENERGY_PERF_BIAS_NORMAL:
1455 		epb_string = "balanced";
1456 		break;
1457 	case ENERGY_PERF_BIAS_POWERSAVE:
1458 		epb_string = "powersave";
1459 		break;
1460 	default:
1461 		epb_string = "custom";
1462 		break;
1463 	}
1464 	fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
1465 
1466 	return 0;
1467 }
1468 
1469 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
1470 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
1471 
1472 /*
1473  * rapl_probe()
1474  *
1475  * sets do_rapl
1476  */
1477 void rapl_probe(unsigned int family, unsigned int model)
1478 {
1479 	unsigned long long msr;
1480 	double tdp;
1481 
1482 	if (!genuine_intel)
1483 		return;
1484 
1485 	if (family != 6)
1486 		return;
1487 
1488 	switch (model) {
1489 	case 0x2A:
1490 	case 0x3A:
1491 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
1492 		break;
1493 	case 0x2D:
1494 	case 0x3E:
1495 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
1496 		break;
1497 	default:
1498 		return;
1499 	}
1500 
1501 	/* units on package 0, verify later other packages match */
1502 	if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
1503 		return;
1504 
1505 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
1506 	rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1507 	rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
1508 
1509 	/* get TDP to determine energy counter range */
1510 	if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
1511 		return;
1512 
1513 	tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
1514 
1515 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
1516 
1517 	if (verbose)
1518 		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
1519 
1520 	return;
1521 }
1522 
1523 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1524 {
1525 	unsigned long long msr;
1526 	unsigned int dts;
1527 	int cpu;
1528 
1529 	if (!(do_dts || do_ptm))
1530 		return 0;
1531 
1532 	cpu = t->cpu_id;
1533 
1534 	/* DTS is per-core, no need to print for each thread */
1535 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1536 		return 0;
1537 
1538 	if (cpu_migrate(cpu)) {
1539 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1540 		return -1;
1541 	}
1542 
1543 	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
1544 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1545 			return 0;
1546 
1547 		dts = (msr >> 16) & 0x7F;
1548 		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
1549 			cpu, msr, tcc_activation_temp - dts);
1550 
1551 #ifdef	THERM_DEBUG
1552 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
1553 			return 0;
1554 
1555 		dts = (msr >> 16) & 0x7F;
1556 		dts2 = (msr >> 8) & 0x7F;
1557 		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1558 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1559 #endif
1560 	}
1561 
1562 
1563 	if (do_dts) {
1564 		unsigned int resolution;
1565 
1566 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1567 			return 0;
1568 
1569 		dts = (msr >> 16) & 0x7F;
1570 		resolution = (msr >> 27) & 0xF;
1571 		fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
1572 			cpu, msr, tcc_activation_temp - dts, resolution);
1573 
1574 #ifdef THERM_DEBUG
1575 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
1576 			return 0;
1577 
1578 		dts = (msr >> 16) & 0x7F;
1579 		dts2 = (msr >> 8) & 0x7F;
1580 		fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1581 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1582 #endif
1583 	}
1584 
1585 	return 0;
1586 }
1587 
1588 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
1589 {
1590 	fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
1591 		cpu, label,
1592 		((msr >> 15) & 1) ? "EN" : "DIS",
1593 		((msr >> 0) & 0x7FFF) * rapl_power_units,
1594 		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
1595 		(((msr >> 16) & 1) ? "EN" : "DIS"));
1596 
1597 	return;
1598 }
1599 
1600 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1601 {
1602 	unsigned long long msr;
1603 	int cpu;
1604 	double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
1605 
1606 	if (!do_rapl)
1607 		return 0;
1608 
1609 	/* RAPL counters are per package, so print only for 1st thread/package */
1610 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1611 		return 0;
1612 
1613 	cpu = t->cpu_id;
1614 	if (cpu_migrate(cpu)) {
1615 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1616 		return -1;
1617 	}
1618 
1619 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
1620 		return -1;
1621 
1622 	local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
1623 	local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1624 	local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
1625 
1626 	if (local_rapl_power_units != rapl_power_units)
1627 		fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
1628 	if (local_rapl_energy_units != rapl_energy_units)
1629 		fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
1630 	if (local_rapl_time_units != rapl_time_units)
1631 		fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
1632 
1633 	if (verbose) {
1634 		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
1635 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
1636 			local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
1637 	}
1638 	if (do_rapl & RAPL_PKG) {
1639 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
1640                 	return -5;
1641 
1642 
1643 		fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1644 			cpu, msr,
1645 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1646 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1647 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1648 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1649 
1650 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
1651 			return -9;
1652 
1653 		fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
1654 			cpu, msr, (msr >> 63) & 1 ? "": "UN");
1655 
1656 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
1657 		fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
1658 			cpu,
1659 			((msr >> 47) & 1) ? "EN" : "DIS",
1660 			((msr >> 32) & 0x7FFF) * rapl_power_units,
1661 			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
1662 			((msr >> 48) & 1) ? "EN" : "DIS");
1663 	}
1664 
1665 	if (do_rapl & RAPL_DRAM) {
1666 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
1667                 	return -6;
1668 
1669 
1670 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1671 			cpu, msr,
1672 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1673 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1674 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1675 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1676 
1677 
1678 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
1679 			return -9;
1680 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
1681 				cpu, msr, (msr >> 31) & 1 ? "": "UN");
1682 
1683 		print_power_limit_msr(cpu, msr, "DRAM Limit");
1684 	}
1685 	if (do_rapl & RAPL_CORES) {
1686 		if (verbose) {
1687 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
1688 				return -7;
1689 
1690 			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
1691 
1692 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
1693 				return -9;
1694 			fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
1695 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
1696 			print_power_limit_msr(cpu, msr, "Cores Limit");
1697 		}
1698 	}
1699 	if (do_rapl & RAPL_GFX) {
1700 		if (verbose) {
1701 			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
1702 				return -8;
1703 
1704 			fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
1705 
1706 			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
1707 				return -9;
1708 			fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
1709 					cpu, msr, (msr >> 31) & 1 ? "": "UN");
1710 			print_power_limit_msr(cpu, msr, "GFX Limit");
1711 		}
1712 	}
1713 	return 0;
1714 }
1715 
1716 
1717 int is_snb(unsigned int family, unsigned int model)
1718 {
1719 	if (!genuine_intel)
1720 		return 0;
1721 
1722 	switch (model) {
1723 	case 0x2A:
1724 	case 0x2D:
1725 	case 0x3A:	/* IVB */
1726 	case 0x3E:	/* IVB Xeon */
1727 		return 1;
1728 	}
1729 	return 0;
1730 }
1731 
1732 double discover_bclk(unsigned int family, unsigned int model)
1733 {
1734 	if (is_snb(family, model))
1735 		return 100.00;
1736 	else
1737 		return 133.33;
1738 }
1739 
1740 /*
1741  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
1742  * the Thermal Control Circuit (TCC) activates.
1743  * This is usually equal to tjMax.
1744  *
1745  * Older processors do not have this MSR, so there we guess,
1746  * but also allow cmdline over-ride with -T.
1747  *
1748  * Several MSR temperature values are in units of degrees-C
1749  * below this value, including the Digital Thermal Sensor (DTS),
1750  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
1751  */
1752 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1753 {
1754 	unsigned long long msr;
1755 	unsigned int target_c_local;
1756 	int cpu;
1757 
1758 	/* tcc_activation_temp is used only for dts or ptm */
1759 	if (!(do_dts || do_ptm))
1760 		return 0;
1761 
1762 	/* this is a per-package concept */
1763 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1764 		return 0;
1765 
1766 	cpu = t->cpu_id;
1767 	if (cpu_migrate(cpu)) {
1768 		fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1769 		return -1;
1770 	}
1771 
1772 	if (tcc_activation_temp_override != 0) {
1773 		tcc_activation_temp = tcc_activation_temp_override;
1774 		fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
1775 			cpu, tcc_activation_temp);
1776 		return 0;
1777 	}
1778 
1779 	/* Temperature Target MSR is Nehalem and newer only */
1780 	if (!do_nehalem_platform_info)
1781 		goto guess;
1782 
1783 	if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
1784 		goto guess;
1785 
1786 	target_c_local = (msr >> 16) & 0x7F;
1787 
1788 	if (verbose)
1789 		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
1790 			cpu, msr, target_c_local);
1791 
1792 	if (target_c_local < 85 || target_c_local > 120)
1793 		goto guess;
1794 
1795 	tcc_activation_temp = target_c_local;
1796 
1797 	return 0;
1798 
1799 guess:
1800 	tcc_activation_temp = TJMAX_DEFAULT;
1801 	fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
1802 		cpu, tcc_activation_temp);
1803 
1804 	return 0;
1805 }
1806 void check_cpuid()
1807 {
1808 	unsigned int eax, ebx, ecx, edx, max_level;
1809 	unsigned int fms, family, model, stepping;
1810 
1811 	eax = ebx = ecx = edx = 0;
1812 
1813 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
1814 
1815 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
1816 		genuine_intel = 1;
1817 
1818 	if (verbose)
1819 		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
1820 			(char *)&ebx, (char *)&edx, (char *)&ecx);
1821 
1822 	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
1823 	family = (fms >> 8) & 0xf;
1824 	model = (fms >> 4) & 0xf;
1825 	stepping = fms & 0xf;
1826 	if (family == 6 || family == 0xf)
1827 		model += ((fms >> 16) & 0xf) << 4;
1828 
1829 	if (verbose)
1830 		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
1831 			max_level, family, model, stepping, family, model, stepping);
1832 
1833 	if (!(edx & (1 << 5))) {
1834 		fprintf(stderr, "CPUID: no MSR\n");
1835 		exit(1);
1836 	}
1837 
1838 	/*
1839 	 * check max extended function levels of CPUID.
1840 	 * This is needed to check for invariant TSC.
1841 	 * This check is valid for both Intel and AMD.
1842 	 */
1843 	ebx = ecx = edx = 0;
1844 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
1845 
1846 	if (max_level < 0x80000007) {
1847 		fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
1848 		exit(1);
1849 	}
1850 
1851 	/*
1852 	 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
1853 	 * this check is valid for both Intel and AMD
1854 	 */
1855 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
1856 	has_invariant_tsc = edx & (1 << 8);
1857 
1858 	if (!has_invariant_tsc) {
1859 		fprintf(stderr, "No invariant TSC\n");
1860 		exit(1);
1861 	}
1862 
1863 	/*
1864 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
1865 	 * this check is valid for both Intel and AMD
1866 	 */
1867 
1868 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
1869 	has_aperf = ecx & (1 << 0);
1870 	do_dts = eax & (1 << 0);
1871 	do_ptm = eax & (1 << 6);
1872 	has_epb = ecx & (1 << 3);
1873 
1874 	if (verbose)
1875 		fprintf(stderr, "CPUID(6): %s%s%s%s\n",
1876 			has_aperf ? "APERF" : "No APERF!",
1877 			do_dts ? ", DTS" : "",
1878 			do_ptm ? ", PTM": "",
1879 			has_epb ? ", EPB": "");
1880 
1881 	if (!has_aperf)
1882 		exit(-1);
1883 
1884 	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
1885 	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
1886 	do_snb_cstates = is_snb(family, model);
1887 	bclk = discover_bclk(family, model);
1888 
1889 	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
1890 	do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
1891 	rapl_probe(family, model);
1892 
1893 	return;
1894 }
1895 
1896 
1897 void usage()
1898 {
1899 	fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
1900 		progname);
1901 	exit(1);
1902 }
1903 
1904 
1905 /*
1906  * in /dev/cpu/ return success for names that are numbers
1907  * ie. filter out ".", "..", "microcode".
1908  */
1909 int dir_filter(const struct dirent *dirp)
1910 {
1911 	if (isdigit(dirp->d_name[0]))
1912 		return 1;
1913 	else
1914 		return 0;
1915 }
1916 
1917 int open_dev_cpu_msr(int dummy1)
1918 {
1919 	return 0;
1920 }
1921 
1922 void topology_probe()
1923 {
1924 	int i;
1925 	int max_core_id = 0;
1926 	int max_package_id = 0;
1927 	int max_siblings = 0;
1928 	struct cpu_topology {
1929 		int core_id;
1930 		int physical_package_id;
1931 	} *cpus;
1932 
1933 	/* Initialize num_cpus, max_cpu_num */
1934 	topo.num_cpus = 0;
1935 	topo.max_cpu_num = 0;
1936 	for_all_proc_cpus(count_cpus);
1937 	if (!summary_only && topo.num_cpus > 1)
1938 		show_cpu = 1;
1939 
1940 	if (verbose > 1)
1941 		fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
1942 
1943 	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology));
1944 	if (cpus == NULL) {
1945 		perror("calloc cpus");
1946 		exit(1);
1947 	}
1948 
1949 	/*
1950 	 * Allocate and initialize cpu_present_set
1951 	 */
1952 	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
1953 	if (cpu_present_set == NULL) {
1954 		perror("CPU_ALLOC");
1955 		exit(3);
1956 	}
1957 	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
1958 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
1959 	for_all_proc_cpus(mark_cpu_present);
1960 
1961 	/*
1962 	 * Allocate and initialize cpu_affinity_set
1963 	 */
1964 	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
1965 	if (cpu_affinity_set == NULL) {
1966 		perror("CPU_ALLOC");
1967 		exit(3);
1968 	}
1969 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
1970 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
1971 
1972 
1973 	/*
1974 	 * For online cpus
1975 	 * find max_core_id, max_package_id
1976 	 */
1977 	for (i = 0; i <= topo.max_cpu_num; ++i) {
1978 		int siblings;
1979 
1980 		if (cpu_is_not_present(i)) {
1981 			if (verbose > 1)
1982 				fprintf(stderr, "cpu%d NOT PRESENT\n", i);
1983 			continue;
1984 		}
1985 		cpus[i].core_id = get_core_id(i);
1986 		if (cpus[i].core_id > max_core_id)
1987 			max_core_id = cpus[i].core_id;
1988 
1989 		cpus[i].physical_package_id = get_physical_package_id(i);
1990 		if (cpus[i].physical_package_id > max_package_id)
1991 			max_package_id = cpus[i].physical_package_id;
1992 
1993 		siblings = get_num_ht_siblings(i);
1994 		if (siblings > max_siblings)
1995 			max_siblings = siblings;
1996 		if (verbose > 1)
1997 			fprintf(stderr, "cpu %d pkg %d core %d\n",
1998 				i, cpus[i].physical_package_id, cpus[i].core_id);
1999 	}
2000 	topo.num_cores_per_pkg = max_core_id + 1;
2001 	if (verbose > 1)
2002 		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
2003 			max_core_id, topo.num_cores_per_pkg);
2004 	if (!summary_only && topo.num_cores_per_pkg > 1)
2005 		show_core = 1;
2006 
2007 	topo.num_packages = max_package_id + 1;
2008 	if (verbose > 1)
2009 		fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
2010 			max_package_id, topo.num_packages);
2011 	if (!summary_only && topo.num_packages > 1)
2012 		show_pkg = 1;
2013 
2014 	topo.num_threads_per_core = max_siblings;
2015 	if (verbose > 1)
2016 		fprintf(stderr, "max_siblings %d\n", max_siblings);
2017 
2018 	free(cpus);
2019 }
2020 
2021 void
2022 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
2023 {
2024 	int i;
2025 
2026 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
2027 		topo.num_packages, sizeof(struct thread_data));
2028 	if (*t == NULL)
2029 		goto error;
2030 
2031 	for (i = 0; i < topo.num_threads_per_core *
2032 		topo.num_cores_per_pkg * topo.num_packages; i++)
2033 		(*t)[i].cpu_id = -1;
2034 
2035 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
2036 		sizeof(struct core_data));
2037 	if (*c == NULL)
2038 		goto error;
2039 
2040 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
2041 		(*c)[i].core_id = -1;
2042 
2043 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
2044 	if (*p == NULL)
2045 		goto error;
2046 
2047 	for (i = 0; i < topo.num_packages; i++)
2048 		(*p)[i].package_id = i;
2049 
2050 	return;
2051 error:
2052 	perror("calloc counters");
2053 	exit(1);
2054 }
2055 /*
2056  * init_counter()
2057  *
2058  * set cpu_id, core_num, pkg_num
2059  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
2060  *
2061  * increment topo.num_cores when 1st core in pkg seen
2062  */
2063 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
2064 	struct pkg_data *pkg_base, int thread_num, int core_num,
2065 	int pkg_num, int cpu_id)
2066 {
2067 	struct thread_data *t;
2068 	struct core_data *c;
2069 	struct pkg_data *p;
2070 
2071 	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
2072 	c = GET_CORE(core_base, core_num, pkg_num);
2073 	p = GET_PKG(pkg_base, pkg_num);
2074 
2075 	t->cpu_id = cpu_id;
2076 	if (thread_num == 0) {
2077 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
2078 		if (cpu_is_first_core_in_package(cpu_id))
2079 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
2080 	}
2081 
2082 	c->core_id = core_num;
2083 	p->package_id = pkg_num;
2084 }
2085 
2086 
2087 int initialize_counters(int cpu_id)
2088 {
2089 	int my_thread_id, my_core_id, my_package_id;
2090 
2091 	my_package_id = get_physical_package_id(cpu_id);
2092 	my_core_id = get_core_id(cpu_id);
2093 
2094 	if (cpu_is_first_sibling_in_core(cpu_id)) {
2095 		my_thread_id = 0;
2096 		topo.num_cores++;
2097 	} else {
2098 		my_thread_id = 1;
2099 	}
2100 
2101 	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2102 	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
2103 	return 0;
2104 }
2105 
2106 void allocate_output_buffer()
2107 {
2108 	output_buffer = calloc(1, (1 + topo.num_cpus) * 128);
2109 	outp = output_buffer;
2110 	if (outp == NULL) {
2111 		perror("calloc");
2112 		exit(-1);
2113 	}
2114 }
2115 
2116 void setup_all_buffers(void)
2117 {
2118 	topology_probe();
2119 	allocate_counters(&thread_even, &core_even, &package_even);
2120 	allocate_counters(&thread_odd, &core_odd, &package_odd);
2121 	allocate_output_buffer();
2122 	for_all_proc_cpus(initialize_counters);
2123 }
2124 void turbostat_init()
2125 {
2126 	check_cpuid();
2127 
2128 	check_dev_msr();
2129 	check_super_user();
2130 
2131 	setup_all_buffers();
2132 
2133 	if (verbose)
2134 		print_verbose_header();
2135 
2136 	if (verbose)
2137 		for_all_cpus(print_epb, ODD_COUNTERS);
2138 
2139 	if (verbose)
2140 		for_all_cpus(print_rapl, ODD_COUNTERS);
2141 
2142 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
2143 
2144 	if (verbose)
2145 		for_all_cpus(print_thermal, ODD_COUNTERS);
2146 }
2147 
2148 int fork_it(char **argv)
2149 {
2150 	pid_t child_pid;
2151 	int status;
2152 
2153 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
2154 	if (status)
2155 		exit(status);
2156 	/* clear affinity side-effect of get_counters() */
2157 	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
2158 	gettimeofday(&tv_even, (struct timezone *)NULL);
2159 
2160 	child_pid = fork();
2161 	if (!child_pid) {
2162 		/* child */
2163 		execvp(argv[0], argv);
2164 	} else {
2165 
2166 		/* parent */
2167 		if (child_pid == -1) {
2168 			perror("fork");
2169 			exit(1);
2170 		}
2171 
2172 		signal(SIGINT, SIG_IGN);
2173 		signal(SIGQUIT, SIG_IGN);
2174 		if (waitpid(child_pid, &status, 0) == -1) {
2175 			perror("wait");
2176 			exit(status);
2177 		}
2178 	}
2179 	/*
2180 	 * n.b. fork_it() does not check for errors from for_all_cpus()
2181 	 * because re-starting is problematic when forking
2182 	 */
2183 	for_all_cpus(get_counters, ODD_COUNTERS);
2184 	gettimeofday(&tv_odd, (struct timezone *)NULL);
2185 	timersub(&tv_odd, &tv_even, &tv_delta);
2186 	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
2187 	compute_average(EVEN_COUNTERS);
2188 	format_all_counters(EVEN_COUNTERS);
2189 	flush_stderr();
2190 
2191 	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
2192 
2193 	return status;
2194 }
2195 
2196 void cmdline(int argc, char **argv)
2197 {
2198 	int opt;
2199 
2200 	progname = argv[0];
2201 
2202 	while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) {
2203 		switch (opt) {
2204 		case 'p':
2205 			show_core_only++;
2206 			break;
2207 		case 'P':
2208 			show_pkg_only++;
2209 			break;
2210 		case 'S':
2211 			summary_only++;
2212 			break;
2213 		case 'v':
2214 			verbose++;
2215 			break;
2216 		case 'i':
2217 			interval_sec = atoi(optarg);
2218 			break;
2219 		case 'c':
2220 			sscanf(optarg, "%x", &extra_delta_offset32);
2221 			break;
2222 		case 's':
2223 			extra_delta_offset32 = 0x34;	/* SMI counter */
2224 			break;
2225 		case 'C':
2226 			sscanf(optarg, "%x", &extra_delta_offset64);
2227 			break;
2228 		case 'm':
2229 			sscanf(optarg, "%x", &extra_msr_offset32);
2230 			break;
2231 		case 'M':
2232 			sscanf(optarg, "%x", &extra_msr_offset64);
2233 			break;
2234 		case 'R':
2235 			rapl_verbose++;
2236 			break;
2237 		case 'T':
2238 			tcc_activation_temp_override = atoi(optarg);
2239 			break;
2240 		default:
2241 			usage();
2242 		}
2243 	}
2244 }
2245 
2246 int main(int argc, char **argv)
2247 {
2248 	cmdline(argc, argv);
2249 
2250 	if (verbose)
2251 		fprintf(stderr, "turbostat v3.0 November 23, 2012"
2252 			" - Len Brown <lenb@kernel.org>\n");
2253 
2254 	turbostat_init();
2255 
2256 	/*
2257 	 * if any params left, it must be a command to fork
2258 	 */
2259 	if (argc - optind)
2260 		return fork_it(argv + optind);
2261 	else
2262 		turbostat_loop();
2263 
2264 	return 0;
2265 }
2266