xref: /openbmc/linux/tools/power/x86/turbostat/turbostat.c (revision e23da0370f80834e971142e50253f5b79be83631)
1 /*
2  * turbostat -- show CPU frequency and C-state residency
3  * on modern Intel turbo-capable processors.
4  *
5  * Copyright (c) 2012 Intel Corporation.
6  * Len Brown <len.brown@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <stdio.h>
23 #include <unistd.h>
24 #include <sys/types.h>
25 #include <sys/wait.h>
26 #include <sys/stat.h>
27 #include <sys/resource.h>
28 #include <fcntl.h>
29 #include <signal.h>
30 #include <sys/time.h>
31 #include <stdlib.h>
32 #include <dirent.h>
33 #include <string.h>
34 #include <ctype.h>
35 
36 #define MSR_TSC	0x10
37 #define MSR_NEHALEM_PLATFORM_INFO	0xCE
38 #define MSR_NEHALEM_TURBO_RATIO_LIMIT	0x1AD
39 #define MSR_APERF	0xE8
40 #define MSR_MPERF	0xE7
41 #define MSR_PKG_C2_RESIDENCY	0x60D	/* SNB only */
42 #define MSR_PKG_C3_RESIDENCY	0x3F8
43 #define MSR_PKG_C6_RESIDENCY	0x3F9
44 #define MSR_PKG_C7_RESIDENCY	0x3FA	/* SNB only */
45 #define MSR_CORE_C3_RESIDENCY	0x3FC
46 #define MSR_CORE_C6_RESIDENCY	0x3FD
47 #define MSR_CORE_C7_RESIDENCY	0x3FE	/* SNB only */
48 
49 char *proc_stat = "/proc/stat";
50 unsigned int interval_sec = 5;	/* set with -i interval_sec */
51 unsigned int verbose;		/* set with -v */
52 unsigned int summary_only;	/* set with -s */
53 unsigned int skip_c0;
54 unsigned int skip_c1;
55 unsigned int do_nhm_cstates;
56 unsigned int do_snb_cstates;
57 unsigned int has_aperf;
58 unsigned int units = 1000000000;	/* Ghz etc */
59 unsigned int genuine_intel;
60 unsigned int has_invariant_tsc;
61 unsigned int do_nehalem_platform_info;
62 unsigned int do_nehalem_turbo_ratio_limit;
63 unsigned int extra_msr_offset;
64 double bclk;
65 unsigned int show_pkg;
66 unsigned int show_core;
67 unsigned int show_cpu;
68 
69 int aperf_mperf_unstable;
70 int backwards_count;
71 char *progname;
72 int need_reinitialize;
73 
74 int num_cpus;
75 
76 struct counters {
77 	unsigned long long tsc;		/* per thread */
78 	unsigned long long aperf;	/* per thread */
79 	unsigned long long mperf;	/* per thread */
80 	unsigned long long c1;	/* per thread (calculated) */
81 	unsigned long long c3;	/* per core */
82 	unsigned long long c6;	/* per core */
83 	unsigned long long c7;	/* per core */
84 	unsigned long long pc2;	/* per package */
85 	unsigned long long pc3;	/* per package */
86 	unsigned long long pc6;	/* per package */
87 	unsigned long long pc7;	/* per package */
88 	unsigned long long extra_msr;	/* per thread */
89 	int pkg;
90 	int core;
91 	int cpu;
92 	struct counters *next;
93 };
94 
95 struct counters *cnt_even;
96 struct counters *cnt_odd;
97 struct counters *cnt_delta;
98 struct counters *cnt_average;
99 struct timeval tv_even;
100 struct timeval tv_odd;
101 struct timeval tv_delta;
102 
103 unsigned long long get_msr(int cpu, off_t offset)
104 {
105 	ssize_t retval;
106 	unsigned long long msr;
107 	char pathname[32];
108 	int fd;
109 
110 	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
111 	fd = open(pathname, O_RDONLY);
112 	if (fd < 0) {
113 		perror(pathname);
114 		need_reinitialize = 1;
115 		return 0;
116 	}
117 
118 	retval = pread(fd, &msr, sizeof msr, offset);
119 	if (retval != sizeof msr) {
120 		fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
121 			cpu, offset, retval);
122 		exit(-2);
123 	}
124 
125 	close(fd);
126 	return msr;
127 }
128 
129 void print_header(void)
130 {
131 	if (show_pkg)
132 		fprintf(stderr, "pk");
133 	if (show_pkg)
134 		fprintf(stderr, " ");
135 	if (show_core)
136 		fprintf(stderr, "cor");
137 	if (show_cpu)
138 		fprintf(stderr, " CPU");
139 	if (show_pkg || show_core || show_cpu)
140 		fprintf(stderr, " ");
141 	if (do_nhm_cstates)
142 		fprintf(stderr, "   %%c0");
143 	if (has_aperf)
144 		fprintf(stderr, "  GHz");
145 	fprintf(stderr, "  TSC");
146 	if (do_nhm_cstates)
147 		fprintf(stderr, "    %%c1");
148 	if (do_nhm_cstates)
149 		fprintf(stderr, "    %%c3");
150 	if (do_nhm_cstates)
151 		fprintf(stderr, "    %%c6");
152 	if (do_snb_cstates)
153 		fprintf(stderr, "    %%c7");
154 	if (do_snb_cstates)
155 		fprintf(stderr, "   %%pc2");
156 	if (do_nhm_cstates)
157 		fprintf(stderr, "   %%pc3");
158 	if (do_nhm_cstates)
159 		fprintf(stderr, "   %%pc6");
160 	if (do_snb_cstates)
161 		fprintf(stderr, "   %%pc7");
162 	if (extra_msr_offset)
163 		fprintf(stderr, "        MSR 0x%x ", extra_msr_offset);
164 
165 	putc('\n', stderr);
166 }
167 
168 void dump_cnt(struct counters *cnt)
169 {
170 	if (!cnt)
171 		return;
172 	if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg);
173 	if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core);
174 	if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu);
175 	if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc);
176 	if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3);
177 	if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6);
178 	if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7);
179 	if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf);
180 	if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2);
181 	if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3);
182 	if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6);
183 	if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7);
184 	if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr);
185 }
186 
187 void dump_list(struct counters *cnt)
188 {
189 	printf("dump_list 0x%p\n", cnt);
190 
191 	for (; cnt; cnt = cnt->next)
192 		dump_cnt(cnt);
193 }
194 
195 /*
196  * column formatting convention & formats
197  * package: "pk" 2 columns %2d
198  * core: "cor" 3 columns %3d
199  * CPU: "CPU" 3 columns %3d
200  * GHz: "GHz" 3 columns %3.2
201  * TSC: "TSC" 3 columns %3.2
202  * percentage " %pc3" %6.2
203  */
204 void print_cnt(struct counters *p)
205 {
206 	double interval_float;
207 
208 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
209 
210 	/* topology columns, print blanks on 1st (average) line */
211 	if (p == cnt_average) {
212 		if (show_pkg)
213 			fprintf(stderr, "  ");
214 		if (show_pkg && show_core)
215 			fprintf(stderr, " ");
216 		if (show_core)
217 			fprintf(stderr, "   ");
218 		if (show_cpu)
219 			fprintf(stderr, " " "   ");
220 	} else {
221 		if (show_pkg)
222 			fprintf(stderr, "%2d", p->pkg);
223 		if (show_pkg && show_core)
224 			fprintf(stderr, " ");
225 		if (show_core)
226 			fprintf(stderr, "%3d", p->core);
227 		if (show_cpu)
228 			fprintf(stderr, " %3d", p->cpu);
229 	}
230 
231 	/* %c0 */
232 	if (do_nhm_cstates) {
233 		if (show_pkg || show_core || show_cpu)
234 			fprintf(stderr, " ");
235 		if (!skip_c0)
236 			fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc);
237 		else
238 			fprintf(stderr, "  ****");
239 	}
240 
241 	/* GHz */
242 	if (has_aperf) {
243 		if (!aperf_mperf_unstable) {
244 			fprintf(stderr, " %3.2f",
245 				1.0 * p->tsc / units * p->aperf /
246 				p->mperf / interval_float);
247 		} else {
248 			if (p->aperf > p->tsc || p->mperf > p->tsc) {
249 				fprintf(stderr, " ***");
250 			} else {
251 				fprintf(stderr, "%3.1f*",
252 					1.0 * p->tsc /
253 					units * p->aperf /
254 					p->mperf / interval_float);
255 			}
256 		}
257 	}
258 
259 	/* TSC */
260 	fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
261 
262 	if (do_nhm_cstates) {
263 		if (!skip_c1)
264 			fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc);
265 		else
266 			fprintf(stderr, "  ****");
267 	}
268 	if (do_nhm_cstates)
269 		fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
270 	if (do_nhm_cstates)
271 		fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
272 	if (do_snb_cstates)
273 		fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
274 	if (do_snb_cstates)
275 		fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc);
276 	if (do_nhm_cstates)
277 		fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc);
278 	if (do_nhm_cstates)
279 		fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc);
280 	if (do_snb_cstates)
281 		fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc);
282 	if (extra_msr_offset)
283 		fprintf(stderr, "  0x%016llx", p->extra_msr);
284 	putc('\n', stderr);
285 }
286 
287 void print_counters(struct counters *counters)
288 {
289 	struct counters *cnt;
290 	static int printed;
291 
292 
293 	if (!printed || !summary_only)
294 		print_header();
295 
296 	if (num_cpus > 1)
297 		print_cnt(cnt_average);
298 
299 	printed = 1;
300 
301 	if (summary_only)
302 		return;
303 
304 	for (cnt = counters; cnt != NULL; cnt = cnt->next)
305 		print_cnt(cnt);
306 
307 }
308 
309 #define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
310 
311 int compute_delta(struct counters *after,
312 	struct counters *before, struct counters *delta)
313 {
314 	int errors = 0;
315 	int perf_err = 0;
316 
317 	skip_c0 = skip_c1 = 0;
318 
319 	for ( ; after && before && delta;
320 		after = after->next, before = before->next, delta = delta->next) {
321 		if (before->cpu != after->cpu) {
322 			printf("cpu configuration changed: %d != %d\n",
323 				before->cpu, after->cpu);
324 			return -1;
325 		}
326 
327 		if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
328 			fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
329 				before->cpu, before->tsc, after->tsc);
330 			errors++;
331 		}
332 		/* check for TSC < 1 Mcycles over interval */
333 		if (delta->tsc < (1000 * 1000)) {
334 			fprintf(stderr, "Insanely slow TSC rate,"
335 				" TSC stops in idle?\n");
336 			fprintf(stderr, "You can disable all c-states"
337 				" by booting with \"idle=poll\"\n");
338 			fprintf(stderr, "or just the deep ones with"
339 				" \"processor.max_cstate=1\"\n");
340 			exit(-3);
341 		}
342 		if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
343 			fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
344 				before->cpu, before->c3, after->c3);
345 			errors++;
346 		}
347 		if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
348 			fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
349 				before->cpu, before->c6, after->c6);
350 			errors++;
351 		}
352 		if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
353 			fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
354 				before->cpu, before->c7, after->c7);
355 			errors++;
356 		}
357 		if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
358 			fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
359 				before->cpu, before->pc2, after->pc2);
360 			errors++;
361 		}
362 		if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
363 			fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
364 				before->cpu, before->pc3, after->pc3);
365 			errors++;
366 		}
367 		if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
368 			fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
369 				before->cpu, before->pc6, after->pc6);
370 			errors++;
371 		}
372 		if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
373 			fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
374 				before->cpu, before->pc7, after->pc7);
375 			errors++;
376 		}
377 
378 		perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
379 		if (perf_err) {
380 			fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
381 				before->cpu, before->aperf, after->aperf);
382 		}
383 		perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
384 		if (perf_err) {
385 			fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
386 				before->cpu, before->mperf, after->mperf);
387 		}
388 		if (perf_err) {
389 			if (!aperf_mperf_unstable) {
390 				fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
391 				fprintf(stderr, "* Frequency results do not cover entire interval *\n");
392 				fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
393 
394 				aperf_mperf_unstable = 1;
395 			}
396 			/*
397 			 * mperf delta is likely a huge "positive" number
398 			 * can not use it for calculating c0 time
399 			 */
400 			skip_c0 = 1;
401 			skip_c1 = 1;
402 		}
403 
404 		/*
405 		 * As mperf and tsc collection are not atomic,
406 		 * it is possible for mperf's non-halted cycles
407 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
408 		 */
409 		if (delta->mperf > delta->tsc)
410 			delta->c1 = 0;
411 		else /* normal case, derive c1 */
412 			delta->c1 = delta->tsc - delta->mperf
413 				- delta->c3 - delta->c6 - delta->c7;
414 
415 		if (delta->mperf == 0)
416 			delta->mperf = 1;	/* divide by 0 protection */
417 
418 		/*
419 		 * for "extra msr", just copy the latest w/o subtracting
420 		 */
421 		delta->extra_msr = after->extra_msr;
422 		if (errors) {
423 			fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
424 			dump_cnt(before);
425 			fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
426 			dump_cnt(after);
427 			errors = 0;
428 		}
429 	}
430 	return 0;
431 }
432 
433 void compute_average(struct counters *delta, struct counters *avg)
434 {
435 	struct counters *sum;
436 
437 	sum = calloc(1, sizeof(struct counters));
438 	if (sum == NULL) {
439 		perror("calloc sum");
440 		exit(1);
441 	}
442 
443 	for (; delta; delta = delta->next) {
444 		sum->tsc += delta->tsc;
445 		sum->c1 += delta->c1;
446 		sum->c3 += delta->c3;
447 		sum->c6 += delta->c6;
448 		sum->c7 += delta->c7;
449 		sum->aperf += delta->aperf;
450 		sum->mperf += delta->mperf;
451 		sum->pc2 += delta->pc2;
452 		sum->pc3 += delta->pc3;
453 		sum->pc6 += delta->pc6;
454 		sum->pc7 += delta->pc7;
455 	}
456 	avg->tsc = sum->tsc/num_cpus;
457 	avg->c1 = sum->c1/num_cpus;
458 	avg->c3 = sum->c3/num_cpus;
459 	avg->c6 = sum->c6/num_cpus;
460 	avg->c7 = sum->c7/num_cpus;
461 	avg->aperf = sum->aperf/num_cpus;
462 	avg->mperf = sum->mperf/num_cpus;
463 	avg->pc2 = sum->pc2/num_cpus;
464 	avg->pc3 = sum->pc3/num_cpus;
465 	avg->pc6 = sum->pc6/num_cpus;
466 	avg->pc7 = sum->pc7/num_cpus;
467 
468 	free(sum);
469 }
470 
471 void get_counters(struct counters *cnt)
472 {
473 	for ( ; cnt; cnt = cnt->next) {
474 		cnt->tsc = get_msr(cnt->cpu, MSR_TSC);
475 		if (do_nhm_cstates)
476 			cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY);
477 		if (do_nhm_cstates)
478 			cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY);
479 		if (do_snb_cstates)
480 			cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY);
481 		if (has_aperf)
482 			cnt->aperf = get_msr(cnt->cpu, MSR_APERF);
483 		if (has_aperf)
484 			cnt->mperf = get_msr(cnt->cpu, MSR_MPERF);
485 		if (do_snb_cstates)
486 			cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY);
487 		if (do_nhm_cstates)
488 			cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY);
489 		if (do_nhm_cstates)
490 			cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY);
491 		if (do_snb_cstates)
492 			cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY);
493 		if (extra_msr_offset)
494 			cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset);
495 	}
496 }
497 
498 void print_nehalem_info(void)
499 {
500 	unsigned long long msr;
501 	unsigned int ratio;
502 
503 	if (!do_nehalem_platform_info)
504 		return;
505 
506 	msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
507 
508 	ratio = (msr >> 40) & 0xFF;
509 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
510 		ratio, bclk, ratio * bclk);
511 
512 	ratio = (msr >> 8) & 0xFF;
513 	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
514 		ratio, bclk, ratio * bclk);
515 
516 	if (verbose > 1)
517 		fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
518 
519 	if (!do_nehalem_turbo_ratio_limit)
520 		return;
521 
522 	msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
523 
524 	ratio = (msr >> 24) & 0xFF;
525 	if (ratio)
526 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
527 			ratio, bclk, ratio * bclk);
528 
529 	ratio = (msr >> 16) & 0xFF;
530 	if (ratio)
531 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
532 			ratio, bclk, ratio * bclk);
533 
534 	ratio = (msr >> 8) & 0xFF;
535 	if (ratio)
536 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
537 			ratio, bclk, ratio * bclk);
538 
539 	ratio = (msr >> 0) & 0xFF;
540 	if (ratio)
541 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
542 			ratio, bclk, ratio * bclk);
543 
544 }
545 
546 void free_counter_list(struct counters *list)
547 {
548 	struct counters *p;
549 
550 	for (p = list; p; ) {
551 		struct counters *free_me;
552 
553 		free_me = p;
554 		p = p->next;
555 		free(free_me);
556 	}
557 }
558 
559 void free_all_counters(void)
560 {
561 	free_counter_list(cnt_even);
562 	cnt_even = NULL;
563 
564 	free_counter_list(cnt_odd);
565 	cnt_odd = NULL;
566 
567 	free_counter_list(cnt_delta);
568 	cnt_delta = NULL;
569 
570 	free_counter_list(cnt_average);
571 	cnt_average = NULL;
572 }
573 
574 void insert_counters(struct counters **list,
575 	struct counters *new)
576 {
577 	struct counters *prev;
578 
579 	/*
580 	 * list was empty
581 	 */
582 	if (*list == NULL) {
583 		new->next = *list;
584 		*list = new;
585 		return;
586 	}
587 
588 	if (!summary_only)
589 		show_cpu = 1;	/* there is more than one CPU */
590 
591 	/*
592 	 * insert on front of list.
593 	 * It is sorted by ascending package#, core#, cpu#
594 	 */
595 	if (((*list)->pkg > new->pkg) ||
596 	    (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
597 	    (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
598 		new->next = *list;
599 		*list = new;
600 		return;
601 	}
602 
603 	prev = *list;
604 
605 	while (prev->next && (prev->next->pkg < new->pkg)) {
606 		prev = prev->next;
607 		if (!summary_only)
608 			show_pkg = 1;	/* there is more than 1 package */
609 	}
610 
611 	while (prev->next && (prev->next->pkg == new->pkg)
612 		&& (prev->next->core < new->core)) {
613 		prev = prev->next;
614 		if (!summary_only)
615 			show_core = 1;	/* there is more than 1 core */
616 	}
617 
618 	while (prev->next && (prev->next->pkg == new->pkg)
619 		&& (prev->next->core == new->core)
620 		&& (prev->next->cpu < new->cpu)) {
621 		prev = prev->next;
622 	}
623 
624 	/*
625 	 * insert after "prev"
626 	 */
627 	new->next = prev->next;
628 	prev->next = new;
629 }
630 
631 void alloc_new_counters(int pkg, int core, int cpu)
632 {
633 	struct counters *new;
634 
635 	if (verbose > 1)
636 		printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
637 
638 	new = (struct counters *)calloc(1, sizeof(struct counters));
639 	if (new == NULL) {
640 		perror("calloc");
641 		exit(1);
642 	}
643 	new->pkg = pkg;
644 	new->core = core;
645 	new->cpu = cpu;
646 	insert_counters(&cnt_odd, new);
647 
648 	new = (struct counters *)calloc(1,
649 		sizeof(struct counters));
650 	if (new == NULL) {
651 		perror("calloc");
652 		exit(1);
653 	}
654 	new->pkg = pkg;
655 	new->core = core;
656 	new->cpu = cpu;
657 	insert_counters(&cnt_even, new);
658 
659 	new = (struct counters *)calloc(1, sizeof(struct counters));
660 	if (new == NULL) {
661 		perror("calloc");
662 		exit(1);
663 	}
664 	new->pkg = pkg;
665 	new->core = core;
666 	new->cpu = cpu;
667 	insert_counters(&cnt_delta, new);
668 
669 	new = (struct counters *)calloc(1, sizeof(struct counters));
670 	if (new == NULL) {
671 		perror("calloc");
672 		exit(1);
673 	}
674 	new->pkg = pkg;
675 	new->core = core;
676 	new->cpu = cpu;
677 	cnt_average = new;
678 }
679 
680 int get_physical_package_id(int cpu)
681 {
682 	char path[64];
683 	FILE *filep;
684 	int pkg;
685 
686 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
687 	filep = fopen(path, "r");
688 	if (filep == NULL) {
689 		perror(path);
690 		exit(1);
691 	}
692 	fscanf(filep, "%d", &pkg);
693 	fclose(filep);
694 	return pkg;
695 }
696 
697 int get_core_id(int cpu)
698 {
699 	char path[64];
700 	FILE *filep;
701 	int core;
702 
703 	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
704 	filep = fopen(path, "r");
705 	if (filep == NULL) {
706 		perror(path);
707 		exit(1);
708 	}
709 	fscanf(filep, "%d", &core);
710 	fclose(filep);
711 	return core;
712 }
713 
714 /*
715  * run func(index, cpu) on every cpu in /proc/stat
716  */
717 
718 int for_all_cpus(void (func)(int, int, int))
719 {
720 	FILE *fp;
721 	int cpu_count;
722 	int retval;
723 
724 	fp = fopen(proc_stat, "r");
725 	if (fp == NULL) {
726 		perror(proc_stat);
727 		exit(1);
728 	}
729 
730 	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
731 	if (retval != 0) {
732 		perror("/proc/stat format");
733 		exit(1);
734 	}
735 
736 	for (cpu_count = 0; ; cpu_count++) {
737 		int cpu;
738 
739 		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
740 		if (retval != 1)
741 			break;
742 
743 		func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
744 	}
745 	fclose(fp);
746 	return cpu_count;
747 }
748 
749 void re_initialize(void)
750 {
751 	printf("turbostat: topology changed, re-initializing.\n");
752 	free_all_counters();
753 	num_cpus = for_all_cpus(alloc_new_counters);
754 	need_reinitialize = 0;
755 	printf("num_cpus is now %d\n", num_cpus);
756 }
757 
758 void dummy(int pkg, int core, int cpu) { return; }
759 /*
760  * check to see if a cpu came on-line
761  */
762 void verify_num_cpus(void)
763 {
764 	int new_num_cpus;
765 
766 	new_num_cpus = for_all_cpus(dummy);
767 
768 	if (new_num_cpus != num_cpus) {
769 		if (verbose)
770 			printf("num_cpus was %d, is now  %d\n",
771 				num_cpus, new_num_cpus);
772 		need_reinitialize = 1;
773 	}
774 }
775 
776 void turbostat_loop()
777 {
778 restart:
779 	get_counters(cnt_even);
780 	gettimeofday(&tv_even, (struct timezone *)NULL);
781 
782 	while (1) {
783 		verify_num_cpus();
784 		if (need_reinitialize) {
785 			re_initialize();
786 			goto restart;
787 		}
788 		sleep(interval_sec);
789 		get_counters(cnt_odd);
790 		gettimeofday(&tv_odd, (struct timezone *)NULL);
791 
792 		compute_delta(cnt_odd, cnt_even, cnt_delta);
793 		timersub(&tv_odd, &tv_even, &tv_delta);
794 		compute_average(cnt_delta, cnt_average);
795 		print_counters(cnt_delta);
796 		if (need_reinitialize) {
797 			re_initialize();
798 			goto restart;
799 		}
800 		sleep(interval_sec);
801 		get_counters(cnt_even);
802 		gettimeofday(&tv_even, (struct timezone *)NULL);
803 		compute_delta(cnt_even, cnt_odd, cnt_delta);
804 		timersub(&tv_even, &tv_odd, &tv_delta);
805 		compute_average(cnt_delta, cnt_average);
806 		print_counters(cnt_delta);
807 	}
808 }
809 
810 void check_dev_msr()
811 {
812 	struct stat sb;
813 
814 	if (stat("/dev/cpu/0/msr", &sb)) {
815 		fprintf(stderr, "no /dev/cpu/0/msr\n");
816 		fprintf(stderr, "Try \"# modprobe msr\"\n");
817 		exit(-5);
818 	}
819 }
820 
821 void check_super_user()
822 {
823 	if (getuid() != 0) {
824 		fprintf(stderr, "must be root\n");
825 		exit(-6);
826 	}
827 }
828 
829 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
830 {
831 	if (!genuine_intel)
832 		return 0;
833 
834 	if (family != 6)
835 		return 0;
836 
837 	switch (model) {
838 	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
839 	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
840 	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
841 	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */
842 	case 0x2C:	/* Westmere EP - Gulftown */
843 	case 0x2A:	/* SNB */
844 	case 0x2D:	/* SNB Xeon */
845 	case 0x3A:	/* IVB */
846 	case 0x3D:	/* IVB Xeon */
847 		return 1;
848 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
849 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
850 	default:
851 		return 0;
852 	}
853 }
854 
855 int is_snb(unsigned int family, unsigned int model)
856 {
857 	if (!genuine_intel)
858 		return 0;
859 
860 	switch (model) {
861 	case 0x2A:
862 	case 0x2D:
863 		return 1;
864 	}
865 	return 0;
866 }
867 
868 double discover_bclk(unsigned int family, unsigned int model)
869 {
870 	if (is_snb(family, model))
871 		return 100.00;
872 	else
873 		return 133.33;
874 }
875 
876 void check_cpuid()
877 {
878 	unsigned int eax, ebx, ecx, edx, max_level;
879 	unsigned int fms, family, model, stepping;
880 
881 	eax = ebx = ecx = edx = 0;
882 
883 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
884 
885 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
886 		genuine_intel = 1;
887 
888 	if (verbose)
889 		fprintf(stderr, "%.4s%.4s%.4s ",
890 			(char *)&ebx, (char *)&edx, (char *)&ecx);
891 
892 	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
893 	family = (fms >> 8) & 0xf;
894 	model = (fms >> 4) & 0xf;
895 	stepping = fms & 0xf;
896 	if (family == 6 || family == 0xf)
897 		model += ((fms >> 16) & 0xf) << 4;
898 
899 	if (verbose)
900 		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
901 			max_level, family, model, stepping, family, model, stepping);
902 
903 	if (!(edx & (1 << 5))) {
904 		fprintf(stderr, "CPUID: no MSR\n");
905 		exit(1);
906 	}
907 
908 	/*
909 	 * check max extended function levels of CPUID.
910 	 * This is needed to check for invariant TSC.
911 	 * This check is valid for both Intel and AMD.
912 	 */
913 	ebx = ecx = edx = 0;
914 	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
915 
916 	if (max_level < 0x80000007) {
917 		fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
918 		exit(1);
919 	}
920 
921 	/*
922 	 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
923 	 * this check is valid for both Intel and AMD
924 	 */
925 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
926 	has_invariant_tsc = edx & (1 << 8);
927 
928 	if (!has_invariant_tsc) {
929 		fprintf(stderr, "No invariant TSC\n");
930 		exit(1);
931 	}
932 
933 	/*
934 	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
935 	 * this check is valid for both Intel and AMD
936 	 */
937 
938 	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
939 	has_aperf = ecx & (1 << 0);
940 	if (!has_aperf) {
941 		fprintf(stderr, "No APERF MSR\n");
942 		exit(1);
943 	}
944 
945 	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
946 	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
947 	do_snb_cstates = is_snb(family, model);
948 	bclk = discover_bclk(family, model);
949 
950 	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
951 }
952 
953 
954 void usage()
955 {
956 	fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
957 		progname);
958 	exit(1);
959 }
960 
961 
962 /*
963  * in /dev/cpu/ return success for names that are numbers
964  * ie. filter out ".", "..", "microcode".
965  */
966 int dir_filter(const struct dirent *dirp)
967 {
968 	if (isdigit(dirp->d_name[0]))
969 		return 1;
970 	else
971 		return 0;
972 }
973 
974 int open_dev_cpu_msr(int dummy1)
975 {
976 	return 0;
977 }
978 
979 void turbostat_init()
980 {
981 	check_cpuid();
982 
983 	check_dev_msr();
984 	check_super_user();
985 
986 	num_cpus = for_all_cpus(alloc_new_counters);
987 
988 	if (verbose)
989 		print_nehalem_info();
990 }
991 
992 int fork_it(char **argv)
993 {
994 	int retval;
995 	pid_t child_pid;
996 	get_counters(cnt_even);
997 	gettimeofday(&tv_even, (struct timezone *)NULL);
998 
999 	child_pid = fork();
1000 	if (!child_pid) {
1001 		/* child */
1002 		execvp(argv[0], argv);
1003 	} else {
1004 		int status;
1005 
1006 		/* parent */
1007 		if (child_pid == -1) {
1008 			perror("fork");
1009 			exit(1);
1010 		}
1011 
1012 		signal(SIGINT, SIG_IGN);
1013 		signal(SIGQUIT, SIG_IGN);
1014 		if (waitpid(child_pid, &status, 0) == -1) {
1015 			perror("wait");
1016 			exit(1);
1017 		}
1018 	}
1019 	get_counters(cnt_odd);
1020 	gettimeofday(&tv_odd, (struct timezone *)NULL);
1021 	retval = compute_delta(cnt_odd, cnt_even, cnt_delta);
1022 
1023 	timersub(&tv_odd, &tv_even, &tv_delta);
1024 	compute_average(cnt_delta, cnt_average);
1025 	if (!retval)
1026 		print_counters(cnt_delta);
1027 
1028 	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
1029 
1030 	return 0;
1031 }
1032 
1033 void cmdline(int argc, char **argv)
1034 {
1035 	int opt;
1036 
1037 	progname = argv[0];
1038 
1039 	while ((opt = getopt(argc, argv, "+svi:M:")) != -1) {
1040 		switch (opt) {
1041 		case 's':
1042 			summary_only++;
1043 			break;
1044 		case 'v':
1045 			verbose++;
1046 			break;
1047 		case 'i':
1048 			interval_sec = atoi(optarg);
1049 			break;
1050 		case 'M':
1051 			sscanf(optarg, "%x", &extra_msr_offset);
1052 			if (verbose > 1)
1053 				fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
1054 			break;
1055 		default:
1056 			usage();
1057 		}
1058 	}
1059 }
1060 
1061 int main(int argc, char **argv)
1062 {
1063 	cmdline(argc, argv);
1064 
1065 	if (verbose > 1)
1066 		fprintf(stderr, "turbostat Dec 6, 2010"
1067 			" - Len Brown <lenb@kernel.org>\n");
1068 	if (verbose > 1)
1069 		fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
1070 
1071 	turbostat_init();
1072 
1073 	/*
1074 	 * if any params left, it must be a command to fork
1075 	 */
1076 	if (argc - optind)
1077 		return fork_it(argv + optind);
1078 	else
1079 		turbostat_loop();
1080 
1081 	return 0;
1082 }
1083