1 /* 2 * turbostat -- show CPU frequency and C-state residency 3 * on modern Intel turbo-capable processors. 4 * 5 * Copyright (c) 2012 Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along with 18 * this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22 #define _GNU_SOURCE 23 #include <stdio.h> 24 #include <unistd.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/stat.h> 28 #include <sys/resource.h> 29 #include <fcntl.h> 30 #include <signal.h> 31 #include <sys/time.h> 32 #include <stdlib.h> 33 #include <dirent.h> 34 #include <string.h> 35 #include <ctype.h> 36 #include <sched.h> 37 38 #define MSR_NEHALEM_PLATFORM_INFO 0xCE 39 #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD 40 #define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE 41 #define MSR_APERF 0xE8 42 #define MSR_MPERF 0xE7 43 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ 44 #define MSR_PKG_C3_RESIDENCY 0x3F8 45 #define MSR_PKG_C6_RESIDENCY 0x3F9 46 #define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ 47 #define MSR_CORE_C3_RESIDENCY 0x3FC 48 #define MSR_CORE_C6_RESIDENCY 0x3FD 49 #define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ 50 51 char *proc_stat = "/proc/stat"; 52 unsigned int interval_sec = 5; /* set with -i interval_sec */ 53 unsigned int verbose; /* set with -v */ 54 unsigned int summary_only; /* set with -s */ 55 unsigned int skip_c0; 56 unsigned int skip_c1; 57 unsigned int do_nhm_cstates; 58 unsigned int do_snb_cstates; 59 unsigned int has_aperf; 60 unsigned int units = 1000000000; /* Ghz etc */ 61 unsigned int genuine_intel; 62 unsigned int has_invariant_tsc; 63 unsigned int do_nehalem_platform_info; 64 unsigned int do_nehalem_turbo_ratio_limit; 65 unsigned int do_ivt_turbo_ratio_limit; 66 unsigned int extra_msr_offset32; 67 unsigned int extra_msr_offset64; 68 double bclk; 69 unsigned int show_pkg; 70 unsigned int show_core; 71 unsigned int show_cpu; 72 unsigned int show_pkg_only; 73 unsigned int show_core_only; 74 char *output_buffer, *outp; 75 76 int aperf_mperf_unstable; 77 int backwards_count; 78 char *progname; 79 80 cpu_set_t *cpu_present_set, *cpu_affinity_set; 81 size_t cpu_present_setsize, cpu_affinity_setsize; 82 83 struct thread_data { 84 unsigned long long tsc; 85 unsigned long long aperf; 86 unsigned long long mperf; 87 unsigned long long c1; /* derived */ 88 unsigned long long extra_msr64; 89 unsigned int extra_msr32; 90 unsigned int cpu_id; 91 unsigned int flags; 92 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 93 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 94 } *thread_even, *thread_odd; 95 96 struct core_data { 97 unsigned long long c3; 98 unsigned long long c6; 99 unsigned long long c7; 100 unsigned int core_id; 101 } *core_even, *core_odd; 102 103 struct pkg_data { 104 unsigned long long pc2; 105 unsigned long long pc3; 106 unsigned long long pc6; 107 unsigned long long pc7; 108 unsigned int package_id; 109 } *package_even, *package_odd; 110 111 #define ODD_COUNTERS thread_odd, core_odd, package_odd 112 #define EVEN_COUNTERS thread_even, core_even, package_even 113 114 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 115 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 116 topo.num_threads_per_core + \ 117 (core_no) * topo.num_threads_per_core + (thread_no)) 118 #define GET_CORE(core_base, core_no, pkg_no) \ 119 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 120 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 121 122 struct system_summary { 123 struct thread_data threads; 124 struct core_data cores; 125 struct pkg_data packages; 126 } sum, average; 127 128 129 struct topo_params { 130 int num_packages; 131 int num_cpus; 132 int num_cores; 133 int max_cpu_num; 134 int num_cores_per_pkg; 135 int num_threads_per_core; 136 } topo; 137 138 struct timeval tv_even, tv_odd, tv_delta; 139 140 void setup_all_buffers(void); 141 142 int cpu_is_not_present(int cpu) 143 { 144 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 145 } 146 /* 147 * run func(thread, core, package) in topology order 148 * skip non-present cpus 149 */ 150 151 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 152 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 153 { 154 int retval, pkg_no, core_no, thread_no; 155 156 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 157 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 158 for (thread_no = 0; thread_no < 159 topo.num_threads_per_core; ++thread_no) { 160 struct thread_data *t; 161 struct core_data *c; 162 struct pkg_data *p; 163 164 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 165 166 if (cpu_is_not_present(t->cpu_id)) 167 continue; 168 169 c = GET_CORE(core_base, core_no, pkg_no); 170 p = GET_PKG(pkg_base, pkg_no); 171 172 retval = func(t, c, p); 173 if (retval) 174 return retval; 175 } 176 } 177 } 178 return 0; 179 } 180 181 int cpu_migrate(int cpu) 182 { 183 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 184 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 185 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 186 return -1; 187 else 188 return 0; 189 } 190 191 int get_msr(int cpu, off_t offset, unsigned long long *msr) 192 { 193 ssize_t retval; 194 char pathname[32]; 195 int fd; 196 197 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 198 fd = open(pathname, O_RDONLY); 199 if (fd < 0) 200 return -1; 201 202 retval = pread(fd, msr, sizeof *msr, offset); 203 close(fd); 204 205 if (retval != sizeof *msr) 206 return -1; 207 208 return 0; 209 } 210 211 /* 212 * Truncate the 8 bytes we read from /dev/cpu/.../msr 213 * to the 4 bytes requested 214 */ 215 216 int get_msr32(int cpu, off_t offset, unsigned int *msr) 217 { 218 int retval; 219 220 unsigned long long msr64; 221 222 retval = get_msr(cpu, offset, &msr64); 223 *msr = (unsigned int) msr64; 224 225 return retval; 226 } 227 228 229 void print_header(void) 230 { 231 if (show_pkg) 232 outp += sprintf(outp, "pk"); 233 if (show_pkg) 234 outp += sprintf(outp, " "); 235 if (show_core) 236 outp += sprintf(outp, "cor"); 237 if (show_cpu) 238 outp += sprintf(outp, " CPU"); 239 if (show_pkg || show_core || show_cpu) 240 outp += sprintf(outp, " "); 241 if (do_nhm_cstates) 242 outp += sprintf(outp, " %%c0"); 243 if (has_aperf) 244 outp += sprintf(outp, " GHz"); 245 outp += sprintf(outp, " TSC"); 246 if (extra_msr_offset32) 247 outp += sprintf(outp, " MSR 0x%04X", extra_msr_offset32); 248 if (extra_msr_offset64) 249 outp += sprintf(outp, " MSR 0x%04X", extra_msr_offset64); 250 if (do_nhm_cstates) 251 outp += sprintf(outp, " %%c1"); 252 if (do_nhm_cstates) 253 outp += sprintf(outp, " %%c3"); 254 if (do_nhm_cstates) 255 outp += sprintf(outp, " %%c6"); 256 if (do_snb_cstates) 257 outp += sprintf(outp, " %%c7"); 258 if (do_snb_cstates) 259 outp += sprintf(outp, " %%pc2"); 260 if (do_nhm_cstates) 261 outp += sprintf(outp, " %%pc3"); 262 if (do_nhm_cstates) 263 outp += sprintf(outp, " %%pc6"); 264 if (do_snb_cstates) 265 outp += sprintf(outp, " %%pc7"); 266 267 outp += sprintf(outp, "\n"); 268 } 269 270 int dump_counters(struct thread_data *t, struct core_data *c, 271 struct pkg_data *p) 272 { 273 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); 274 275 if (t) { 276 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 277 fprintf(stderr, "TSC: %016llX\n", t->tsc); 278 fprintf(stderr, "aperf: %016llX\n", t->aperf); 279 fprintf(stderr, "mperf: %016llX\n", t->mperf); 280 fprintf(stderr, "c1: %016llX\n", t->c1); 281 fprintf(stderr, "msr0x%x: %08X\n", 282 extra_msr_offset32, t->extra_msr32); 283 fprintf(stderr, "msr0x%x: %016llX\n", 284 extra_msr_offset64, t->extra_msr64); 285 } 286 287 if (c) { 288 fprintf(stderr, "core: %d\n", c->core_id); 289 fprintf(stderr, "c3: %016llX\n", c->c3); 290 fprintf(stderr, "c6: %016llX\n", c->c6); 291 fprintf(stderr, "c7: %016llX\n", c->c7); 292 } 293 294 if (p) { 295 fprintf(stderr, "package: %d\n", p->package_id); 296 fprintf(stderr, "pc2: %016llX\n", p->pc2); 297 fprintf(stderr, "pc3: %016llX\n", p->pc3); 298 fprintf(stderr, "pc6: %016llX\n", p->pc6); 299 fprintf(stderr, "pc7: %016llX\n", p->pc7); 300 } 301 return 0; 302 } 303 304 /* 305 * column formatting convention & formats 306 * package: "pk" 2 columns %2d 307 * core: "cor" 3 columns %3d 308 * CPU: "CPU" 3 columns %3d 309 * GHz: "GHz" 3 columns %3.2 310 * TSC: "TSC" 3 columns %3.2 311 * percentage " %pc3" %6.2 312 */ 313 int format_counters(struct thread_data *t, struct core_data *c, 314 struct pkg_data *p) 315 { 316 double interval_float; 317 318 /* if showing only 1st thread in core and this isn't one, bail out */ 319 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 320 return 0; 321 322 /* if showing only 1st thread in pkg and this isn't one, bail out */ 323 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 324 return 0; 325 326 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 327 328 /* topo columns, print blanks on 1st (average) line */ 329 if (t == &average.threads) { 330 if (show_pkg) 331 outp += sprintf(outp, " "); 332 if (show_pkg && show_core) 333 outp += sprintf(outp, " "); 334 if (show_core) 335 outp += sprintf(outp, " "); 336 if (show_cpu) 337 outp += sprintf(outp, " " " "); 338 } else { 339 if (show_pkg) { 340 if (p) 341 outp += sprintf(outp, "%2d", p->package_id); 342 else 343 outp += sprintf(outp, " "); 344 } 345 if (show_pkg && show_core) 346 outp += sprintf(outp, " "); 347 if (show_core) { 348 if (c) 349 outp += sprintf(outp, "%3d", c->core_id); 350 else 351 outp += sprintf(outp, " "); 352 } 353 if (show_cpu) 354 outp += sprintf(outp, " %3d", t->cpu_id); 355 } 356 357 /* %c0 */ 358 if (do_nhm_cstates) { 359 if (show_pkg || show_core || show_cpu) 360 outp += sprintf(outp, " "); 361 if (!skip_c0) 362 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); 363 else 364 outp += sprintf(outp, " ****"); 365 } 366 367 /* GHz */ 368 if (has_aperf) { 369 if (!aperf_mperf_unstable) { 370 outp += sprintf(outp, " %3.2f", 371 1.0 * t->tsc / units * t->aperf / 372 t->mperf / interval_float); 373 } else { 374 if (t->aperf > t->tsc || t->mperf > t->tsc) { 375 outp += sprintf(outp, " ***"); 376 } else { 377 outp += sprintf(outp, "%3.1f*", 378 1.0 * t->tsc / 379 units * t->aperf / 380 t->mperf / interval_float); 381 } 382 } 383 } 384 385 /* TSC */ 386 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 387 388 /* msr */ 389 if (extra_msr_offset32) 390 outp += sprintf(outp, " 0x%08x", t->extra_msr32); 391 392 /* MSR */ 393 if (extra_msr_offset64) 394 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 395 396 if (do_nhm_cstates) { 397 if (!skip_c1) 398 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); 399 else 400 outp += sprintf(outp, " ****"); 401 } 402 403 /* print per-core data only for 1st thread in core */ 404 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 405 goto done; 406 407 if (do_nhm_cstates) 408 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); 409 if (do_nhm_cstates) 410 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); 411 if (do_snb_cstates) 412 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 413 414 /* print per-package data only for 1st core in package */ 415 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 416 goto done; 417 418 if (do_snb_cstates) 419 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 420 if (do_nhm_cstates) 421 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); 422 if (do_nhm_cstates) 423 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 424 if (do_snb_cstates) 425 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 426 done: 427 outp += sprintf(outp, "\n"); 428 429 return 0; 430 } 431 432 void flush_stdout() 433 { 434 fputs(output_buffer, stdout); 435 outp = output_buffer; 436 } 437 void flush_stderr() 438 { 439 fputs(output_buffer, stderr); 440 outp = output_buffer; 441 } 442 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 443 { 444 static int printed; 445 446 if (!printed || !summary_only) 447 print_header(); 448 449 if (topo.num_cpus > 1) 450 format_counters(&average.threads, &average.cores, 451 &average.packages); 452 453 printed = 1; 454 455 if (summary_only) 456 return; 457 458 for_all_cpus(format_counters, t, c, p); 459 } 460 461 void 462 delta_package(struct pkg_data *new, struct pkg_data *old) 463 { 464 old->pc2 = new->pc2 - old->pc2; 465 old->pc3 = new->pc3 - old->pc3; 466 old->pc6 = new->pc6 - old->pc6; 467 old->pc7 = new->pc7 - old->pc7; 468 } 469 470 void 471 delta_core(struct core_data *new, struct core_data *old) 472 { 473 old->c3 = new->c3 - old->c3; 474 old->c6 = new->c6 - old->c6; 475 old->c7 = new->c7 - old->c7; 476 } 477 478 /* 479 * old = new - old 480 */ 481 void 482 delta_thread(struct thread_data *new, struct thread_data *old, 483 struct core_data *core_delta) 484 { 485 old->tsc = new->tsc - old->tsc; 486 487 /* check for TSC < 1 Mcycles over interval */ 488 if (old->tsc < (1000 * 1000)) { 489 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); 490 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); 491 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); 492 exit(-3); 493 } 494 495 old->c1 = new->c1 - old->c1; 496 497 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 498 old->aperf = new->aperf - old->aperf; 499 old->mperf = new->mperf - old->mperf; 500 } else { 501 502 if (!aperf_mperf_unstable) { 503 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 504 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 505 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 506 507 aperf_mperf_unstable = 1; 508 } 509 /* 510 * mperf delta is likely a huge "positive" number 511 * can not use it for calculating c0 time 512 */ 513 skip_c0 = 1; 514 skip_c1 = 1; 515 } 516 517 518 /* 519 * As counter collection is not atomic, 520 * it is possible for mperf's non-halted cycles + idle states 521 * to exceed TSC's all cycles: show c1 = 0% in that case. 522 */ 523 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) 524 old->c1 = 0; 525 else { 526 /* normal case, derive c1 */ 527 old->c1 = old->tsc - old->mperf - core_delta->c3 528 - core_delta->c6 - core_delta->c7; 529 } 530 531 if (old->mperf == 0) { 532 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 533 old->mperf = 1; /* divide by 0 protection */ 534 } 535 536 /* 537 * Extra MSR is a snapshot, simply copy latest w/o subtracting 538 */ 539 old->extra_msr32 = new->extra_msr32; 540 old->extra_msr64 = new->extra_msr64; 541 } 542 543 int delta_cpu(struct thread_data *t, struct core_data *c, 544 struct pkg_data *p, struct thread_data *t2, 545 struct core_data *c2, struct pkg_data *p2) 546 { 547 /* calculate core delta only for 1st thread in core */ 548 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 549 delta_core(c, c2); 550 551 /* always calculate thread delta */ 552 delta_thread(t, t2, c2); /* c2 is core delta */ 553 554 /* calculate package delta only for 1st core in package */ 555 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 556 delta_package(p, p2); 557 558 return 0; 559 } 560 561 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 562 { 563 t->tsc = 0; 564 t->aperf = 0; 565 t->mperf = 0; 566 t->c1 = 0; 567 568 /* tells format_counters to dump all fields from this set */ 569 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 570 571 c->c3 = 0; 572 c->c6 = 0; 573 c->c7 = 0; 574 575 p->pc2 = 0; 576 p->pc3 = 0; 577 p->pc6 = 0; 578 p->pc7 = 0; 579 } 580 int sum_counters(struct thread_data *t, struct core_data *c, 581 struct pkg_data *p) 582 { 583 average.threads.tsc += t->tsc; 584 average.threads.aperf += t->aperf; 585 average.threads.mperf += t->mperf; 586 average.threads.c1 += t->c1; 587 588 /* sum per-core values only for 1st thread in core */ 589 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 590 return 0; 591 592 average.cores.c3 += c->c3; 593 average.cores.c6 += c->c6; 594 average.cores.c7 += c->c7; 595 596 /* sum per-pkg values only for 1st core in pkg */ 597 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 598 return 0; 599 600 average.packages.pc2 += p->pc2; 601 average.packages.pc3 += p->pc3; 602 average.packages.pc6 += p->pc6; 603 average.packages.pc7 += p->pc7; 604 605 return 0; 606 } 607 /* 608 * sum the counters for all cpus in the system 609 * compute the weighted average 610 */ 611 void compute_average(struct thread_data *t, struct core_data *c, 612 struct pkg_data *p) 613 { 614 clear_counters(&average.threads, &average.cores, &average.packages); 615 616 for_all_cpus(sum_counters, t, c, p); 617 618 average.threads.tsc /= topo.num_cpus; 619 average.threads.aperf /= topo.num_cpus; 620 average.threads.mperf /= topo.num_cpus; 621 average.threads.c1 /= topo.num_cpus; 622 623 average.cores.c3 /= topo.num_cores; 624 average.cores.c6 /= topo.num_cores; 625 average.cores.c7 /= topo.num_cores; 626 627 average.packages.pc2 /= topo.num_packages; 628 average.packages.pc3 /= topo.num_packages; 629 average.packages.pc6 /= topo.num_packages; 630 average.packages.pc7 /= topo.num_packages; 631 } 632 633 static unsigned long long rdtsc(void) 634 { 635 unsigned int low, high; 636 637 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 638 639 return low | ((unsigned long long)high) << 32; 640 } 641 642 643 /* 644 * get_counters(...) 645 * migrate to cpu 646 * acquire and record local counters for that cpu 647 */ 648 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 649 { 650 int cpu = t->cpu_id; 651 652 if (cpu_migrate(cpu)) 653 return -1; 654 655 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 656 657 if (has_aperf) { 658 if (get_msr(cpu, MSR_APERF, &t->aperf)) 659 return -3; 660 if (get_msr(cpu, MSR_MPERF, &t->mperf)) 661 return -4; 662 } 663 664 if (extra_msr_offset32) 665 if (get_msr32(cpu, extra_msr_offset32, &t->extra_msr32)) 666 return -5; 667 668 if (extra_msr_offset64) 669 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) 670 return -5; 671 672 /* collect core counters only for 1st thread in core */ 673 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 674 return 0; 675 676 if (do_nhm_cstates) { 677 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 678 return -6; 679 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 680 return -7; 681 } 682 683 if (do_snb_cstates) 684 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 685 return -8; 686 687 /* collect package counters only for 1st core in package */ 688 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 689 return 0; 690 691 if (do_nhm_cstates) { 692 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 693 return -9; 694 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 695 return -10; 696 } 697 if (do_snb_cstates) { 698 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 699 return -11; 700 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 701 return -12; 702 } 703 return 0; 704 } 705 706 void print_verbose_header(void) 707 { 708 unsigned long long msr; 709 unsigned int ratio; 710 711 if (!do_nehalem_platform_info) 712 return; 713 714 get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); 715 716 if (verbose > 1) 717 fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); 718 719 ratio = (msr >> 40) & 0xFF; 720 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 721 ratio, bclk, ratio * bclk); 722 723 ratio = (msr >> 8) & 0xFF; 724 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 725 ratio, bclk, ratio * bclk); 726 727 if (!do_ivt_turbo_ratio_limit) 728 goto print_nhm_turbo_ratio_limits; 729 730 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 731 732 if (verbose > 1) 733 fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 734 735 ratio = (msr >> 56) & 0xFF; 736 if (ratio) 737 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 738 ratio, bclk, ratio * bclk); 739 740 ratio = (msr >> 48) & 0xFF; 741 if (ratio) 742 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 743 ratio, bclk, ratio * bclk); 744 745 ratio = (msr >> 40) & 0xFF; 746 if (ratio) 747 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 748 ratio, bclk, ratio * bclk); 749 750 ratio = (msr >> 32) & 0xFF; 751 if (ratio) 752 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 753 ratio, bclk, ratio * bclk); 754 755 ratio = (msr >> 24) & 0xFF; 756 if (ratio) 757 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 758 ratio, bclk, ratio * bclk); 759 760 ratio = (msr >> 16) & 0xFF; 761 if (ratio) 762 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 763 ratio, bclk, ratio * bclk); 764 765 ratio = (msr >> 8) & 0xFF; 766 if (ratio) 767 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 768 ratio, bclk, ratio * bclk); 769 770 ratio = (msr >> 0) & 0xFF; 771 if (ratio) 772 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 773 ratio, bclk, ratio * bclk); 774 775 print_nhm_turbo_ratio_limits: 776 777 if (!do_nehalem_turbo_ratio_limit) 778 return; 779 780 get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); 781 782 if (verbose > 1) 783 fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 784 785 ratio = (msr >> 56) & 0xFF; 786 if (ratio) 787 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 788 ratio, bclk, ratio * bclk); 789 790 ratio = (msr >> 48) & 0xFF; 791 if (ratio) 792 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 793 ratio, bclk, ratio * bclk); 794 795 ratio = (msr >> 40) & 0xFF; 796 if (ratio) 797 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 798 ratio, bclk, ratio * bclk); 799 800 ratio = (msr >> 32) & 0xFF; 801 if (ratio) 802 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 803 ratio, bclk, ratio * bclk); 804 805 ratio = (msr >> 24) & 0xFF; 806 if (ratio) 807 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 808 ratio, bclk, ratio * bclk); 809 810 ratio = (msr >> 16) & 0xFF; 811 if (ratio) 812 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 813 ratio, bclk, ratio * bclk); 814 815 ratio = (msr >> 8) & 0xFF; 816 if (ratio) 817 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 818 ratio, bclk, ratio * bclk); 819 820 ratio = (msr >> 0) & 0xFF; 821 if (ratio) 822 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 823 ratio, bclk, ratio * bclk); 824 } 825 826 void free_all_buffers(void) 827 { 828 CPU_FREE(cpu_present_set); 829 cpu_present_set = NULL; 830 cpu_present_set = 0; 831 832 CPU_FREE(cpu_affinity_set); 833 cpu_affinity_set = NULL; 834 cpu_affinity_setsize = 0; 835 836 free(thread_even); 837 free(core_even); 838 free(package_even); 839 840 thread_even = NULL; 841 core_even = NULL; 842 package_even = NULL; 843 844 free(thread_odd); 845 free(core_odd); 846 free(package_odd); 847 848 thread_odd = NULL; 849 core_odd = NULL; 850 package_odd = NULL; 851 852 free(output_buffer); 853 output_buffer = NULL; 854 outp = NULL; 855 } 856 857 /* 858 * cpu_is_first_sibling_in_core(cpu) 859 * return 1 if given CPU is 1st HT sibling in the core 860 */ 861 int cpu_is_first_sibling_in_core(int cpu) 862 { 863 char path[64]; 864 FILE *filep; 865 int first_cpu; 866 867 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 868 filep = fopen(path, "r"); 869 if (filep == NULL) { 870 perror(path); 871 exit(1); 872 } 873 fscanf(filep, "%d", &first_cpu); 874 fclose(filep); 875 return (cpu == first_cpu); 876 } 877 878 /* 879 * cpu_is_first_core_in_package(cpu) 880 * return 1 if given CPU is 1st core in package 881 */ 882 int cpu_is_first_core_in_package(int cpu) 883 { 884 char path[64]; 885 FILE *filep; 886 int first_cpu; 887 888 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 889 filep = fopen(path, "r"); 890 if (filep == NULL) { 891 perror(path); 892 exit(1); 893 } 894 fscanf(filep, "%d", &first_cpu); 895 fclose(filep); 896 return (cpu == first_cpu); 897 } 898 899 int get_physical_package_id(int cpu) 900 { 901 char path[80]; 902 FILE *filep; 903 int pkg; 904 905 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 906 filep = fopen(path, "r"); 907 if (filep == NULL) { 908 perror(path); 909 exit(1); 910 } 911 fscanf(filep, "%d", &pkg); 912 fclose(filep); 913 return pkg; 914 } 915 916 int get_core_id(int cpu) 917 { 918 char path[80]; 919 FILE *filep; 920 int core; 921 922 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 923 filep = fopen(path, "r"); 924 if (filep == NULL) { 925 perror(path); 926 exit(1); 927 } 928 fscanf(filep, "%d", &core); 929 fclose(filep); 930 return core; 931 } 932 933 int get_num_ht_siblings(int cpu) 934 { 935 char path[80]; 936 FILE *filep; 937 int sib1, sib2; 938 int matches; 939 char character; 940 941 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 942 filep = fopen(path, "r"); 943 if (filep == NULL) { 944 perror(path); 945 exit(1); 946 } 947 /* 948 * file format: 949 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 950 * otherwinse 1 sibling (self). 951 */ 952 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 953 954 fclose(filep); 955 956 if (matches == 3) 957 return 2; 958 else 959 return 1; 960 } 961 962 /* 963 * run func(thread, core, package) in topology order 964 * skip non-present cpus 965 */ 966 967 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 968 struct pkg_data *, struct thread_data *, struct core_data *, 969 struct pkg_data *), struct thread_data *thread_base, 970 struct core_data *core_base, struct pkg_data *pkg_base, 971 struct thread_data *thread_base2, struct core_data *core_base2, 972 struct pkg_data *pkg_base2) 973 { 974 int retval, pkg_no, core_no, thread_no; 975 976 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 977 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 978 for (thread_no = 0; thread_no < 979 topo.num_threads_per_core; ++thread_no) { 980 struct thread_data *t, *t2; 981 struct core_data *c, *c2; 982 struct pkg_data *p, *p2; 983 984 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 985 986 if (cpu_is_not_present(t->cpu_id)) 987 continue; 988 989 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 990 991 c = GET_CORE(core_base, core_no, pkg_no); 992 c2 = GET_CORE(core_base2, core_no, pkg_no); 993 994 p = GET_PKG(pkg_base, pkg_no); 995 p2 = GET_PKG(pkg_base2, pkg_no); 996 997 retval = func(t, c, p, t2, c2, p2); 998 if (retval) 999 return retval; 1000 } 1001 } 1002 } 1003 return 0; 1004 } 1005 1006 /* 1007 * run func(cpu) on every cpu in /proc/stat 1008 * return max_cpu number 1009 */ 1010 int for_all_proc_cpus(int (func)(int)) 1011 { 1012 FILE *fp; 1013 int cpu_num; 1014 int retval; 1015 1016 fp = fopen(proc_stat, "r"); 1017 if (fp == NULL) { 1018 perror(proc_stat); 1019 exit(1); 1020 } 1021 1022 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 1023 if (retval != 0) { 1024 perror("/proc/stat format"); 1025 exit(1); 1026 } 1027 1028 while (1) { 1029 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 1030 if (retval != 1) 1031 break; 1032 1033 retval = func(cpu_num); 1034 if (retval) { 1035 fclose(fp); 1036 return(retval); 1037 } 1038 } 1039 fclose(fp); 1040 return 0; 1041 } 1042 1043 void re_initialize(void) 1044 { 1045 free_all_buffers(); 1046 setup_all_buffers(); 1047 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 1048 } 1049 1050 1051 /* 1052 * count_cpus() 1053 * remember the last one seen, it will be the max 1054 */ 1055 int count_cpus(int cpu) 1056 { 1057 if (topo.max_cpu_num < cpu) 1058 topo.max_cpu_num = cpu; 1059 1060 topo.num_cpus += 1; 1061 return 0; 1062 } 1063 int mark_cpu_present(int cpu) 1064 { 1065 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 1066 return 0; 1067 } 1068 1069 void turbostat_loop() 1070 { 1071 int retval; 1072 1073 restart: 1074 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1075 if (retval) { 1076 re_initialize(); 1077 goto restart; 1078 } 1079 gettimeofday(&tv_even, (struct timezone *)NULL); 1080 1081 while (1) { 1082 if (for_all_proc_cpus(cpu_is_not_present)) { 1083 re_initialize(); 1084 goto restart; 1085 } 1086 sleep(interval_sec); 1087 retval = for_all_cpus(get_counters, ODD_COUNTERS); 1088 if (retval) { 1089 re_initialize(); 1090 goto restart; 1091 } 1092 gettimeofday(&tv_odd, (struct timezone *)NULL); 1093 timersub(&tv_odd, &tv_even, &tv_delta); 1094 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1095 compute_average(EVEN_COUNTERS); 1096 format_all_counters(EVEN_COUNTERS); 1097 flush_stdout(); 1098 sleep(interval_sec); 1099 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1100 if (retval) { 1101 re_initialize(); 1102 goto restart; 1103 } 1104 gettimeofday(&tv_even, (struct timezone *)NULL); 1105 timersub(&tv_even, &tv_odd, &tv_delta); 1106 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 1107 compute_average(ODD_COUNTERS); 1108 format_all_counters(ODD_COUNTERS); 1109 flush_stdout(); 1110 } 1111 } 1112 1113 void check_dev_msr() 1114 { 1115 struct stat sb; 1116 1117 if (stat("/dev/cpu/0/msr", &sb)) { 1118 fprintf(stderr, "no /dev/cpu/0/msr\n"); 1119 fprintf(stderr, "Try \"# modprobe msr\"\n"); 1120 exit(-5); 1121 } 1122 } 1123 1124 void check_super_user() 1125 { 1126 if (getuid() != 0) { 1127 fprintf(stderr, "must be root\n"); 1128 exit(-6); 1129 } 1130 } 1131 1132 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) 1133 { 1134 if (!genuine_intel) 1135 return 0; 1136 1137 if (family != 6) 1138 return 0; 1139 1140 switch (model) { 1141 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 1142 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 1143 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 1144 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 1145 case 0x2C: /* Westmere EP - Gulftown */ 1146 case 0x2A: /* SNB */ 1147 case 0x2D: /* SNB Xeon */ 1148 case 0x3A: /* IVB */ 1149 case 0x3E: /* IVB Xeon */ 1150 return 1; 1151 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1152 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1153 default: 1154 return 0; 1155 } 1156 } 1157 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 1158 { 1159 if (!genuine_intel) 1160 return 0; 1161 1162 if (family != 6) 1163 return 0; 1164 1165 switch (model) { 1166 case 0x3E: /* IVB Xeon */ 1167 return 1; 1168 default: 1169 return 0; 1170 } 1171 } 1172 1173 1174 int is_snb(unsigned int family, unsigned int model) 1175 { 1176 if (!genuine_intel) 1177 return 0; 1178 1179 switch (model) { 1180 case 0x2A: 1181 case 0x2D: 1182 case 0x3A: /* IVB */ 1183 case 0x3E: /* IVB Xeon */ 1184 return 1; 1185 } 1186 return 0; 1187 } 1188 1189 double discover_bclk(unsigned int family, unsigned int model) 1190 { 1191 if (is_snb(family, model)) 1192 return 100.00; 1193 else 1194 return 133.33; 1195 } 1196 1197 void check_cpuid() 1198 { 1199 unsigned int eax, ebx, ecx, edx, max_level; 1200 unsigned int fms, family, model, stepping; 1201 1202 eax = ebx = ecx = edx = 0; 1203 1204 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); 1205 1206 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 1207 genuine_intel = 1; 1208 1209 if (verbose) 1210 fprintf(stderr, "%.4s%.4s%.4s ", 1211 (char *)&ebx, (char *)&edx, (char *)&ecx); 1212 1213 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1214 family = (fms >> 8) & 0xf; 1215 model = (fms >> 4) & 0xf; 1216 stepping = fms & 0xf; 1217 if (family == 6 || family == 0xf) 1218 model += ((fms >> 16) & 0xf) << 4; 1219 1220 if (verbose) 1221 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 1222 max_level, family, model, stepping, family, model, stepping); 1223 1224 if (!(edx & (1 << 5))) { 1225 fprintf(stderr, "CPUID: no MSR\n"); 1226 exit(1); 1227 } 1228 1229 /* 1230 * check max extended function levels of CPUID. 1231 * This is needed to check for invariant TSC. 1232 * This check is valid for both Intel and AMD. 1233 */ 1234 ebx = ecx = edx = 0; 1235 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); 1236 1237 if (max_level < 0x80000007) { 1238 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); 1239 exit(1); 1240 } 1241 1242 /* 1243 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 1244 * this check is valid for both Intel and AMD 1245 */ 1246 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); 1247 has_invariant_tsc = edx & (1 << 8); 1248 1249 if (!has_invariant_tsc) { 1250 fprintf(stderr, "No invariant TSC\n"); 1251 exit(1); 1252 } 1253 1254 /* 1255 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 1256 * this check is valid for both Intel and AMD 1257 */ 1258 1259 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1260 has_aperf = ecx & (1 << 0); 1261 if (!has_aperf) { 1262 fprintf(stderr, "No APERF MSR\n"); 1263 exit(1); 1264 } 1265 1266 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1267 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1268 do_snb_cstates = is_snb(family, model); 1269 bclk = discover_bclk(family, model); 1270 1271 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1272 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1273 } 1274 1275 1276 void usage() 1277 { 1278 fprintf(stderr, "%s: [-v] [-m msr#] [-M MSR#] [-i interval_sec | command ...]\n", 1279 progname); 1280 exit(1); 1281 } 1282 1283 1284 /* 1285 * in /dev/cpu/ return success for names that are numbers 1286 * ie. filter out ".", "..", "microcode". 1287 */ 1288 int dir_filter(const struct dirent *dirp) 1289 { 1290 if (isdigit(dirp->d_name[0])) 1291 return 1; 1292 else 1293 return 0; 1294 } 1295 1296 int open_dev_cpu_msr(int dummy1) 1297 { 1298 return 0; 1299 } 1300 1301 void topology_probe() 1302 { 1303 int i; 1304 int max_core_id = 0; 1305 int max_package_id = 0; 1306 int max_siblings = 0; 1307 struct cpu_topology { 1308 int core_id; 1309 int physical_package_id; 1310 } *cpus; 1311 1312 /* Initialize num_cpus, max_cpu_num */ 1313 topo.num_cpus = 0; 1314 topo.max_cpu_num = 0; 1315 for_all_proc_cpus(count_cpus); 1316 if (!summary_only && topo.num_cpus > 1) 1317 show_cpu = 1; 1318 1319 if (verbose > 1) 1320 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 1321 1322 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 1323 if (cpus == NULL) { 1324 perror("calloc cpus"); 1325 exit(1); 1326 } 1327 1328 /* 1329 * Allocate and initialize cpu_present_set 1330 */ 1331 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1332 if (cpu_present_set == NULL) { 1333 perror("CPU_ALLOC"); 1334 exit(3); 1335 } 1336 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1337 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 1338 for_all_proc_cpus(mark_cpu_present); 1339 1340 /* 1341 * Allocate and initialize cpu_affinity_set 1342 */ 1343 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1344 if (cpu_affinity_set == NULL) { 1345 perror("CPU_ALLOC"); 1346 exit(3); 1347 } 1348 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1349 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1350 1351 1352 /* 1353 * For online cpus 1354 * find max_core_id, max_package_id 1355 */ 1356 for (i = 0; i <= topo.max_cpu_num; ++i) { 1357 int siblings; 1358 1359 if (cpu_is_not_present(i)) { 1360 if (verbose > 1) 1361 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 1362 continue; 1363 } 1364 cpus[i].core_id = get_core_id(i); 1365 if (cpus[i].core_id > max_core_id) 1366 max_core_id = cpus[i].core_id; 1367 1368 cpus[i].physical_package_id = get_physical_package_id(i); 1369 if (cpus[i].physical_package_id > max_package_id) 1370 max_package_id = cpus[i].physical_package_id; 1371 1372 siblings = get_num_ht_siblings(i); 1373 if (siblings > max_siblings) 1374 max_siblings = siblings; 1375 if (verbose > 1) 1376 fprintf(stderr, "cpu %d pkg %d core %d\n", 1377 i, cpus[i].physical_package_id, cpus[i].core_id); 1378 } 1379 topo.num_cores_per_pkg = max_core_id + 1; 1380 if (verbose > 1) 1381 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 1382 max_core_id, topo.num_cores_per_pkg); 1383 if (!summary_only && topo.num_cores_per_pkg > 1) 1384 show_core = 1; 1385 1386 topo.num_packages = max_package_id + 1; 1387 if (verbose > 1) 1388 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 1389 max_package_id, topo.num_packages); 1390 if (!summary_only && topo.num_packages > 1) 1391 show_pkg = 1; 1392 1393 topo.num_threads_per_core = max_siblings; 1394 if (verbose > 1) 1395 fprintf(stderr, "max_siblings %d\n", max_siblings); 1396 1397 free(cpus); 1398 } 1399 1400 void 1401 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 1402 { 1403 int i; 1404 1405 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 1406 topo.num_packages, sizeof(struct thread_data)); 1407 if (*t == NULL) 1408 goto error; 1409 1410 for (i = 0; i < topo.num_threads_per_core * 1411 topo.num_cores_per_pkg * topo.num_packages; i++) 1412 (*t)[i].cpu_id = -1; 1413 1414 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 1415 sizeof(struct core_data)); 1416 if (*c == NULL) 1417 goto error; 1418 1419 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 1420 (*c)[i].core_id = -1; 1421 1422 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 1423 if (*p == NULL) 1424 goto error; 1425 1426 for (i = 0; i < topo.num_packages; i++) 1427 (*p)[i].package_id = i; 1428 1429 return; 1430 error: 1431 perror("calloc counters"); 1432 exit(1); 1433 } 1434 /* 1435 * init_counter() 1436 * 1437 * set cpu_id, core_num, pkg_num 1438 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 1439 * 1440 * increment topo.num_cores when 1st core in pkg seen 1441 */ 1442 void init_counter(struct thread_data *thread_base, struct core_data *core_base, 1443 struct pkg_data *pkg_base, int thread_num, int core_num, 1444 int pkg_num, int cpu_id) 1445 { 1446 struct thread_data *t; 1447 struct core_data *c; 1448 struct pkg_data *p; 1449 1450 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 1451 c = GET_CORE(core_base, core_num, pkg_num); 1452 p = GET_PKG(pkg_base, pkg_num); 1453 1454 t->cpu_id = cpu_id; 1455 if (thread_num == 0) { 1456 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 1457 if (cpu_is_first_core_in_package(cpu_id)) 1458 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 1459 } 1460 1461 c->core_id = core_num; 1462 p->package_id = pkg_num; 1463 } 1464 1465 1466 int initialize_counters(int cpu_id) 1467 { 1468 int my_thread_id, my_core_id, my_package_id; 1469 1470 my_package_id = get_physical_package_id(cpu_id); 1471 my_core_id = get_core_id(cpu_id); 1472 1473 if (cpu_is_first_sibling_in_core(cpu_id)) { 1474 my_thread_id = 0; 1475 topo.num_cores++; 1476 } else { 1477 my_thread_id = 1; 1478 } 1479 1480 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1481 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1482 return 0; 1483 } 1484 1485 void allocate_output_buffer() 1486 { 1487 output_buffer = calloc(1, (1 + topo.num_cpus) * 128); 1488 outp = output_buffer; 1489 if (outp == NULL) { 1490 perror("calloc"); 1491 exit(-1); 1492 } 1493 } 1494 1495 void setup_all_buffers(void) 1496 { 1497 topology_probe(); 1498 allocate_counters(&thread_even, &core_even, &package_even); 1499 allocate_counters(&thread_odd, &core_odd, &package_odd); 1500 allocate_output_buffer(); 1501 for_all_proc_cpus(initialize_counters); 1502 } 1503 void turbostat_init() 1504 { 1505 check_cpuid(); 1506 1507 check_dev_msr(); 1508 check_super_user(); 1509 1510 setup_all_buffers(); 1511 1512 if (verbose) 1513 print_verbose_header(); 1514 } 1515 1516 int fork_it(char **argv) 1517 { 1518 pid_t child_pid; 1519 1520 for_all_cpus(get_counters, EVEN_COUNTERS); 1521 /* clear affinity side-effect of get_counters() */ 1522 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 1523 gettimeofday(&tv_even, (struct timezone *)NULL); 1524 1525 child_pid = fork(); 1526 if (!child_pid) { 1527 /* child */ 1528 execvp(argv[0], argv); 1529 } else { 1530 int status; 1531 1532 /* parent */ 1533 if (child_pid == -1) { 1534 perror("fork"); 1535 exit(1); 1536 } 1537 1538 signal(SIGINT, SIG_IGN); 1539 signal(SIGQUIT, SIG_IGN); 1540 if (waitpid(child_pid, &status, 0) == -1) { 1541 perror("wait"); 1542 exit(1); 1543 } 1544 } 1545 /* 1546 * n.b. fork_it() does not check for errors from for_all_cpus() 1547 * because re-starting is problematic when forking 1548 */ 1549 for_all_cpus(get_counters, ODD_COUNTERS); 1550 gettimeofday(&tv_odd, (struct timezone *)NULL); 1551 timersub(&tv_odd, &tv_even, &tv_delta); 1552 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1553 compute_average(EVEN_COUNTERS); 1554 format_all_counters(EVEN_COUNTERS); 1555 flush_stderr(); 1556 1557 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 1558 1559 return 0; 1560 } 1561 1562 void cmdline(int argc, char **argv) 1563 { 1564 int opt; 1565 1566 progname = argv[0]; 1567 1568 while ((opt = getopt(argc, argv, "+cpsvi:m:M:")) != -1) { 1569 switch (opt) { 1570 case 'c': 1571 show_core_only++; 1572 break; 1573 case 'p': 1574 show_pkg_only++; 1575 break; 1576 case 's': 1577 summary_only++; 1578 break; 1579 case 'v': 1580 verbose++; 1581 break; 1582 case 'i': 1583 interval_sec = atoi(optarg); 1584 break; 1585 case 'm': 1586 sscanf(optarg, "%x", &extra_msr_offset32); 1587 if (verbose > 1) 1588 fprintf(stderr, "msr 0x%X\n", extra_msr_offset32); 1589 break; 1590 case 'M': 1591 sscanf(optarg, "%x", &extra_msr_offset64); 1592 if (verbose > 1) 1593 fprintf(stderr, "MSR 0x%X\n", extra_msr_offset64); 1594 break; 1595 default: 1596 usage(); 1597 } 1598 } 1599 } 1600 1601 int main(int argc, char **argv) 1602 { 1603 cmdline(argc, argv); 1604 1605 if (verbose > 1) 1606 fprintf(stderr, "turbostat v2.0 May 16, 2012" 1607 " - Len Brown <lenb@kernel.org>\n"); 1608 1609 turbostat_init(); 1610 1611 /* 1612 * if any params left, it must be a command to fork 1613 */ 1614 if (argc - optind) 1615 return fork_it(argv + optind); 1616 else 1617 turbostat_loop(); 1618 1619 return 0; 1620 } 1621