1 /* 2 * turbostat -- show CPU frequency and C-state residency 3 * on modern Intel turbo-capable processors. 4 * 5 * Copyright (c) 2012 Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along with 18 * this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22 #define _GNU_SOURCE 23 #include <asm/msr.h> 24 #include <stdio.h> 25 #include <unistd.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/stat.h> 29 #include <sys/resource.h> 30 #include <fcntl.h> 31 #include <signal.h> 32 #include <sys/time.h> 33 #include <stdlib.h> 34 #include <dirent.h> 35 #include <string.h> 36 #include <ctype.h> 37 #include <sched.h> 38 39 char *proc_stat = "/proc/stat"; 40 unsigned int interval_sec = 5; /* set with -i interval_sec */ 41 unsigned int verbose; /* set with -v */ 42 unsigned int rapl_verbose; /* set with -R */ 43 unsigned int thermal_verbose; /* set with -T */ 44 unsigned int summary_only; /* set with -s */ 45 unsigned int skip_c0; 46 unsigned int skip_c1; 47 unsigned int do_nhm_cstates; 48 unsigned int do_snb_cstates; 49 unsigned int has_aperf; 50 unsigned int has_epb; 51 unsigned int units = 1000000000; /* Ghz etc */ 52 unsigned int genuine_intel; 53 unsigned int has_invariant_tsc; 54 unsigned int do_nehalem_platform_info; 55 unsigned int do_nehalem_turbo_ratio_limit; 56 unsigned int do_ivt_turbo_ratio_limit; 57 unsigned int extra_msr_offset32; 58 unsigned int extra_msr_offset64; 59 unsigned int extra_delta_offset32; 60 unsigned int extra_delta_offset64; 61 int do_smi; 62 double bclk; 63 unsigned int show_pkg; 64 unsigned int show_core; 65 unsigned int show_cpu; 66 unsigned int show_pkg_only; 67 unsigned int show_core_only; 68 char *output_buffer, *outp; 69 unsigned int do_rapl; 70 unsigned int do_dts; 71 unsigned int do_ptm; 72 unsigned int tcc_activation_temp; 73 unsigned int tcc_activation_temp_override; 74 double rapl_power_units, rapl_energy_units, rapl_time_units; 75 double rapl_joule_counter_range; 76 77 #define RAPL_PKG (1 << 0) 78 #define RAPL_CORES (1 << 1) 79 #define RAPL_GFX (1 << 2) 80 #define RAPL_DRAM (1 << 3) 81 #define RAPL_PKG_PERF_STATUS (1 << 4) 82 #define RAPL_DRAM_PERF_STATUS (1 << 5) 83 #define TJMAX_DEFAULT 100 84 85 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 86 87 int aperf_mperf_unstable; 88 int backwards_count; 89 char *progname; 90 91 cpu_set_t *cpu_present_set, *cpu_affinity_set; 92 size_t cpu_present_setsize, cpu_affinity_setsize; 93 94 struct thread_data { 95 unsigned long long tsc; 96 unsigned long long aperf; 97 unsigned long long mperf; 98 unsigned long long c1; /* derived */ 99 unsigned long long extra_msr64; 100 unsigned long long extra_delta64; 101 unsigned long long extra_msr32; 102 unsigned long long extra_delta32; 103 unsigned int smi_count; 104 unsigned int cpu_id; 105 unsigned int flags; 106 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 107 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 108 } *thread_even, *thread_odd; 109 110 struct core_data { 111 unsigned long long c3; 112 unsigned long long c6; 113 unsigned long long c7; 114 unsigned int core_temp_c; 115 unsigned int core_id; 116 } *core_even, *core_odd; 117 118 struct pkg_data { 119 unsigned long long pc2; 120 unsigned long long pc3; 121 unsigned long long pc6; 122 unsigned long long pc7; 123 unsigned int package_id; 124 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 125 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 126 unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ 127 unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 128 unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 129 unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 130 unsigned int pkg_temp_c; 131 132 } *package_even, *package_odd; 133 134 #define ODD_COUNTERS thread_odd, core_odd, package_odd 135 #define EVEN_COUNTERS thread_even, core_even, package_even 136 137 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 138 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 139 topo.num_threads_per_core + \ 140 (core_no) * topo.num_threads_per_core + (thread_no)) 141 #define GET_CORE(core_base, core_no, pkg_no) \ 142 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 143 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 144 145 struct system_summary { 146 struct thread_data threads; 147 struct core_data cores; 148 struct pkg_data packages; 149 } sum, average; 150 151 152 struct topo_params { 153 int num_packages; 154 int num_cpus; 155 int num_cores; 156 int max_cpu_num; 157 int num_cores_per_pkg; 158 int num_threads_per_core; 159 } topo; 160 161 struct timeval tv_even, tv_odd, tv_delta; 162 163 void setup_all_buffers(void); 164 165 int cpu_is_not_present(int cpu) 166 { 167 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 168 } 169 /* 170 * run func(thread, core, package) in topology order 171 * skip non-present cpus 172 */ 173 174 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 175 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 176 { 177 int retval, pkg_no, core_no, thread_no; 178 179 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 180 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 181 for (thread_no = 0; thread_no < 182 topo.num_threads_per_core; ++thread_no) { 183 struct thread_data *t; 184 struct core_data *c; 185 struct pkg_data *p; 186 187 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 188 189 if (cpu_is_not_present(t->cpu_id)) 190 continue; 191 192 c = GET_CORE(core_base, core_no, pkg_no); 193 p = GET_PKG(pkg_base, pkg_no); 194 195 retval = func(t, c, p); 196 if (retval) 197 return retval; 198 } 199 } 200 } 201 return 0; 202 } 203 204 int cpu_migrate(int cpu) 205 { 206 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 207 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 208 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 209 return -1; 210 else 211 return 0; 212 } 213 214 int get_msr(int cpu, off_t offset, unsigned long long *msr) 215 { 216 ssize_t retval; 217 char pathname[32]; 218 int fd; 219 220 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 221 fd = open(pathname, O_RDONLY); 222 if (fd < 0) 223 return -1; 224 225 retval = pread(fd, msr, sizeof *msr, offset); 226 close(fd); 227 228 if (retval != sizeof *msr) { 229 fprintf(stderr, "%s offset 0x%zx read failed\n", pathname, offset); 230 return -1; 231 } 232 233 return 0; 234 } 235 236 void print_header(void) 237 { 238 if (show_pkg) 239 outp += sprintf(outp, "pk"); 240 if (show_pkg) 241 outp += sprintf(outp, " "); 242 if (show_core) 243 outp += sprintf(outp, "cor"); 244 if (show_cpu) 245 outp += sprintf(outp, " CPU"); 246 if (show_pkg || show_core || show_cpu) 247 outp += sprintf(outp, " "); 248 if (do_nhm_cstates) 249 outp += sprintf(outp, " %%c0"); 250 if (has_aperf) 251 outp += sprintf(outp, " GHz"); 252 outp += sprintf(outp, " TSC"); 253 if (do_smi) 254 outp += sprintf(outp, " SMI"); 255 if (extra_delta_offset32) 256 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 257 if (extra_delta_offset64) 258 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); 259 if (extra_msr_offset32) 260 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); 261 if (extra_msr_offset64) 262 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); 263 if (do_nhm_cstates) 264 outp += sprintf(outp, " %%c1"); 265 if (do_nhm_cstates) 266 outp += sprintf(outp, " %%c3"); 267 if (do_nhm_cstates) 268 outp += sprintf(outp, " %%c6"); 269 if (do_snb_cstates) 270 outp += sprintf(outp, " %%c7"); 271 272 if (do_dts) 273 outp += sprintf(outp, " CTMP"); 274 if (do_ptm) 275 outp += sprintf(outp, " PTMP"); 276 277 if (do_snb_cstates) 278 outp += sprintf(outp, " %%pc2"); 279 if (do_nhm_cstates) 280 outp += sprintf(outp, " %%pc3"); 281 if (do_nhm_cstates) 282 outp += sprintf(outp, " %%pc6"); 283 if (do_snb_cstates) 284 outp += sprintf(outp, " %%pc7"); 285 286 if (do_rapl & RAPL_PKG) 287 outp += sprintf(outp, " Pkg_W"); 288 if (do_rapl & RAPL_CORES) 289 outp += sprintf(outp, " Cor_W"); 290 if (do_rapl & RAPL_GFX) 291 outp += sprintf(outp, " GFX_W"); 292 if (do_rapl & RAPL_DRAM) 293 outp += sprintf(outp, " RAM_W"); 294 if (do_rapl & RAPL_PKG_PERF_STATUS) 295 outp += sprintf(outp, " PKG_%%"); 296 if (do_rapl & RAPL_DRAM_PERF_STATUS) 297 outp += sprintf(outp, " RAM_%%"); 298 299 outp += sprintf(outp, "\n"); 300 } 301 302 int dump_counters(struct thread_data *t, struct core_data *c, 303 struct pkg_data *p) 304 { 305 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); 306 307 if (t) { 308 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 309 fprintf(stderr, "TSC: %016llX\n", t->tsc); 310 fprintf(stderr, "aperf: %016llX\n", t->aperf); 311 fprintf(stderr, "mperf: %016llX\n", t->mperf); 312 fprintf(stderr, "c1: %016llX\n", t->c1); 313 fprintf(stderr, "msr0x%x: %08llX\n", 314 extra_delta_offset32, t->extra_delta32); 315 fprintf(stderr, "msr0x%x: %016llX\n", 316 extra_delta_offset64, t->extra_delta64); 317 fprintf(stderr, "msr0x%x: %08llX\n", 318 extra_msr_offset32, t->extra_msr32); 319 fprintf(stderr, "msr0x%x: %016llX\n", 320 extra_msr_offset64, t->extra_msr64); 321 if (do_smi) 322 fprintf(stderr, "SMI: %08X\n", t->smi_count); 323 } 324 325 if (c) { 326 fprintf(stderr, "core: %d\n", c->core_id); 327 fprintf(stderr, "c3: %016llX\n", c->c3); 328 fprintf(stderr, "c6: %016llX\n", c->c6); 329 fprintf(stderr, "c7: %016llX\n", c->c7); 330 fprintf(stderr, "DTS: %dC\n", c->core_temp_c); 331 } 332 333 if (p) { 334 fprintf(stderr, "package: %d\n", p->package_id); 335 fprintf(stderr, "pc2: %016llX\n", p->pc2); 336 fprintf(stderr, "pc3: %016llX\n", p->pc3); 337 fprintf(stderr, "pc6: %016llX\n", p->pc6); 338 fprintf(stderr, "pc7: %016llX\n", p->pc7); 339 fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); 340 fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); 341 fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); 342 fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); 343 fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); 344 fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); 345 fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); 346 } 347 return 0; 348 } 349 350 /* 351 * column formatting convention & formats 352 * package: "pk" 2 columns %2d 353 * core: "cor" 3 columns %3d 354 * CPU: "CPU" 3 columns %3d 355 * Pkg_W: %6.2 356 * Cor_W: %6.2 357 * GFX_W: %5.2 358 * RAM_W: %5.2 359 * GHz: "GHz" 3 columns %3.2 360 * TSC: "TSC" 3 columns %3.2 361 * SMI: "SMI" 4 columns %4d 362 * percentage " %pc3" %6.2 363 * Perf Status percentage: %5.2 364 * "CTMP" 4 columns %4d 365 */ 366 int format_counters(struct thread_data *t, struct core_data *c, 367 struct pkg_data *p) 368 { 369 double interval_float; 370 char *fmt5, *fmt6; 371 372 /* if showing only 1st thread in core and this isn't one, bail out */ 373 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 374 return 0; 375 376 /* if showing only 1st thread in pkg and this isn't one, bail out */ 377 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 378 return 0; 379 380 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 381 382 /* topo columns, print blanks on 1st (average) line */ 383 if (t == &average.threads) { 384 if (show_pkg) 385 outp += sprintf(outp, " "); 386 if (show_pkg && show_core) 387 outp += sprintf(outp, " "); 388 if (show_core) 389 outp += sprintf(outp, " "); 390 if (show_cpu) 391 outp += sprintf(outp, " " " "); 392 } else { 393 if (show_pkg) { 394 if (p) 395 outp += sprintf(outp, "%2d", p->package_id); 396 else 397 outp += sprintf(outp, " "); 398 } 399 if (show_pkg && show_core) 400 outp += sprintf(outp, " "); 401 if (show_core) { 402 if (c) 403 outp += sprintf(outp, "%3d", c->core_id); 404 else 405 outp += sprintf(outp, " "); 406 } 407 if (show_cpu) 408 outp += sprintf(outp, " %3d", t->cpu_id); 409 } 410 /* %c0 */ 411 if (do_nhm_cstates) { 412 if (show_pkg || show_core || show_cpu) 413 outp += sprintf(outp, " "); 414 if (!skip_c0) 415 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); 416 else 417 outp += sprintf(outp, " ****"); 418 } 419 420 /* GHz */ 421 if (has_aperf) { 422 if (!aperf_mperf_unstable) { 423 outp += sprintf(outp, " %3.2f", 424 1.0 * t->tsc / units * t->aperf / 425 t->mperf / interval_float); 426 } else { 427 if (t->aperf > t->tsc || t->mperf > t->tsc) { 428 outp += sprintf(outp, " ***"); 429 } else { 430 outp += sprintf(outp, "%3.1f*", 431 1.0 * t->tsc / 432 units * t->aperf / 433 t->mperf / interval_float); 434 } 435 } 436 } 437 438 /* TSC */ 439 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 440 441 /* SMI */ 442 if (do_smi) 443 outp += sprintf(outp, "%4d", t->smi_count); 444 445 /* delta */ 446 if (extra_delta_offset32) 447 outp += sprintf(outp, " %11llu", t->extra_delta32); 448 449 /* DELTA */ 450 if (extra_delta_offset64) 451 outp += sprintf(outp, " %11llu", t->extra_delta64); 452 /* msr */ 453 if (extra_msr_offset32) 454 outp += sprintf(outp, " 0x%08llx", t->extra_msr32); 455 456 /* MSR */ 457 if (extra_msr_offset64) 458 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 459 460 if (do_nhm_cstates) { 461 if (!skip_c1) 462 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); 463 else 464 outp += sprintf(outp, " ****"); 465 } 466 467 /* print per-core data only for 1st thread in core */ 468 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 469 goto done; 470 471 if (do_nhm_cstates) 472 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); 473 if (do_nhm_cstates) 474 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); 475 if (do_snb_cstates) 476 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 477 478 if (do_dts) 479 outp += sprintf(outp, " %4d", c->core_temp_c); 480 481 /* print per-package data only for 1st core in package */ 482 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 483 goto done; 484 485 if (do_ptm) 486 outp += sprintf(outp, " %4d", p->pkg_temp_c); 487 488 if (do_snb_cstates) 489 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 490 if (do_nhm_cstates) 491 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); 492 if (do_nhm_cstates) 493 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 494 if (do_snb_cstates) 495 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 496 497 /* 498 * If measurement interval exceeds minimum RAPL Joule Counter range, 499 * indicate that results are suspect by printing "**" in fraction place. 500 */ 501 if (interval_float < rapl_joule_counter_range) { 502 fmt5 = " %5.2f"; 503 fmt6 = " %6.2f"; 504 } else { 505 fmt5 = " %3.0f**"; 506 fmt6 = " %4.0f**"; 507 } 508 509 if (do_rapl & RAPL_PKG) 510 outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); 511 if (do_rapl & RAPL_CORES) 512 outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); 513 if (do_rapl & RAPL_GFX) 514 outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); 515 if (do_rapl & RAPL_DRAM) 516 outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); 517 if (do_rapl & RAPL_PKG_PERF_STATUS ) 518 outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 519 if (do_rapl & RAPL_DRAM_PERF_STATUS ) 520 outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); 521 522 done: 523 outp += sprintf(outp, "\n"); 524 525 return 0; 526 } 527 528 void flush_stdout() 529 { 530 fputs(output_buffer, stdout); 531 fflush(stdout); 532 outp = output_buffer; 533 } 534 void flush_stderr() 535 { 536 fputs(output_buffer, stderr); 537 outp = output_buffer; 538 } 539 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 540 { 541 static int printed; 542 543 if (!printed || !summary_only) 544 print_header(); 545 546 if (topo.num_cpus > 1) 547 format_counters(&average.threads, &average.cores, 548 &average.packages); 549 550 printed = 1; 551 552 if (summary_only) 553 return; 554 555 for_all_cpus(format_counters, t, c, p); 556 } 557 558 #define DELTA_WRAP32(new, old) \ 559 if (new > old) { \ 560 old = new - old; \ 561 } else { \ 562 old = 0x100000000 + new - old; \ 563 } 564 565 void 566 delta_package(struct pkg_data *new, struct pkg_data *old) 567 { 568 old->pc2 = new->pc2 - old->pc2; 569 old->pc3 = new->pc3 - old->pc3; 570 old->pc6 = new->pc6 - old->pc6; 571 old->pc7 = new->pc7 - old->pc7; 572 old->pkg_temp_c = new->pkg_temp_c; 573 574 DELTA_WRAP32(new->energy_pkg, old->energy_pkg); 575 DELTA_WRAP32(new->energy_cores, old->energy_cores); 576 DELTA_WRAP32(new->energy_gfx, old->energy_gfx); 577 DELTA_WRAP32(new->energy_dram, old->energy_dram); 578 DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); 579 DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); 580 } 581 582 void 583 delta_core(struct core_data *new, struct core_data *old) 584 { 585 old->c3 = new->c3 - old->c3; 586 old->c6 = new->c6 - old->c6; 587 old->c7 = new->c7 - old->c7; 588 old->core_temp_c = new->core_temp_c; 589 } 590 591 /* 592 * old = new - old 593 */ 594 void 595 delta_thread(struct thread_data *new, struct thread_data *old, 596 struct core_data *core_delta) 597 { 598 old->tsc = new->tsc - old->tsc; 599 600 /* check for TSC < 1 Mcycles over interval */ 601 if (old->tsc < (1000 * 1000)) { 602 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); 603 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); 604 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); 605 exit(-3); 606 } 607 608 old->c1 = new->c1 - old->c1; 609 610 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 611 old->aperf = new->aperf - old->aperf; 612 old->mperf = new->mperf - old->mperf; 613 } else { 614 615 if (!aperf_mperf_unstable) { 616 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 617 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 618 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 619 620 aperf_mperf_unstable = 1; 621 } 622 /* 623 * mperf delta is likely a huge "positive" number 624 * can not use it for calculating c0 time 625 */ 626 skip_c0 = 1; 627 skip_c1 = 1; 628 } 629 630 631 /* 632 * As counter collection is not atomic, 633 * it is possible for mperf's non-halted cycles + idle states 634 * to exceed TSC's all cycles: show c1 = 0% in that case. 635 */ 636 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) 637 old->c1 = 0; 638 else { 639 /* normal case, derive c1 */ 640 old->c1 = old->tsc - old->mperf - core_delta->c3 641 - core_delta->c6 - core_delta->c7; 642 } 643 644 if (old->mperf == 0) { 645 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 646 old->mperf = 1; /* divide by 0 protection */ 647 } 648 649 old->extra_delta32 = new->extra_delta32 - old->extra_delta32; 650 old->extra_delta32 &= 0xFFFFFFFF; 651 652 old->extra_delta64 = new->extra_delta64 - old->extra_delta64; 653 654 /* 655 * Extra MSR is just a snapshot, simply copy latest w/o subtracting 656 */ 657 old->extra_msr32 = new->extra_msr32; 658 old->extra_msr64 = new->extra_msr64; 659 660 if (do_smi) 661 old->smi_count = new->smi_count - old->smi_count; 662 } 663 664 int delta_cpu(struct thread_data *t, struct core_data *c, 665 struct pkg_data *p, struct thread_data *t2, 666 struct core_data *c2, struct pkg_data *p2) 667 { 668 /* calculate core delta only for 1st thread in core */ 669 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 670 delta_core(c, c2); 671 672 /* always calculate thread delta */ 673 delta_thread(t, t2, c2); /* c2 is core delta */ 674 675 /* calculate package delta only for 1st core in package */ 676 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 677 delta_package(p, p2); 678 679 return 0; 680 } 681 682 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 683 { 684 t->tsc = 0; 685 t->aperf = 0; 686 t->mperf = 0; 687 t->c1 = 0; 688 689 t->smi_count = 0; 690 t->extra_delta32 = 0; 691 t->extra_delta64 = 0; 692 693 /* tells format_counters to dump all fields from this set */ 694 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 695 696 c->c3 = 0; 697 c->c6 = 0; 698 c->c7 = 0; 699 c->core_temp_c = 0; 700 701 p->pc2 = 0; 702 p->pc3 = 0; 703 p->pc6 = 0; 704 p->pc7 = 0; 705 706 p->energy_pkg = 0; 707 p->energy_dram = 0; 708 p->energy_cores = 0; 709 p->energy_gfx = 0; 710 p->rapl_pkg_perf_status = 0; 711 p->rapl_dram_perf_status = 0; 712 p->pkg_temp_c = 0; 713 } 714 int sum_counters(struct thread_data *t, struct core_data *c, 715 struct pkg_data *p) 716 { 717 average.threads.tsc += t->tsc; 718 average.threads.aperf += t->aperf; 719 average.threads.mperf += t->mperf; 720 average.threads.c1 += t->c1; 721 722 average.threads.extra_delta32 += t->extra_delta32; 723 average.threads.extra_delta64 += t->extra_delta64; 724 725 /* sum per-core values only for 1st thread in core */ 726 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 727 return 0; 728 729 average.cores.c3 += c->c3; 730 average.cores.c6 += c->c6; 731 average.cores.c7 += c->c7; 732 733 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 734 735 /* sum per-pkg values only for 1st core in pkg */ 736 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 737 return 0; 738 739 average.packages.pc2 += p->pc2; 740 average.packages.pc3 += p->pc3; 741 average.packages.pc6 += p->pc6; 742 average.packages.pc7 += p->pc7; 743 744 average.packages.energy_pkg += p->energy_pkg; 745 average.packages.energy_dram += p->energy_dram; 746 average.packages.energy_cores += p->energy_cores; 747 average.packages.energy_gfx += p->energy_gfx; 748 749 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 750 751 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; 752 average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; 753 return 0; 754 } 755 /* 756 * sum the counters for all cpus in the system 757 * compute the weighted average 758 */ 759 void compute_average(struct thread_data *t, struct core_data *c, 760 struct pkg_data *p) 761 { 762 clear_counters(&average.threads, &average.cores, &average.packages); 763 764 for_all_cpus(sum_counters, t, c, p); 765 766 average.threads.tsc /= topo.num_cpus; 767 average.threads.aperf /= topo.num_cpus; 768 average.threads.mperf /= topo.num_cpus; 769 average.threads.c1 /= topo.num_cpus; 770 771 average.threads.extra_delta32 /= topo.num_cpus; 772 average.threads.extra_delta32 &= 0xFFFFFFFF; 773 774 average.threads.extra_delta64 /= topo.num_cpus; 775 776 average.cores.c3 /= topo.num_cores; 777 average.cores.c6 /= topo.num_cores; 778 average.cores.c7 /= topo.num_cores; 779 780 average.packages.pc2 /= topo.num_packages; 781 average.packages.pc3 /= topo.num_packages; 782 average.packages.pc6 /= topo.num_packages; 783 average.packages.pc7 /= topo.num_packages; 784 } 785 786 static unsigned long long rdtsc(void) 787 { 788 unsigned int low, high; 789 790 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 791 792 return low | ((unsigned long long)high) << 32; 793 } 794 795 796 /* 797 * get_counters(...) 798 * migrate to cpu 799 * acquire and record local counters for that cpu 800 */ 801 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 802 { 803 int cpu = t->cpu_id; 804 unsigned long long msr; 805 806 if (cpu_migrate(cpu)) { 807 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 808 return -1; 809 } 810 811 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 812 813 if (has_aperf) { 814 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 815 return -3; 816 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 817 return -4; 818 } 819 820 if (do_smi) { 821 if (get_msr(cpu, MSR_SMI_COUNT, &msr)) 822 return -5; 823 t->smi_count = msr & 0xFFFFFFFF; 824 } 825 if (extra_delta_offset32) { 826 if (get_msr(cpu, extra_delta_offset32, &msr)) 827 return -5; 828 t->extra_delta32 = msr & 0xFFFFFFFF; 829 } 830 831 if (extra_delta_offset64) 832 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) 833 return -5; 834 835 if (extra_msr_offset32) { 836 if (get_msr(cpu, extra_msr_offset32, &msr)) 837 return -5; 838 t->extra_msr32 = msr & 0xFFFFFFFF; 839 } 840 841 if (extra_msr_offset64) 842 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) 843 return -5; 844 845 /* collect core counters only for 1st thread in core */ 846 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 847 return 0; 848 849 if (do_nhm_cstates) { 850 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 851 return -6; 852 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 853 return -7; 854 } 855 856 if (do_snb_cstates) 857 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 858 return -8; 859 860 if (do_dts) { 861 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 862 return -9; 863 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 864 } 865 866 867 /* collect package counters only for 1st core in package */ 868 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 869 return 0; 870 871 if (do_nhm_cstates) { 872 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 873 return -9; 874 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 875 return -10; 876 } 877 if (do_snb_cstates) { 878 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 879 return -11; 880 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 881 return -12; 882 } 883 if (do_rapl & RAPL_PKG) { 884 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) 885 return -13; 886 p->energy_pkg = msr & 0xFFFFFFFF; 887 } 888 if (do_rapl & RAPL_CORES) { 889 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) 890 return -14; 891 p->energy_cores = msr & 0xFFFFFFFF; 892 } 893 if (do_rapl & RAPL_DRAM) { 894 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) 895 return -15; 896 p->energy_dram = msr & 0xFFFFFFFF; 897 } 898 if (do_rapl & RAPL_GFX) { 899 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) 900 return -16; 901 p->energy_gfx = msr & 0xFFFFFFFF; 902 } 903 if (do_rapl & RAPL_PKG_PERF_STATUS) { 904 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) 905 return -16; 906 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; 907 } 908 if (do_rapl & RAPL_DRAM_PERF_STATUS) { 909 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) 910 return -16; 911 p->rapl_dram_perf_status = msr & 0xFFFFFFFF; 912 } 913 if (do_ptm) { 914 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 915 return -17; 916 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 917 } 918 return 0; 919 } 920 921 void print_verbose_header(void) 922 { 923 unsigned long long msr; 924 unsigned int ratio; 925 926 if (!do_nehalem_platform_info) 927 return; 928 929 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); 930 931 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); 932 933 ratio = (msr >> 40) & 0xFF; 934 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 935 ratio, bclk, ratio * bclk); 936 937 ratio = (msr >> 8) & 0xFF; 938 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 939 ratio, bclk, ratio * bclk); 940 941 get_msr(0, MSR_IA32_POWER_CTL, &msr); 942 fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n", 943 msr, msr & 0x2 ? "EN" : "DIS"); 944 945 if (!do_ivt_turbo_ratio_limit) 946 goto print_nhm_turbo_ratio_limits; 947 948 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 949 950 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 951 952 ratio = (msr >> 56) & 0xFF; 953 if (ratio) 954 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 955 ratio, bclk, ratio * bclk); 956 957 ratio = (msr >> 48) & 0xFF; 958 if (ratio) 959 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 960 ratio, bclk, ratio * bclk); 961 962 ratio = (msr >> 40) & 0xFF; 963 if (ratio) 964 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 965 ratio, bclk, ratio * bclk); 966 967 ratio = (msr >> 32) & 0xFF; 968 if (ratio) 969 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 970 ratio, bclk, ratio * bclk); 971 972 ratio = (msr >> 24) & 0xFF; 973 if (ratio) 974 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 975 ratio, bclk, ratio * bclk); 976 977 ratio = (msr >> 16) & 0xFF; 978 if (ratio) 979 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 980 ratio, bclk, ratio * bclk); 981 982 ratio = (msr >> 8) & 0xFF; 983 if (ratio) 984 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 985 ratio, bclk, ratio * bclk); 986 987 ratio = (msr >> 0) & 0xFF; 988 if (ratio) 989 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 990 ratio, bclk, ratio * bclk); 991 992 print_nhm_turbo_ratio_limits: 993 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 994 995 #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 996 #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 997 998 fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); 999 1000 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", 1001 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 1002 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 1003 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 1004 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 1005 (msr & (1 << 15)) ? "" : "UN", 1006 (unsigned int)msr & 7); 1007 1008 1009 switch(msr & 0x7) { 1010 case 0: 1011 fprintf(stderr, "pc0"); 1012 break; 1013 case 1: 1014 fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); 1015 break; 1016 case 2: 1017 fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); 1018 break; 1019 case 3: 1020 fprintf(stderr, "pc6"); 1021 break; 1022 case 4: 1023 fprintf(stderr, "pc7"); 1024 break; 1025 case 5: 1026 fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); 1027 break; 1028 case 7: 1029 fprintf(stderr, "unlimited"); 1030 break; 1031 default: 1032 fprintf(stderr, "invalid"); 1033 } 1034 fprintf(stderr, ")\n"); 1035 1036 if (!do_nehalem_turbo_ratio_limit) 1037 return; 1038 1039 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1040 1041 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 1042 1043 ratio = (msr >> 56) & 0xFF; 1044 if (ratio) 1045 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 1046 ratio, bclk, ratio * bclk); 1047 1048 ratio = (msr >> 48) & 0xFF; 1049 if (ratio) 1050 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 1051 ratio, bclk, ratio * bclk); 1052 1053 ratio = (msr >> 40) & 0xFF; 1054 if (ratio) 1055 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 1056 ratio, bclk, ratio * bclk); 1057 1058 ratio = (msr >> 32) & 0xFF; 1059 if (ratio) 1060 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 1061 ratio, bclk, ratio * bclk); 1062 1063 ratio = (msr >> 24) & 0xFF; 1064 if (ratio) 1065 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 1066 ratio, bclk, ratio * bclk); 1067 1068 ratio = (msr >> 16) & 0xFF; 1069 if (ratio) 1070 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 1071 ratio, bclk, ratio * bclk); 1072 1073 ratio = (msr >> 8) & 0xFF; 1074 if (ratio) 1075 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 1076 ratio, bclk, ratio * bclk); 1077 1078 ratio = (msr >> 0) & 0xFF; 1079 if (ratio) 1080 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1081 ratio, bclk, ratio * bclk); 1082 } 1083 1084 void free_all_buffers(void) 1085 { 1086 CPU_FREE(cpu_present_set); 1087 cpu_present_set = NULL; 1088 cpu_present_set = 0; 1089 1090 CPU_FREE(cpu_affinity_set); 1091 cpu_affinity_set = NULL; 1092 cpu_affinity_setsize = 0; 1093 1094 free(thread_even); 1095 free(core_even); 1096 free(package_even); 1097 1098 thread_even = NULL; 1099 core_even = NULL; 1100 package_even = NULL; 1101 1102 free(thread_odd); 1103 free(core_odd); 1104 free(package_odd); 1105 1106 thread_odd = NULL; 1107 core_odd = NULL; 1108 package_odd = NULL; 1109 1110 free(output_buffer); 1111 output_buffer = NULL; 1112 outp = NULL; 1113 } 1114 1115 /* 1116 * cpu_is_first_sibling_in_core(cpu) 1117 * return 1 if given CPU is 1st HT sibling in the core 1118 */ 1119 int cpu_is_first_sibling_in_core(int cpu) 1120 { 1121 char path[64]; 1122 FILE *filep; 1123 int first_cpu; 1124 1125 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1126 filep = fopen(path, "r"); 1127 if (filep == NULL) { 1128 perror(path); 1129 exit(1); 1130 } 1131 fscanf(filep, "%d", &first_cpu); 1132 fclose(filep); 1133 return (cpu == first_cpu); 1134 } 1135 1136 /* 1137 * cpu_is_first_core_in_package(cpu) 1138 * return 1 if given CPU is 1st core in package 1139 */ 1140 int cpu_is_first_core_in_package(int cpu) 1141 { 1142 char path[64]; 1143 FILE *filep; 1144 int first_cpu; 1145 1146 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 1147 filep = fopen(path, "r"); 1148 if (filep == NULL) { 1149 perror(path); 1150 exit(1); 1151 } 1152 fscanf(filep, "%d", &first_cpu); 1153 fclose(filep); 1154 return (cpu == first_cpu); 1155 } 1156 1157 int get_physical_package_id(int cpu) 1158 { 1159 char path[80]; 1160 FILE *filep; 1161 int pkg; 1162 1163 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 1164 filep = fopen(path, "r"); 1165 if (filep == NULL) { 1166 perror(path); 1167 exit(1); 1168 } 1169 fscanf(filep, "%d", &pkg); 1170 fclose(filep); 1171 return pkg; 1172 } 1173 1174 int get_core_id(int cpu) 1175 { 1176 char path[80]; 1177 FILE *filep; 1178 int core; 1179 1180 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 1181 filep = fopen(path, "r"); 1182 if (filep == NULL) { 1183 perror(path); 1184 exit(1); 1185 } 1186 fscanf(filep, "%d", &core); 1187 fclose(filep); 1188 return core; 1189 } 1190 1191 int get_num_ht_siblings(int cpu) 1192 { 1193 char path[80]; 1194 FILE *filep; 1195 int sib1, sib2; 1196 int matches; 1197 char character; 1198 1199 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1200 filep = fopen(path, "r"); 1201 if (filep == NULL) { 1202 perror(path); 1203 exit(1); 1204 } 1205 /* 1206 * file format: 1207 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 1208 * otherwinse 1 sibling (self). 1209 */ 1210 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 1211 1212 fclose(filep); 1213 1214 if (matches == 3) 1215 return 2; 1216 else 1217 return 1; 1218 } 1219 1220 /* 1221 * run func(thread, core, package) in topology order 1222 * skip non-present cpus 1223 */ 1224 1225 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 1226 struct pkg_data *, struct thread_data *, struct core_data *, 1227 struct pkg_data *), struct thread_data *thread_base, 1228 struct core_data *core_base, struct pkg_data *pkg_base, 1229 struct thread_data *thread_base2, struct core_data *core_base2, 1230 struct pkg_data *pkg_base2) 1231 { 1232 int retval, pkg_no, core_no, thread_no; 1233 1234 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1235 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 1236 for (thread_no = 0; thread_no < 1237 topo.num_threads_per_core; ++thread_no) { 1238 struct thread_data *t, *t2; 1239 struct core_data *c, *c2; 1240 struct pkg_data *p, *p2; 1241 1242 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 1243 1244 if (cpu_is_not_present(t->cpu_id)) 1245 continue; 1246 1247 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 1248 1249 c = GET_CORE(core_base, core_no, pkg_no); 1250 c2 = GET_CORE(core_base2, core_no, pkg_no); 1251 1252 p = GET_PKG(pkg_base, pkg_no); 1253 p2 = GET_PKG(pkg_base2, pkg_no); 1254 1255 retval = func(t, c, p, t2, c2, p2); 1256 if (retval) 1257 return retval; 1258 } 1259 } 1260 } 1261 return 0; 1262 } 1263 1264 /* 1265 * run func(cpu) on every cpu in /proc/stat 1266 * return max_cpu number 1267 */ 1268 int for_all_proc_cpus(int (func)(int)) 1269 { 1270 FILE *fp; 1271 int cpu_num; 1272 int retval; 1273 1274 fp = fopen(proc_stat, "r"); 1275 if (fp == NULL) { 1276 perror(proc_stat); 1277 exit(1); 1278 } 1279 1280 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 1281 if (retval != 0) { 1282 perror("/proc/stat format"); 1283 exit(1); 1284 } 1285 1286 while (1) { 1287 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 1288 if (retval != 1) 1289 break; 1290 1291 retval = func(cpu_num); 1292 if (retval) { 1293 fclose(fp); 1294 return(retval); 1295 } 1296 } 1297 fclose(fp); 1298 return 0; 1299 } 1300 1301 void re_initialize(void) 1302 { 1303 free_all_buffers(); 1304 setup_all_buffers(); 1305 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 1306 } 1307 1308 1309 /* 1310 * count_cpus() 1311 * remember the last one seen, it will be the max 1312 */ 1313 int count_cpus(int cpu) 1314 { 1315 if (topo.max_cpu_num < cpu) 1316 topo.max_cpu_num = cpu; 1317 1318 topo.num_cpus += 1; 1319 return 0; 1320 } 1321 int mark_cpu_present(int cpu) 1322 { 1323 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 1324 return 0; 1325 } 1326 1327 void turbostat_loop() 1328 { 1329 int retval; 1330 int restarted = 0; 1331 1332 restart: 1333 restarted++; 1334 1335 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1336 if (retval < -1) { 1337 exit(retval); 1338 } else if (retval == -1) { 1339 if (restarted > 1) { 1340 exit(retval); 1341 } 1342 re_initialize(); 1343 goto restart; 1344 } 1345 restarted = 0; 1346 gettimeofday(&tv_even, (struct timezone *)NULL); 1347 1348 while (1) { 1349 if (for_all_proc_cpus(cpu_is_not_present)) { 1350 re_initialize(); 1351 goto restart; 1352 } 1353 sleep(interval_sec); 1354 retval = for_all_cpus(get_counters, ODD_COUNTERS); 1355 if (retval < -1) { 1356 exit(retval); 1357 } else if (retval == -1) { 1358 re_initialize(); 1359 goto restart; 1360 } 1361 gettimeofday(&tv_odd, (struct timezone *)NULL); 1362 timersub(&tv_odd, &tv_even, &tv_delta); 1363 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1364 compute_average(EVEN_COUNTERS); 1365 format_all_counters(EVEN_COUNTERS); 1366 flush_stdout(); 1367 sleep(interval_sec); 1368 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1369 if (retval < -1) { 1370 exit(retval); 1371 } else if (retval == -1) { 1372 re_initialize(); 1373 goto restart; 1374 } 1375 gettimeofday(&tv_even, (struct timezone *)NULL); 1376 timersub(&tv_even, &tv_odd, &tv_delta); 1377 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 1378 compute_average(ODD_COUNTERS); 1379 format_all_counters(ODD_COUNTERS); 1380 flush_stdout(); 1381 } 1382 } 1383 1384 void check_dev_msr() 1385 { 1386 struct stat sb; 1387 1388 if (stat("/dev/cpu/0/msr", &sb)) { 1389 fprintf(stderr, "no /dev/cpu/0/msr\n"); 1390 fprintf(stderr, "Try \"# modprobe msr\"\n"); 1391 exit(-5); 1392 } 1393 } 1394 1395 void check_super_user() 1396 { 1397 if (getuid() != 0) { 1398 fprintf(stderr, "must be root\n"); 1399 exit(-6); 1400 } 1401 } 1402 1403 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) 1404 { 1405 if (!genuine_intel) 1406 return 0; 1407 1408 if (family != 6) 1409 return 0; 1410 1411 switch (model) { 1412 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 1413 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 1414 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 1415 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 1416 case 0x2C: /* Westmere EP - Gulftown */ 1417 case 0x2A: /* SNB */ 1418 case 0x2D: /* SNB Xeon */ 1419 case 0x3A: /* IVB */ 1420 case 0x3E: /* IVB Xeon */ 1421 case 0x3C: /* HSW */ 1422 case 0x3F: /* HSW */ 1423 case 0x45: /* HSW */ 1424 return 1; 1425 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1426 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1427 default: 1428 return 0; 1429 } 1430 } 1431 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 1432 { 1433 if (!genuine_intel) 1434 return 0; 1435 1436 if (family != 6) 1437 return 0; 1438 1439 switch (model) { 1440 case 0x3E: /* IVB Xeon */ 1441 return 1; 1442 default: 1443 return 0; 1444 } 1445 } 1446 1447 /* 1448 * print_epb() 1449 * Decode the ENERGY_PERF_BIAS MSR 1450 */ 1451 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1452 { 1453 unsigned long long msr; 1454 char *epb_string; 1455 int cpu; 1456 1457 if (!has_epb) 1458 return 0; 1459 1460 cpu = t->cpu_id; 1461 1462 /* EPB is per-package */ 1463 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1464 return 0; 1465 1466 if (cpu_migrate(cpu)) { 1467 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1468 return -1; 1469 } 1470 1471 if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) 1472 return 0; 1473 1474 switch (msr & 0x7) { 1475 case ENERGY_PERF_BIAS_PERFORMANCE: 1476 epb_string = "performance"; 1477 break; 1478 case ENERGY_PERF_BIAS_NORMAL: 1479 epb_string = "balanced"; 1480 break; 1481 case ENERGY_PERF_BIAS_POWERSAVE: 1482 epb_string = "powersave"; 1483 break; 1484 default: 1485 epb_string = "custom"; 1486 break; 1487 } 1488 fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); 1489 1490 return 0; 1491 } 1492 1493 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 1494 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 1495 1496 /* 1497 * rapl_probe() 1498 * 1499 * sets do_rapl 1500 */ 1501 void rapl_probe(unsigned int family, unsigned int model) 1502 { 1503 unsigned long long msr; 1504 double tdp; 1505 1506 if (!genuine_intel) 1507 return; 1508 1509 if (family != 6) 1510 return; 1511 1512 switch (model) { 1513 case 0x2A: 1514 case 0x3A: 1515 case 0x3C: /* HSW */ 1516 case 0x3F: /* HSW */ 1517 case 0x45: /* HSW */ 1518 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; 1519 break; 1520 case 0x2D: 1521 case 0x3E: 1522 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; 1523 break; 1524 default: 1525 return; 1526 } 1527 1528 /* units on package 0, verify later other packages match */ 1529 if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) 1530 return; 1531 1532 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 1533 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 1534 rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); 1535 1536 /* get TDP to determine energy counter range */ 1537 if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) 1538 return; 1539 1540 tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 1541 1542 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 1543 1544 if (verbose) 1545 fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); 1546 1547 return; 1548 } 1549 1550 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1551 { 1552 unsigned long long msr; 1553 unsigned int dts; 1554 int cpu; 1555 1556 if (!(do_dts || do_ptm)) 1557 return 0; 1558 1559 cpu = t->cpu_id; 1560 1561 /* DTS is per-core, no need to print for each thread */ 1562 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1563 return 0; 1564 1565 if (cpu_migrate(cpu)) { 1566 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1567 return -1; 1568 } 1569 1570 if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { 1571 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 1572 return 0; 1573 1574 dts = (msr >> 16) & 0x7F; 1575 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 1576 cpu, msr, tcc_activation_temp - dts); 1577 1578 #ifdef THERM_DEBUG 1579 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 1580 return 0; 1581 1582 dts = (msr >> 16) & 0x7F; 1583 dts2 = (msr >> 8) & 0x7F; 1584 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 1585 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 1586 #endif 1587 } 1588 1589 1590 if (do_dts) { 1591 unsigned int resolution; 1592 1593 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 1594 return 0; 1595 1596 dts = (msr >> 16) & 0x7F; 1597 resolution = (msr >> 27) & 0xF; 1598 fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 1599 cpu, msr, tcc_activation_temp - dts, resolution); 1600 1601 #ifdef THERM_DEBUG 1602 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 1603 return 0; 1604 1605 dts = (msr >> 16) & 0x7F; 1606 dts2 = (msr >> 8) & 0x7F; 1607 fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 1608 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 1609 #endif 1610 } 1611 1612 return 0; 1613 } 1614 1615 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 1616 { 1617 fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", 1618 cpu, label, 1619 ((msr >> 15) & 1) ? "EN" : "DIS", 1620 ((msr >> 0) & 0x7FFF) * rapl_power_units, 1621 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 1622 (((msr >> 16) & 1) ? "EN" : "DIS")); 1623 1624 return; 1625 } 1626 1627 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1628 { 1629 unsigned long long msr; 1630 int cpu; 1631 double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; 1632 1633 if (!do_rapl) 1634 return 0; 1635 1636 /* RAPL counters are per package, so print only for 1st thread/package */ 1637 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1638 return 0; 1639 1640 cpu = t->cpu_id; 1641 if (cpu_migrate(cpu)) { 1642 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1643 return -1; 1644 } 1645 1646 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 1647 return -1; 1648 1649 local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); 1650 local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 1651 local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); 1652 1653 if (local_rapl_power_units != rapl_power_units) 1654 fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); 1655 if (local_rapl_energy_units != rapl_energy_units) 1656 fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); 1657 if (local_rapl_time_units != rapl_time_units) 1658 fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); 1659 1660 if (verbose) { 1661 fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " 1662 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, 1663 local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); 1664 } 1665 if (do_rapl & RAPL_PKG) { 1666 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 1667 return -5; 1668 1669 1670 fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 1671 cpu, msr, 1672 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1673 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1674 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1675 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 1676 1677 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 1678 return -9; 1679 1680 fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 1681 cpu, msr, (msr >> 63) & 1 ? "": "UN"); 1682 1683 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 1684 fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", 1685 cpu, 1686 ((msr >> 47) & 1) ? "EN" : "DIS", 1687 ((msr >> 32) & 0x7FFF) * rapl_power_units, 1688 (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 1689 ((msr >> 48) & 1) ? "EN" : "DIS"); 1690 } 1691 1692 if (do_rapl & RAPL_DRAM) { 1693 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 1694 return -6; 1695 1696 1697 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 1698 cpu, msr, 1699 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1700 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1701 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1702 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 1703 1704 1705 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 1706 return -9; 1707 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 1708 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1709 1710 print_power_limit_msr(cpu, msr, "DRAM Limit"); 1711 } 1712 if (do_rapl & RAPL_CORES) { 1713 if (verbose) { 1714 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 1715 return -7; 1716 1717 fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 1718 1719 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 1720 return -9; 1721 fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 1722 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1723 print_power_limit_msr(cpu, msr, "Cores Limit"); 1724 } 1725 } 1726 if (do_rapl & RAPL_GFX) { 1727 if (verbose) { 1728 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 1729 return -8; 1730 1731 fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 1732 1733 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 1734 return -9; 1735 fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 1736 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1737 print_power_limit_msr(cpu, msr, "GFX Limit"); 1738 } 1739 } 1740 return 0; 1741 } 1742 1743 1744 int is_snb(unsigned int family, unsigned int model) 1745 { 1746 if (!genuine_intel) 1747 return 0; 1748 1749 switch (model) { 1750 case 0x2A: 1751 case 0x2D: 1752 case 0x3A: /* IVB */ 1753 case 0x3E: /* IVB Xeon */ 1754 case 0x3C: /* HSW */ 1755 case 0x3F: /* HSW */ 1756 case 0x45: /* HSW */ 1757 return 1; 1758 } 1759 return 0; 1760 } 1761 1762 double discover_bclk(unsigned int family, unsigned int model) 1763 { 1764 if (is_snb(family, model)) 1765 return 100.00; 1766 else 1767 return 133.33; 1768 } 1769 1770 /* 1771 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 1772 * the Thermal Control Circuit (TCC) activates. 1773 * This is usually equal to tjMax. 1774 * 1775 * Older processors do not have this MSR, so there we guess, 1776 * but also allow cmdline over-ride with -T. 1777 * 1778 * Several MSR temperature values are in units of degrees-C 1779 * below this value, including the Digital Thermal Sensor (DTS), 1780 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 1781 */ 1782 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1783 { 1784 unsigned long long msr; 1785 unsigned int target_c_local; 1786 int cpu; 1787 1788 /* tcc_activation_temp is used only for dts or ptm */ 1789 if (!(do_dts || do_ptm)) 1790 return 0; 1791 1792 /* this is a per-package concept */ 1793 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1794 return 0; 1795 1796 cpu = t->cpu_id; 1797 if (cpu_migrate(cpu)) { 1798 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1799 return -1; 1800 } 1801 1802 if (tcc_activation_temp_override != 0) { 1803 tcc_activation_temp = tcc_activation_temp_override; 1804 fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", 1805 cpu, tcc_activation_temp); 1806 return 0; 1807 } 1808 1809 /* Temperature Target MSR is Nehalem and newer only */ 1810 if (!do_nehalem_platform_info) 1811 goto guess; 1812 1813 if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) 1814 goto guess; 1815 1816 target_c_local = (msr >> 16) & 0x7F; 1817 1818 if (verbose) 1819 fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", 1820 cpu, msr, target_c_local); 1821 1822 if (target_c_local < 85 || target_c_local > 120) 1823 goto guess; 1824 1825 tcc_activation_temp = target_c_local; 1826 1827 return 0; 1828 1829 guess: 1830 tcc_activation_temp = TJMAX_DEFAULT; 1831 fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", 1832 cpu, tcc_activation_temp); 1833 1834 return 0; 1835 } 1836 void check_cpuid() 1837 { 1838 unsigned int eax, ebx, ecx, edx, max_level; 1839 unsigned int fms, family, model, stepping; 1840 1841 eax = ebx = ecx = edx = 0; 1842 1843 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); 1844 1845 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 1846 genuine_intel = 1; 1847 1848 if (verbose) 1849 fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", 1850 (char *)&ebx, (char *)&edx, (char *)&ecx); 1851 1852 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1853 family = (fms >> 8) & 0xf; 1854 model = (fms >> 4) & 0xf; 1855 stepping = fms & 0xf; 1856 if (family == 6 || family == 0xf) 1857 model += ((fms >> 16) & 0xf) << 4; 1858 1859 if (verbose) 1860 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 1861 max_level, family, model, stepping, family, model, stepping); 1862 1863 if (!(edx & (1 << 5))) { 1864 fprintf(stderr, "CPUID: no MSR\n"); 1865 exit(1); 1866 } 1867 1868 /* 1869 * check max extended function levels of CPUID. 1870 * This is needed to check for invariant TSC. 1871 * This check is valid for both Intel and AMD. 1872 */ 1873 ebx = ecx = edx = 0; 1874 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); 1875 1876 if (max_level < 0x80000007) { 1877 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); 1878 exit(1); 1879 } 1880 1881 /* 1882 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 1883 * this check is valid for both Intel and AMD 1884 */ 1885 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); 1886 has_invariant_tsc = edx & (1 << 8); 1887 1888 if (!has_invariant_tsc) { 1889 fprintf(stderr, "No invariant TSC\n"); 1890 exit(1); 1891 } 1892 1893 /* 1894 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 1895 * this check is valid for both Intel and AMD 1896 */ 1897 1898 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1899 has_aperf = ecx & (1 << 0); 1900 do_dts = eax & (1 << 0); 1901 do_ptm = eax & (1 << 6); 1902 has_epb = ecx & (1 << 3); 1903 1904 if (verbose) 1905 fprintf(stderr, "CPUID(6): %s%s%s%s\n", 1906 has_aperf ? "APERF" : "No APERF!", 1907 do_dts ? ", DTS" : "", 1908 do_ptm ? ", PTM": "", 1909 has_epb ? ", EPB": ""); 1910 1911 if (!has_aperf) 1912 exit(-1); 1913 1914 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1915 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1916 do_smi = do_nhm_cstates; 1917 do_snb_cstates = is_snb(family, model); 1918 bclk = discover_bclk(family, model); 1919 1920 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1921 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1922 rapl_probe(family, model); 1923 1924 return; 1925 } 1926 1927 1928 void usage() 1929 { 1930 fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", 1931 progname); 1932 exit(1); 1933 } 1934 1935 1936 /* 1937 * in /dev/cpu/ return success for names that are numbers 1938 * ie. filter out ".", "..", "microcode". 1939 */ 1940 int dir_filter(const struct dirent *dirp) 1941 { 1942 if (isdigit(dirp->d_name[0])) 1943 return 1; 1944 else 1945 return 0; 1946 } 1947 1948 int open_dev_cpu_msr(int dummy1) 1949 { 1950 return 0; 1951 } 1952 1953 void topology_probe() 1954 { 1955 int i; 1956 int max_core_id = 0; 1957 int max_package_id = 0; 1958 int max_siblings = 0; 1959 struct cpu_topology { 1960 int core_id; 1961 int physical_package_id; 1962 } *cpus; 1963 1964 /* Initialize num_cpus, max_cpu_num */ 1965 topo.num_cpus = 0; 1966 topo.max_cpu_num = 0; 1967 for_all_proc_cpus(count_cpus); 1968 if (!summary_only && topo.num_cpus > 1) 1969 show_cpu = 1; 1970 1971 if (verbose > 1) 1972 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 1973 1974 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 1975 if (cpus == NULL) { 1976 perror("calloc cpus"); 1977 exit(1); 1978 } 1979 1980 /* 1981 * Allocate and initialize cpu_present_set 1982 */ 1983 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1984 if (cpu_present_set == NULL) { 1985 perror("CPU_ALLOC"); 1986 exit(3); 1987 } 1988 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1989 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 1990 for_all_proc_cpus(mark_cpu_present); 1991 1992 /* 1993 * Allocate and initialize cpu_affinity_set 1994 */ 1995 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1996 if (cpu_affinity_set == NULL) { 1997 perror("CPU_ALLOC"); 1998 exit(3); 1999 } 2000 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 2001 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 2002 2003 2004 /* 2005 * For online cpus 2006 * find max_core_id, max_package_id 2007 */ 2008 for (i = 0; i <= topo.max_cpu_num; ++i) { 2009 int siblings; 2010 2011 if (cpu_is_not_present(i)) { 2012 if (verbose > 1) 2013 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 2014 continue; 2015 } 2016 cpus[i].core_id = get_core_id(i); 2017 if (cpus[i].core_id > max_core_id) 2018 max_core_id = cpus[i].core_id; 2019 2020 cpus[i].physical_package_id = get_physical_package_id(i); 2021 if (cpus[i].physical_package_id > max_package_id) 2022 max_package_id = cpus[i].physical_package_id; 2023 2024 siblings = get_num_ht_siblings(i); 2025 if (siblings > max_siblings) 2026 max_siblings = siblings; 2027 if (verbose > 1) 2028 fprintf(stderr, "cpu %d pkg %d core %d\n", 2029 i, cpus[i].physical_package_id, cpus[i].core_id); 2030 } 2031 topo.num_cores_per_pkg = max_core_id + 1; 2032 if (verbose > 1) 2033 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 2034 max_core_id, topo.num_cores_per_pkg); 2035 if (!summary_only && topo.num_cores_per_pkg > 1) 2036 show_core = 1; 2037 2038 topo.num_packages = max_package_id + 1; 2039 if (verbose > 1) 2040 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 2041 max_package_id, topo.num_packages); 2042 if (!summary_only && topo.num_packages > 1) 2043 show_pkg = 1; 2044 2045 topo.num_threads_per_core = max_siblings; 2046 if (verbose > 1) 2047 fprintf(stderr, "max_siblings %d\n", max_siblings); 2048 2049 free(cpus); 2050 } 2051 2052 void 2053 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 2054 { 2055 int i; 2056 2057 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 2058 topo.num_packages, sizeof(struct thread_data)); 2059 if (*t == NULL) 2060 goto error; 2061 2062 for (i = 0; i < topo.num_threads_per_core * 2063 topo.num_cores_per_pkg * topo.num_packages; i++) 2064 (*t)[i].cpu_id = -1; 2065 2066 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 2067 sizeof(struct core_data)); 2068 if (*c == NULL) 2069 goto error; 2070 2071 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 2072 (*c)[i].core_id = -1; 2073 2074 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 2075 if (*p == NULL) 2076 goto error; 2077 2078 for (i = 0; i < topo.num_packages; i++) 2079 (*p)[i].package_id = i; 2080 2081 return; 2082 error: 2083 perror("calloc counters"); 2084 exit(1); 2085 } 2086 /* 2087 * init_counter() 2088 * 2089 * set cpu_id, core_num, pkg_num 2090 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 2091 * 2092 * increment topo.num_cores when 1st core in pkg seen 2093 */ 2094 void init_counter(struct thread_data *thread_base, struct core_data *core_base, 2095 struct pkg_data *pkg_base, int thread_num, int core_num, 2096 int pkg_num, int cpu_id) 2097 { 2098 struct thread_data *t; 2099 struct core_data *c; 2100 struct pkg_data *p; 2101 2102 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 2103 c = GET_CORE(core_base, core_num, pkg_num); 2104 p = GET_PKG(pkg_base, pkg_num); 2105 2106 t->cpu_id = cpu_id; 2107 if (thread_num == 0) { 2108 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 2109 if (cpu_is_first_core_in_package(cpu_id)) 2110 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 2111 } 2112 2113 c->core_id = core_num; 2114 p->package_id = pkg_num; 2115 } 2116 2117 2118 int initialize_counters(int cpu_id) 2119 { 2120 int my_thread_id, my_core_id, my_package_id; 2121 2122 my_package_id = get_physical_package_id(cpu_id); 2123 my_core_id = get_core_id(cpu_id); 2124 2125 if (cpu_is_first_sibling_in_core(cpu_id)) { 2126 my_thread_id = 0; 2127 topo.num_cores++; 2128 } else { 2129 my_thread_id = 1; 2130 } 2131 2132 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 2133 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 2134 return 0; 2135 } 2136 2137 void allocate_output_buffer() 2138 { 2139 output_buffer = calloc(1, (1 + topo.num_cpus) * 128); 2140 outp = output_buffer; 2141 if (outp == NULL) { 2142 perror("calloc"); 2143 exit(-1); 2144 } 2145 } 2146 2147 void setup_all_buffers(void) 2148 { 2149 topology_probe(); 2150 allocate_counters(&thread_even, &core_even, &package_even); 2151 allocate_counters(&thread_odd, &core_odd, &package_odd); 2152 allocate_output_buffer(); 2153 for_all_proc_cpus(initialize_counters); 2154 } 2155 void turbostat_init() 2156 { 2157 check_cpuid(); 2158 2159 check_dev_msr(); 2160 check_super_user(); 2161 2162 setup_all_buffers(); 2163 2164 if (verbose) 2165 print_verbose_header(); 2166 2167 if (verbose) 2168 for_all_cpus(print_epb, ODD_COUNTERS); 2169 2170 if (verbose) 2171 for_all_cpus(print_rapl, ODD_COUNTERS); 2172 2173 for_all_cpus(set_temperature_target, ODD_COUNTERS); 2174 2175 if (verbose) 2176 for_all_cpus(print_thermal, ODD_COUNTERS); 2177 } 2178 2179 int fork_it(char **argv) 2180 { 2181 pid_t child_pid; 2182 int status; 2183 2184 status = for_all_cpus(get_counters, EVEN_COUNTERS); 2185 if (status) 2186 exit(status); 2187 /* clear affinity side-effect of get_counters() */ 2188 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 2189 gettimeofday(&tv_even, (struct timezone *)NULL); 2190 2191 child_pid = fork(); 2192 if (!child_pid) { 2193 /* child */ 2194 execvp(argv[0], argv); 2195 } else { 2196 2197 /* parent */ 2198 if (child_pid == -1) { 2199 perror("fork"); 2200 exit(1); 2201 } 2202 2203 signal(SIGINT, SIG_IGN); 2204 signal(SIGQUIT, SIG_IGN); 2205 if (waitpid(child_pid, &status, 0) == -1) { 2206 perror("wait"); 2207 exit(status); 2208 } 2209 } 2210 /* 2211 * n.b. fork_it() does not check for errors from for_all_cpus() 2212 * because re-starting is problematic when forking 2213 */ 2214 for_all_cpus(get_counters, ODD_COUNTERS); 2215 gettimeofday(&tv_odd, (struct timezone *)NULL); 2216 timersub(&tv_odd, &tv_even, &tv_delta); 2217 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 2218 compute_average(EVEN_COUNTERS); 2219 format_all_counters(EVEN_COUNTERS); 2220 flush_stderr(); 2221 2222 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 2223 2224 return status; 2225 } 2226 2227 void cmdline(int argc, char **argv) 2228 { 2229 int opt; 2230 2231 progname = argv[0]; 2232 2233 while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { 2234 switch (opt) { 2235 case 'p': 2236 show_core_only++; 2237 break; 2238 case 'P': 2239 show_pkg_only++; 2240 break; 2241 case 'S': 2242 summary_only++; 2243 break; 2244 case 'v': 2245 verbose++; 2246 break; 2247 case 'i': 2248 interval_sec = atoi(optarg); 2249 break; 2250 case 'c': 2251 sscanf(optarg, "%x", &extra_delta_offset32); 2252 break; 2253 case 'C': 2254 sscanf(optarg, "%x", &extra_delta_offset64); 2255 break; 2256 case 'm': 2257 sscanf(optarg, "%x", &extra_msr_offset32); 2258 break; 2259 case 'M': 2260 sscanf(optarg, "%x", &extra_msr_offset64); 2261 break; 2262 case 'R': 2263 rapl_verbose++; 2264 break; 2265 case 'T': 2266 tcc_activation_temp_override = atoi(optarg); 2267 break; 2268 default: 2269 usage(); 2270 } 2271 } 2272 } 2273 2274 int main(int argc, char **argv) 2275 { 2276 cmdline(argc, argv); 2277 2278 if (verbose) 2279 fprintf(stderr, "turbostat v3.2 February 11, 2013" 2280 " - Len Brown <lenb@kernel.org>\n"); 2281 2282 turbostat_init(); 2283 2284 /* 2285 * if any params left, it must be a command to fork 2286 */ 2287 if (argc - optind) 2288 return fork_it(argv + optind); 2289 else 2290 turbostat_loop(); 2291 2292 return 0; 2293 } 2294