1 /* 2 * turbostat -- show CPU frequency and C-state residency 3 * on modern Intel turbo-capable processors. 4 * 5 * Copyright (c) 2012 Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along with 18 * this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22 #define _GNU_SOURCE 23 #include <asm/msr.h> 24 #include <stdio.h> 25 #include <unistd.h> 26 #include <sys/types.h> 27 #include <sys/wait.h> 28 #include <sys/stat.h> 29 #include <sys/resource.h> 30 #include <fcntl.h> 31 #include <signal.h> 32 #include <sys/time.h> 33 #include <stdlib.h> 34 #include <dirent.h> 35 #include <string.h> 36 #include <ctype.h> 37 #include <sched.h> 38 39 char *proc_stat = "/proc/stat"; 40 unsigned int interval_sec = 5; /* set with -i interval_sec */ 41 unsigned int verbose; /* set with -v */ 42 unsigned int rapl_verbose; /* set with -R */ 43 unsigned int thermal_verbose; /* set with -T */ 44 unsigned int summary_only; /* set with -s */ 45 unsigned int skip_c0; 46 unsigned int skip_c1; 47 unsigned int do_nhm_cstates; 48 unsigned int do_snb_cstates; 49 unsigned int has_aperf; 50 unsigned int has_epb; 51 unsigned int units = 1000000000; /* Ghz etc */ 52 unsigned int genuine_intel; 53 unsigned int has_invariant_tsc; 54 unsigned int do_nehalem_platform_info; 55 unsigned int do_nehalem_turbo_ratio_limit; 56 unsigned int do_ivt_turbo_ratio_limit; 57 unsigned int extra_msr_offset32; 58 unsigned int extra_msr_offset64; 59 unsigned int extra_delta_offset32; 60 unsigned int extra_delta_offset64; 61 double bclk; 62 unsigned int show_pkg; 63 unsigned int show_core; 64 unsigned int show_cpu; 65 unsigned int show_pkg_only; 66 unsigned int show_core_only; 67 char *output_buffer, *outp; 68 unsigned int do_rapl; 69 unsigned int do_dts; 70 unsigned int do_ptm; 71 unsigned int tcc_activation_temp; 72 unsigned int tcc_activation_temp_override; 73 double rapl_power_units, rapl_energy_units, rapl_time_units; 74 double rapl_joule_counter_range; 75 76 #define RAPL_PKG (1 << 0) 77 #define RAPL_CORES (1 << 1) 78 #define RAPL_GFX (1 << 2) 79 #define RAPL_DRAM (1 << 3) 80 #define RAPL_PKG_PERF_STATUS (1 << 4) 81 #define RAPL_DRAM_PERF_STATUS (1 << 5) 82 #define TJMAX_DEFAULT 100 83 84 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 85 86 int aperf_mperf_unstable; 87 int backwards_count; 88 char *progname; 89 90 cpu_set_t *cpu_present_set, *cpu_affinity_set; 91 size_t cpu_present_setsize, cpu_affinity_setsize; 92 93 struct thread_data { 94 unsigned long long tsc; 95 unsigned long long aperf; 96 unsigned long long mperf; 97 unsigned long long c1; /* derived */ 98 unsigned long long extra_msr64; 99 unsigned long long extra_delta64; 100 unsigned long long extra_msr32; 101 unsigned long long extra_delta32; 102 unsigned int cpu_id; 103 unsigned int flags; 104 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 105 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 106 } *thread_even, *thread_odd; 107 108 struct core_data { 109 unsigned long long c3; 110 unsigned long long c6; 111 unsigned long long c7; 112 unsigned int core_temp_c; 113 unsigned int core_id; 114 } *core_even, *core_odd; 115 116 struct pkg_data { 117 unsigned long long pc2; 118 unsigned long long pc3; 119 unsigned long long pc6; 120 unsigned long long pc7; 121 unsigned int package_id; 122 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 123 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 124 unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ 125 unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ 126 unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 127 unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 128 unsigned int pkg_temp_c; 129 130 } *package_even, *package_odd; 131 132 #define ODD_COUNTERS thread_odd, core_odd, package_odd 133 #define EVEN_COUNTERS thread_even, core_even, package_even 134 135 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 136 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 137 topo.num_threads_per_core + \ 138 (core_no) * topo.num_threads_per_core + (thread_no)) 139 #define GET_CORE(core_base, core_no, pkg_no) \ 140 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 141 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 142 143 struct system_summary { 144 struct thread_data threads; 145 struct core_data cores; 146 struct pkg_data packages; 147 } sum, average; 148 149 150 struct topo_params { 151 int num_packages; 152 int num_cpus; 153 int num_cores; 154 int max_cpu_num; 155 int num_cores_per_pkg; 156 int num_threads_per_core; 157 } topo; 158 159 struct timeval tv_even, tv_odd, tv_delta; 160 161 void setup_all_buffers(void); 162 163 int cpu_is_not_present(int cpu) 164 { 165 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 166 } 167 /* 168 * run func(thread, core, package) in topology order 169 * skip non-present cpus 170 */ 171 172 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 173 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 174 { 175 int retval, pkg_no, core_no, thread_no; 176 177 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 178 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 179 for (thread_no = 0; thread_no < 180 topo.num_threads_per_core; ++thread_no) { 181 struct thread_data *t; 182 struct core_data *c; 183 struct pkg_data *p; 184 185 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 186 187 if (cpu_is_not_present(t->cpu_id)) 188 continue; 189 190 c = GET_CORE(core_base, core_no, pkg_no); 191 p = GET_PKG(pkg_base, pkg_no); 192 193 retval = func(t, c, p); 194 if (retval) 195 return retval; 196 } 197 } 198 } 199 return 0; 200 } 201 202 int cpu_migrate(int cpu) 203 { 204 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 205 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 206 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 207 return -1; 208 else 209 return 0; 210 } 211 212 int get_msr(int cpu, off_t offset, unsigned long long *msr) 213 { 214 ssize_t retval; 215 char pathname[32]; 216 int fd; 217 218 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 219 fd = open(pathname, O_RDONLY); 220 if (fd < 0) 221 return -1; 222 223 retval = pread(fd, msr, sizeof *msr, offset); 224 close(fd); 225 226 if (retval != sizeof *msr) { 227 fprintf(stderr, "%s offset 0x%zx read failed\n", pathname, offset); 228 return -1; 229 } 230 231 return 0; 232 } 233 234 void print_header(void) 235 { 236 if (show_pkg) 237 outp += sprintf(outp, "pk"); 238 if (show_pkg) 239 outp += sprintf(outp, " "); 240 if (show_core) 241 outp += sprintf(outp, "cor"); 242 if (show_cpu) 243 outp += sprintf(outp, " CPU"); 244 if (show_pkg || show_core || show_cpu) 245 outp += sprintf(outp, " "); 246 if (do_nhm_cstates) 247 outp += sprintf(outp, " %%c0"); 248 if (has_aperf) 249 outp += sprintf(outp, " GHz"); 250 outp += sprintf(outp, " TSC"); 251 if (extra_delta_offset32) 252 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 253 if (extra_delta_offset64) 254 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); 255 if (extra_msr_offset32) 256 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); 257 if (extra_msr_offset64) 258 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); 259 if (do_nhm_cstates) 260 outp += sprintf(outp, " %%c1"); 261 if (do_nhm_cstates) 262 outp += sprintf(outp, " %%c3"); 263 if (do_nhm_cstates) 264 outp += sprintf(outp, " %%c6"); 265 if (do_snb_cstates) 266 outp += sprintf(outp, " %%c7"); 267 268 if (do_dts) 269 outp += sprintf(outp, " CTMP"); 270 if (do_ptm) 271 outp += sprintf(outp, " PTMP"); 272 273 if (do_snb_cstates) 274 outp += sprintf(outp, " %%pc2"); 275 if (do_nhm_cstates) 276 outp += sprintf(outp, " %%pc3"); 277 if (do_nhm_cstates) 278 outp += sprintf(outp, " %%pc6"); 279 if (do_snb_cstates) 280 outp += sprintf(outp, " %%pc7"); 281 282 if (do_rapl & RAPL_PKG) 283 outp += sprintf(outp, " Pkg_W"); 284 if (do_rapl & RAPL_CORES) 285 outp += sprintf(outp, " Cor_W"); 286 if (do_rapl & RAPL_GFX) 287 outp += sprintf(outp, " GFX_W"); 288 if (do_rapl & RAPL_DRAM) 289 outp += sprintf(outp, " RAM_W"); 290 if (do_rapl & RAPL_PKG_PERF_STATUS) 291 outp += sprintf(outp, " PKG_%%"); 292 if (do_rapl & RAPL_DRAM_PERF_STATUS) 293 outp += sprintf(outp, " RAM_%%"); 294 295 outp += sprintf(outp, "\n"); 296 } 297 298 int dump_counters(struct thread_data *t, struct core_data *c, 299 struct pkg_data *p) 300 { 301 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); 302 303 if (t) { 304 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 305 fprintf(stderr, "TSC: %016llX\n", t->tsc); 306 fprintf(stderr, "aperf: %016llX\n", t->aperf); 307 fprintf(stderr, "mperf: %016llX\n", t->mperf); 308 fprintf(stderr, "c1: %016llX\n", t->c1); 309 fprintf(stderr, "msr0x%x: %08llX\n", 310 extra_delta_offset32, t->extra_delta32); 311 fprintf(stderr, "msr0x%x: %016llX\n", 312 extra_delta_offset64, t->extra_delta64); 313 fprintf(stderr, "msr0x%x: %08llX\n", 314 extra_msr_offset32, t->extra_msr32); 315 fprintf(stderr, "msr0x%x: %016llX\n", 316 extra_msr_offset64, t->extra_msr64); 317 } 318 319 if (c) { 320 fprintf(stderr, "core: %d\n", c->core_id); 321 fprintf(stderr, "c3: %016llX\n", c->c3); 322 fprintf(stderr, "c6: %016llX\n", c->c6); 323 fprintf(stderr, "c7: %016llX\n", c->c7); 324 fprintf(stderr, "DTS: %dC\n", c->core_temp_c); 325 } 326 327 if (p) { 328 fprintf(stderr, "package: %d\n", p->package_id); 329 fprintf(stderr, "pc2: %016llX\n", p->pc2); 330 fprintf(stderr, "pc3: %016llX\n", p->pc3); 331 fprintf(stderr, "pc6: %016llX\n", p->pc6); 332 fprintf(stderr, "pc7: %016llX\n", p->pc7); 333 fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); 334 fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); 335 fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); 336 fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); 337 fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); 338 fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); 339 fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); 340 } 341 return 0; 342 } 343 344 /* 345 * column formatting convention & formats 346 * package: "pk" 2 columns %2d 347 * core: "cor" 3 columns %3d 348 * CPU: "CPU" 3 columns %3d 349 * Pkg_W: %6.2 350 * Cor_W: %6.2 351 * GFX_W: %5.2 352 * RAM_W: %5.2 353 * GHz: "GHz" 3 columns %3.2 354 * TSC: "TSC" 3 columns %3.2 355 * percentage " %pc3" %6.2 356 * Perf Status percentage: %5.2 357 * "CTMP" 4 columns %4d 358 */ 359 int format_counters(struct thread_data *t, struct core_data *c, 360 struct pkg_data *p) 361 { 362 double interval_float; 363 char *fmt5, *fmt6; 364 365 /* if showing only 1st thread in core and this isn't one, bail out */ 366 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 367 return 0; 368 369 /* if showing only 1st thread in pkg and this isn't one, bail out */ 370 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 371 return 0; 372 373 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 374 375 /* topo columns, print blanks on 1st (average) line */ 376 if (t == &average.threads) { 377 if (show_pkg) 378 outp += sprintf(outp, " "); 379 if (show_pkg && show_core) 380 outp += sprintf(outp, " "); 381 if (show_core) 382 outp += sprintf(outp, " "); 383 if (show_cpu) 384 outp += sprintf(outp, " " " "); 385 } else { 386 if (show_pkg) { 387 if (p) 388 outp += sprintf(outp, "%2d", p->package_id); 389 else 390 outp += sprintf(outp, " "); 391 } 392 if (show_pkg && show_core) 393 outp += sprintf(outp, " "); 394 if (show_core) { 395 if (c) 396 outp += sprintf(outp, "%3d", c->core_id); 397 else 398 outp += sprintf(outp, " "); 399 } 400 if (show_cpu) 401 outp += sprintf(outp, " %3d", t->cpu_id); 402 } 403 /* %c0 */ 404 if (do_nhm_cstates) { 405 if (show_pkg || show_core || show_cpu) 406 outp += sprintf(outp, " "); 407 if (!skip_c0) 408 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); 409 else 410 outp += sprintf(outp, " ****"); 411 } 412 413 /* GHz */ 414 if (has_aperf) { 415 if (!aperf_mperf_unstable) { 416 outp += sprintf(outp, " %3.2f", 417 1.0 * t->tsc / units * t->aperf / 418 t->mperf / interval_float); 419 } else { 420 if (t->aperf > t->tsc || t->mperf > t->tsc) { 421 outp += sprintf(outp, " ***"); 422 } else { 423 outp += sprintf(outp, "%3.1f*", 424 1.0 * t->tsc / 425 units * t->aperf / 426 t->mperf / interval_float); 427 } 428 } 429 } 430 431 /* TSC */ 432 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 433 434 /* delta */ 435 if (extra_delta_offset32) 436 outp += sprintf(outp, " %11llu", t->extra_delta32); 437 438 /* DELTA */ 439 if (extra_delta_offset64) 440 outp += sprintf(outp, " %11llu", t->extra_delta64); 441 /* msr */ 442 if (extra_msr_offset32) 443 outp += sprintf(outp, " 0x%08llx", t->extra_msr32); 444 445 /* MSR */ 446 if (extra_msr_offset64) 447 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 448 449 if (do_nhm_cstates) { 450 if (!skip_c1) 451 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); 452 else 453 outp += sprintf(outp, " ****"); 454 } 455 456 /* print per-core data only for 1st thread in core */ 457 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 458 goto done; 459 460 if (do_nhm_cstates) 461 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); 462 if (do_nhm_cstates) 463 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); 464 if (do_snb_cstates) 465 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 466 467 if (do_dts) 468 outp += sprintf(outp, " %4d", c->core_temp_c); 469 470 /* print per-package data only for 1st core in package */ 471 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 472 goto done; 473 474 if (do_ptm) 475 outp += sprintf(outp, " %4d", p->pkg_temp_c); 476 477 if (do_snb_cstates) 478 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 479 if (do_nhm_cstates) 480 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); 481 if (do_nhm_cstates) 482 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 483 if (do_snb_cstates) 484 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 485 486 /* 487 * If measurement interval exceeds minimum RAPL Joule Counter range, 488 * indicate that results are suspect by printing "**" in fraction place. 489 */ 490 if (interval_float < rapl_joule_counter_range) { 491 fmt5 = " %5.2f"; 492 fmt6 = " %6.2f"; 493 } else { 494 fmt5 = " %3.0f**"; 495 fmt6 = " %4.0f**"; 496 } 497 498 if (do_rapl & RAPL_PKG) 499 outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); 500 if (do_rapl & RAPL_CORES) 501 outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); 502 if (do_rapl & RAPL_GFX) 503 outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); 504 if (do_rapl & RAPL_DRAM) 505 outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); 506 if (do_rapl & RAPL_PKG_PERF_STATUS ) 507 outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 508 if (do_rapl & RAPL_DRAM_PERF_STATUS ) 509 outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); 510 511 done: 512 outp += sprintf(outp, "\n"); 513 514 return 0; 515 } 516 517 void flush_stdout() 518 { 519 fputs(output_buffer, stdout); 520 fflush(stdout); 521 outp = output_buffer; 522 } 523 void flush_stderr() 524 { 525 fputs(output_buffer, stderr); 526 outp = output_buffer; 527 } 528 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 529 { 530 static int printed; 531 532 if (!printed || !summary_only) 533 print_header(); 534 535 if (topo.num_cpus > 1) 536 format_counters(&average.threads, &average.cores, 537 &average.packages); 538 539 printed = 1; 540 541 if (summary_only) 542 return; 543 544 for_all_cpus(format_counters, t, c, p); 545 } 546 547 #define DELTA_WRAP32(new, old) \ 548 if (new > old) { \ 549 old = new - old; \ 550 } else { \ 551 old = 0x100000000 + new - old; \ 552 } 553 554 void 555 delta_package(struct pkg_data *new, struct pkg_data *old) 556 { 557 old->pc2 = new->pc2 - old->pc2; 558 old->pc3 = new->pc3 - old->pc3; 559 old->pc6 = new->pc6 - old->pc6; 560 old->pc7 = new->pc7 - old->pc7; 561 old->pkg_temp_c = new->pkg_temp_c; 562 563 DELTA_WRAP32(new->energy_pkg, old->energy_pkg); 564 DELTA_WRAP32(new->energy_cores, old->energy_cores); 565 DELTA_WRAP32(new->energy_gfx, old->energy_gfx); 566 DELTA_WRAP32(new->energy_dram, old->energy_dram); 567 DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); 568 DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); 569 } 570 571 void 572 delta_core(struct core_data *new, struct core_data *old) 573 { 574 old->c3 = new->c3 - old->c3; 575 old->c6 = new->c6 - old->c6; 576 old->c7 = new->c7 - old->c7; 577 old->core_temp_c = new->core_temp_c; 578 } 579 580 /* 581 * old = new - old 582 */ 583 void 584 delta_thread(struct thread_data *new, struct thread_data *old, 585 struct core_data *core_delta) 586 { 587 old->tsc = new->tsc - old->tsc; 588 589 /* check for TSC < 1 Mcycles over interval */ 590 if (old->tsc < (1000 * 1000)) { 591 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); 592 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); 593 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); 594 exit(-3); 595 } 596 597 old->c1 = new->c1 - old->c1; 598 599 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 600 old->aperf = new->aperf - old->aperf; 601 old->mperf = new->mperf - old->mperf; 602 } else { 603 604 if (!aperf_mperf_unstable) { 605 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 606 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 607 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 608 609 aperf_mperf_unstable = 1; 610 } 611 /* 612 * mperf delta is likely a huge "positive" number 613 * can not use it for calculating c0 time 614 */ 615 skip_c0 = 1; 616 skip_c1 = 1; 617 } 618 619 620 /* 621 * As counter collection is not atomic, 622 * it is possible for mperf's non-halted cycles + idle states 623 * to exceed TSC's all cycles: show c1 = 0% in that case. 624 */ 625 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) 626 old->c1 = 0; 627 else { 628 /* normal case, derive c1 */ 629 old->c1 = old->tsc - old->mperf - core_delta->c3 630 - core_delta->c6 - core_delta->c7; 631 } 632 633 if (old->mperf == 0) { 634 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 635 old->mperf = 1; /* divide by 0 protection */ 636 } 637 638 old->extra_delta32 = new->extra_delta32 - old->extra_delta32; 639 old->extra_delta32 &= 0xFFFFFFFF; 640 641 old->extra_delta64 = new->extra_delta64 - old->extra_delta64; 642 643 /* 644 * Extra MSR is just a snapshot, simply copy latest w/o subtracting 645 */ 646 old->extra_msr32 = new->extra_msr32; 647 old->extra_msr64 = new->extra_msr64; 648 } 649 650 int delta_cpu(struct thread_data *t, struct core_data *c, 651 struct pkg_data *p, struct thread_data *t2, 652 struct core_data *c2, struct pkg_data *p2) 653 { 654 /* calculate core delta only for 1st thread in core */ 655 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 656 delta_core(c, c2); 657 658 /* always calculate thread delta */ 659 delta_thread(t, t2, c2); /* c2 is core delta */ 660 661 /* calculate package delta only for 1st core in package */ 662 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 663 delta_package(p, p2); 664 665 return 0; 666 } 667 668 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 669 { 670 t->tsc = 0; 671 t->aperf = 0; 672 t->mperf = 0; 673 t->c1 = 0; 674 675 t->extra_delta32 = 0; 676 t->extra_delta64 = 0; 677 678 /* tells format_counters to dump all fields from this set */ 679 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 680 681 c->c3 = 0; 682 c->c6 = 0; 683 c->c7 = 0; 684 c->core_temp_c = 0; 685 686 p->pc2 = 0; 687 p->pc3 = 0; 688 p->pc6 = 0; 689 p->pc7 = 0; 690 691 p->energy_pkg = 0; 692 p->energy_dram = 0; 693 p->energy_cores = 0; 694 p->energy_gfx = 0; 695 p->rapl_pkg_perf_status = 0; 696 p->rapl_dram_perf_status = 0; 697 p->pkg_temp_c = 0; 698 } 699 int sum_counters(struct thread_data *t, struct core_data *c, 700 struct pkg_data *p) 701 { 702 average.threads.tsc += t->tsc; 703 average.threads.aperf += t->aperf; 704 average.threads.mperf += t->mperf; 705 average.threads.c1 += t->c1; 706 707 average.threads.extra_delta32 += t->extra_delta32; 708 average.threads.extra_delta64 += t->extra_delta64; 709 710 /* sum per-core values only for 1st thread in core */ 711 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 712 return 0; 713 714 average.cores.c3 += c->c3; 715 average.cores.c6 += c->c6; 716 average.cores.c7 += c->c7; 717 718 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 719 720 /* sum per-pkg values only for 1st core in pkg */ 721 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 722 return 0; 723 724 average.packages.pc2 += p->pc2; 725 average.packages.pc3 += p->pc3; 726 average.packages.pc6 += p->pc6; 727 average.packages.pc7 += p->pc7; 728 729 average.packages.energy_pkg += p->energy_pkg; 730 average.packages.energy_dram += p->energy_dram; 731 average.packages.energy_cores += p->energy_cores; 732 average.packages.energy_gfx += p->energy_gfx; 733 734 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 735 736 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; 737 average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; 738 return 0; 739 } 740 /* 741 * sum the counters for all cpus in the system 742 * compute the weighted average 743 */ 744 void compute_average(struct thread_data *t, struct core_data *c, 745 struct pkg_data *p) 746 { 747 clear_counters(&average.threads, &average.cores, &average.packages); 748 749 for_all_cpus(sum_counters, t, c, p); 750 751 average.threads.tsc /= topo.num_cpus; 752 average.threads.aperf /= topo.num_cpus; 753 average.threads.mperf /= topo.num_cpus; 754 average.threads.c1 /= topo.num_cpus; 755 756 average.threads.extra_delta32 /= topo.num_cpus; 757 average.threads.extra_delta32 &= 0xFFFFFFFF; 758 759 average.threads.extra_delta64 /= topo.num_cpus; 760 761 average.cores.c3 /= topo.num_cores; 762 average.cores.c6 /= topo.num_cores; 763 average.cores.c7 /= topo.num_cores; 764 765 average.packages.pc2 /= topo.num_packages; 766 average.packages.pc3 /= topo.num_packages; 767 average.packages.pc6 /= topo.num_packages; 768 average.packages.pc7 /= topo.num_packages; 769 } 770 771 static unsigned long long rdtsc(void) 772 { 773 unsigned int low, high; 774 775 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 776 777 return low | ((unsigned long long)high) << 32; 778 } 779 780 781 /* 782 * get_counters(...) 783 * migrate to cpu 784 * acquire and record local counters for that cpu 785 */ 786 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 787 { 788 int cpu = t->cpu_id; 789 unsigned long long msr; 790 791 if (cpu_migrate(cpu)) { 792 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 793 return -1; 794 } 795 796 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 797 798 if (has_aperf) { 799 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 800 return -3; 801 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 802 return -4; 803 } 804 805 if (extra_delta_offset32) { 806 if (get_msr(cpu, extra_delta_offset32, &msr)) 807 return -5; 808 t->extra_delta32 = msr & 0xFFFFFFFF; 809 } 810 811 if (extra_delta_offset64) 812 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) 813 return -5; 814 815 if (extra_msr_offset32) { 816 if (get_msr(cpu, extra_msr_offset32, &msr)) 817 return -5; 818 t->extra_msr32 = msr & 0xFFFFFFFF; 819 } 820 821 if (extra_msr_offset64) 822 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) 823 return -5; 824 825 /* collect core counters only for 1st thread in core */ 826 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 827 return 0; 828 829 if (do_nhm_cstates) { 830 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 831 return -6; 832 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 833 return -7; 834 } 835 836 if (do_snb_cstates) 837 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 838 return -8; 839 840 if (do_dts) { 841 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 842 return -9; 843 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 844 } 845 846 847 /* collect package counters only for 1st core in package */ 848 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 849 return 0; 850 851 if (do_nhm_cstates) { 852 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 853 return -9; 854 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 855 return -10; 856 } 857 if (do_snb_cstates) { 858 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 859 return -11; 860 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 861 return -12; 862 } 863 if (do_rapl & RAPL_PKG) { 864 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) 865 return -13; 866 p->energy_pkg = msr & 0xFFFFFFFF; 867 } 868 if (do_rapl & RAPL_CORES) { 869 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) 870 return -14; 871 p->energy_cores = msr & 0xFFFFFFFF; 872 } 873 if (do_rapl & RAPL_DRAM) { 874 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) 875 return -15; 876 p->energy_dram = msr & 0xFFFFFFFF; 877 } 878 if (do_rapl & RAPL_GFX) { 879 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) 880 return -16; 881 p->energy_gfx = msr & 0xFFFFFFFF; 882 } 883 if (do_rapl & RAPL_PKG_PERF_STATUS) { 884 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) 885 return -16; 886 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; 887 } 888 if (do_rapl & RAPL_DRAM_PERF_STATUS) { 889 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) 890 return -16; 891 p->rapl_dram_perf_status = msr & 0xFFFFFFFF; 892 } 893 if (do_ptm) { 894 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 895 return -17; 896 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 897 } 898 return 0; 899 } 900 901 void print_verbose_header(void) 902 { 903 unsigned long long msr; 904 unsigned int ratio; 905 906 if (!do_nehalem_platform_info) 907 return; 908 909 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); 910 911 if (verbose) 912 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); 913 914 ratio = (msr >> 40) & 0xFF; 915 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 916 ratio, bclk, ratio * bclk); 917 918 ratio = (msr >> 8) & 0xFF; 919 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 920 ratio, bclk, ratio * bclk); 921 922 if (!do_ivt_turbo_ratio_limit) 923 goto print_nhm_turbo_ratio_limits; 924 925 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 926 927 if (verbose) 928 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 929 930 ratio = (msr >> 56) & 0xFF; 931 if (ratio) 932 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 933 ratio, bclk, ratio * bclk); 934 935 ratio = (msr >> 48) & 0xFF; 936 if (ratio) 937 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 938 ratio, bclk, ratio * bclk); 939 940 ratio = (msr >> 40) & 0xFF; 941 if (ratio) 942 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 943 ratio, bclk, ratio * bclk); 944 945 ratio = (msr >> 32) & 0xFF; 946 if (ratio) 947 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 948 ratio, bclk, ratio * bclk); 949 950 ratio = (msr >> 24) & 0xFF; 951 if (ratio) 952 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 953 ratio, bclk, ratio * bclk); 954 955 ratio = (msr >> 16) & 0xFF; 956 if (ratio) 957 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 958 ratio, bclk, ratio * bclk); 959 960 ratio = (msr >> 8) & 0xFF; 961 if (ratio) 962 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 963 ratio, bclk, ratio * bclk); 964 965 ratio = (msr >> 0) & 0xFF; 966 if (ratio) 967 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 968 ratio, bclk, ratio * bclk); 969 970 print_nhm_turbo_ratio_limits: 971 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 972 973 #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 974 #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 975 976 fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); 977 978 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", 979 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 980 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 981 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 982 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 983 (msr & (1 << 15)) ? "" : "UN", 984 (unsigned int)msr & 7); 985 986 987 switch(msr & 0x7) { 988 case 0: 989 fprintf(stderr, "pc0"); 990 break; 991 case 1: 992 fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); 993 break; 994 case 2: 995 fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); 996 break; 997 case 3: 998 fprintf(stderr, "pc6"); 999 break; 1000 case 4: 1001 fprintf(stderr, "pc7"); 1002 break; 1003 case 5: 1004 fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); 1005 break; 1006 case 7: 1007 fprintf(stderr, "unlimited"); 1008 break; 1009 default: 1010 fprintf(stderr, "invalid"); 1011 } 1012 fprintf(stderr, ")\n"); 1013 1014 if (!do_nehalem_turbo_ratio_limit) 1015 return; 1016 1017 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1018 1019 if (verbose) 1020 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 1021 1022 ratio = (msr >> 56) & 0xFF; 1023 if (ratio) 1024 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 1025 ratio, bclk, ratio * bclk); 1026 1027 ratio = (msr >> 48) & 0xFF; 1028 if (ratio) 1029 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 1030 ratio, bclk, ratio * bclk); 1031 1032 ratio = (msr >> 40) & 0xFF; 1033 if (ratio) 1034 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 1035 ratio, bclk, ratio * bclk); 1036 1037 ratio = (msr >> 32) & 0xFF; 1038 if (ratio) 1039 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 1040 ratio, bclk, ratio * bclk); 1041 1042 ratio = (msr >> 24) & 0xFF; 1043 if (ratio) 1044 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 1045 ratio, bclk, ratio * bclk); 1046 1047 ratio = (msr >> 16) & 0xFF; 1048 if (ratio) 1049 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 1050 ratio, bclk, ratio * bclk); 1051 1052 ratio = (msr >> 8) & 0xFF; 1053 if (ratio) 1054 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 1055 ratio, bclk, ratio * bclk); 1056 1057 ratio = (msr >> 0) & 0xFF; 1058 if (ratio) 1059 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1060 ratio, bclk, ratio * bclk); 1061 } 1062 1063 void free_all_buffers(void) 1064 { 1065 CPU_FREE(cpu_present_set); 1066 cpu_present_set = NULL; 1067 cpu_present_set = 0; 1068 1069 CPU_FREE(cpu_affinity_set); 1070 cpu_affinity_set = NULL; 1071 cpu_affinity_setsize = 0; 1072 1073 free(thread_even); 1074 free(core_even); 1075 free(package_even); 1076 1077 thread_even = NULL; 1078 core_even = NULL; 1079 package_even = NULL; 1080 1081 free(thread_odd); 1082 free(core_odd); 1083 free(package_odd); 1084 1085 thread_odd = NULL; 1086 core_odd = NULL; 1087 package_odd = NULL; 1088 1089 free(output_buffer); 1090 output_buffer = NULL; 1091 outp = NULL; 1092 } 1093 1094 /* 1095 * cpu_is_first_sibling_in_core(cpu) 1096 * return 1 if given CPU is 1st HT sibling in the core 1097 */ 1098 int cpu_is_first_sibling_in_core(int cpu) 1099 { 1100 char path[64]; 1101 FILE *filep; 1102 int first_cpu; 1103 1104 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1105 filep = fopen(path, "r"); 1106 if (filep == NULL) { 1107 perror(path); 1108 exit(1); 1109 } 1110 fscanf(filep, "%d", &first_cpu); 1111 fclose(filep); 1112 return (cpu == first_cpu); 1113 } 1114 1115 /* 1116 * cpu_is_first_core_in_package(cpu) 1117 * return 1 if given CPU is 1st core in package 1118 */ 1119 int cpu_is_first_core_in_package(int cpu) 1120 { 1121 char path[64]; 1122 FILE *filep; 1123 int first_cpu; 1124 1125 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 1126 filep = fopen(path, "r"); 1127 if (filep == NULL) { 1128 perror(path); 1129 exit(1); 1130 } 1131 fscanf(filep, "%d", &first_cpu); 1132 fclose(filep); 1133 return (cpu == first_cpu); 1134 } 1135 1136 int get_physical_package_id(int cpu) 1137 { 1138 char path[80]; 1139 FILE *filep; 1140 int pkg; 1141 1142 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 1143 filep = fopen(path, "r"); 1144 if (filep == NULL) { 1145 perror(path); 1146 exit(1); 1147 } 1148 fscanf(filep, "%d", &pkg); 1149 fclose(filep); 1150 return pkg; 1151 } 1152 1153 int get_core_id(int cpu) 1154 { 1155 char path[80]; 1156 FILE *filep; 1157 int core; 1158 1159 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 1160 filep = fopen(path, "r"); 1161 if (filep == NULL) { 1162 perror(path); 1163 exit(1); 1164 } 1165 fscanf(filep, "%d", &core); 1166 fclose(filep); 1167 return core; 1168 } 1169 1170 int get_num_ht_siblings(int cpu) 1171 { 1172 char path[80]; 1173 FILE *filep; 1174 int sib1, sib2; 1175 int matches; 1176 char character; 1177 1178 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 1179 filep = fopen(path, "r"); 1180 if (filep == NULL) { 1181 perror(path); 1182 exit(1); 1183 } 1184 /* 1185 * file format: 1186 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 1187 * otherwinse 1 sibling (self). 1188 */ 1189 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 1190 1191 fclose(filep); 1192 1193 if (matches == 3) 1194 return 2; 1195 else 1196 return 1; 1197 } 1198 1199 /* 1200 * run func(thread, core, package) in topology order 1201 * skip non-present cpus 1202 */ 1203 1204 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 1205 struct pkg_data *, struct thread_data *, struct core_data *, 1206 struct pkg_data *), struct thread_data *thread_base, 1207 struct core_data *core_base, struct pkg_data *pkg_base, 1208 struct thread_data *thread_base2, struct core_data *core_base2, 1209 struct pkg_data *pkg_base2) 1210 { 1211 int retval, pkg_no, core_no, thread_no; 1212 1213 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1214 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 1215 for (thread_no = 0; thread_no < 1216 topo.num_threads_per_core; ++thread_no) { 1217 struct thread_data *t, *t2; 1218 struct core_data *c, *c2; 1219 struct pkg_data *p, *p2; 1220 1221 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 1222 1223 if (cpu_is_not_present(t->cpu_id)) 1224 continue; 1225 1226 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 1227 1228 c = GET_CORE(core_base, core_no, pkg_no); 1229 c2 = GET_CORE(core_base2, core_no, pkg_no); 1230 1231 p = GET_PKG(pkg_base, pkg_no); 1232 p2 = GET_PKG(pkg_base2, pkg_no); 1233 1234 retval = func(t, c, p, t2, c2, p2); 1235 if (retval) 1236 return retval; 1237 } 1238 } 1239 } 1240 return 0; 1241 } 1242 1243 /* 1244 * run func(cpu) on every cpu in /proc/stat 1245 * return max_cpu number 1246 */ 1247 int for_all_proc_cpus(int (func)(int)) 1248 { 1249 FILE *fp; 1250 int cpu_num; 1251 int retval; 1252 1253 fp = fopen(proc_stat, "r"); 1254 if (fp == NULL) { 1255 perror(proc_stat); 1256 exit(1); 1257 } 1258 1259 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 1260 if (retval != 0) { 1261 perror("/proc/stat format"); 1262 exit(1); 1263 } 1264 1265 while (1) { 1266 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 1267 if (retval != 1) 1268 break; 1269 1270 retval = func(cpu_num); 1271 if (retval) { 1272 fclose(fp); 1273 return(retval); 1274 } 1275 } 1276 fclose(fp); 1277 return 0; 1278 } 1279 1280 void re_initialize(void) 1281 { 1282 free_all_buffers(); 1283 setup_all_buffers(); 1284 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 1285 } 1286 1287 1288 /* 1289 * count_cpus() 1290 * remember the last one seen, it will be the max 1291 */ 1292 int count_cpus(int cpu) 1293 { 1294 if (topo.max_cpu_num < cpu) 1295 topo.max_cpu_num = cpu; 1296 1297 topo.num_cpus += 1; 1298 return 0; 1299 } 1300 int mark_cpu_present(int cpu) 1301 { 1302 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 1303 return 0; 1304 } 1305 1306 void turbostat_loop() 1307 { 1308 int retval; 1309 int restarted = 0; 1310 1311 restart: 1312 restarted++; 1313 1314 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1315 if (retval < -1) { 1316 exit(retval); 1317 } else if (retval == -1) { 1318 if (restarted > 1) { 1319 exit(retval); 1320 } 1321 re_initialize(); 1322 goto restart; 1323 } 1324 restarted = 0; 1325 gettimeofday(&tv_even, (struct timezone *)NULL); 1326 1327 while (1) { 1328 if (for_all_proc_cpus(cpu_is_not_present)) { 1329 re_initialize(); 1330 goto restart; 1331 } 1332 sleep(interval_sec); 1333 retval = for_all_cpus(get_counters, ODD_COUNTERS); 1334 if (retval < -1) { 1335 exit(retval); 1336 } else if (retval == -1) { 1337 re_initialize(); 1338 goto restart; 1339 } 1340 gettimeofday(&tv_odd, (struct timezone *)NULL); 1341 timersub(&tv_odd, &tv_even, &tv_delta); 1342 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1343 compute_average(EVEN_COUNTERS); 1344 format_all_counters(EVEN_COUNTERS); 1345 flush_stdout(); 1346 sleep(interval_sec); 1347 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1348 if (retval < -1) { 1349 exit(retval); 1350 } else if (retval == -1) { 1351 re_initialize(); 1352 goto restart; 1353 } 1354 gettimeofday(&tv_even, (struct timezone *)NULL); 1355 timersub(&tv_even, &tv_odd, &tv_delta); 1356 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 1357 compute_average(ODD_COUNTERS); 1358 format_all_counters(ODD_COUNTERS); 1359 flush_stdout(); 1360 } 1361 } 1362 1363 void check_dev_msr() 1364 { 1365 struct stat sb; 1366 1367 if (stat("/dev/cpu/0/msr", &sb)) { 1368 fprintf(stderr, "no /dev/cpu/0/msr\n"); 1369 fprintf(stderr, "Try \"# modprobe msr\"\n"); 1370 exit(-5); 1371 } 1372 } 1373 1374 void check_super_user() 1375 { 1376 if (getuid() != 0) { 1377 fprintf(stderr, "must be root\n"); 1378 exit(-6); 1379 } 1380 } 1381 1382 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) 1383 { 1384 if (!genuine_intel) 1385 return 0; 1386 1387 if (family != 6) 1388 return 0; 1389 1390 switch (model) { 1391 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 1392 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 1393 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 1394 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 1395 case 0x2C: /* Westmere EP - Gulftown */ 1396 case 0x2A: /* SNB */ 1397 case 0x2D: /* SNB Xeon */ 1398 case 0x3A: /* IVB */ 1399 case 0x3E: /* IVB Xeon */ 1400 return 1; 1401 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1402 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1403 default: 1404 return 0; 1405 } 1406 } 1407 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 1408 { 1409 if (!genuine_intel) 1410 return 0; 1411 1412 if (family != 6) 1413 return 0; 1414 1415 switch (model) { 1416 case 0x3E: /* IVB Xeon */ 1417 return 1; 1418 default: 1419 return 0; 1420 } 1421 } 1422 1423 /* 1424 * print_epb() 1425 * Decode the ENERGY_PERF_BIAS MSR 1426 */ 1427 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1428 { 1429 unsigned long long msr; 1430 char *epb_string; 1431 int cpu; 1432 1433 if (!has_epb) 1434 return 0; 1435 1436 cpu = t->cpu_id; 1437 1438 /* EPB is per-package */ 1439 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1440 return 0; 1441 1442 if (cpu_migrate(cpu)) { 1443 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1444 return -1; 1445 } 1446 1447 if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) 1448 return 0; 1449 1450 switch (msr & 0x7) { 1451 case ENERGY_PERF_BIAS_PERFORMANCE: 1452 epb_string = "performance"; 1453 break; 1454 case ENERGY_PERF_BIAS_NORMAL: 1455 epb_string = "balanced"; 1456 break; 1457 case ENERGY_PERF_BIAS_POWERSAVE: 1458 epb_string = "powersave"; 1459 break; 1460 default: 1461 epb_string = "custom"; 1462 break; 1463 } 1464 fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); 1465 1466 return 0; 1467 } 1468 1469 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 1470 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 1471 1472 /* 1473 * rapl_probe() 1474 * 1475 * sets do_rapl 1476 */ 1477 void rapl_probe(unsigned int family, unsigned int model) 1478 { 1479 unsigned long long msr; 1480 double tdp; 1481 1482 if (!genuine_intel) 1483 return; 1484 1485 if (family != 6) 1486 return; 1487 1488 switch (model) { 1489 case 0x2A: 1490 case 0x3A: 1491 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; 1492 break; 1493 case 0x2D: 1494 case 0x3E: 1495 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; 1496 break; 1497 default: 1498 return; 1499 } 1500 1501 /* units on package 0, verify later other packages match */ 1502 if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) 1503 return; 1504 1505 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 1506 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 1507 rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); 1508 1509 /* get TDP to determine energy counter range */ 1510 if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) 1511 return; 1512 1513 tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 1514 1515 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 1516 1517 if (verbose) 1518 fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); 1519 1520 return; 1521 } 1522 1523 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1524 { 1525 unsigned long long msr; 1526 unsigned int dts; 1527 int cpu; 1528 1529 if (!(do_dts || do_ptm)) 1530 return 0; 1531 1532 cpu = t->cpu_id; 1533 1534 /* DTS is per-core, no need to print for each thread */ 1535 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1536 return 0; 1537 1538 if (cpu_migrate(cpu)) { 1539 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1540 return -1; 1541 } 1542 1543 if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { 1544 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) 1545 return 0; 1546 1547 dts = (msr >> 16) & 0x7F; 1548 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 1549 cpu, msr, tcc_activation_temp - dts); 1550 1551 #ifdef THERM_DEBUG 1552 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) 1553 return 0; 1554 1555 dts = (msr >> 16) & 0x7F; 1556 dts2 = (msr >> 8) & 0x7F; 1557 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 1558 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 1559 #endif 1560 } 1561 1562 1563 if (do_dts) { 1564 unsigned int resolution; 1565 1566 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) 1567 return 0; 1568 1569 dts = (msr >> 16) & 0x7F; 1570 resolution = (msr >> 27) & 0xF; 1571 fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 1572 cpu, msr, tcc_activation_temp - dts, resolution); 1573 1574 #ifdef THERM_DEBUG 1575 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) 1576 return 0; 1577 1578 dts = (msr >> 16) & 0x7F; 1579 dts2 = (msr >> 8) & 0x7F; 1580 fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 1581 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 1582 #endif 1583 } 1584 1585 return 0; 1586 } 1587 1588 void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 1589 { 1590 fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", 1591 cpu, label, 1592 ((msr >> 15) & 1) ? "EN" : "DIS", 1593 ((msr >> 0) & 0x7FFF) * rapl_power_units, 1594 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, 1595 (((msr >> 16) & 1) ? "EN" : "DIS")); 1596 1597 return; 1598 } 1599 1600 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1601 { 1602 unsigned long long msr; 1603 int cpu; 1604 double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; 1605 1606 if (!do_rapl) 1607 return 0; 1608 1609 /* RAPL counters are per package, so print only for 1st thread/package */ 1610 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1611 return 0; 1612 1613 cpu = t->cpu_id; 1614 if (cpu_migrate(cpu)) { 1615 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1616 return -1; 1617 } 1618 1619 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) 1620 return -1; 1621 1622 local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); 1623 local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 1624 local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); 1625 1626 if (local_rapl_power_units != rapl_power_units) 1627 fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); 1628 if (local_rapl_energy_units != rapl_energy_units) 1629 fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); 1630 if (local_rapl_time_units != rapl_time_units) 1631 fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); 1632 1633 if (verbose) { 1634 fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " 1635 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, 1636 local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); 1637 } 1638 if (do_rapl & RAPL_PKG) { 1639 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) 1640 return -5; 1641 1642 1643 fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 1644 cpu, msr, 1645 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1646 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1647 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1648 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 1649 1650 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 1651 return -9; 1652 1653 fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 1654 cpu, msr, (msr >> 63) & 1 ? "": "UN"); 1655 1656 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 1657 fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", 1658 cpu, 1659 ((msr >> 47) & 1) ? "EN" : "DIS", 1660 ((msr >> 32) & 0x7FFF) * rapl_power_units, 1661 (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, 1662 ((msr >> 48) & 1) ? "EN" : "DIS"); 1663 } 1664 1665 if (do_rapl & RAPL_DRAM) { 1666 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 1667 return -6; 1668 1669 1670 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 1671 cpu, msr, 1672 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1673 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1674 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 1675 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 1676 1677 1678 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 1679 return -9; 1680 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 1681 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1682 1683 print_power_limit_msr(cpu, msr, "DRAM Limit"); 1684 } 1685 if (do_rapl & RAPL_CORES) { 1686 if (verbose) { 1687 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 1688 return -7; 1689 1690 fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 1691 1692 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 1693 return -9; 1694 fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 1695 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1696 print_power_limit_msr(cpu, msr, "Cores Limit"); 1697 } 1698 } 1699 if (do_rapl & RAPL_GFX) { 1700 if (verbose) { 1701 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 1702 return -8; 1703 1704 fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 1705 1706 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 1707 return -9; 1708 fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 1709 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 1710 print_power_limit_msr(cpu, msr, "GFX Limit"); 1711 } 1712 } 1713 return 0; 1714 } 1715 1716 1717 int is_snb(unsigned int family, unsigned int model) 1718 { 1719 if (!genuine_intel) 1720 return 0; 1721 1722 switch (model) { 1723 case 0x2A: 1724 case 0x2D: 1725 case 0x3A: /* IVB */ 1726 case 0x3E: /* IVB Xeon */ 1727 return 1; 1728 } 1729 return 0; 1730 } 1731 1732 double discover_bclk(unsigned int family, unsigned int model) 1733 { 1734 if (is_snb(family, model)) 1735 return 100.00; 1736 else 1737 return 133.33; 1738 } 1739 1740 /* 1741 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where 1742 * the Thermal Control Circuit (TCC) activates. 1743 * This is usually equal to tjMax. 1744 * 1745 * Older processors do not have this MSR, so there we guess, 1746 * but also allow cmdline over-ride with -T. 1747 * 1748 * Several MSR temperature values are in units of degrees-C 1749 * below this value, including the Digital Thermal Sensor (DTS), 1750 * Package Thermal Management Sensor (PTM), and thermal event thresholds. 1751 */ 1752 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) 1753 { 1754 unsigned long long msr; 1755 unsigned int target_c_local; 1756 int cpu; 1757 1758 /* tcc_activation_temp is used only for dts or ptm */ 1759 if (!(do_dts || do_ptm)) 1760 return 0; 1761 1762 /* this is a per-package concept */ 1763 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1764 return 0; 1765 1766 cpu = t->cpu_id; 1767 if (cpu_migrate(cpu)) { 1768 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1769 return -1; 1770 } 1771 1772 if (tcc_activation_temp_override != 0) { 1773 tcc_activation_temp = tcc_activation_temp_override; 1774 fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", 1775 cpu, tcc_activation_temp); 1776 return 0; 1777 } 1778 1779 /* Temperature Target MSR is Nehalem and newer only */ 1780 if (!do_nehalem_platform_info) 1781 goto guess; 1782 1783 if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) 1784 goto guess; 1785 1786 target_c_local = (msr >> 16) & 0x7F; 1787 1788 if (verbose) 1789 fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", 1790 cpu, msr, target_c_local); 1791 1792 if (target_c_local < 85 || target_c_local > 120) 1793 goto guess; 1794 1795 tcc_activation_temp = target_c_local; 1796 1797 return 0; 1798 1799 guess: 1800 tcc_activation_temp = TJMAX_DEFAULT; 1801 fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", 1802 cpu, tcc_activation_temp); 1803 1804 return 0; 1805 } 1806 void check_cpuid() 1807 { 1808 unsigned int eax, ebx, ecx, edx, max_level; 1809 unsigned int fms, family, model, stepping; 1810 1811 eax = ebx = ecx = edx = 0; 1812 1813 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); 1814 1815 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 1816 genuine_intel = 1; 1817 1818 if (verbose) 1819 fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", 1820 (char *)&ebx, (char *)&edx, (char *)&ecx); 1821 1822 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1823 family = (fms >> 8) & 0xf; 1824 model = (fms >> 4) & 0xf; 1825 stepping = fms & 0xf; 1826 if (family == 6 || family == 0xf) 1827 model += ((fms >> 16) & 0xf) << 4; 1828 1829 if (verbose) 1830 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 1831 max_level, family, model, stepping, family, model, stepping); 1832 1833 if (!(edx & (1 << 5))) { 1834 fprintf(stderr, "CPUID: no MSR\n"); 1835 exit(1); 1836 } 1837 1838 /* 1839 * check max extended function levels of CPUID. 1840 * This is needed to check for invariant TSC. 1841 * This check is valid for both Intel and AMD. 1842 */ 1843 ebx = ecx = edx = 0; 1844 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); 1845 1846 if (max_level < 0x80000007) { 1847 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); 1848 exit(1); 1849 } 1850 1851 /* 1852 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 1853 * this check is valid for both Intel and AMD 1854 */ 1855 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); 1856 has_invariant_tsc = edx & (1 << 8); 1857 1858 if (!has_invariant_tsc) { 1859 fprintf(stderr, "No invariant TSC\n"); 1860 exit(1); 1861 } 1862 1863 /* 1864 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 1865 * this check is valid for both Intel and AMD 1866 */ 1867 1868 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1869 has_aperf = ecx & (1 << 0); 1870 do_dts = eax & (1 << 0); 1871 do_ptm = eax & (1 << 6); 1872 has_epb = ecx & (1 << 3); 1873 1874 if (verbose) 1875 fprintf(stderr, "CPUID(6): %s%s%s%s\n", 1876 has_aperf ? "APERF" : "No APERF!", 1877 do_dts ? ", DTS" : "", 1878 do_ptm ? ", PTM": "", 1879 has_epb ? ", EPB": ""); 1880 1881 if (!has_aperf) 1882 exit(-1); 1883 1884 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1885 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1886 do_snb_cstates = is_snb(family, model); 1887 bclk = discover_bclk(family, model); 1888 1889 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1890 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1891 rapl_probe(family, model); 1892 1893 return; 1894 } 1895 1896 1897 void usage() 1898 { 1899 fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", 1900 progname); 1901 exit(1); 1902 } 1903 1904 1905 /* 1906 * in /dev/cpu/ return success for names that are numbers 1907 * ie. filter out ".", "..", "microcode". 1908 */ 1909 int dir_filter(const struct dirent *dirp) 1910 { 1911 if (isdigit(dirp->d_name[0])) 1912 return 1; 1913 else 1914 return 0; 1915 } 1916 1917 int open_dev_cpu_msr(int dummy1) 1918 { 1919 return 0; 1920 } 1921 1922 void topology_probe() 1923 { 1924 int i; 1925 int max_core_id = 0; 1926 int max_package_id = 0; 1927 int max_siblings = 0; 1928 struct cpu_topology { 1929 int core_id; 1930 int physical_package_id; 1931 } *cpus; 1932 1933 /* Initialize num_cpus, max_cpu_num */ 1934 topo.num_cpus = 0; 1935 topo.max_cpu_num = 0; 1936 for_all_proc_cpus(count_cpus); 1937 if (!summary_only && topo.num_cpus > 1) 1938 show_cpu = 1; 1939 1940 if (verbose > 1) 1941 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 1942 1943 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 1944 if (cpus == NULL) { 1945 perror("calloc cpus"); 1946 exit(1); 1947 } 1948 1949 /* 1950 * Allocate and initialize cpu_present_set 1951 */ 1952 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1953 if (cpu_present_set == NULL) { 1954 perror("CPU_ALLOC"); 1955 exit(3); 1956 } 1957 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1958 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 1959 for_all_proc_cpus(mark_cpu_present); 1960 1961 /* 1962 * Allocate and initialize cpu_affinity_set 1963 */ 1964 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1965 if (cpu_affinity_set == NULL) { 1966 perror("CPU_ALLOC"); 1967 exit(3); 1968 } 1969 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1970 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1971 1972 1973 /* 1974 * For online cpus 1975 * find max_core_id, max_package_id 1976 */ 1977 for (i = 0; i <= topo.max_cpu_num; ++i) { 1978 int siblings; 1979 1980 if (cpu_is_not_present(i)) { 1981 if (verbose > 1) 1982 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 1983 continue; 1984 } 1985 cpus[i].core_id = get_core_id(i); 1986 if (cpus[i].core_id > max_core_id) 1987 max_core_id = cpus[i].core_id; 1988 1989 cpus[i].physical_package_id = get_physical_package_id(i); 1990 if (cpus[i].physical_package_id > max_package_id) 1991 max_package_id = cpus[i].physical_package_id; 1992 1993 siblings = get_num_ht_siblings(i); 1994 if (siblings > max_siblings) 1995 max_siblings = siblings; 1996 if (verbose > 1) 1997 fprintf(stderr, "cpu %d pkg %d core %d\n", 1998 i, cpus[i].physical_package_id, cpus[i].core_id); 1999 } 2000 topo.num_cores_per_pkg = max_core_id + 1; 2001 if (verbose > 1) 2002 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 2003 max_core_id, topo.num_cores_per_pkg); 2004 if (!summary_only && topo.num_cores_per_pkg > 1) 2005 show_core = 1; 2006 2007 topo.num_packages = max_package_id + 1; 2008 if (verbose > 1) 2009 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 2010 max_package_id, topo.num_packages); 2011 if (!summary_only && topo.num_packages > 1) 2012 show_pkg = 1; 2013 2014 topo.num_threads_per_core = max_siblings; 2015 if (verbose > 1) 2016 fprintf(stderr, "max_siblings %d\n", max_siblings); 2017 2018 free(cpus); 2019 } 2020 2021 void 2022 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 2023 { 2024 int i; 2025 2026 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 2027 topo.num_packages, sizeof(struct thread_data)); 2028 if (*t == NULL) 2029 goto error; 2030 2031 for (i = 0; i < topo.num_threads_per_core * 2032 topo.num_cores_per_pkg * topo.num_packages; i++) 2033 (*t)[i].cpu_id = -1; 2034 2035 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 2036 sizeof(struct core_data)); 2037 if (*c == NULL) 2038 goto error; 2039 2040 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 2041 (*c)[i].core_id = -1; 2042 2043 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 2044 if (*p == NULL) 2045 goto error; 2046 2047 for (i = 0; i < topo.num_packages; i++) 2048 (*p)[i].package_id = i; 2049 2050 return; 2051 error: 2052 perror("calloc counters"); 2053 exit(1); 2054 } 2055 /* 2056 * init_counter() 2057 * 2058 * set cpu_id, core_num, pkg_num 2059 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 2060 * 2061 * increment topo.num_cores when 1st core in pkg seen 2062 */ 2063 void init_counter(struct thread_data *thread_base, struct core_data *core_base, 2064 struct pkg_data *pkg_base, int thread_num, int core_num, 2065 int pkg_num, int cpu_id) 2066 { 2067 struct thread_data *t; 2068 struct core_data *c; 2069 struct pkg_data *p; 2070 2071 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 2072 c = GET_CORE(core_base, core_num, pkg_num); 2073 p = GET_PKG(pkg_base, pkg_num); 2074 2075 t->cpu_id = cpu_id; 2076 if (thread_num == 0) { 2077 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 2078 if (cpu_is_first_core_in_package(cpu_id)) 2079 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 2080 } 2081 2082 c->core_id = core_num; 2083 p->package_id = pkg_num; 2084 } 2085 2086 2087 int initialize_counters(int cpu_id) 2088 { 2089 int my_thread_id, my_core_id, my_package_id; 2090 2091 my_package_id = get_physical_package_id(cpu_id); 2092 my_core_id = get_core_id(cpu_id); 2093 2094 if (cpu_is_first_sibling_in_core(cpu_id)) { 2095 my_thread_id = 0; 2096 topo.num_cores++; 2097 } else { 2098 my_thread_id = 1; 2099 } 2100 2101 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 2102 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 2103 return 0; 2104 } 2105 2106 void allocate_output_buffer() 2107 { 2108 output_buffer = calloc(1, (1 + topo.num_cpus) * 128); 2109 outp = output_buffer; 2110 if (outp == NULL) { 2111 perror("calloc"); 2112 exit(-1); 2113 } 2114 } 2115 2116 void setup_all_buffers(void) 2117 { 2118 topology_probe(); 2119 allocate_counters(&thread_even, &core_even, &package_even); 2120 allocate_counters(&thread_odd, &core_odd, &package_odd); 2121 allocate_output_buffer(); 2122 for_all_proc_cpus(initialize_counters); 2123 } 2124 void turbostat_init() 2125 { 2126 check_cpuid(); 2127 2128 check_dev_msr(); 2129 check_super_user(); 2130 2131 setup_all_buffers(); 2132 2133 if (verbose) 2134 print_verbose_header(); 2135 2136 if (verbose) 2137 for_all_cpus(print_epb, ODD_COUNTERS); 2138 2139 if (verbose) 2140 for_all_cpus(print_rapl, ODD_COUNTERS); 2141 2142 for_all_cpus(set_temperature_target, ODD_COUNTERS); 2143 2144 if (verbose) 2145 for_all_cpus(print_thermal, ODD_COUNTERS); 2146 } 2147 2148 int fork_it(char **argv) 2149 { 2150 pid_t child_pid; 2151 int status; 2152 2153 status = for_all_cpus(get_counters, EVEN_COUNTERS); 2154 if (status) 2155 exit(status); 2156 /* clear affinity side-effect of get_counters() */ 2157 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 2158 gettimeofday(&tv_even, (struct timezone *)NULL); 2159 2160 child_pid = fork(); 2161 if (!child_pid) { 2162 /* child */ 2163 execvp(argv[0], argv); 2164 } else { 2165 2166 /* parent */ 2167 if (child_pid == -1) { 2168 perror("fork"); 2169 exit(1); 2170 } 2171 2172 signal(SIGINT, SIG_IGN); 2173 signal(SIGQUIT, SIG_IGN); 2174 if (waitpid(child_pid, &status, 0) == -1) { 2175 perror("wait"); 2176 exit(status); 2177 } 2178 } 2179 /* 2180 * n.b. fork_it() does not check for errors from for_all_cpus() 2181 * because re-starting is problematic when forking 2182 */ 2183 for_all_cpus(get_counters, ODD_COUNTERS); 2184 gettimeofday(&tv_odd, (struct timezone *)NULL); 2185 timersub(&tv_odd, &tv_even, &tv_delta); 2186 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 2187 compute_average(EVEN_COUNTERS); 2188 format_all_counters(EVEN_COUNTERS); 2189 flush_stderr(); 2190 2191 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 2192 2193 return status; 2194 } 2195 2196 void cmdline(int argc, char **argv) 2197 { 2198 int opt; 2199 2200 progname = argv[0]; 2201 2202 while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { 2203 switch (opt) { 2204 case 'p': 2205 show_core_only++; 2206 break; 2207 case 'P': 2208 show_pkg_only++; 2209 break; 2210 case 'S': 2211 summary_only++; 2212 break; 2213 case 'v': 2214 verbose++; 2215 break; 2216 case 'i': 2217 interval_sec = atoi(optarg); 2218 break; 2219 case 'c': 2220 sscanf(optarg, "%x", &extra_delta_offset32); 2221 break; 2222 case 's': 2223 extra_delta_offset32 = 0x34; /* SMI counter */ 2224 break; 2225 case 'C': 2226 sscanf(optarg, "%x", &extra_delta_offset64); 2227 break; 2228 case 'm': 2229 sscanf(optarg, "%x", &extra_msr_offset32); 2230 break; 2231 case 'M': 2232 sscanf(optarg, "%x", &extra_msr_offset64); 2233 break; 2234 case 'R': 2235 rapl_verbose++; 2236 break; 2237 case 'T': 2238 tcc_activation_temp_override = atoi(optarg); 2239 break; 2240 default: 2241 usage(); 2242 } 2243 } 2244 } 2245 2246 int main(int argc, char **argv) 2247 { 2248 cmdline(argc, argv); 2249 2250 if (verbose) 2251 fprintf(stderr, "turbostat v3.0 November 23, 2012" 2252 " - Len Brown <lenb@kernel.org>\n"); 2253 2254 turbostat_init(); 2255 2256 /* 2257 * if any params left, it must be a command to fork 2258 */ 2259 if (argc - optind) 2260 return fork_it(argv + optind); 2261 else 2262 turbostat_loop(); 2263 2264 return 0; 2265 } 2266