1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include "util/parse-options.h" 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 63 #include <stdlib.h> 64 #include <sys/prctl.h> 65 #include <locale.h> 66 67 #define DEFAULT_SEPARATOR " " 68 #define CNTR_NOT_SUPPORTED "<not supported>" 69 #define CNTR_NOT_COUNTED "<not counted>" 70 71 static void print_counters(struct timespec *ts, int argc, const char **argv); 72 73 /* Default events used for perf stat -T */ 74 static const char *transaction_attrs = { 75 "task-clock," 76 "{" 77 "instructions," 78 "cycles," 79 "cpu/cycles-t/," 80 "cpu/tx-start/," 81 "cpu/el-start/," 82 "cpu/cycles-ct/" 83 "}" 84 }; 85 86 /* More limited version when the CPU does not have all events. */ 87 static const char * transaction_limited_attrs = { 88 "task-clock," 89 "{" 90 "instructions," 91 "cycles," 92 "cpu/cycles-t/," 93 "cpu/tx-start/" 94 "}" 95 }; 96 97 static struct perf_evlist *evsel_list; 98 99 static struct target target = { 100 .uid = UINT_MAX, 101 }; 102 103 static int run_count = 1; 104 static bool no_inherit = false; 105 static volatile pid_t child_pid = -1; 106 static bool null_run = false; 107 static int detailed_run = 0; 108 static bool transaction_run; 109 static bool big_num = true; 110 static int big_num_opt = -1; 111 static const char *csv_sep = NULL; 112 static bool csv_output = false; 113 static bool group = false; 114 static const char *pre_cmd = NULL; 115 static const char *post_cmd = NULL; 116 static bool sync_run = false; 117 static unsigned int initial_delay = 0; 118 static unsigned int unit_width = 4; /* strlen("unit") */ 119 static bool forever = false; 120 static struct timespec ref_time; 121 static struct cpu_map *aggr_map; 122 static int (*aggr_get_id)(struct cpu_map *m, int cpu); 123 124 static volatile int done = 0; 125 126 static struct perf_stat_config stat_config = { 127 .aggr_mode = AGGR_GLOBAL, 128 .scale = true, 129 }; 130 131 static inline void diff_timespec(struct timespec *r, struct timespec *a, 132 struct timespec *b) 133 { 134 r->tv_sec = a->tv_sec - b->tv_sec; 135 if (a->tv_nsec < b->tv_nsec) { 136 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 137 r->tv_sec--; 138 } else { 139 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 140 } 141 } 142 143 static void perf_stat__reset_stats(void) 144 { 145 perf_evlist__reset_stats(evsel_list); 146 perf_stat__reset_shadow_stats(); 147 } 148 149 static int create_perf_stat_counter(struct perf_evsel *evsel) 150 { 151 struct perf_event_attr *attr = &evsel->attr; 152 153 if (stat_config.scale) 154 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 155 PERF_FORMAT_TOTAL_TIME_RUNNING; 156 157 attr->inherit = !no_inherit; 158 159 if (target__has_cpu(&target)) 160 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 161 162 if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) { 163 attr->disabled = 1; 164 if (!initial_delay) 165 attr->enable_on_exec = 1; 166 } 167 168 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 169 } 170 171 /* 172 * Does the counter have nsecs as a unit? 173 */ 174 static inline int nsec_counter(struct perf_evsel *evsel) 175 { 176 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 177 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 178 return 1; 179 180 return 0; 181 } 182 183 /* 184 * Read out the results of a single counter: 185 * do not aggregate counts across CPUs in system-wide mode 186 */ 187 static int read_counter(struct perf_evsel *counter) 188 { 189 int nthreads = thread_map__nr(evsel_list->threads); 190 int ncpus = perf_evsel__nr_cpus(counter); 191 int cpu, thread; 192 193 if (!counter->supported) 194 return -ENOENT; 195 196 if (counter->system_wide) 197 nthreads = 1; 198 199 for (thread = 0; thread < nthreads; thread++) { 200 for (cpu = 0; cpu < ncpus; cpu++) { 201 struct perf_counts_values *count; 202 203 count = perf_counts(counter->counts, cpu, thread); 204 if (perf_evsel__read(counter, cpu, thread, count)) 205 return -1; 206 } 207 } 208 209 return 0; 210 } 211 212 static void read_counters(bool close_counters) 213 { 214 struct perf_evsel *counter; 215 216 evlist__for_each(evsel_list, counter) { 217 if (read_counter(counter)) 218 pr_warning("failed to read counter %s\n", counter->name); 219 220 if (perf_stat_process_counter(&stat_config, counter)) 221 pr_warning("failed to process counter %s\n", counter->name); 222 223 if (close_counters) { 224 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 225 thread_map__nr(evsel_list->threads)); 226 } 227 } 228 } 229 230 static void process_interval(void) 231 { 232 struct timespec ts, rs; 233 234 read_counters(false); 235 236 clock_gettime(CLOCK_MONOTONIC, &ts); 237 diff_timespec(&rs, &ts, &ref_time); 238 239 print_counters(&rs, 0, NULL); 240 } 241 242 static void handle_initial_delay(void) 243 { 244 struct perf_evsel *counter; 245 246 if (initial_delay) { 247 const int ncpus = cpu_map__nr(evsel_list->cpus), 248 nthreads = thread_map__nr(evsel_list->threads); 249 250 usleep(initial_delay * 1000); 251 evlist__for_each(evsel_list, counter) 252 perf_evsel__enable(counter, ncpus, nthreads); 253 } 254 } 255 256 static volatile int workload_exec_errno; 257 258 /* 259 * perf_evlist__prepare_workload will send a SIGUSR1 260 * if the fork fails, since we asked by setting its 261 * want_signal to true. 262 */ 263 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 264 void *ucontext __maybe_unused) 265 { 266 workload_exec_errno = info->si_value.sival_int; 267 } 268 269 static int __run_perf_stat(int argc, const char **argv) 270 { 271 int interval = stat_config.interval; 272 char msg[512]; 273 unsigned long long t0, t1; 274 struct perf_evsel *counter; 275 struct timespec ts; 276 size_t l; 277 int status = 0; 278 const bool forks = (argc > 0); 279 280 if (interval) { 281 ts.tv_sec = interval / 1000; 282 ts.tv_nsec = (interval % 1000) * 1000000; 283 } else { 284 ts.tv_sec = 1; 285 ts.tv_nsec = 0; 286 } 287 288 if (forks) { 289 if (perf_evlist__prepare_workload(evsel_list, &target, argv, false, 290 workload_exec_failed_signal) < 0) { 291 perror("failed to prepare workload"); 292 return -1; 293 } 294 child_pid = evsel_list->workload.pid; 295 } 296 297 if (group) 298 perf_evlist__set_leader(evsel_list); 299 300 evlist__for_each(evsel_list, counter) { 301 if (create_perf_stat_counter(counter) < 0) { 302 /* 303 * PPC returns ENXIO for HW counters until 2.6.37 304 * (behavior changed with commit b0a873e). 305 */ 306 if (errno == EINVAL || errno == ENOSYS || 307 errno == ENOENT || errno == EOPNOTSUPP || 308 errno == ENXIO) { 309 if (verbose) 310 ui__warning("%s event is not supported by the kernel.\n", 311 perf_evsel__name(counter)); 312 counter->supported = false; 313 314 if ((counter->leader != counter) || 315 !(counter->leader->nr_members > 1)) 316 continue; 317 } 318 319 perf_evsel__open_strerror(counter, &target, 320 errno, msg, sizeof(msg)); 321 ui__error("%s\n", msg); 322 323 if (child_pid != -1) 324 kill(child_pid, SIGTERM); 325 326 return -1; 327 } 328 counter->supported = true; 329 330 l = strlen(counter->unit); 331 if (l > unit_width) 332 unit_width = l; 333 } 334 335 if (perf_evlist__apply_filters(evsel_list, &counter)) { 336 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 337 counter->filter, perf_evsel__name(counter), errno, 338 strerror_r(errno, msg, sizeof(msg))); 339 return -1; 340 } 341 342 /* 343 * Enable counters and exec the command: 344 */ 345 t0 = rdclock(); 346 clock_gettime(CLOCK_MONOTONIC, &ref_time); 347 348 if (forks) { 349 perf_evlist__start_workload(evsel_list); 350 handle_initial_delay(); 351 352 if (interval) { 353 while (!waitpid(child_pid, &status, WNOHANG)) { 354 nanosleep(&ts, NULL); 355 process_interval(); 356 } 357 } 358 wait(&status); 359 360 if (workload_exec_errno) { 361 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 362 pr_err("Workload failed: %s\n", emsg); 363 return -1; 364 } 365 366 if (WIFSIGNALED(status)) 367 psignal(WTERMSIG(status), argv[0]); 368 } else { 369 handle_initial_delay(); 370 while (!done) { 371 nanosleep(&ts, NULL); 372 if (interval) 373 process_interval(); 374 } 375 } 376 377 t1 = rdclock(); 378 379 update_stats(&walltime_nsecs_stats, t1 - t0); 380 381 read_counters(true); 382 383 return WEXITSTATUS(status); 384 } 385 386 static int run_perf_stat(int argc, const char **argv) 387 { 388 int ret; 389 390 if (pre_cmd) { 391 ret = system(pre_cmd); 392 if (ret) 393 return ret; 394 } 395 396 if (sync_run) 397 sync(); 398 399 ret = __run_perf_stat(argc, argv); 400 if (ret) 401 return ret; 402 403 if (post_cmd) { 404 ret = system(post_cmd); 405 if (ret) 406 return ret; 407 } 408 409 return ret; 410 } 411 412 static void print_running(u64 run, u64 ena) 413 { 414 if (csv_output) { 415 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 416 csv_sep, 417 run, 418 csv_sep, 419 ena ? 100.0 * run / ena : 100.0); 420 } else if (run != ena) { 421 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 422 } 423 } 424 425 static void print_noise_pct(double total, double avg) 426 { 427 double pct = rel_stddev_stats(total, avg); 428 429 if (csv_output) 430 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 431 else if (pct) 432 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 433 } 434 435 static void print_noise(struct perf_evsel *evsel, double avg) 436 { 437 struct perf_stat *ps; 438 439 if (run_count == 1) 440 return; 441 442 ps = evsel->priv; 443 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 444 } 445 446 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 447 { 448 switch (stat_config.aggr_mode) { 449 case AGGR_CORE: 450 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 451 cpu_map__id_to_socket(id), 452 csv_output ? 0 : -8, 453 cpu_map__id_to_cpu(id), 454 csv_sep, 455 csv_output ? 0 : 4, 456 nr, 457 csv_sep); 458 break; 459 case AGGR_SOCKET: 460 fprintf(stat_config.output, "S%*d%s%*d%s", 461 csv_output ? 0 : -5, 462 id, 463 csv_sep, 464 csv_output ? 0 : 4, 465 nr, 466 csv_sep); 467 break; 468 case AGGR_NONE: 469 fprintf(stat_config.output, "CPU%*d%s", 470 csv_output ? 0 : -4, 471 perf_evsel__cpus(evsel)->map[id], csv_sep); 472 break; 473 case AGGR_THREAD: 474 fprintf(stat_config.output, "%*s-%*d%s", 475 csv_output ? 0 : 16, 476 thread_map__comm(evsel->threads, id), 477 csv_output ? 0 : -8, 478 thread_map__pid(evsel->threads, id), 479 csv_sep); 480 break; 481 case AGGR_GLOBAL: 482 default: 483 break; 484 } 485 } 486 487 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 488 { 489 FILE *output = stat_config.output; 490 double msecs = avg / 1e6; 491 const char *fmt_v, *fmt_n; 492 char name[25]; 493 494 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 495 fmt_n = csv_output ? "%s" : "%-25s"; 496 497 aggr_printout(evsel, id, nr); 498 499 scnprintf(name, sizeof(name), "%s%s", 500 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 501 502 fprintf(output, fmt_v, msecs, csv_sep); 503 504 if (csv_output) 505 fprintf(output, "%s%s", evsel->unit, csv_sep); 506 else 507 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 508 509 fprintf(output, fmt_n, name); 510 511 if (evsel->cgrp) 512 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 513 514 if (csv_output || stat_config.interval) 515 return; 516 517 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 518 fprintf(output, " # %8.3f CPUs utilized ", 519 avg / avg_stats(&walltime_nsecs_stats)); 520 else 521 fprintf(output, " "); 522 } 523 524 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 525 { 526 FILE *output = stat_config.output; 527 double sc = evsel->scale; 528 const char *fmt; 529 int cpu = cpu_map__id_to_cpu(id); 530 531 if (csv_output) { 532 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 533 } else { 534 if (big_num) 535 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 536 else 537 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 538 } 539 540 aggr_printout(evsel, id, nr); 541 542 if (stat_config.aggr_mode == AGGR_GLOBAL) 543 cpu = 0; 544 545 fprintf(output, fmt, avg, csv_sep); 546 547 if (evsel->unit) 548 fprintf(output, "%-*s%s", 549 csv_output ? 0 : unit_width, 550 evsel->unit, csv_sep); 551 552 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 553 554 if (evsel->cgrp) 555 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 556 557 if (csv_output || stat_config.interval) 558 return; 559 560 perf_stat__print_shadow_stats(output, evsel, avg, cpu, 561 stat_config.aggr_mode); 562 } 563 564 static void print_aggr(char *prefix) 565 { 566 FILE *output = stat_config.output; 567 struct perf_evsel *counter; 568 int cpu, s, s2, id, nr; 569 double uval; 570 u64 ena, run, val; 571 572 if (!(aggr_map || aggr_get_id)) 573 return; 574 575 for (s = 0; s < aggr_map->nr; s++) { 576 id = aggr_map->map[s]; 577 evlist__for_each(evsel_list, counter) { 578 val = ena = run = 0; 579 nr = 0; 580 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 581 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 582 if (s2 != id) 583 continue; 584 val += perf_counts(counter->counts, cpu, 0)->val; 585 ena += perf_counts(counter->counts, cpu, 0)->ena; 586 run += perf_counts(counter->counts, cpu, 0)->run; 587 nr++; 588 } 589 if (prefix) 590 fprintf(output, "%s", prefix); 591 592 if (run == 0 || ena == 0) { 593 aggr_printout(counter, id, nr); 594 595 fprintf(output, "%*s%s", 596 csv_output ? 0 : 18, 597 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 598 csv_sep); 599 600 fprintf(output, "%-*s%s", 601 csv_output ? 0 : unit_width, 602 counter->unit, csv_sep); 603 604 fprintf(output, "%*s", 605 csv_output ? 0 : -25, 606 perf_evsel__name(counter)); 607 608 if (counter->cgrp) 609 fprintf(output, "%s%s", 610 csv_sep, counter->cgrp->name); 611 612 print_running(run, ena); 613 fputc('\n', output); 614 continue; 615 } 616 uval = val * counter->scale; 617 618 if (nsec_counter(counter)) 619 nsec_printout(id, nr, counter, uval); 620 else 621 abs_printout(id, nr, counter, uval); 622 623 if (!csv_output) 624 print_noise(counter, 1.0); 625 626 print_running(run, ena); 627 fputc('\n', output); 628 } 629 } 630 } 631 632 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 633 { 634 FILE *output = stat_config.output; 635 int nthreads = thread_map__nr(counter->threads); 636 int ncpus = cpu_map__nr(counter->cpus); 637 int cpu, thread; 638 double uval; 639 640 for (thread = 0; thread < nthreads; thread++) { 641 u64 ena = 0, run = 0, val = 0; 642 643 for (cpu = 0; cpu < ncpus; cpu++) { 644 val += perf_counts(counter->counts, cpu, thread)->val; 645 ena += perf_counts(counter->counts, cpu, thread)->ena; 646 run += perf_counts(counter->counts, cpu, thread)->run; 647 } 648 649 if (prefix) 650 fprintf(output, "%s", prefix); 651 652 uval = val * counter->scale; 653 654 if (nsec_counter(counter)) 655 nsec_printout(thread, 0, counter, uval); 656 else 657 abs_printout(thread, 0, counter, uval); 658 659 if (!csv_output) 660 print_noise(counter, 1.0); 661 662 print_running(run, ena); 663 fputc('\n', output); 664 } 665 } 666 667 /* 668 * Print out the results of a single counter: 669 * aggregated counts in system-wide mode 670 */ 671 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 672 { 673 FILE *output = stat_config.output; 674 struct perf_stat *ps = counter->priv; 675 double avg = avg_stats(&ps->res_stats[0]); 676 int scaled = counter->counts->scaled; 677 double uval; 678 double avg_enabled, avg_running; 679 680 avg_enabled = avg_stats(&ps->res_stats[1]); 681 avg_running = avg_stats(&ps->res_stats[2]); 682 683 if (prefix) 684 fprintf(output, "%s", prefix); 685 686 if (scaled == -1 || !counter->supported) { 687 fprintf(output, "%*s%s", 688 csv_output ? 0 : 18, 689 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 690 csv_sep); 691 fprintf(output, "%-*s%s", 692 csv_output ? 0 : unit_width, 693 counter->unit, csv_sep); 694 fprintf(output, "%*s", 695 csv_output ? 0 : -25, 696 perf_evsel__name(counter)); 697 698 if (counter->cgrp) 699 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 700 701 print_running(avg_running, avg_enabled); 702 fputc('\n', output); 703 return; 704 } 705 706 uval = avg * counter->scale; 707 708 if (nsec_counter(counter)) 709 nsec_printout(-1, 0, counter, uval); 710 else 711 abs_printout(-1, 0, counter, uval); 712 713 print_noise(counter, avg); 714 715 print_running(avg_running, avg_enabled); 716 fprintf(output, "\n"); 717 } 718 719 /* 720 * Print out the results of a single counter: 721 * does not use aggregated count in system-wide 722 */ 723 static void print_counter(struct perf_evsel *counter, char *prefix) 724 { 725 FILE *output = stat_config.output; 726 u64 ena, run, val; 727 double uval; 728 int cpu; 729 730 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 731 val = perf_counts(counter->counts, cpu, 0)->val; 732 ena = perf_counts(counter->counts, cpu, 0)->ena; 733 run = perf_counts(counter->counts, cpu, 0)->run; 734 735 if (prefix) 736 fprintf(output, "%s", prefix); 737 738 if (run == 0 || ena == 0) { 739 fprintf(output, "CPU%*d%s%*s%s", 740 csv_output ? 0 : -4, 741 perf_evsel__cpus(counter)->map[cpu], csv_sep, 742 csv_output ? 0 : 18, 743 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 744 csv_sep); 745 746 fprintf(output, "%-*s%s", 747 csv_output ? 0 : unit_width, 748 counter->unit, csv_sep); 749 750 fprintf(output, "%*s", 751 csv_output ? 0 : -25, 752 perf_evsel__name(counter)); 753 754 if (counter->cgrp) 755 fprintf(output, "%s%s", 756 csv_sep, counter->cgrp->name); 757 758 print_running(run, ena); 759 fputc('\n', output); 760 continue; 761 } 762 763 uval = val * counter->scale; 764 765 if (nsec_counter(counter)) 766 nsec_printout(cpu, 0, counter, uval); 767 else 768 abs_printout(cpu, 0, counter, uval); 769 770 if (!csv_output) 771 print_noise(counter, 1.0); 772 print_running(run, ena); 773 774 fputc('\n', output); 775 } 776 } 777 778 static void print_interval(char *prefix, struct timespec *ts) 779 { 780 FILE *output = stat_config.output; 781 static int num_print_interval; 782 783 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 784 785 if (num_print_interval == 0 && !csv_output) { 786 switch (stat_config.aggr_mode) { 787 case AGGR_SOCKET: 788 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 789 break; 790 case AGGR_CORE: 791 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 792 break; 793 case AGGR_NONE: 794 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 795 break; 796 case AGGR_THREAD: 797 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 798 break; 799 case AGGR_GLOBAL: 800 default: 801 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 802 } 803 } 804 805 if (++num_print_interval == 25) 806 num_print_interval = 0; 807 } 808 809 static void print_header(int argc, const char **argv) 810 { 811 FILE *output = stat_config.output; 812 int i; 813 814 fflush(stdout); 815 816 if (!csv_output) { 817 fprintf(output, "\n"); 818 fprintf(output, " Performance counter stats for "); 819 if (target.system_wide) 820 fprintf(output, "\'system wide"); 821 else if (target.cpu_list) 822 fprintf(output, "\'CPU(s) %s", target.cpu_list); 823 else if (!target__has_task(&target)) { 824 fprintf(output, "\'%s", argv[0]); 825 for (i = 1; i < argc; i++) 826 fprintf(output, " %s", argv[i]); 827 } else if (target.pid) 828 fprintf(output, "process id \'%s", target.pid); 829 else 830 fprintf(output, "thread id \'%s", target.tid); 831 832 fprintf(output, "\'"); 833 if (run_count > 1) 834 fprintf(output, " (%d runs)", run_count); 835 fprintf(output, ":\n\n"); 836 } 837 } 838 839 static void print_footer(void) 840 { 841 FILE *output = stat_config.output; 842 843 if (!null_run) 844 fprintf(output, "\n"); 845 fprintf(output, " %17.9f seconds time elapsed", 846 avg_stats(&walltime_nsecs_stats)/1e9); 847 if (run_count > 1) { 848 fprintf(output, " "); 849 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 850 avg_stats(&walltime_nsecs_stats)); 851 } 852 fprintf(output, "\n\n"); 853 } 854 855 static void print_counters(struct timespec *ts, int argc, const char **argv) 856 { 857 int interval = stat_config.interval; 858 struct perf_evsel *counter; 859 char buf[64], *prefix = NULL; 860 861 if (interval) 862 print_interval(prefix = buf, ts); 863 else 864 print_header(argc, argv); 865 866 switch (stat_config.aggr_mode) { 867 case AGGR_CORE: 868 case AGGR_SOCKET: 869 print_aggr(prefix); 870 break; 871 case AGGR_THREAD: 872 evlist__for_each(evsel_list, counter) 873 print_aggr_thread(counter, prefix); 874 break; 875 case AGGR_GLOBAL: 876 evlist__for_each(evsel_list, counter) 877 print_counter_aggr(counter, prefix); 878 break; 879 case AGGR_NONE: 880 evlist__for_each(evsel_list, counter) 881 print_counter(counter, prefix); 882 break; 883 default: 884 break; 885 } 886 887 if (!interval && !csv_output) 888 print_footer(); 889 890 fflush(stat_config.output); 891 } 892 893 static volatile int signr = -1; 894 895 static void skip_signal(int signo) 896 { 897 if ((child_pid == -1) || stat_config.interval) 898 done = 1; 899 900 signr = signo; 901 /* 902 * render child_pid harmless 903 * won't send SIGTERM to a random 904 * process in case of race condition 905 * and fast PID recycling 906 */ 907 child_pid = -1; 908 } 909 910 static void sig_atexit(void) 911 { 912 sigset_t set, oset; 913 914 /* 915 * avoid race condition with SIGCHLD handler 916 * in skip_signal() which is modifying child_pid 917 * goal is to avoid send SIGTERM to a random 918 * process 919 */ 920 sigemptyset(&set); 921 sigaddset(&set, SIGCHLD); 922 sigprocmask(SIG_BLOCK, &set, &oset); 923 924 if (child_pid != -1) 925 kill(child_pid, SIGTERM); 926 927 sigprocmask(SIG_SETMASK, &oset, NULL); 928 929 if (signr == -1) 930 return; 931 932 signal(signr, SIG_DFL); 933 kill(getpid(), signr); 934 } 935 936 static int stat__set_big_num(const struct option *opt __maybe_unused, 937 const char *s __maybe_unused, int unset) 938 { 939 big_num_opt = unset ? 0 : 1; 940 return 0; 941 } 942 943 static int perf_stat_init_aggr_mode(void) 944 { 945 switch (stat_config.aggr_mode) { 946 case AGGR_SOCKET: 947 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 948 perror("cannot build socket map"); 949 return -1; 950 } 951 aggr_get_id = cpu_map__get_socket; 952 break; 953 case AGGR_CORE: 954 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 955 perror("cannot build core map"); 956 return -1; 957 } 958 aggr_get_id = cpu_map__get_core; 959 break; 960 case AGGR_NONE: 961 case AGGR_GLOBAL: 962 case AGGR_THREAD: 963 default: 964 break; 965 } 966 return 0; 967 } 968 969 /* 970 * Add default attributes, if there were no attributes specified or 971 * if -d/--detailed, -d -d or -d -d -d is used: 972 */ 973 static int add_default_attributes(void) 974 { 975 struct perf_event_attr default_attrs[] = { 976 977 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 978 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 979 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 980 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 981 982 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 983 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 984 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 985 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 986 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 987 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 988 989 }; 990 991 /* 992 * Detailed stats (-d), covering the L1 and last level data caches: 993 */ 994 struct perf_event_attr detailed_attrs[] = { 995 996 { .type = PERF_TYPE_HW_CACHE, 997 .config = 998 PERF_COUNT_HW_CACHE_L1D << 0 | 999 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1000 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1001 1002 { .type = PERF_TYPE_HW_CACHE, 1003 .config = 1004 PERF_COUNT_HW_CACHE_L1D << 0 | 1005 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1006 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1007 1008 { .type = PERF_TYPE_HW_CACHE, 1009 .config = 1010 PERF_COUNT_HW_CACHE_LL << 0 | 1011 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1012 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1013 1014 { .type = PERF_TYPE_HW_CACHE, 1015 .config = 1016 PERF_COUNT_HW_CACHE_LL << 0 | 1017 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1018 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1019 }; 1020 1021 /* 1022 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1023 */ 1024 struct perf_event_attr very_detailed_attrs[] = { 1025 1026 { .type = PERF_TYPE_HW_CACHE, 1027 .config = 1028 PERF_COUNT_HW_CACHE_L1I << 0 | 1029 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1030 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1031 1032 { .type = PERF_TYPE_HW_CACHE, 1033 .config = 1034 PERF_COUNT_HW_CACHE_L1I << 0 | 1035 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1036 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1037 1038 { .type = PERF_TYPE_HW_CACHE, 1039 .config = 1040 PERF_COUNT_HW_CACHE_DTLB << 0 | 1041 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1042 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1043 1044 { .type = PERF_TYPE_HW_CACHE, 1045 .config = 1046 PERF_COUNT_HW_CACHE_DTLB << 0 | 1047 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1048 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1049 1050 { .type = PERF_TYPE_HW_CACHE, 1051 .config = 1052 PERF_COUNT_HW_CACHE_ITLB << 0 | 1053 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1054 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1055 1056 { .type = PERF_TYPE_HW_CACHE, 1057 .config = 1058 PERF_COUNT_HW_CACHE_ITLB << 0 | 1059 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1060 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1061 1062 }; 1063 1064 /* 1065 * Very, very detailed stats (-d -d -d), adding prefetch events: 1066 */ 1067 struct perf_event_attr very_very_detailed_attrs[] = { 1068 1069 { .type = PERF_TYPE_HW_CACHE, 1070 .config = 1071 PERF_COUNT_HW_CACHE_L1D << 0 | 1072 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1073 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1074 1075 { .type = PERF_TYPE_HW_CACHE, 1076 .config = 1077 PERF_COUNT_HW_CACHE_L1D << 0 | 1078 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1079 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1080 }; 1081 1082 /* Set attrs if no event is selected and !null_run: */ 1083 if (null_run) 1084 return 0; 1085 1086 if (transaction_run) { 1087 int err; 1088 if (pmu_have_event("cpu", "cycles-ct") && 1089 pmu_have_event("cpu", "el-start")) 1090 err = parse_events(evsel_list, transaction_attrs, NULL); 1091 else 1092 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1093 if (err) { 1094 fprintf(stderr, "Cannot set up transaction events\n"); 1095 return -1; 1096 } 1097 return 0; 1098 } 1099 1100 if (!evsel_list->nr_entries) { 1101 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1102 return -1; 1103 } 1104 1105 /* Detailed events get appended to the event list: */ 1106 1107 if (detailed_run < 1) 1108 return 0; 1109 1110 /* Append detailed run extra attributes: */ 1111 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1112 return -1; 1113 1114 if (detailed_run < 2) 1115 return 0; 1116 1117 /* Append very detailed run extra attributes: */ 1118 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1119 return -1; 1120 1121 if (detailed_run < 3) 1122 return 0; 1123 1124 /* Append very, very detailed run extra attributes: */ 1125 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1126 } 1127 1128 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1129 { 1130 bool append_file = false; 1131 int output_fd = 0; 1132 const char *output_name = NULL; 1133 const struct option options[] = { 1134 OPT_BOOLEAN('T', "transaction", &transaction_run, 1135 "hardware transaction statistics"), 1136 OPT_CALLBACK('e', "event", &evsel_list, "event", 1137 "event selector. use 'perf list' to list available events", 1138 parse_events_option), 1139 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1140 "event filter", parse_filter), 1141 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1142 "child tasks do not inherit counters"), 1143 OPT_STRING('p', "pid", &target.pid, "pid", 1144 "stat events on existing process id"), 1145 OPT_STRING('t', "tid", &target.tid, "tid", 1146 "stat events on existing thread id"), 1147 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1148 "system-wide collection from all CPUs"), 1149 OPT_BOOLEAN('g', "group", &group, 1150 "put the counters into a counter group"), 1151 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1152 OPT_INCR('v', "verbose", &verbose, 1153 "be more verbose (show counter open errors, etc)"), 1154 OPT_INTEGER('r', "repeat", &run_count, 1155 "repeat command and print average + stddev (max: 100, forever: 0)"), 1156 OPT_BOOLEAN('n', "null", &null_run, 1157 "null run - dont start any counters"), 1158 OPT_INCR('d', "detailed", &detailed_run, 1159 "detailed run - start a lot of events"), 1160 OPT_BOOLEAN('S', "sync", &sync_run, 1161 "call sync() before starting a run"), 1162 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1163 "print large numbers with thousands\' separators", 1164 stat__set_big_num), 1165 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1166 "list of cpus to monitor in system-wide"), 1167 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1168 "disable CPU count aggregation", AGGR_NONE), 1169 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1170 "print counts with custom separator"), 1171 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1172 "monitor event in cgroup name only", parse_cgroups), 1173 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1174 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1175 OPT_INTEGER(0, "log-fd", &output_fd, 1176 "log output to fd, instead of stderr"), 1177 OPT_STRING(0, "pre", &pre_cmd, "command", 1178 "command to run prior to the measured command"), 1179 OPT_STRING(0, "post", &post_cmd, "command", 1180 "command to run after to the measured command"), 1181 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1182 "print counts at regular interval in ms (>= 100)"), 1183 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1184 "aggregate counts per processor socket", AGGR_SOCKET), 1185 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1186 "aggregate counts per physical processor core", AGGR_CORE), 1187 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1188 "aggregate counts per thread", AGGR_THREAD), 1189 OPT_UINTEGER('D', "delay", &initial_delay, 1190 "ms to wait before starting measurement after program start"), 1191 OPT_END() 1192 }; 1193 const char * const stat_usage[] = { 1194 "perf stat [<options>] [<command>]", 1195 NULL 1196 }; 1197 int status = -EINVAL, run_idx; 1198 const char *mode; 1199 FILE *output = stderr; 1200 unsigned int interval; 1201 1202 setlocale(LC_ALL, ""); 1203 1204 evsel_list = perf_evlist__new(); 1205 if (evsel_list == NULL) 1206 return -ENOMEM; 1207 1208 argc = parse_options(argc, argv, options, stat_usage, 1209 PARSE_OPT_STOP_AT_NON_OPTION); 1210 1211 interval = stat_config.interval; 1212 1213 if (output_name && strcmp(output_name, "-")) 1214 output = NULL; 1215 1216 if (output_name && output_fd) { 1217 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1218 parse_options_usage(stat_usage, options, "o", 1); 1219 parse_options_usage(NULL, options, "log-fd", 0); 1220 goto out; 1221 } 1222 1223 if (output_fd < 0) { 1224 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1225 parse_options_usage(stat_usage, options, "log-fd", 0); 1226 goto out; 1227 } 1228 1229 if (!output) { 1230 struct timespec tm; 1231 mode = append_file ? "a" : "w"; 1232 1233 output = fopen(output_name, mode); 1234 if (!output) { 1235 perror("failed to create output file"); 1236 return -1; 1237 } 1238 clock_gettime(CLOCK_REALTIME, &tm); 1239 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1240 } else if (output_fd > 0) { 1241 mode = append_file ? "a" : "w"; 1242 output = fdopen(output_fd, mode); 1243 if (!output) { 1244 perror("Failed opening logfd"); 1245 return -errno; 1246 } 1247 } 1248 1249 stat_config.output = output; 1250 1251 if (csv_sep) { 1252 csv_output = true; 1253 if (!strcmp(csv_sep, "\\t")) 1254 csv_sep = "\t"; 1255 } else 1256 csv_sep = DEFAULT_SEPARATOR; 1257 1258 /* 1259 * let the spreadsheet do the pretty-printing 1260 */ 1261 if (csv_output) { 1262 /* User explicitly passed -B? */ 1263 if (big_num_opt == 1) { 1264 fprintf(stderr, "-B option not supported with -x\n"); 1265 parse_options_usage(stat_usage, options, "B", 1); 1266 parse_options_usage(NULL, options, "x", 1); 1267 goto out; 1268 } else /* Nope, so disable big number formatting */ 1269 big_num = false; 1270 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1271 big_num = false; 1272 1273 if (!argc && target__none(&target)) 1274 usage_with_options(stat_usage, options); 1275 1276 if (run_count < 0) { 1277 pr_err("Run count must be a positive number\n"); 1278 parse_options_usage(stat_usage, options, "r", 1); 1279 goto out; 1280 } else if (run_count == 0) { 1281 forever = true; 1282 run_count = 1; 1283 } 1284 1285 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 1286 fprintf(stderr, "The --per-thread option is only available " 1287 "when monitoring via -p -t options.\n"); 1288 parse_options_usage(NULL, options, "p", 1); 1289 parse_options_usage(NULL, options, "t", 1); 1290 goto out; 1291 } 1292 1293 /* 1294 * no_aggr, cgroup are for system-wide only 1295 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1296 */ 1297 if (((stat_config.aggr_mode != AGGR_GLOBAL && 1298 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 1299 !target__has_cpu(&target)) { 1300 fprintf(stderr, "both cgroup and no-aggregation " 1301 "modes only available in system-wide mode\n"); 1302 1303 parse_options_usage(stat_usage, options, "G", 1); 1304 parse_options_usage(NULL, options, "A", 1); 1305 parse_options_usage(NULL, options, "a", 1); 1306 goto out; 1307 } 1308 1309 if (add_default_attributes()) 1310 goto out; 1311 1312 target__validate(&target); 1313 1314 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1315 if (target__has_task(&target)) { 1316 pr_err("Problems finding threads of monitor\n"); 1317 parse_options_usage(stat_usage, options, "p", 1); 1318 parse_options_usage(NULL, options, "t", 1); 1319 } else if (target__has_cpu(&target)) { 1320 perror("failed to parse CPUs map"); 1321 parse_options_usage(stat_usage, options, "C", 1); 1322 parse_options_usage(NULL, options, "a", 1); 1323 } 1324 goto out; 1325 } 1326 1327 /* 1328 * Initialize thread_map with comm names, 1329 * so we could print it out on output. 1330 */ 1331 if (stat_config.aggr_mode == AGGR_THREAD) 1332 thread_map__read_comms(evsel_list->threads); 1333 1334 if (interval && interval < 100) { 1335 pr_err("print interval must be >= 100ms\n"); 1336 parse_options_usage(stat_usage, options, "I", 1); 1337 goto out; 1338 } 1339 1340 if (perf_evlist__alloc_stats(evsel_list, interval)) 1341 goto out; 1342 1343 if (perf_stat_init_aggr_mode()) 1344 goto out; 1345 1346 /* 1347 * We dont want to block the signals - that would cause 1348 * child tasks to inherit that and Ctrl-C would not work. 1349 * What we want is for Ctrl-C to work in the exec()-ed 1350 * task, but being ignored by perf stat itself: 1351 */ 1352 atexit(sig_atexit); 1353 if (!forever) 1354 signal(SIGINT, skip_signal); 1355 signal(SIGCHLD, skip_signal); 1356 signal(SIGALRM, skip_signal); 1357 signal(SIGABRT, skip_signal); 1358 1359 status = 0; 1360 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1361 if (run_count != 1 && verbose) 1362 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1363 run_idx + 1); 1364 1365 status = run_perf_stat(argc, argv); 1366 if (forever && status != -1) { 1367 print_counters(NULL, argc, argv); 1368 perf_stat__reset_stats(); 1369 } 1370 } 1371 1372 if (!forever && status != -1 && !interval) 1373 print_counters(NULL, argc, argv); 1374 1375 perf_evlist__free_stats(evsel_list); 1376 out: 1377 perf_evlist__delete(evsel_list); 1378 return status; 1379 } 1380