1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include "util/parse-options.h" 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 62 #include <stdlib.h> 63 #include <sys/prctl.h> 64 #include <locale.h> 65 66 #define DEFAULT_SEPARATOR " " 67 #define CNTR_NOT_SUPPORTED "<not supported>" 68 #define CNTR_NOT_COUNTED "<not counted>" 69 70 static void print_counters(struct timespec *ts, int argc, const char **argv); 71 72 /* Default events used for perf stat -T */ 73 static const char *transaction_attrs = { 74 "task-clock," 75 "{" 76 "instructions," 77 "cycles," 78 "cpu/cycles-t/," 79 "cpu/tx-start/," 80 "cpu/el-start/," 81 "cpu/cycles-ct/" 82 "}" 83 }; 84 85 /* More limited version when the CPU does not have all events. */ 86 static const char * transaction_limited_attrs = { 87 "task-clock," 88 "{" 89 "instructions," 90 "cycles," 91 "cpu/cycles-t/," 92 "cpu/tx-start/" 93 "}" 94 }; 95 96 static struct perf_evlist *evsel_list; 97 98 static struct target target = { 99 .uid = UINT_MAX, 100 }; 101 102 static int run_count = 1; 103 static bool no_inherit = false; 104 static bool scale = true; 105 static enum aggr_mode aggr_mode = AGGR_GLOBAL; 106 static volatile pid_t child_pid = -1; 107 static bool null_run = false; 108 static int detailed_run = 0; 109 static bool transaction_run; 110 static bool big_num = true; 111 static int big_num_opt = -1; 112 static const char *csv_sep = NULL; 113 static bool csv_output = false; 114 static bool group = false; 115 static FILE *output = NULL; 116 static const char *pre_cmd = NULL; 117 static const char *post_cmd = NULL; 118 static bool sync_run = false; 119 static unsigned int interval = 0; 120 static unsigned int initial_delay = 0; 121 static unsigned int unit_width = 4; /* strlen("unit") */ 122 static bool forever = false; 123 static struct timespec ref_time; 124 static struct cpu_map *aggr_map; 125 static int (*aggr_get_id)(struct cpu_map *m, int cpu); 126 127 static volatile int done = 0; 128 129 static inline void diff_timespec(struct timespec *r, struct timespec *a, 130 struct timespec *b) 131 { 132 r->tv_sec = a->tv_sec - b->tv_sec; 133 if (a->tv_nsec < b->tv_nsec) { 134 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 135 r->tv_sec--; 136 } else { 137 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 138 } 139 } 140 141 static void perf_stat__reset_stats(void) 142 { 143 perf_evlist__reset_stats(evsel_list); 144 perf_stat__reset_shadow_stats(); 145 } 146 147 static int create_perf_stat_counter(struct perf_evsel *evsel) 148 { 149 struct perf_event_attr *attr = &evsel->attr; 150 151 if (scale) 152 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 153 PERF_FORMAT_TOTAL_TIME_RUNNING; 154 155 attr->inherit = !no_inherit; 156 157 if (target__has_cpu(&target)) 158 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 159 160 if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) { 161 attr->disabled = 1; 162 if (!initial_delay) 163 attr->enable_on_exec = 1; 164 } 165 166 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 167 } 168 169 /* 170 * Does the counter have nsecs as a unit? 171 */ 172 static inline int nsec_counter(struct perf_evsel *evsel) 173 { 174 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 175 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 176 return 1; 177 178 return 0; 179 } 180 181 static void zero_per_pkg(struct perf_evsel *counter) 182 { 183 if (counter->per_pkg_mask) 184 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); 185 } 186 187 static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) 188 { 189 unsigned long *mask = counter->per_pkg_mask; 190 struct cpu_map *cpus = perf_evsel__cpus(counter); 191 int s; 192 193 *skip = false; 194 195 if (!counter->per_pkg) 196 return 0; 197 198 if (cpu_map__empty(cpus)) 199 return 0; 200 201 if (!mask) { 202 mask = zalloc(MAX_NR_CPUS); 203 if (!mask) 204 return -ENOMEM; 205 206 counter->per_pkg_mask = mask; 207 } 208 209 s = cpu_map__get_socket(cpus, cpu); 210 if (s < 0) 211 return -1; 212 213 *skip = test_and_set_bit(s, mask) == 1; 214 return 0; 215 } 216 217 static int 218 process_counter_values(struct perf_evsel *evsel, int cpu, int thread, 219 struct perf_counts_values *count) 220 { 221 struct perf_counts_values *aggr = &evsel->counts->aggr; 222 static struct perf_counts_values zero; 223 bool skip = false; 224 225 if (check_per_pkg(evsel, cpu, &skip)) { 226 pr_err("failed to read per-pkg counter\n"); 227 return -1; 228 } 229 230 if (skip) 231 count = &zero; 232 233 switch (aggr_mode) { 234 case AGGR_THREAD: 235 case AGGR_CORE: 236 case AGGR_SOCKET: 237 case AGGR_NONE: 238 if (!evsel->snapshot) 239 perf_evsel__compute_deltas(evsel, cpu, thread, count); 240 perf_counts_values__scale(count, scale, NULL); 241 if (aggr_mode == AGGR_NONE) 242 perf_stat__update_shadow_stats(evsel, count->values, cpu); 243 break; 244 case AGGR_GLOBAL: 245 aggr->val += count->val; 246 if (scale) { 247 aggr->ena += count->ena; 248 aggr->run += count->run; 249 } 250 default: 251 break; 252 } 253 254 return 0; 255 } 256 257 static int process_counter_maps(struct perf_evsel *counter) 258 { 259 int nthreads = thread_map__nr(counter->threads); 260 int ncpus = perf_evsel__nr_cpus(counter); 261 int cpu, thread; 262 263 if (counter->system_wide) 264 nthreads = 1; 265 266 for (thread = 0; thread < nthreads; thread++) { 267 for (cpu = 0; cpu < ncpus; cpu++) { 268 if (process_counter_values(counter, cpu, thread, 269 perf_counts(counter->counts, cpu, thread))) 270 return -1; 271 } 272 } 273 274 return 0; 275 } 276 277 static int process_counter(struct perf_evsel *counter) 278 { 279 struct perf_counts_values *aggr = &counter->counts->aggr; 280 struct perf_stat *ps = counter->priv; 281 u64 *count = counter->counts->aggr.values; 282 int i, ret; 283 284 aggr->val = aggr->ena = aggr->run = 0; 285 init_stats(ps->res_stats); 286 287 if (counter->per_pkg) 288 zero_per_pkg(counter); 289 290 ret = process_counter_maps(counter); 291 if (ret) 292 return ret; 293 294 if (aggr_mode != AGGR_GLOBAL) 295 return 0; 296 297 if (!counter->snapshot) 298 perf_evsel__compute_deltas(counter, -1, -1, aggr); 299 perf_counts_values__scale(aggr, scale, &counter->counts->scaled); 300 301 for (i = 0; i < 3; i++) 302 update_stats(&ps->res_stats[i], count[i]); 303 304 if (verbose) { 305 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 306 perf_evsel__name(counter), count[0], count[1], count[2]); 307 } 308 309 /* 310 * Save the full runtime - to allow normalization during printout: 311 */ 312 perf_stat__update_shadow_stats(counter, count, 0); 313 314 return 0; 315 } 316 317 /* 318 * Read out the results of a single counter: 319 * do not aggregate counts across CPUs in system-wide mode 320 */ 321 static int read_counter(struct perf_evsel *counter) 322 { 323 int nthreads = thread_map__nr(evsel_list->threads); 324 int ncpus = perf_evsel__nr_cpus(counter); 325 int cpu, thread; 326 327 if (!counter->supported) 328 return -ENOENT; 329 330 if (counter->system_wide) 331 nthreads = 1; 332 333 for (thread = 0; thread < nthreads; thread++) { 334 for (cpu = 0; cpu < ncpus; cpu++) { 335 struct perf_counts_values *count; 336 337 count = perf_counts(counter->counts, cpu, thread); 338 if (perf_evsel__read(counter, cpu, thread, count)) 339 return -1; 340 } 341 } 342 343 return 0; 344 } 345 346 static void read_counters(bool close_counters) 347 { 348 struct perf_evsel *counter; 349 350 evlist__for_each(evsel_list, counter) { 351 if (read_counter(counter)) 352 pr_warning("failed to read counter %s\n", counter->name); 353 354 if (process_counter(counter)) 355 pr_warning("failed to process counter %s\n", counter->name); 356 357 if (close_counters) { 358 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 359 thread_map__nr(evsel_list->threads)); 360 } 361 } 362 } 363 364 static void process_interval(void) 365 { 366 struct timespec ts, rs; 367 368 read_counters(false); 369 370 clock_gettime(CLOCK_MONOTONIC, &ts); 371 diff_timespec(&rs, &ts, &ref_time); 372 373 print_counters(&rs, 0, NULL); 374 } 375 376 static void handle_initial_delay(void) 377 { 378 struct perf_evsel *counter; 379 380 if (initial_delay) { 381 const int ncpus = cpu_map__nr(evsel_list->cpus), 382 nthreads = thread_map__nr(evsel_list->threads); 383 384 usleep(initial_delay * 1000); 385 evlist__for_each(evsel_list, counter) 386 perf_evsel__enable(counter, ncpus, nthreads); 387 } 388 } 389 390 static volatile int workload_exec_errno; 391 392 /* 393 * perf_evlist__prepare_workload will send a SIGUSR1 394 * if the fork fails, since we asked by setting its 395 * want_signal to true. 396 */ 397 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 398 void *ucontext __maybe_unused) 399 { 400 workload_exec_errno = info->si_value.sival_int; 401 } 402 403 static int __run_perf_stat(int argc, const char **argv) 404 { 405 char msg[512]; 406 unsigned long long t0, t1; 407 struct perf_evsel *counter; 408 struct timespec ts; 409 size_t l; 410 int status = 0; 411 const bool forks = (argc > 0); 412 413 if (interval) { 414 ts.tv_sec = interval / 1000; 415 ts.tv_nsec = (interval % 1000) * 1000000; 416 } else { 417 ts.tv_sec = 1; 418 ts.tv_nsec = 0; 419 } 420 421 if (forks) { 422 if (perf_evlist__prepare_workload(evsel_list, &target, argv, false, 423 workload_exec_failed_signal) < 0) { 424 perror("failed to prepare workload"); 425 return -1; 426 } 427 child_pid = evsel_list->workload.pid; 428 } 429 430 if (group) 431 perf_evlist__set_leader(evsel_list); 432 433 evlist__for_each(evsel_list, counter) { 434 if (create_perf_stat_counter(counter) < 0) { 435 /* 436 * PPC returns ENXIO for HW counters until 2.6.37 437 * (behavior changed with commit b0a873e). 438 */ 439 if (errno == EINVAL || errno == ENOSYS || 440 errno == ENOENT || errno == EOPNOTSUPP || 441 errno == ENXIO) { 442 if (verbose) 443 ui__warning("%s event is not supported by the kernel.\n", 444 perf_evsel__name(counter)); 445 counter->supported = false; 446 447 if ((counter->leader != counter) || 448 !(counter->leader->nr_members > 1)) 449 continue; 450 } 451 452 perf_evsel__open_strerror(counter, &target, 453 errno, msg, sizeof(msg)); 454 ui__error("%s\n", msg); 455 456 if (child_pid != -1) 457 kill(child_pid, SIGTERM); 458 459 return -1; 460 } 461 counter->supported = true; 462 463 l = strlen(counter->unit); 464 if (l > unit_width) 465 unit_width = l; 466 } 467 468 if (perf_evlist__apply_filters(evsel_list, &counter)) { 469 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 470 counter->filter, perf_evsel__name(counter), errno, 471 strerror_r(errno, msg, sizeof(msg))); 472 return -1; 473 } 474 475 /* 476 * Enable counters and exec the command: 477 */ 478 t0 = rdclock(); 479 clock_gettime(CLOCK_MONOTONIC, &ref_time); 480 481 if (forks) { 482 perf_evlist__start_workload(evsel_list); 483 handle_initial_delay(); 484 485 if (interval) { 486 while (!waitpid(child_pid, &status, WNOHANG)) { 487 nanosleep(&ts, NULL); 488 process_interval(); 489 } 490 } 491 wait(&status); 492 493 if (workload_exec_errno) { 494 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 495 pr_err("Workload failed: %s\n", emsg); 496 return -1; 497 } 498 499 if (WIFSIGNALED(status)) 500 psignal(WTERMSIG(status), argv[0]); 501 } else { 502 handle_initial_delay(); 503 while (!done) { 504 nanosleep(&ts, NULL); 505 if (interval) 506 process_interval(); 507 } 508 } 509 510 t1 = rdclock(); 511 512 update_stats(&walltime_nsecs_stats, t1 - t0); 513 514 read_counters(true); 515 516 return WEXITSTATUS(status); 517 } 518 519 static int run_perf_stat(int argc, const char **argv) 520 { 521 int ret; 522 523 if (pre_cmd) { 524 ret = system(pre_cmd); 525 if (ret) 526 return ret; 527 } 528 529 if (sync_run) 530 sync(); 531 532 ret = __run_perf_stat(argc, argv); 533 if (ret) 534 return ret; 535 536 if (post_cmd) { 537 ret = system(post_cmd); 538 if (ret) 539 return ret; 540 } 541 542 return ret; 543 } 544 545 static void print_running(u64 run, u64 ena) 546 { 547 if (csv_output) { 548 fprintf(output, "%s%" PRIu64 "%s%.2f", 549 csv_sep, 550 run, 551 csv_sep, 552 ena ? 100.0 * run / ena : 100.0); 553 } else if (run != ena) { 554 fprintf(output, " (%.2f%%)", 100.0 * run / ena); 555 } 556 } 557 558 static void print_noise_pct(double total, double avg) 559 { 560 double pct = rel_stddev_stats(total, avg); 561 562 if (csv_output) 563 fprintf(output, "%s%.2f%%", csv_sep, pct); 564 else if (pct) 565 fprintf(output, " ( +-%6.2f%% )", pct); 566 } 567 568 static void print_noise(struct perf_evsel *evsel, double avg) 569 { 570 struct perf_stat *ps; 571 572 if (run_count == 1) 573 return; 574 575 ps = evsel->priv; 576 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 577 } 578 579 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 580 { 581 switch (aggr_mode) { 582 case AGGR_CORE: 583 fprintf(output, "S%d-C%*d%s%*d%s", 584 cpu_map__id_to_socket(id), 585 csv_output ? 0 : -8, 586 cpu_map__id_to_cpu(id), 587 csv_sep, 588 csv_output ? 0 : 4, 589 nr, 590 csv_sep); 591 break; 592 case AGGR_SOCKET: 593 fprintf(output, "S%*d%s%*d%s", 594 csv_output ? 0 : -5, 595 id, 596 csv_sep, 597 csv_output ? 0 : 4, 598 nr, 599 csv_sep); 600 break; 601 case AGGR_NONE: 602 fprintf(output, "CPU%*d%s", 603 csv_output ? 0 : -4, 604 perf_evsel__cpus(evsel)->map[id], csv_sep); 605 break; 606 case AGGR_THREAD: 607 fprintf(output, "%*s-%*d%s", 608 csv_output ? 0 : 16, 609 thread_map__comm(evsel->threads, id), 610 csv_output ? 0 : -8, 611 thread_map__pid(evsel->threads, id), 612 csv_sep); 613 break; 614 case AGGR_GLOBAL: 615 default: 616 break; 617 } 618 } 619 620 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 621 { 622 double msecs = avg / 1e6; 623 const char *fmt_v, *fmt_n; 624 char name[25]; 625 626 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 627 fmt_n = csv_output ? "%s" : "%-25s"; 628 629 aggr_printout(evsel, id, nr); 630 631 scnprintf(name, sizeof(name), "%s%s", 632 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 633 634 fprintf(output, fmt_v, msecs, csv_sep); 635 636 if (csv_output) 637 fprintf(output, "%s%s", evsel->unit, csv_sep); 638 else 639 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 640 641 fprintf(output, fmt_n, name); 642 643 if (evsel->cgrp) 644 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 645 646 if (csv_output || interval) 647 return; 648 649 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 650 fprintf(output, " # %8.3f CPUs utilized ", 651 avg / avg_stats(&walltime_nsecs_stats)); 652 else 653 fprintf(output, " "); 654 } 655 656 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 657 { 658 double sc = evsel->scale; 659 const char *fmt; 660 int cpu = cpu_map__id_to_cpu(id); 661 662 if (csv_output) { 663 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 664 } else { 665 if (big_num) 666 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 667 else 668 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 669 } 670 671 aggr_printout(evsel, id, nr); 672 673 if (aggr_mode == AGGR_GLOBAL) 674 cpu = 0; 675 676 fprintf(output, fmt, avg, csv_sep); 677 678 if (evsel->unit) 679 fprintf(output, "%-*s%s", 680 csv_output ? 0 : unit_width, 681 evsel->unit, csv_sep); 682 683 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 684 685 if (evsel->cgrp) 686 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 687 688 if (csv_output || interval) 689 return; 690 691 perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); 692 } 693 694 static void print_aggr(char *prefix) 695 { 696 struct perf_evsel *counter; 697 int cpu, cpu2, s, s2, id, nr; 698 double uval; 699 u64 ena, run, val; 700 701 if (!(aggr_map || aggr_get_id)) 702 return; 703 704 for (s = 0; s < aggr_map->nr; s++) { 705 id = aggr_map->map[s]; 706 evlist__for_each(evsel_list, counter) { 707 val = ena = run = 0; 708 nr = 0; 709 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 710 cpu2 = perf_evsel__cpus(counter)->map[cpu]; 711 s2 = aggr_get_id(evsel_list->cpus, cpu2); 712 if (s2 != id) 713 continue; 714 val += perf_counts(counter->counts, cpu, 0)->val; 715 ena += perf_counts(counter->counts, cpu, 0)->ena; 716 run += perf_counts(counter->counts, cpu, 0)->run; 717 nr++; 718 } 719 if (prefix) 720 fprintf(output, "%s", prefix); 721 722 if (run == 0 || ena == 0) { 723 aggr_printout(counter, id, nr); 724 725 fprintf(output, "%*s%s", 726 csv_output ? 0 : 18, 727 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 728 csv_sep); 729 730 fprintf(output, "%-*s%s", 731 csv_output ? 0 : unit_width, 732 counter->unit, csv_sep); 733 734 fprintf(output, "%*s", 735 csv_output ? 0 : -25, 736 perf_evsel__name(counter)); 737 738 if (counter->cgrp) 739 fprintf(output, "%s%s", 740 csv_sep, counter->cgrp->name); 741 742 print_running(run, ena); 743 fputc('\n', output); 744 continue; 745 } 746 uval = val * counter->scale; 747 748 if (nsec_counter(counter)) 749 nsec_printout(id, nr, counter, uval); 750 else 751 abs_printout(id, nr, counter, uval); 752 753 if (!csv_output) 754 print_noise(counter, 1.0); 755 756 print_running(run, ena); 757 fputc('\n', output); 758 } 759 } 760 } 761 762 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 763 { 764 int nthreads = thread_map__nr(counter->threads); 765 int ncpus = cpu_map__nr(counter->cpus); 766 int cpu, thread; 767 double uval; 768 769 for (thread = 0; thread < nthreads; thread++) { 770 u64 ena = 0, run = 0, val = 0; 771 772 for (cpu = 0; cpu < ncpus; cpu++) { 773 val += perf_counts(counter->counts, cpu, thread)->val; 774 ena += perf_counts(counter->counts, cpu, thread)->ena; 775 run += perf_counts(counter->counts, cpu, thread)->run; 776 } 777 778 if (prefix) 779 fprintf(output, "%s", prefix); 780 781 uval = val * counter->scale; 782 783 if (nsec_counter(counter)) 784 nsec_printout(thread, 0, counter, uval); 785 else 786 abs_printout(thread, 0, counter, uval); 787 788 if (!csv_output) 789 print_noise(counter, 1.0); 790 791 print_running(run, ena); 792 fputc('\n', output); 793 } 794 } 795 796 /* 797 * Print out the results of a single counter: 798 * aggregated counts in system-wide mode 799 */ 800 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 801 { 802 struct perf_stat *ps = counter->priv; 803 double avg = avg_stats(&ps->res_stats[0]); 804 int scaled = counter->counts->scaled; 805 double uval; 806 double avg_enabled, avg_running; 807 808 avg_enabled = avg_stats(&ps->res_stats[1]); 809 avg_running = avg_stats(&ps->res_stats[2]); 810 811 if (prefix) 812 fprintf(output, "%s", prefix); 813 814 if (scaled == -1 || !counter->supported) { 815 fprintf(output, "%*s%s", 816 csv_output ? 0 : 18, 817 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 818 csv_sep); 819 fprintf(output, "%-*s%s", 820 csv_output ? 0 : unit_width, 821 counter->unit, csv_sep); 822 fprintf(output, "%*s", 823 csv_output ? 0 : -25, 824 perf_evsel__name(counter)); 825 826 if (counter->cgrp) 827 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 828 829 print_running(avg_running, avg_enabled); 830 fputc('\n', output); 831 return; 832 } 833 834 uval = avg * counter->scale; 835 836 if (nsec_counter(counter)) 837 nsec_printout(-1, 0, counter, uval); 838 else 839 abs_printout(-1, 0, counter, uval); 840 841 print_noise(counter, avg); 842 843 print_running(avg_running, avg_enabled); 844 fprintf(output, "\n"); 845 } 846 847 /* 848 * Print out the results of a single counter: 849 * does not use aggregated count in system-wide 850 */ 851 static void print_counter(struct perf_evsel *counter, char *prefix) 852 { 853 u64 ena, run, val; 854 double uval; 855 int cpu; 856 857 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 858 val = perf_counts(counter->counts, cpu, 0)->val; 859 ena = perf_counts(counter->counts, cpu, 0)->ena; 860 run = perf_counts(counter->counts, cpu, 0)->run; 861 862 if (prefix) 863 fprintf(output, "%s", prefix); 864 865 if (run == 0 || ena == 0) { 866 fprintf(output, "CPU%*d%s%*s%s", 867 csv_output ? 0 : -4, 868 perf_evsel__cpus(counter)->map[cpu], csv_sep, 869 csv_output ? 0 : 18, 870 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 871 csv_sep); 872 873 fprintf(output, "%-*s%s", 874 csv_output ? 0 : unit_width, 875 counter->unit, csv_sep); 876 877 fprintf(output, "%*s", 878 csv_output ? 0 : -25, 879 perf_evsel__name(counter)); 880 881 if (counter->cgrp) 882 fprintf(output, "%s%s", 883 csv_sep, counter->cgrp->name); 884 885 print_running(run, ena); 886 fputc('\n', output); 887 continue; 888 } 889 890 uval = val * counter->scale; 891 892 if (nsec_counter(counter)) 893 nsec_printout(cpu, 0, counter, uval); 894 else 895 abs_printout(cpu, 0, counter, uval); 896 897 if (!csv_output) 898 print_noise(counter, 1.0); 899 print_running(run, ena); 900 901 fputc('\n', output); 902 } 903 } 904 905 static void print_interval(char *prefix, struct timespec *ts) 906 { 907 static int num_print_interval; 908 909 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 910 911 if (num_print_interval == 0 && !csv_output) { 912 switch (aggr_mode) { 913 case AGGR_SOCKET: 914 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 915 break; 916 case AGGR_CORE: 917 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 918 break; 919 case AGGR_NONE: 920 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 921 break; 922 case AGGR_THREAD: 923 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 924 break; 925 case AGGR_GLOBAL: 926 default: 927 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 928 } 929 } 930 931 if (++num_print_interval == 25) 932 num_print_interval = 0; 933 } 934 935 static void print_header(int argc, const char **argv) 936 { 937 int i; 938 939 fflush(stdout); 940 941 if (!csv_output) { 942 fprintf(output, "\n"); 943 fprintf(output, " Performance counter stats for "); 944 if (target.system_wide) 945 fprintf(output, "\'system wide"); 946 else if (target.cpu_list) 947 fprintf(output, "\'CPU(s) %s", target.cpu_list); 948 else if (!target__has_task(&target)) { 949 fprintf(output, "\'%s", argv[0]); 950 for (i = 1; i < argc; i++) 951 fprintf(output, " %s", argv[i]); 952 } else if (target.pid) 953 fprintf(output, "process id \'%s", target.pid); 954 else 955 fprintf(output, "thread id \'%s", target.tid); 956 957 fprintf(output, "\'"); 958 if (run_count > 1) 959 fprintf(output, " (%d runs)", run_count); 960 fprintf(output, ":\n\n"); 961 } 962 } 963 964 static void print_footer(void) 965 { 966 if (!null_run) 967 fprintf(output, "\n"); 968 fprintf(output, " %17.9f seconds time elapsed", 969 avg_stats(&walltime_nsecs_stats)/1e9); 970 if (run_count > 1) { 971 fprintf(output, " "); 972 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 973 avg_stats(&walltime_nsecs_stats)); 974 } 975 fprintf(output, "\n\n"); 976 } 977 978 static void print_counters(struct timespec *ts, int argc, const char **argv) 979 { 980 struct perf_evsel *counter; 981 char buf[64], *prefix = NULL; 982 983 if (interval) 984 print_interval(prefix = buf, ts); 985 else 986 print_header(argc, argv); 987 988 switch (aggr_mode) { 989 case AGGR_CORE: 990 case AGGR_SOCKET: 991 print_aggr(prefix); 992 break; 993 case AGGR_THREAD: 994 evlist__for_each(evsel_list, counter) 995 print_aggr_thread(counter, prefix); 996 break; 997 case AGGR_GLOBAL: 998 evlist__for_each(evsel_list, counter) 999 print_counter_aggr(counter, prefix); 1000 break; 1001 case AGGR_NONE: 1002 evlist__for_each(evsel_list, counter) 1003 print_counter(counter, prefix); 1004 break; 1005 default: 1006 break; 1007 } 1008 1009 if (!interval && !csv_output) 1010 print_footer(); 1011 1012 fflush(output); 1013 } 1014 1015 static volatile int signr = -1; 1016 1017 static void skip_signal(int signo) 1018 { 1019 if ((child_pid == -1) || interval) 1020 done = 1; 1021 1022 signr = signo; 1023 /* 1024 * render child_pid harmless 1025 * won't send SIGTERM to a random 1026 * process in case of race condition 1027 * and fast PID recycling 1028 */ 1029 child_pid = -1; 1030 } 1031 1032 static void sig_atexit(void) 1033 { 1034 sigset_t set, oset; 1035 1036 /* 1037 * avoid race condition with SIGCHLD handler 1038 * in skip_signal() which is modifying child_pid 1039 * goal is to avoid send SIGTERM to a random 1040 * process 1041 */ 1042 sigemptyset(&set); 1043 sigaddset(&set, SIGCHLD); 1044 sigprocmask(SIG_BLOCK, &set, &oset); 1045 1046 if (child_pid != -1) 1047 kill(child_pid, SIGTERM); 1048 1049 sigprocmask(SIG_SETMASK, &oset, NULL); 1050 1051 if (signr == -1) 1052 return; 1053 1054 signal(signr, SIG_DFL); 1055 kill(getpid(), signr); 1056 } 1057 1058 static int stat__set_big_num(const struct option *opt __maybe_unused, 1059 const char *s __maybe_unused, int unset) 1060 { 1061 big_num_opt = unset ? 0 : 1; 1062 return 0; 1063 } 1064 1065 static int perf_stat_init_aggr_mode(void) 1066 { 1067 switch (aggr_mode) { 1068 case AGGR_SOCKET: 1069 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1070 perror("cannot build socket map"); 1071 return -1; 1072 } 1073 aggr_get_id = cpu_map__get_socket; 1074 break; 1075 case AGGR_CORE: 1076 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1077 perror("cannot build core map"); 1078 return -1; 1079 } 1080 aggr_get_id = cpu_map__get_core; 1081 break; 1082 case AGGR_NONE: 1083 case AGGR_GLOBAL: 1084 case AGGR_THREAD: 1085 default: 1086 break; 1087 } 1088 return 0; 1089 } 1090 1091 /* 1092 * Add default attributes, if there were no attributes specified or 1093 * if -d/--detailed, -d -d or -d -d -d is used: 1094 */ 1095 static int add_default_attributes(void) 1096 { 1097 struct perf_event_attr default_attrs[] = { 1098 1099 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1100 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1101 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1102 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1103 1104 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1105 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1106 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1107 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1108 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1109 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1110 1111 }; 1112 1113 /* 1114 * Detailed stats (-d), covering the L1 and last level data caches: 1115 */ 1116 struct perf_event_attr detailed_attrs[] = { 1117 1118 { .type = PERF_TYPE_HW_CACHE, 1119 .config = 1120 PERF_COUNT_HW_CACHE_L1D << 0 | 1121 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1122 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1123 1124 { .type = PERF_TYPE_HW_CACHE, 1125 .config = 1126 PERF_COUNT_HW_CACHE_L1D << 0 | 1127 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1128 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1129 1130 { .type = PERF_TYPE_HW_CACHE, 1131 .config = 1132 PERF_COUNT_HW_CACHE_LL << 0 | 1133 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1134 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1135 1136 { .type = PERF_TYPE_HW_CACHE, 1137 .config = 1138 PERF_COUNT_HW_CACHE_LL << 0 | 1139 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1140 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1141 }; 1142 1143 /* 1144 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1145 */ 1146 struct perf_event_attr very_detailed_attrs[] = { 1147 1148 { .type = PERF_TYPE_HW_CACHE, 1149 .config = 1150 PERF_COUNT_HW_CACHE_L1I << 0 | 1151 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1152 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1153 1154 { .type = PERF_TYPE_HW_CACHE, 1155 .config = 1156 PERF_COUNT_HW_CACHE_L1I << 0 | 1157 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1158 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1159 1160 { .type = PERF_TYPE_HW_CACHE, 1161 .config = 1162 PERF_COUNT_HW_CACHE_DTLB << 0 | 1163 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1164 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1165 1166 { .type = PERF_TYPE_HW_CACHE, 1167 .config = 1168 PERF_COUNT_HW_CACHE_DTLB << 0 | 1169 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1170 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1171 1172 { .type = PERF_TYPE_HW_CACHE, 1173 .config = 1174 PERF_COUNT_HW_CACHE_ITLB << 0 | 1175 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1176 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1177 1178 { .type = PERF_TYPE_HW_CACHE, 1179 .config = 1180 PERF_COUNT_HW_CACHE_ITLB << 0 | 1181 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1182 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1183 1184 }; 1185 1186 /* 1187 * Very, very detailed stats (-d -d -d), adding prefetch events: 1188 */ 1189 struct perf_event_attr very_very_detailed_attrs[] = { 1190 1191 { .type = PERF_TYPE_HW_CACHE, 1192 .config = 1193 PERF_COUNT_HW_CACHE_L1D << 0 | 1194 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1195 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1196 1197 { .type = PERF_TYPE_HW_CACHE, 1198 .config = 1199 PERF_COUNT_HW_CACHE_L1D << 0 | 1200 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1201 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1202 }; 1203 1204 /* Set attrs if no event is selected and !null_run: */ 1205 if (null_run) 1206 return 0; 1207 1208 if (transaction_run) { 1209 int err; 1210 if (pmu_have_event("cpu", "cycles-ct") && 1211 pmu_have_event("cpu", "el-start")) 1212 err = parse_events(evsel_list, transaction_attrs, NULL); 1213 else 1214 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1215 if (err) { 1216 fprintf(stderr, "Cannot set up transaction events\n"); 1217 return -1; 1218 } 1219 return 0; 1220 } 1221 1222 if (!evsel_list->nr_entries) { 1223 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1224 return -1; 1225 } 1226 1227 /* Detailed events get appended to the event list: */ 1228 1229 if (detailed_run < 1) 1230 return 0; 1231 1232 /* Append detailed run extra attributes: */ 1233 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1234 return -1; 1235 1236 if (detailed_run < 2) 1237 return 0; 1238 1239 /* Append very detailed run extra attributes: */ 1240 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1241 return -1; 1242 1243 if (detailed_run < 3) 1244 return 0; 1245 1246 /* Append very, very detailed run extra attributes: */ 1247 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1248 } 1249 1250 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1251 { 1252 bool append_file = false; 1253 int output_fd = 0; 1254 const char *output_name = NULL; 1255 const struct option options[] = { 1256 OPT_BOOLEAN('T', "transaction", &transaction_run, 1257 "hardware transaction statistics"), 1258 OPT_CALLBACK('e', "event", &evsel_list, "event", 1259 "event selector. use 'perf list' to list available events", 1260 parse_events_option), 1261 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1262 "event filter", parse_filter), 1263 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1264 "child tasks do not inherit counters"), 1265 OPT_STRING('p', "pid", &target.pid, "pid", 1266 "stat events on existing process id"), 1267 OPT_STRING('t', "tid", &target.tid, "tid", 1268 "stat events on existing thread id"), 1269 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1270 "system-wide collection from all CPUs"), 1271 OPT_BOOLEAN('g', "group", &group, 1272 "put the counters into a counter group"), 1273 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1274 OPT_INCR('v', "verbose", &verbose, 1275 "be more verbose (show counter open errors, etc)"), 1276 OPT_INTEGER('r', "repeat", &run_count, 1277 "repeat command and print average + stddev (max: 100, forever: 0)"), 1278 OPT_BOOLEAN('n', "null", &null_run, 1279 "null run - dont start any counters"), 1280 OPT_INCR('d', "detailed", &detailed_run, 1281 "detailed run - start a lot of events"), 1282 OPT_BOOLEAN('S', "sync", &sync_run, 1283 "call sync() before starting a run"), 1284 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1285 "print large numbers with thousands\' separators", 1286 stat__set_big_num), 1287 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1288 "list of cpus to monitor in system-wide"), 1289 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1290 "disable CPU count aggregation", AGGR_NONE), 1291 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1292 "print counts with custom separator"), 1293 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1294 "monitor event in cgroup name only", parse_cgroups), 1295 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1296 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1297 OPT_INTEGER(0, "log-fd", &output_fd, 1298 "log output to fd, instead of stderr"), 1299 OPT_STRING(0, "pre", &pre_cmd, "command", 1300 "command to run prior to the measured command"), 1301 OPT_STRING(0, "post", &post_cmd, "command", 1302 "command to run after to the measured command"), 1303 OPT_UINTEGER('I', "interval-print", &interval, 1304 "print counts at regular interval in ms (>= 100)"), 1305 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1306 "aggregate counts per processor socket", AGGR_SOCKET), 1307 OPT_SET_UINT(0, "per-core", &aggr_mode, 1308 "aggregate counts per physical processor core", AGGR_CORE), 1309 OPT_SET_UINT(0, "per-thread", &aggr_mode, 1310 "aggregate counts per thread", AGGR_THREAD), 1311 OPT_UINTEGER('D', "delay", &initial_delay, 1312 "ms to wait before starting measurement after program start"), 1313 OPT_END() 1314 }; 1315 const char * const stat_usage[] = { 1316 "perf stat [<options>] [<command>]", 1317 NULL 1318 }; 1319 int status = -EINVAL, run_idx; 1320 const char *mode; 1321 1322 setlocale(LC_ALL, ""); 1323 1324 evsel_list = perf_evlist__new(); 1325 if (evsel_list == NULL) 1326 return -ENOMEM; 1327 1328 argc = parse_options(argc, argv, options, stat_usage, 1329 PARSE_OPT_STOP_AT_NON_OPTION); 1330 1331 output = stderr; 1332 if (output_name && strcmp(output_name, "-")) 1333 output = NULL; 1334 1335 if (output_name && output_fd) { 1336 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1337 parse_options_usage(stat_usage, options, "o", 1); 1338 parse_options_usage(NULL, options, "log-fd", 0); 1339 goto out; 1340 } 1341 1342 if (output_fd < 0) { 1343 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1344 parse_options_usage(stat_usage, options, "log-fd", 0); 1345 goto out; 1346 } 1347 1348 if (!output) { 1349 struct timespec tm; 1350 mode = append_file ? "a" : "w"; 1351 1352 output = fopen(output_name, mode); 1353 if (!output) { 1354 perror("failed to create output file"); 1355 return -1; 1356 } 1357 clock_gettime(CLOCK_REALTIME, &tm); 1358 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1359 } else if (output_fd > 0) { 1360 mode = append_file ? "a" : "w"; 1361 output = fdopen(output_fd, mode); 1362 if (!output) { 1363 perror("Failed opening logfd"); 1364 return -errno; 1365 } 1366 } 1367 1368 if (csv_sep) { 1369 csv_output = true; 1370 if (!strcmp(csv_sep, "\\t")) 1371 csv_sep = "\t"; 1372 } else 1373 csv_sep = DEFAULT_SEPARATOR; 1374 1375 /* 1376 * let the spreadsheet do the pretty-printing 1377 */ 1378 if (csv_output) { 1379 /* User explicitly passed -B? */ 1380 if (big_num_opt == 1) { 1381 fprintf(stderr, "-B option not supported with -x\n"); 1382 parse_options_usage(stat_usage, options, "B", 1); 1383 parse_options_usage(NULL, options, "x", 1); 1384 goto out; 1385 } else /* Nope, so disable big number formatting */ 1386 big_num = false; 1387 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1388 big_num = false; 1389 1390 if (!argc && target__none(&target)) 1391 usage_with_options(stat_usage, options); 1392 1393 if (run_count < 0) { 1394 pr_err("Run count must be a positive number\n"); 1395 parse_options_usage(stat_usage, options, "r", 1); 1396 goto out; 1397 } else if (run_count == 0) { 1398 forever = true; 1399 run_count = 1; 1400 } 1401 1402 if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 1403 fprintf(stderr, "The --per-thread option is only available " 1404 "when monitoring via -p -t options.\n"); 1405 parse_options_usage(NULL, options, "p", 1); 1406 parse_options_usage(NULL, options, "t", 1); 1407 goto out; 1408 } 1409 1410 /* 1411 * no_aggr, cgroup are for system-wide only 1412 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1413 */ 1414 if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && 1415 !target__has_cpu(&target)) { 1416 fprintf(stderr, "both cgroup and no-aggregation " 1417 "modes only available in system-wide mode\n"); 1418 1419 parse_options_usage(stat_usage, options, "G", 1); 1420 parse_options_usage(NULL, options, "A", 1); 1421 parse_options_usage(NULL, options, "a", 1); 1422 goto out; 1423 } 1424 1425 if (add_default_attributes()) 1426 goto out; 1427 1428 target__validate(&target); 1429 1430 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1431 if (target__has_task(&target)) { 1432 pr_err("Problems finding threads of monitor\n"); 1433 parse_options_usage(stat_usage, options, "p", 1); 1434 parse_options_usage(NULL, options, "t", 1); 1435 } else if (target__has_cpu(&target)) { 1436 perror("failed to parse CPUs map"); 1437 parse_options_usage(stat_usage, options, "C", 1); 1438 parse_options_usage(NULL, options, "a", 1); 1439 } 1440 goto out; 1441 } 1442 1443 /* 1444 * Initialize thread_map with comm names, 1445 * so we could print it out on output. 1446 */ 1447 if (aggr_mode == AGGR_THREAD) 1448 thread_map__read_comms(evsel_list->threads); 1449 1450 if (interval && interval < 100) { 1451 pr_err("print interval must be >= 100ms\n"); 1452 parse_options_usage(stat_usage, options, "I", 1); 1453 goto out; 1454 } 1455 1456 if (perf_evlist__alloc_stats(evsel_list, interval)) 1457 goto out; 1458 1459 if (perf_stat_init_aggr_mode()) 1460 goto out; 1461 1462 /* 1463 * We dont want to block the signals - that would cause 1464 * child tasks to inherit that and Ctrl-C would not work. 1465 * What we want is for Ctrl-C to work in the exec()-ed 1466 * task, but being ignored by perf stat itself: 1467 */ 1468 atexit(sig_atexit); 1469 if (!forever) 1470 signal(SIGINT, skip_signal); 1471 signal(SIGCHLD, skip_signal); 1472 signal(SIGALRM, skip_signal); 1473 signal(SIGABRT, skip_signal); 1474 1475 status = 0; 1476 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1477 if (run_count != 1 && verbose) 1478 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1479 run_idx + 1); 1480 1481 status = run_perf_stat(argc, argv); 1482 if (forever && status != -1) { 1483 print_counters(NULL, argc, argv); 1484 perf_stat__reset_stats(); 1485 } 1486 } 1487 1488 if (!forever && status != -1 && !interval) 1489 print_counters(NULL, argc, argv); 1490 1491 perf_evlist__free_stats(evsel_list); 1492 out: 1493 perf_evlist__delete(evsel_list); 1494 return status; 1495 } 1496