1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/session.h" 63 #include "util/tool.h" 64 #include "asm/bug.h" 65 66 #include <stdlib.h> 67 #include <sys/prctl.h> 68 #include <locale.h> 69 70 #define DEFAULT_SEPARATOR " " 71 #define CNTR_NOT_SUPPORTED "<not supported>" 72 #define CNTR_NOT_COUNTED "<not counted>" 73 74 static void print_counters(struct timespec *ts, int argc, const char **argv); 75 76 /* Default events used for perf stat -T */ 77 static const char *transaction_attrs = { 78 "task-clock," 79 "{" 80 "instructions," 81 "cycles," 82 "cpu/cycles-t/," 83 "cpu/tx-start/," 84 "cpu/el-start/," 85 "cpu/cycles-ct/" 86 "}" 87 }; 88 89 /* More limited version when the CPU does not have all events. */ 90 static const char * transaction_limited_attrs = { 91 "task-clock," 92 "{" 93 "instructions," 94 "cycles," 95 "cpu/cycles-t/," 96 "cpu/tx-start/" 97 "}" 98 }; 99 100 static struct perf_evlist *evsel_list; 101 102 static struct target target = { 103 .uid = UINT_MAX, 104 }; 105 106 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 107 108 static int run_count = 1; 109 static bool no_inherit = false; 110 static volatile pid_t child_pid = -1; 111 static bool null_run = false; 112 static int detailed_run = 0; 113 static bool transaction_run; 114 static bool big_num = true; 115 static int big_num_opt = -1; 116 static const char *csv_sep = NULL; 117 static bool csv_output = false; 118 static bool group = false; 119 static const char *pre_cmd = NULL; 120 static const char *post_cmd = NULL; 121 static bool sync_run = false; 122 static unsigned int initial_delay = 0; 123 static unsigned int unit_width = 4; /* strlen("unit") */ 124 static bool forever = false; 125 static bool metric_only = false; 126 static struct timespec ref_time; 127 static struct cpu_map *aggr_map; 128 static aggr_get_id_t aggr_get_id; 129 static bool append_file; 130 static const char *output_name; 131 static int output_fd; 132 133 struct perf_stat { 134 bool record; 135 struct perf_data_file file; 136 struct perf_session *session; 137 u64 bytes_written; 138 struct perf_tool tool; 139 bool maps_allocated; 140 struct cpu_map *cpus; 141 struct thread_map *threads; 142 enum aggr_mode aggr_mode; 143 }; 144 145 static struct perf_stat perf_stat; 146 #define STAT_RECORD perf_stat.record 147 148 static volatile int done = 0; 149 150 static struct perf_stat_config stat_config = { 151 .aggr_mode = AGGR_GLOBAL, 152 .scale = true, 153 }; 154 155 static inline void diff_timespec(struct timespec *r, struct timespec *a, 156 struct timespec *b) 157 { 158 r->tv_sec = a->tv_sec - b->tv_sec; 159 if (a->tv_nsec < b->tv_nsec) { 160 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 161 r->tv_sec--; 162 } else { 163 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 164 } 165 } 166 167 static void perf_stat__reset_stats(void) 168 { 169 perf_evlist__reset_stats(evsel_list); 170 perf_stat__reset_shadow_stats(); 171 } 172 173 static int create_perf_stat_counter(struct perf_evsel *evsel) 174 { 175 struct perf_event_attr *attr = &evsel->attr; 176 177 if (stat_config.scale) 178 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 179 PERF_FORMAT_TOTAL_TIME_RUNNING; 180 181 attr->inherit = !no_inherit; 182 183 /* 184 * Some events get initialized with sample_(period/type) set, 185 * like tracepoints. Clear it up for counting. 186 */ 187 attr->sample_period = 0; 188 189 /* 190 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 191 * while avoiding that older tools show confusing messages. 192 * 193 * However for pipe sessions we need to keep it zero, 194 * because script's perf_evsel__check_attr is triggered 195 * by attr->sample_type != 0, and we can't run it on 196 * stat sessions. 197 */ 198 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 199 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 200 201 /* 202 * Disabling all counters initially, they will be enabled 203 * either manually by us or by kernel via enable_on_exec 204 * set later. 205 */ 206 if (perf_evsel__is_group_leader(evsel)) { 207 attr->disabled = 1; 208 209 /* 210 * In case of initial_delay we enable tracee 211 * events manually. 212 */ 213 if (target__none(&target) && !initial_delay) 214 attr->enable_on_exec = 1; 215 } 216 217 if (target__has_cpu(&target)) 218 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 219 220 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 221 } 222 223 /* 224 * Does the counter have nsecs as a unit? 225 */ 226 static inline int nsec_counter(struct perf_evsel *evsel) 227 { 228 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 229 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 230 return 1; 231 232 return 0; 233 } 234 235 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 236 union perf_event *event, 237 struct perf_sample *sample __maybe_unused, 238 struct machine *machine __maybe_unused) 239 { 240 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 241 pr_err("failed to write perf data, error: %m\n"); 242 return -1; 243 } 244 245 perf_stat.bytes_written += event->header.size; 246 return 0; 247 } 248 249 static int write_stat_round_event(u64 tm, u64 type) 250 { 251 return perf_event__synthesize_stat_round(NULL, tm, type, 252 process_synthesized_event, 253 NULL); 254 } 255 256 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 257 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 258 259 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 260 261 static int 262 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 263 struct perf_counts_values *count) 264 { 265 struct perf_sample_id *sid = SID(counter, cpu, thread); 266 267 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 268 process_synthesized_event, NULL); 269 } 270 271 /* 272 * Read out the results of a single counter: 273 * do not aggregate counts across CPUs in system-wide mode 274 */ 275 static int read_counter(struct perf_evsel *counter) 276 { 277 int nthreads = thread_map__nr(evsel_list->threads); 278 int ncpus = perf_evsel__nr_cpus(counter); 279 int cpu, thread; 280 281 if (!counter->supported) 282 return -ENOENT; 283 284 if (counter->system_wide) 285 nthreads = 1; 286 287 for (thread = 0; thread < nthreads; thread++) { 288 for (cpu = 0; cpu < ncpus; cpu++) { 289 struct perf_counts_values *count; 290 291 count = perf_counts(counter->counts, cpu, thread); 292 if (perf_evsel__read(counter, cpu, thread, count)) 293 return -1; 294 295 if (STAT_RECORD) { 296 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 297 pr_err("failed to write stat event\n"); 298 return -1; 299 } 300 } 301 } 302 } 303 304 return 0; 305 } 306 307 static void read_counters(bool close_counters) 308 { 309 struct perf_evsel *counter; 310 311 evlist__for_each(evsel_list, counter) { 312 if (read_counter(counter)) 313 pr_debug("failed to read counter %s\n", counter->name); 314 315 if (perf_stat_process_counter(&stat_config, counter)) 316 pr_warning("failed to process counter %s\n", counter->name); 317 318 if (close_counters) { 319 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 320 thread_map__nr(evsel_list->threads)); 321 } 322 } 323 } 324 325 static void process_interval(void) 326 { 327 struct timespec ts, rs; 328 329 read_counters(false); 330 331 clock_gettime(CLOCK_MONOTONIC, &ts); 332 diff_timespec(&rs, &ts, &ref_time); 333 334 if (STAT_RECORD) { 335 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 336 pr_err("failed to write stat round event\n"); 337 } 338 339 print_counters(&rs, 0, NULL); 340 } 341 342 static void enable_counters(void) 343 { 344 if (initial_delay) 345 usleep(initial_delay * 1000); 346 347 /* 348 * We need to enable counters only if: 349 * - we don't have tracee (attaching to task or cpu) 350 * - we have initial delay configured 351 */ 352 if (!target__none(&target) || initial_delay) 353 perf_evlist__enable(evsel_list); 354 } 355 356 static volatile int workload_exec_errno; 357 358 /* 359 * perf_evlist__prepare_workload will send a SIGUSR1 360 * if the fork fails, since we asked by setting its 361 * want_signal to true. 362 */ 363 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 364 void *ucontext __maybe_unused) 365 { 366 workload_exec_errno = info->si_value.sival_int; 367 } 368 369 static bool has_unit(struct perf_evsel *counter) 370 { 371 return counter->unit && *counter->unit; 372 } 373 374 static bool has_scale(struct perf_evsel *counter) 375 { 376 return counter->scale != 1; 377 } 378 379 static int perf_stat_synthesize_config(bool is_pipe) 380 { 381 struct perf_evsel *counter; 382 int err; 383 384 if (is_pipe) { 385 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 386 process_synthesized_event); 387 if (err < 0) { 388 pr_err("Couldn't synthesize attrs.\n"); 389 return err; 390 } 391 } 392 393 /* 394 * Synthesize other events stuff not carried within 395 * attr event - unit, scale, name 396 */ 397 evlist__for_each(evsel_list, counter) { 398 if (!counter->supported) 399 continue; 400 401 /* 402 * Synthesize unit and scale only if it's defined. 403 */ 404 if (has_unit(counter)) { 405 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 406 if (err < 0) { 407 pr_err("Couldn't synthesize evsel unit.\n"); 408 return err; 409 } 410 } 411 412 if (has_scale(counter)) { 413 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 414 if (err < 0) { 415 pr_err("Couldn't synthesize evsel scale.\n"); 416 return err; 417 } 418 } 419 420 if (counter->own_cpus) { 421 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 422 if (err < 0) { 423 pr_err("Couldn't synthesize evsel scale.\n"); 424 return err; 425 } 426 } 427 428 /* 429 * Name is needed only for pipe output, 430 * perf.data carries event names. 431 */ 432 if (is_pipe) { 433 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 434 if (err < 0) { 435 pr_err("Couldn't synthesize evsel name.\n"); 436 return err; 437 } 438 } 439 } 440 441 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 442 process_synthesized_event, 443 NULL); 444 if (err < 0) { 445 pr_err("Couldn't synthesize thread map.\n"); 446 return err; 447 } 448 449 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 450 process_synthesized_event, NULL); 451 if (err < 0) { 452 pr_err("Couldn't synthesize thread map.\n"); 453 return err; 454 } 455 456 err = perf_event__synthesize_stat_config(NULL, &stat_config, 457 process_synthesized_event, NULL); 458 if (err < 0) { 459 pr_err("Couldn't synthesize config.\n"); 460 return err; 461 } 462 463 return 0; 464 } 465 466 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 467 468 static int __store_counter_ids(struct perf_evsel *counter, 469 struct cpu_map *cpus, 470 struct thread_map *threads) 471 { 472 int cpu, thread; 473 474 for (cpu = 0; cpu < cpus->nr; cpu++) { 475 for (thread = 0; thread < threads->nr; thread++) { 476 int fd = FD(counter, cpu, thread); 477 478 if (perf_evlist__id_add_fd(evsel_list, counter, 479 cpu, thread, fd) < 0) 480 return -1; 481 } 482 } 483 484 return 0; 485 } 486 487 static int store_counter_ids(struct perf_evsel *counter) 488 { 489 struct cpu_map *cpus = counter->cpus; 490 struct thread_map *threads = counter->threads; 491 492 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 493 return -ENOMEM; 494 495 return __store_counter_ids(counter, cpus, threads); 496 } 497 498 static int __run_perf_stat(int argc, const char **argv) 499 { 500 int interval = stat_config.interval; 501 char msg[512]; 502 unsigned long long t0, t1; 503 struct perf_evsel *counter; 504 struct timespec ts; 505 size_t l; 506 int status = 0; 507 const bool forks = (argc > 0); 508 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 509 510 if (interval) { 511 ts.tv_sec = interval / 1000; 512 ts.tv_nsec = (interval % 1000) * 1000000; 513 } else { 514 ts.tv_sec = 1; 515 ts.tv_nsec = 0; 516 } 517 518 if (forks) { 519 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 520 workload_exec_failed_signal) < 0) { 521 perror("failed to prepare workload"); 522 return -1; 523 } 524 child_pid = evsel_list->workload.pid; 525 } 526 527 if (group) 528 perf_evlist__set_leader(evsel_list); 529 530 evlist__for_each(evsel_list, counter) { 531 if (create_perf_stat_counter(counter) < 0) { 532 /* 533 * PPC returns ENXIO for HW counters until 2.6.37 534 * (behavior changed with commit b0a873e). 535 */ 536 if (errno == EINVAL || errno == ENOSYS || 537 errno == ENOENT || errno == EOPNOTSUPP || 538 errno == ENXIO) { 539 if (verbose) 540 ui__warning("%s event is not supported by the kernel.\n", 541 perf_evsel__name(counter)); 542 counter->supported = false; 543 544 if ((counter->leader != counter) || 545 !(counter->leader->nr_members > 1)) 546 continue; 547 } 548 549 perf_evsel__open_strerror(counter, &target, 550 errno, msg, sizeof(msg)); 551 ui__error("%s\n", msg); 552 553 if (child_pid != -1) 554 kill(child_pid, SIGTERM); 555 556 return -1; 557 } 558 counter->supported = true; 559 560 l = strlen(counter->unit); 561 if (l > unit_width) 562 unit_width = l; 563 564 if (STAT_RECORD && store_counter_ids(counter)) 565 return -1; 566 } 567 568 if (perf_evlist__apply_filters(evsel_list, &counter)) { 569 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 570 counter->filter, perf_evsel__name(counter), errno, 571 strerror_r(errno, msg, sizeof(msg))); 572 return -1; 573 } 574 575 if (STAT_RECORD) { 576 int err, fd = perf_data_file__fd(&perf_stat.file); 577 578 if (is_pipe) { 579 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 580 } else { 581 err = perf_session__write_header(perf_stat.session, evsel_list, 582 fd, false); 583 } 584 585 if (err < 0) 586 return err; 587 588 err = perf_stat_synthesize_config(is_pipe); 589 if (err < 0) 590 return err; 591 } 592 593 /* 594 * Enable counters and exec the command: 595 */ 596 t0 = rdclock(); 597 clock_gettime(CLOCK_MONOTONIC, &ref_time); 598 599 if (forks) { 600 perf_evlist__start_workload(evsel_list); 601 enable_counters(); 602 603 if (interval) { 604 while (!waitpid(child_pid, &status, WNOHANG)) { 605 nanosleep(&ts, NULL); 606 process_interval(); 607 } 608 } 609 wait(&status); 610 611 if (workload_exec_errno) { 612 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 613 pr_err("Workload failed: %s\n", emsg); 614 return -1; 615 } 616 617 if (WIFSIGNALED(status)) 618 psignal(WTERMSIG(status), argv[0]); 619 } else { 620 enable_counters(); 621 while (!done) { 622 nanosleep(&ts, NULL); 623 if (interval) 624 process_interval(); 625 } 626 } 627 628 t1 = rdclock(); 629 630 update_stats(&walltime_nsecs_stats, t1 - t0); 631 632 read_counters(true); 633 634 return WEXITSTATUS(status); 635 } 636 637 static int run_perf_stat(int argc, const char **argv) 638 { 639 int ret; 640 641 if (pre_cmd) { 642 ret = system(pre_cmd); 643 if (ret) 644 return ret; 645 } 646 647 if (sync_run) 648 sync(); 649 650 ret = __run_perf_stat(argc, argv); 651 if (ret) 652 return ret; 653 654 if (post_cmd) { 655 ret = system(post_cmd); 656 if (ret) 657 return ret; 658 } 659 660 return ret; 661 } 662 663 static void print_running(u64 run, u64 ena) 664 { 665 if (csv_output) { 666 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 667 csv_sep, 668 run, 669 csv_sep, 670 ena ? 100.0 * run / ena : 100.0); 671 } else if (run != ena) { 672 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 673 } 674 } 675 676 static void print_noise_pct(double total, double avg) 677 { 678 double pct = rel_stddev_stats(total, avg); 679 680 if (csv_output) 681 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 682 else if (pct) 683 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 684 } 685 686 static void print_noise(struct perf_evsel *evsel, double avg) 687 { 688 struct perf_stat_evsel *ps; 689 690 if (run_count == 1) 691 return; 692 693 ps = evsel->priv; 694 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 695 } 696 697 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 698 { 699 switch (stat_config.aggr_mode) { 700 case AGGR_CORE: 701 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 702 cpu_map__id_to_socket(id), 703 csv_output ? 0 : -8, 704 cpu_map__id_to_cpu(id), 705 csv_sep, 706 csv_output ? 0 : 4, 707 nr, 708 csv_sep); 709 break; 710 case AGGR_SOCKET: 711 fprintf(stat_config.output, "S%*d%s%*d%s", 712 csv_output ? 0 : -5, 713 id, 714 csv_sep, 715 csv_output ? 0 : 4, 716 nr, 717 csv_sep); 718 break; 719 case AGGR_NONE: 720 fprintf(stat_config.output, "CPU%*d%s", 721 csv_output ? 0 : -4, 722 perf_evsel__cpus(evsel)->map[id], csv_sep); 723 break; 724 case AGGR_THREAD: 725 fprintf(stat_config.output, "%*s-%*d%s", 726 csv_output ? 0 : 16, 727 thread_map__comm(evsel->threads, id), 728 csv_output ? 0 : -8, 729 thread_map__pid(evsel->threads, id), 730 csv_sep); 731 break; 732 case AGGR_GLOBAL: 733 case AGGR_UNSET: 734 default: 735 break; 736 } 737 } 738 739 struct outstate { 740 FILE *fh; 741 bool newline; 742 const char *prefix; 743 int nfields; 744 int id, nr; 745 struct perf_evsel *evsel; 746 }; 747 748 #define METRIC_LEN 35 749 750 static void new_line_std(void *ctx) 751 { 752 struct outstate *os = ctx; 753 754 os->newline = true; 755 } 756 757 static void do_new_line_std(struct outstate *os) 758 { 759 fputc('\n', os->fh); 760 fputs(os->prefix, os->fh); 761 aggr_printout(os->evsel, os->id, os->nr); 762 if (stat_config.aggr_mode == AGGR_NONE) 763 fprintf(os->fh, " "); 764 fprintf(os->fh, " "); 765 } 766 767 static void print_metric_std(void *ctx, const char *color, const char *fmt, 768 const char *unit, double val) 769 { 770 struct outstate *os = ctx; 771 FILE *out = os->fh; 772 int n; 773 bool newline = os->newline; 774 775 os->newline = false; 776 777 if (unit == NULL || fmt == NULL) { 778 fprintf(out, "%-*s", METRIC_LEN, ""); 779 return; 780 } 781 782 if (newline) 783 do_new_line_std(os); 784 785 n = fprintf(out, " # "); 786 if (color) 787 n += color_fprintf(out, color, fmt, val); 788 else 789 n += fprintf(out, fmt, val); 790 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 791 } 792 793 static void new_line_csv(void *ctx) 794 { 795 struct outstate *os = ctx; 796 int i; 797 798 fputc('\n', os->fh); 799 if (os->prefix) 800 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 801 aggr_printout(os->evsel, os->id, os->nr); 802 for (i = 0; i < os->nfields; i++) 803 fputs(csv_sep, os->fh); 804 } 805 806 static void print_metric_csv(void *ctx, 807 const char *color __maybe_unused, 808 const char *fmt, const char *unit, double val) 809 { 810 struct outstate *os = ctx; 811 FILE *out = os->fh; 812 char buf[64], *vals, *ends; 813 814 if (unit == NULL || fmt == NULL) { 815 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 816 return; 817 } 818 snprintf(buf, sizeof(buf), fmt, val); 819 vals = buf; 820 while (isspace(*vals)) 821 vals++; 822 ends = vals; 823 while (isdigit(*ends) || *ends == '.') 824 ends++; 825 *ends = 0; 826 while (isspace(*unit)) 827 unit++; 828 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 829 } 830 831 #define METRIC_ONLY_LEN 20 832 833 /* Filter out some columns that don't work well in metrics only mode */ 834 835 static bool valid_only_metric(const char *unit) 836 { 837 if (!unit) 838 return false; 839 if (strstr(unit, "/sec") || 840 strstr(unit, "hz") || 841 strstr(unit, "Hz") || 842 strstr(unit, "CPUs utilized")) 843 return false; 844 return true; 845 } 846 847 static const char *fixunit(char *buf, struct perf_evsel *evsel, 848 const char *unit) 849 { 850 if (!strncmp(unit, "of all", 6)) { 851 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 852 unit); 853 return buf; 854 } 855 return unit; 856 } 857 858 static void print_metric_only(void *ctx, const char *color, const char *fmt, 859 const char *unit, double val) 860 { 861 struct outstate *os = ctx; 862 FILE *out = os->fh; 863 int n; 864 char buf[1024]; 865 unsigned mlen = METRIC_ONLY_LEN; 866 867 if (!valid_only_metric(unit)) 868 return; 869 unit = fixunit(buf, os->evsel, unit); 870 if (color) 871 n = color_fprintf(out, color, fmt, val); 872 else 873 n = fprintf(out, fmt, val); 874 if (n > METRIC_ONLY_LEN) 875 n = METRIC_ONLY_LEN; 876 if (mlen < strlen(unit)) 877 mlen = strlen(unit) + 1; 878 fprintf(out, "%*s", mlen - n, ""); 879 } 880 881 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 882 const char *fmt, 883 const char *unit, double val) 884 { 885 struct outstate *os = ctx; 886 FILE *out = os->fh; 887 char buf[64], *vals, *ends; 888 char tbuf[1024]; 889 890 if (!valid_only_metric(unit)) 891 return; 892 unit = fixunit(tbuf, os->evsel, unit); 893 snprintf(buf, sizeof buf, fmt, val); 894 vals = buf; 895 while (isspace(*vals)) 896 vals++; 897 ends = vals; 898 while (isdigit(*ends) || *ends == '.') 899 ends++; 900 *ends = 0; 901 fprintf(out, "%s%s", vals, csv_sep); 902 } 903 904 static void new_line_metric(void *ctx __maybe_unused) 905 { 906 } 907 908 static void print_metric_header(void *ctx, const char *color __maybe_unused, 909 const char *fmt __maybe_unused, 910 const char *unit, double val __maybe_unused) 911 { 912 struct outstate *os = ctx; 913 char tbuf[1024]; 914 915 if (!valid_only_metric(unit)) 916 return; 917 unit = fixunit(tbuf, os->evsel, unit); 918 if (csv_output) 919 fprintf(os->fh, "%s%s", unit, csv_sep); 920 else 921 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 922 } 923 924 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 925 { 926 FILE *output = stat_config.output; 927 double msecs = avg / 1e6; 928 const char *fmt_v, *fmt_n; 929 char name[25]; 930 931 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 932 fmt_n = csv_output ? "%s" : "%-25s"; 933 934 aggr_printout(evsel, id, nr); 935 936 scnprintf(name, sizeof(name), "%s%s", 937 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 938 939 fprintf(output, fmt_v, msecs, csv_sep); 940 941 if (csv_output) 942 fprintf(output, "%s%s", evsel->unit, csv_sep); 943 else 944 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 945 946 fprintf(output, fmt_n, name); 947 948 if (evsel->cgrp) 949 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 950 } 951 952 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 953 { 954 int i; 955 956 if (!aggr_get_id) 957 return 0; 958 959 if (stat_config.aggr_mode == AGGR_NONE) 960 return id; 961 962 if (stat_config.aggr_mode == AGGR_GLOBAL) 963 return 0; 964 965 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 966 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 967 968 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 969 return cpu2; 970 } 971 return 0; 972 } 973 974 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 975 { 976 FILE *output = stat_config.output; 977 double sc = evsel->scale; 978 const char *fmt; 979 980 if (csv_output) { 981 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 982 } else { 983 if (big_num) 984 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 985 else 986 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 987 } 988 989 aggr_printout(evsel, id, nr); 990 991 fprintf(output, fmt, avg, csv_sep); 992 993 if (evsel->unit) 994 fprintf(output, "%-*s%s", 995 csv_output ? 0 : unit_width, 996 evsel->unit, csv_sep); 997 998 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 999 1000 if (evsel->cgrp) 1001 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1002 } 1003 1004 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1005 char *prefix, u64 run, u64 ena, double noise) 1006 { 1007 struct perf_stat_output_ctx out; 1008 struct outstate os = { 1009 .fh = stat_config.output, 1010 .prefix = prefix ? prefix : "", 1011 .id = id, 1012 .nr = nr, 1013 .evsel = counter, 1014 }; 1015 print_metric_t pm = print_metric_std; 1016 void (*nl)(void *); 1017 1018 if (metric_only) { 1019 nl = new_line_metric; 1020 if (csv_output) 1021 pm = print_metric_only_csv; 1022 else 1023 pm = print_metric_only; 1024 } else 1025 nl = new_line_std; 1026 1027 if (csv_output && !metric_only) { 1028 static int aggr_fields[] = { 1029 [AGGR_GLOBAL] = 0, 1030 [AGGR_THREAD] = 1, 1031 [AGGR_NONE] = 1, 1032 [AGGR_SOCKET] = 2, 1033 [AGGR_CORE] = 2, 1034 }; 1035 1036 pm = print_metric_csv; 1037 nl = new_line_csv; 1038 os.nfields = 3; 1039 os.nfields += aggr_fields[stat_config.aggr_mode]; 1040 if (counter->cgrp) 1041 os.nfields++; 1042 } 1043 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1044 if (metric_only) { 1045 pm(&os, NULL, "", "", 0); 1046 return; 1047 } 1048 aggr_printout(counter, id, nr); 1049 1050 fprintf(stat_config.output, "%*s%s", 1051 csv_output ? 0 : 18, 1052 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1053 csv_sep); 1054 1055 fprintf(stat_config.output, "%-*s%s", 1056 csv_output ? 0 : unit_width, 1057 counter->unit, csv_sep); 1058 1059 fprintf(stat_config.output, "%*s", 1060 csv_output ? 0 : -25, 1061 perf_evsel__name(counter)); 1062 1063 if (counter->cgrp) 1064 fprintf(stat_config.output, "%s%s", 1065 csv_sep, counter->cgrp->name); 1066 1067 if (!csv_output) 1068 pm(&os, NULL, NULL, "", 0); 1069 print_noise(counter, noise); 1070 print_running(run, ena); 1071 if (csv_output) 1072 pm(&os, NULL, NULL, "", 0); 1073 return; 1074 } 1075 1076 if (metric_only) 1077 /* nothing */; 1078 else if (nsec_counter(counter)) 1079 nsec_printout(id, nr, counter, uval); 1080 else 1081 abs_printout(id, nr, counter, uval); 1082 1083 out.print_metric = pm; 1084 out.new_line = nl; 1085 out.ctx = &os; 1086 1087 if (csv_output && !metric_only) { 1088 print_noise(counter, noise); 1089 print_running(run, ena); 1090 } 1091 1092 perf_stat__print_shadow_stats(counter, uval, 1093 first_shadow_cpu(counter, id), 1094 &out); 1095 if (!csv_output && !metric_only) { 1096 print_noise(counter, noise); 1097 print_running(run, ena); 1098 } 1099 } 1100 1101 static void aggr_update_shadow(void) 1102 { 1103 int cpu, s2, id, s; 1104 u64 val; 1105 struct perf_evsel *counter; 1106 1107 for (s = 0; s < aggr_map->nr; s++) { 1108 id = aggr_map->map[s]; 1109 evlist__for_each(evsel_list, counter) { 1110 val = 0; 1111 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1112 s2 = aggr_get_id(evsel_list->cpus, cpu); 1113 if (s2 != id) 1114 continue; 1115 val += perf_counts(counter->counts, cpu, 0)->val; 1116 } 1117 val = val * counter->scale; 1118 perf_stat__update_shadow_stats(counter, &val, 1119 first_shadow_cpu(counter, id)); 1120 } 1121 } 1122 } 1123 1124 static void print_aggr(char *prefix) 1125 { 1126 FILE *output = stat_config.output; 1127 struct perf_evsel *counter; 1128 int cpu, s, s2, id, nr; 1129 double uval; 1130 u64 ena, run, val; 1131 bool first; 1132 1133 if (!(aggr_map || aggr_get_id)) 1134 return; 1135 1136 aggr_update_shadow(); 1137 1138 /* 1139 * With metric_only everything is on a single line. 1140 * Without each counter has its own line. 1141 */ 1142 for (s = 0; s < aggr_map->nr; s++) { 1143 if (prefix && metric_only) 1144 fprintf(output, "%s", prefix); 1145 1146 id = aggr_map->map[s]; 1147 first = true; 1148 evlist__for_each(evsel_list, counter) { 1149 val = ena = run = 0; 1150 nr = 0; 1151 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1152 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1153 if (s2 != id) 1154 continue; 1155 val += perf_counts(counter->counts, cpu, 0)->val; 1156 ena += perf_counts(counter->counts, cpu, 0)->ena; 1157 run += perf_counts(counter->counts, cpu, 0)->run; 1158 nr++; 1159 } 1160 if (first && metric_only) { 1161 first = false; 1162 aggr_printout(counter, id, nr); 1163 } 1164 if (prefix && !metric_only) 1165 fprintf(output, "%s", prefix); 1166 1167 uval = val * counter->scale; 1168 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1169 if (!metric_only) 1170 fputc('\n', output); 1171 } 1172 if (metric_only) 1173 fputc('\n', output); 1174 } 1175 } 1176 1177 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1178 { 1179 FILE *output = stat_config.output; 1180 int nthreads = thread_map__nr(counter->threads); 1181 int ncpus = cpu_map__nr(counter->cpus); 1182 int cpu, thread; 1183 double uval; 1184 1185 for (thread = 0; thread < nthreads; thread++) { 1186 u64 ena = 0, run = 0, val = 0; 1187 1188 for (cpu = 0; cpu < ncpus; cpu++) { 1189 val += perf_counts(counter->counts, cpu, thread)->val; 1190 ena += perf_counts(counter->counts, cpu, thread)->ena; 1191 run += perf_counts(counter->counts, cpu, thread)->run; 1192 } 1193 1194 if (prefix) 1195 fprintf(output, "%s", prefix); 1196 1197 uval = val * counter->scale; 1198 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1199 fputc('\n', output); 1200 } 1201 } 1202 1203 /* 1204 * Print out the results of a single counter: 1205 * aggregated counts in system-wide mode 1206 */ 1207 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1208 { 1209 FILE *output = stat_config.output; 1210 struct perf_stat_evsel *ps = counter->priv; 1211 double avg = avg_stats(&ps->res_stats[0]); 1212 double uval; 1213 double avg_enabled, avg_running; 1214 1215 avg_enabled = avg_stats(&ps->res_stats[1]); 1216 avg_running = avg_stats(&ps->res_stats[2]); 1217 1218 if (prefix && !metric_only) 1219 fprintf(output, "%s", prefix); 1220 1221 uval = avg * counter->scale; 1222 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1223 if (!metric_only) 1224 fprintf(output, "\n"); 1225 } 1226 1227 /* 1228 * Print out the results of a single counter: 1229 * does not use aggregated count in system-wide 1230 */ 1231 static void print_counter(struct perf_evsel *counter, char *prefix) 1232 { 1233 FILE *output = stat_config.output; 1234 u64 ena, run, val; 1235 double uval; 1236 int cpu; 1237 1238 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1239 val = perf_counts(counter->counts, cpu, 0)->val; 1240 ena = perf_counts(counter->counts, cpu, 0)->ena; 1241 run = perf_counts(counter->counts, cpu, 0)->run; 1242 1243 if (prefix) 1244 fprintf(output, "%s", prefix); 1245 1246 uval = val * counter->scale; 1247 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1248 1249 fputc('\n', output); 1250 } 1251 } 1252 1253 static void print_no_aggr_metric(char *prefix) 1254 { 1255 int cpu; 1256 int nrcpus = 0; 1257 struct perf_evsel *counter; 1258 u64 ena, run, val; 1259 double uval; 1260 1261 nrcpus = evsel_list->cpus->nr; 1262 for (cpu = 0; cpu < nrcpus; cpu++) { 1263 bool first = true; 1264 1265 if (prefix) 1266 fputs(prefix, stat_config.output); 1267 evlist__for_each(evsel_list, counter) { 1268 if (first) { 1269 aggr_printout(counter, cpu, 0); 1270 first = false; 1271 } 1272 val = perf_counts(counter->counts, cpu, 0)->val; 1273 ena = perf_counts(counter->counts, cpu, 0)->ena; 1274 run = perf_counts(counter->counts, cpu, 0)->run; 1275 1276 uval = val * counter->scale; 1277 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1278 } 1279 fputc('\n', stat_config.output); 1280 } 1281 } 1282 1283 static int aggr_header_lens[] = { 1284 [AGGR_CORE] = 18, 1285 [AGGR_SOCKET] = 12, 1286 [AGGR_NONE] = 6, 1287 [AGGR_THREAD] = 24, 1288 [AGGR_GLOBAL] = 0, 1289 }; 1290 1291 static void print_metric_headers(char *prefix) 1292 { 1293 struct perf_stat_output_ctx out; 1294 struct perf_evsel *counter; 1295 struct outstate os = { 1296 .fh = stat_config.output 1297 }; 1298 1299 if (prefix) 1300 fprintf(stat_config.output, "%s", prefix); 1301 1302 if (!csv_output) 1303 fprintf(stat_config.output, "%*s", 1304 aggr_header_lens[stat_config.aggr_mode], ""); 1305 1306 /* Print metrics headers only */ 1307 evlist__for_each(evsel_list, counter) { 1308 os.evsel = counter; 1309 out.ctx = &os; 1310 out.print_metric = print_metric_header; 1311 out.new_line = new_line_metric; 1312 os.evsel = counter; 1313 perf_stat__print_shadow_stats(counter, 0, 1314 0, 1315 &out); 1316 } 1317 fputc('\n', stat_config.output); 1318 } 1319 1320 static void print_interval(char *prefix, struct timespec *ts) 1321 { 1322 FILE *output = stat_config.output; 1323 static int num_print_interval; 1324 1325 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1326 1327 if (num_print_interval == 0 && !csv_output && !metric_only) { 1328 switch (stat_config.aggr_mode) { 1329 case AGGR_SOCKET: 1330 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 1331 break; 1332 case AGGR_CORE: 1333 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 1334 break; 1335 case AGGR_NONE: 1336 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 1337 break; 1338 case AGGR_THREAD: 1339 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 1340 break; 1341 case AGGR_GLOBAL: 1342 default: 1343 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 1344 case AGGR_UNSET: 1345 break; 1346 } 1347 } 1348 1349 if (++num_print_interval == 25) 1350 num_print_interval = 0; 1351 } 1352 1353 static void print_header(int argc, const char **argv) 1354 { 1355 FILE *output = stat_config.output; 1356 int i; 1357 1358 fflush(stdout); 1359 1360 if (!csv_output) { 1361 fprintf(output, "\n"); 1362 fprintf(output, " Performance counter stats for "); 1363 if (target.system_wide) 1364 fprintf(output, "\'system wide"); 1365 else if (target.cpu_list) 1366 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1367 else if (!target__has_task(&target)) { 1368 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1369 for (i = 1; argv && (i < argc); i++) 1370 fprintf(output, " %s", argv[i]); 1371 } else if (target.pid) 1372 fprintf(output, "process id \'%s", target.pid); 1373 else 1374 fprintf(output, "thread id \'%s", target.tid); 1375 1376 fprintf(output, "\'"); 1377 if (run_count > 1) 1378 fprintf(output, " (%d runs)", run_count); 1379 fprintf(output, ":\n\n"); 1380 } 1381 } 1382 1383 static void print_footer(void) 1384 { 1385 FILE *output = stat_config.output; 1386 1387 if (!null_run) 1388 fprintf(output, "\n"); 1389 fprintf(output, " %17.9f seconds time elapsed", 1390 avg_stats(&walltime_nsecs_stats)/1e9); 1391 if (run_count > 1) { 1392 fprintf(output, " "); 1393 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1394 avg_stats(&walltime_nsecs_stats)); 1395 } 1396 fprintf(output, "\n\n"); 1397 } 1398 1399 static void print_counters(struct timespec *ts, int argc, const char **argv) 1400 { 1401 int interval = stat_config.interval; 1402 struct perf_evsel *counter; 1403 char buf[64], *prefix = NULL; 1404 1405 /* Do not print anything if we record to the pipe. */ 1406 if (STAT_RECORD && perf_stat.file.is_pipe) 1407 return; 1408 1409 if (interval) 1410 print_interval(prefix = buf, ts); 1411 else 1412 print_header(argc, argv); 1413 1414 if (metric_only) { 1415 static int num_print_iv; 1416 1417 if (num_print_iv == 0) 1418 print_metric_headers(prefix); 1419 if (num_print_iv++ == 25) 1420 num_print_iv = 0; 1421 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1422 fprintf(stat_config.output, "%s", prefix); 1423 } 1424 1425 switch (stat_config.aggr_mode) { 1426 case AGGR_CORE: 1427 case AGGR_SOCKET: 1428 print_aggr(prefix); 1429 break; 1430 case AGGR_THREAD: 1431 evlist__for_each(evsel_list, counter) 1432 print_aggr_thread(counter, prefix); 1433 break; 1434 case AGGR_GLOBAL: 1435 evlist__for_each(evsel_list, counter) 1436 print_counter_aggr(counter, prefix); 1437 if (metric_only) 1438 fputc('\n', stat_config.output); 1439 break; 1440 case AGGR_NONE: 1441 if (metric_only) 1442 print_no_aggr_metric(prefix); 1443 else { 1444 evlist__for_each(evsel_list, counter) 1445 print_counter(counter, prefix); 1446 } 1447 break; 1448 case AGGR_UNSET: 1449 default: 1450 break; 1451 } 1452 1453 if (!interval && !csv_output) 1454 print_footer(); 1455 1456 fflush(stat_config.output); 1457 } 1458 1459 static volatile int signr = -1; 1460 1461 static void skip_signal(int signo) 1462 { 1463 if ((child_pid == -1) || stat_config.interval) 1464 done = 1; 1465 1466 signr = signo; 1467 /* 1468 * render child_pid harmless 1469 * won't send SIGTERM to a random 1470 * process in case of race condition 1471 * and fast PID recycling 1472 */ 1473 child_pid = -1; 1474 } 1475 1476 static void sig_atexit(void) 1477 { 1478 sigset_t set, oset; 1479 1480 /* 1481 * avoid race condition with SIGCHLD handler 1482 * in skip_signal() which is modifying child_pid 1483 * goal is to avoid send SIGTERM to a random 1484 * process 1485 */ 1486 sigemptyset(&set); 1487 sigaddset(&set, SIGCHLD); 1488 sigprocmask(SIG_BLOCK, &set, &oset); 1489 1490 if (child_pid != -1) 1491 kill(child_pid, SIGTERM); 1492 1493 sigprocmask(SIG_SETMASK, &oset, NULL); 1494 1495 if (signr == -1) 1496 return; 1497 1498 signal(signr, SIG_DFL); 1499 kill(getpid(), signr); 1500 } 1501 1502 static int stat__set_big_num(const struct option *opt __maybe_unused, 1503 const char *s __maybe_unused, int unset) 1504 { 1505 big_num_opt = unset ? 0 : 1; 1506 return 0; 1507 } 1508 1509 static const struct option stat_options[] = { 1510 OPT_BOOLEAN('T', "transaction", &transaction_run, 1511 "hardware transaction statistics"), 1512 OPT_CALLBACK('e', "event", &evsel_list, "event", 1513 "event selector. use 'perf list' to list available events", 1514 parse_events_option), 1515 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1516 "event filter", parse_filter), 1517 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1518 "child tasks do not inherit counters"), 1519 OPT_STRING('p', "pid", &target.pid, "pid", 1520 "stat events on existing process id"), 1521 OPT_STRING('t', "tid", &target.tid, "tid", 1522 "stat events on existing thread id"), 1523 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1524 "system-wide collection from all CPUs"), 1525 OPT_BOOLEAN('g', "group", &group, 1526 "put the counters into a counter group"), 1527 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1528 OPT_INCR('v', "verbose", &verbose, 1529 "be more verbose (show counter open errors, etc)"), 1530 OPT_INTEGER('r', "repeat", &run_count, 1531 "repeat command and print average + stddev (max: 100, forever: 0)"), 1532 OPT_BOOLEAN('n', "null", &null_run, 1533 "null run - dont start any counters"), 1534 OPT_INCR('d', "detailed", &detailed_run, 1535 "detailed run - start a lot of events"), 1536 OPT_BOOLEAN('S', "sync", &sync_run, 1537 "call sync() before starting a run"), 1538 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1539 "print large numbers with thousands\' separators", 1540 stat__set_big_num), 1541 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1542 "list of cpus to monitor in system-wide"), 1543 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1544 "disable CPU count aggregation", AGGR_NONE), 1545 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1546 "print counts with custom separator"), 1547 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1548 "monitor event in cgroup name only", parse_cgroups), 1549 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1550 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1551 OPT_INTEGER(0, "log-fd", &output_fd, 1552 "log output to fd, instead of stderr"), 1553 OPT_STRING(0, "pre", &pre_cmd, "command", 1554 "command to run prior to the measured command"), 1555 OPT_STRING(0, "post", &post_cmd, "command", 1556 "command to run after to the measured command"), 1557 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1558 "print counts at regular interval in ms (>= 10)"), 1559 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1560 "aggregate counts per processor socket", AGGR_SOCKET), 1561 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1562 "aggregate counts per physical processor core", AGGR_CORE), 1563 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1564 "aggregate counts per thread", AGGR_THREAD), 1565 OPT_UINTEGER('D', "delay", &initial_delay, 1566 "ms to wait before starting measurement after program start"), 1567 OPT_BOOLEAN(0, "metric-only", &metric_only, 1568 "Only print computed metrics. No raw values"), 1569 OPT_END() 1570 }; 1571 1572 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1573 { 1574 return cpu_map__get_socket(map, cpu, NULL); 1575 } 1576 1577 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1578 { 1579 return cpu_map__get_core(map, cpu, NULL); 1580 } 1581 1582 static int cpu_map__get_max(struct cpu_map *map) 1583 { 1584 int i, max = -1; 1585 1586 for (i = 0; i < map->nr; i++) { 1587 if (map->map[i] > max) 1588 max = map->map[i]; 1589 } 1590 1591 return max; 1592 } 1593 1594 static struct cpu_map *cpus_aggr_map; 1595 1596 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1597 { 1598 int cpu; 1599 1600 if (idx >= map->nr) 1601 return -1; 1602 1603 cpu = map->map[idx]; 1604 1605 if (cpus_aggr_map->map[cpu] == -1) 1606 cpus_aggr_map->map[cpu] = get_id(map, idx); 1607 1608 return cpus_aggr_map->map[cpu]; 1609 } 1610 1611 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1612 { 1613 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1614 } 1615 1616 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1617 { 1618 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1619 } 1620 1621 static int perf_stat_init_aggr_mode(void) 1622 { 1623 int nr; 1624 1625 switch (stat_config.aggr_mode) { 1626 case AGGR_SOCKET: 1627 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1628 perror("cannot build socket map"); 1629 return -1; 1630 } 1631 aggr_get_id = perf_stat__get_socket_cached; 1632 break; 1633 case AGGR_CORE: 1634 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1635 perror("cannot build core map"); 1636 return -1; 1637 } 1638 aggr_get_id = perf_stat__get_core_cached; 1639 break; 1640 case AGGR_NONE: 1641 case AGGR_GLOBAL: 1642 case AGGR_THREAD: 1643 case AGGR_UNSET: 1644 default: 1645 break; 1646 } 1647 1648 /* 1649 * The evsel_list->cpus is the base we operate on, 1650 * taking the highest cpu number to be the size of 1651 * the aggregation translate cpumap. 1652 */ 1653 nr = cpu_map__get_max(evsel_list->cpus); 1654 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1655 return cpus_aggr_map ? 0 : -ENOMEM; 1656 } 1657 1658 static void perf_stat__exit_aggr_mode(void) 1659 { 1660 cpu_map__put(aggr_map); 1661 cpu_map__put(cpus_aggr_map); 1662 aggr_map = NULL; 1663 cpus_aggr_map = NULL; 1664 } 1665 1666 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1667 { 1668 int cpu; 1669 1670 if (idx > map->nr) 1671 return -1; 1672 1673 cpu = map->map[idx]; 1674 1675 if (cpu >= env->nr_cpus_online) 1676 return -1; 1677 1678 return cpu; 1679 } 1680 1681 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1682 { 1683 struct perf_env *env = data; 1684 int cpu = perf_env__get_cpu(env, map, idx); 1685 1686 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1687 } 1688 1689 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1690 { 1691 struct perf_env *env = data; 1692 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1693 1694 if (cpu != -1) { 1695 int socket_id = env->cpu[cpu].socket_id; 1696 1697 /* 1698 * Encode socket in upper 16 bits 1699 * core_id is relative to socket, and 1700 * we need a global id. So we combine 1701 * socket + core id. 1702 */ 1703 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1704 } 1705 1706 return core; 1707 } 1708 1709 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1710 struct cpu_map **sockp) 1711 { 1712 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1713 } 1714 1715 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1716 struct cpu_map **corep) 1717 { 1718 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1719 } 1720 1721 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1722 { 1723 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1724 } 1725 1726 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1727 { 1728 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1729 } 1730 1731 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1732 { 1733 struct perf_env *env = &st->session->header.env; 1734 1735 switch (stat_config.aggr_mode) { 1736 case AGGR_SOCKET: 1737 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1738 perror("cannot build socket map"); 1739 return -1; 1740 } 1741 aggr_get_id = perf_stat__get_socket_file; 1742 break; 1743 case AGGR_CORE: 1744 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1745 perror("cannot build core map"); 1746 return -1; 1747 } 1748 aggr_get_id = perf_stat__get_core_file; 1749 break; 1750 case AGGR_NONE: 1751 case AGGR_GLOBAL: 1752 case AGGR_THREAD: 1753 case AGGR_UNSET: 1754 default: 1755 break; 1756 } 1757 1758 return 0; 1759 } 1760 1761 /* 1762 * Add default attributes, if there were no attributes specified or 1763 * if -d/--detailed, -d -d or -d -d -d is used: 1764 */ 1765 static int add_default_attributes(void) 1766 { 1767 struct perf_event_attr default_attrs0[] = { 1768 1769 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1770 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1771 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1772 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1773 1774 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1775 }; 1776 struct perf_event_attr frontend_attrs[] = { 1777 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1778 }; 1779 struct perf_event_attr backend_attrs[] = { 1780 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1781 }; 1782 struct perf_event_attr default_attrs1[] = { 1783 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1784 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1785 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1786 1787 }; 1788 1789 /* 1790 * Detailed stats (-d), covering the L1 and last level data caches: 1791 */ 1792 struct perf_event_attr detailed_attrs[] = { 1793 1794 { .type = PERF_TYPE_HW_CACHE, 1795 .config = 1796 PERF_COUNT_HW_CACHE_L1D << 0 | 1797 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1798 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1799 1800 { .type = PERF_TYPE_HW_CACHE, 1801 .config = 1802 PERF_COUNT_HW_CACHE_L1D << 0 | 1803 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1804 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1805 1806 { .type = PERF_TYPE_HW_CACHE, 1807 .config = 1808 PERF_COUNT_HW_CACHE_LL << 0 | 1809 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1810 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1811 1812 { .type = PERF_TYPE_HW_CACHE, 1813 .config = 1814 PERF_COUNT_HW_CACHE_LL << 0 | 1815 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1816 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1817 }; 1818 1819 /* 1820 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1821 */ 1822 struct perf_event_attr very_detailed_attrs[] = { 1823 1824 { .type = PERF_TYPE_HW_CACHE, 1825 .config = 1826 PERF_COUNT_HW_CACHE_L1I << 0 | 1827 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1828 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1829 1830 { .type = PERF_TYPE_HW_CACHE, 1831 .config = 1832 PERF_COUNT_HW_CACHE_L1I << 0 | 1833 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1834 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1835 1836 { .type = PERF_TYPE_HW_CACHE, 1837 .config = 1838 PERF_COUNT_HW_CACHE_DTLB << 0 | 1839 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1840 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1841 1842 { .type = PERF_TYPE_HW_CACHE, 1843 .config = 1844 PERF_COUNT_HW_CACHE_DTLB << 0 | 1845 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1846 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1847 1848 { .type = PERF_TYPE_HW_CACHE, 1849 .config = 1850 PERF_COUNT_HW_CACHE_ITLB << 0 | 1851 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1852 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1853 1854 { .type = PERF_TYPE_HW_CACHE, 1855 .config = 1856 PERF_COUNT_HW_CACHE_ITLB << 0 | 1857 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1858 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1859 1860 }; 1861 1862 /* 1863 * Very, very detailed stats (-d -d -d), adding prefetch events: 1864 */ 1865 struct perf_event_attr very_very_detailed_attrs[] = { 1866 1867 { .type = PERF_TYPE_HW_CACHE, 1868 .config = 1869 PERF_COUNT_HW_CACHE_L1D << 0 | 1870 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1871 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1872 1873 { .type = PERF_TYPE_HW_CACHE, 1874 .config = 1875 PERF_COUNT_HW_CACHE_L1D << 0 | 1876 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1877 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1878 }; 1879 1880 /* Set attrs if no event is selected and !null_run: */ 1881 if (null_run) 1882 return 0; 1883 1884 if (transaction_run) { 1885 int err; 1886 if (pmu_have_event("cpu", "cycles-ct") && 1887 pmu_have_event("cpu", "el-start")) 1888 err = parse_events(evsel_list, transaction_attrs, NULL); 1889 else 1890 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1891 if (err) { 1892 fprintf(stderr, "Cannot set up transaction events\n"); 1893 return -1; 1894 } 1895 return 0; 1896 } 1897 1898 if (!evsel_list->nr_entries) { 1899 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 1900 return -1; 1901 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 1902 if (perf_evlist__add_default_attrs(evsel_list, 1903 frontend_attrs) < 0) 1904 return -1; 1905 } 1906 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 1907 if (perf_evlist__add_default_attrs(evsel_list, 1908 backend_attrs) < 0) 1909 return -1; 1910 } 1911 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 1912 return -1; 1913 } 1914 1915 /* Detailed events get appended to the event list: */ 1916 1917 if (detailed_run < 1) 1918 return 0; 1919 1920 /* Append detailed run extra attributes: */ 1921 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1922 return -1; 1923 1924 if (detailed_run < 2) 1925 return 0; 1926 1927 /* Append very detailed run extra attributes: */ 1928 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1929 return -1; 1930 1931 if (detailed_run < 3) 1932 return 0; 1933 1934 /* Append very, very detailed run extra attributes: */ 1935 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1936 } 1937 1938 static const char * const stat_record_usage[] = { 1939 "perf stat record [<options>]", 1940 NULL, 1941 }; 1942 1943 static void init_features(struct perf_session *session) 1944 { 1945 int feat; 1946 1947 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1948 perf_header__set_feat(&session->header, feat); 1949 1950 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1951 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1952 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1953 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1954 } 1955 1956 static int __cmd_record(int argc, const char **argv) 1957 { 1958 struct perf_session *session; 1959 struct perf_data_file *file = &perf_stat.file; 1960 1961 argc = parse_options(argc, argv, stat_options, stat_record_usage, 1962 PARSE_OPT_STOP_AT_NON_OPTION); 1963 1964 if (output_name) 1965 file->path = output_name; 1966 1967 if (run_count != 1 || forever) { 1968 pr_err("Cannot use -r option with perf stat record.\n"); 1969 return -1; 1970 } 1971 1972 session = perf_session__new(file, false, NULL); 1973 if (session == NULL) { 1974 pr_err("Perf session creation failed.\n"); 1975 return -1; 1976 } 1977 1978 init_features(session); 1979 1980 session->evlist = evsel_list; 1981 perf_stat.session = session; 1982 perf_stat.record = true; 1983 return argc; 1984 } 1985 1986 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 1987 union perf_event *event, 1988 struct perf_session *session) 1989 { 1990 struct stat_round_event *round = &event->stat_round; 1991 struct perf_evsel *counter; 1992 struct timespec tsh, *ts = NULL; 1993 const char **argv = session->header.env.cmdline_argv; 1994 int argc = session->header.env.nr_cmdline; 1995 1996 evlist__for_each(evsel_list, counter) 1997 perf_stat_process_counter(&stat_config, counter); 1998 1999 if (round->type == PERF_STAT_ROUND_TYPE__FINAL) 2000 update_stats(&walltime_nsecs_stats, round->time); 2001 2002 if (stat_config.interval && round->time) { 2003 tsh.tv_sec = round->time / NSECS_PER_SEC; 2004 tsh.tv_nsec = round->time % NSECS_PER_SEC; 2005 ts = &tsh; 2006 } 2007 2008 print_counters(ts, argc, argv); 2009 return 0; 2010 } 2011 2012 static 2013 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2014 union perf_event *event, 2015 struct perf_session *session __maybe_unused) 2016 { 2017 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2018 2019 perf_event__read_stat_config(&stat_config, &event->stat_config); 2020 2021 if (cpu_map__empty(st->cpus)) { 2022 if (st->aggr_mode != AGGR_UNSET) 2023 pr_warning("warning: processing task data, aggregation mode not set\n"); 2024 return 0; 2025 } 2026 2027 if (st->aggr_mode != AGGR_UNSET) 2028 stat_config.aggr_mode = st->aggr_mode; 2029 2030 if (perf_stat.file.is_pipe) 2031 perf_stat_init_aggr_mode(); 2032 else 2033 perf_stat_init_aggr_mode_file(st); 2034 2035 return 0; 2036 } 2037 2038 static int set_maps(struct perf_stat *st) 2039 { 2040 if (!st->cpus || !st->threads) 2041 return 0; 2042 2043 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2044 return -EINVAL; 2045 2046 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2047 2048 if (perf_evlist__alloc_stats(evsel_list, true)) 2049 return -ENOMEM; 2050 2051 st->maps_allocated = true; 2052 return 0; 2053 } 2054 2055 static 2056 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2057 union perf_event *event, 2058 struct perf_session *session __maybe_unused) 2059 { 2060 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2061 2062 if (st->threads) { 2063 pr_warning("Extra thread map event, ignoring.\n"); 2064 return 0; 2065 } 2066 2067 st->threads = thread_map__new_event(&event->thread_map); 2068 if (!st->threads) 2069 return -ENOMEM; 2070 2071 return set_maps(st); 2072 } 2073 2074 static 2075 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2076 union perf_event *event, 2077 struct perf_session *session __maybe_unused) 2078 { 2079 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2080 struct cpu_map *cpus; 2081 2082 if (st->cpus) { 2083 pr_warning("Extra cpu map event, ignoring.\n"); 2084 return 0; 2085 } 2086 2087 cpus = cpu_map__new_data(&event->cpu_map.data); 2088 if (!cpus) 2089 return -ENOMEM; 2090 2091 st->cpus = cpus; 2092 return set_maps(st); 2093 } 2094 2095 static const char * const stat_report_usage[] = { 2096 "perf stat report [<options>]", 2097 NULL, 2098 }; 2099 2100 static struct perf_stat perf_stat = { 2101 .tool = { 2102 .attr = perf_event__process_attr, 2103 .event_update = perf_event__process_event_update, 2104 .thread_map = process_thread_map_event, 2105 .cpu_map = process_cpu_map_event, 2106 .stat_config = process_stat_config_event, 2107 .stat = perf_event__process_stat_event, 2108 .stat_round = process_stat_round_event, 2109 }, 2110 .aggr_mode = AGGR_UNSET, 2111 }; 2112 2113 static int __cmd_report(int argc, const char **argv) 2114 { 2115 struct perf_session *session; 2116 const struct option options[] = { 2117 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2118 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2119 "aggregate counts per processor socket", AGGR_SOCKET), 2120 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2121 "aggregate counts per physical processor core", AGGR_CORE), 2122 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2123 "disable CPU count aggregation", AGGR_NONE), 2124 OPT_END() 2125 }; 2126 struct stat st; 2127 int ret; 2128 2129 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2130 2131 if (!input_name || !strlen(input_name)) { 2132 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2133 input_name = "-"; 2134 else 2135 input_name = "perf.data"; 2136 } 2137 2138 perf_stat.file.path = input_name; 2139 perf_stat.file.mode = PERF_DATA_MODE_READ; 2140 2141 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2142 if (session == NULL) 2143 return -1; 2144 2145 perf_stat.session = session; 2146 stat_config.output = stderr; 2147 evsel_list = session->evlist; 2148 2149 ret = perf_session__process_events(session); 2150 if (ret) 2151 return ret; 2152 2153 perf_session__delete(session); 2154 return 0; 2155 } 2156 2157 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2158 { 2159 const char * const stat_usage[] = { 2160 "perf stat [<options>] [<command>]", 2161 NULL 2162 }; 2163 int status = -EINVAL, run_idx; 2164 const char *mode; 2165 FILE *output = stderr; 2166 unsigned int interval; 2167 const char * const stat_subcommands[] = { "record", "report" }; 2168 2169 setlocale(LC_ALL, ""); 2170 2171 evsel_list = perf_evlist__new(); 2172 if (evsel_list == NULL) 2173 return -ENOMEM; 2174 2175 parse_events__shrink_config_terms(); 2176 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2177 (const char **) stat_usage, 2178 PARSE_OPT_STOP_AT_NON_OPTION); 2179 perf_stat__init_shadow_stats(); 2180 2181 if (csv_sep) { 2182 csv_output = true; 2183 if (!strcmp(csv_sep, "\\t")) 2184 csv_sep = "\t"; 2185 } else 2186 csv_sep = DEFAULT_SEPARATOR; 2187 2188 if (argc && !strncmp(argv[0], "rec", 3)) { 2189 argc = __cmd_record(argc, argv); 2190 if (argc < 0) 2191 return -1; 2192 } else if (argc && !strncmp(argv[0], "rep", 3)) 2193 return __cmd_report(argc, argv); 2194 2195 interval = stat_config.interval; 2196 2197 /* 2198 * For record command the -o is already taken care of. 2199 */ 2200 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2201 output = NULL; 2202 2203 if (output_name && output_fd) { 2204 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2205 parse_options_usage(stat_usage, stat_options, "o", 1); 2206 parse_options_usage(NULL, stat_options, "log-fd", 0); 2207 goto out; 2208 } 2209 2210 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2211 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2212 goto out; 2213 } 2214 2215 if (metric_only && run_count > 1) { 2216 fprintf(stderr, "--metric-only is not supported with -r\n"); 2217 goto out; 2218 } 2219 2220 if (output_fd < 0) { 2221 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2222 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2223 goto out; 2224 } 2225 2226 if (!output) { 2227 struct timespec tm; 2228 mode = append_file ? "a" : "w"; 2229 2230 output = fopen(output_name, mode); 2231 if (!output) { 2232 perror("failed to create output file"); 2233 return -1; 2234 } 2235 clock_gettime(CLOCK_REALTIME, &tm); 2236 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2237 } else if (output_fd > 0) { 2238 mode = append_file ? "a" : "w"; 2239 output = fdopen(output_fd, mode); 2240 if (!output) { 2241 perror("Failed opening logfd"); 2242 return -errno; 2243 } 2244 } 2245 2246 stat_config.output = output; 2247 2248 /* 2249 * let the spreadsheet do the pretty-printing 2250 */ 2251 if (csv_output) { 2252 /* User explicitly passed -B? */ 2253 if (big_num_opt == 1) { 2254 fprintf(stderr, "-B option not supported with -x\n"); 2255 parse_options_usage(stat_usage, stat_options, "B", 1); 2256 parse_options_usage(NULL, stat_options, "x", 1); 2257 goto out; 2258 } else /* Nope, so disable big number formatting */ 2259 big_num = false; 2260 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2261 big_num = false; 2262 2263 if (!argc && target__none(&target)) 2264 usage_with_options(stat_usage, stat_options); 2265 2266 if (run_count < 0) { 2267 pr_err("Run count must be a positive number\n"); 2268 parse_options_usage(stat_usage, stat_options, "r", 1); 2269 goto out; 2270 } else if (run_count == 0) { 2271 forever = true; 2272 run_count = 1; 2273 } 2274 2275 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2276 fprintf(stderr, "The --per-thread option is only available " 2277 "when monitoring via -p -t options.\n"); 2278 parse_options_usage(NULL, stat_options, "p", 1); 2279 parse_options_usage(NULL, stat_options, "t", 1); 2280 goto out; 2281 } 2282 2283 /* 2284 * no_aggr, cgroup are for system-wide only 2285 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2286 */ 2287 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2288 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2289 !target__has_cpu(&target)) { 2290 fprintf(stderr, "both cgroup and no-aggregation " 2291 "modes only available in system-wide mode\n"); 2292 2293 parse_options_usage(stat_usage, stat_options, "G", 1); 2294 parse_options_usage(NULL, stat_options, "A", 1); 2295 parse_options_usage(NULL, stat_options, "a", 1); 2296 goto out; 2297 } 2298 2299 if (add_default_attributes()) 2300 goto out; 2301 2302 target__validate(&target); 2303 2304 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2305 if (target__has_task(&target)) { 2306 pr_err("Problems finding threads of monitor\n"); 2307 parse_options_usage(stat_usage, stat_options, "p", 1); 2308 parse_options_usage(NULL, stat_options, "t", 1); 2309 } else if (target__has_cpu(&target)) { 2310 perror("failed to parse CPUs map"); 2311 parse_options_usage(stat_usage, stat_options, "C", 1); 2312 parse_options_usage(NULL, stat_options, "a", 1); 2313 } 2314 goto out; 2315 } 2316 2317 /* 2318 * Initialize thread_map with comm names, 2319 * so we could print it out on output. 2320 */ 2321 if (stat_config.aggr_mode == AGGR_THREAD) 2322 thread_map__read_comms(evsel_list->threads); 2323 2324 if (interval && interval < 100) { 2325 if (interval < 10) { 2326 pr_err("print interval must be >= 10ms\n"); 2327 parse_options_usage(stat_usage, stat_options, "I", 1); 2328 goto out; 2329 } else 2330 pr_warning("print interval < 100ms. " 2331 "The overhead percentage could be high in some cases. " 2332 "Please proceed with caution.\n"); 2333 } 2334 2335 if (perf_evlist__alloc_stats(evsel_list, interval)) 2336 goto out; 2337 2338 if (perf_stat_init_aggr_mode()) 2339 goto out; 2340 2341 /* 2342 * We dont want to block the signals - that would cause 2343 * child tasks to inherit that and Ctrl-C would not work. 2344 * What we want is for Ctrl-C to work in the exec()-ed 2345 * task, but being ignored by perf stat itself: 2346 */ 2347 atexit(sig_atexit); 2348 if (!forever) 2349 signal(SIGINT, skip_signal); 2350 signal(SIGCHLD, skip_signal); 2351 signal(SIGALRM, skip_signal); 2352 signal(SIGABRT, skip_signal); 2353 2354 status = 0; 2355 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2356 if (run_count != 1 && verbose) 2357 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2358 run_idx + 1); 2359 2360 status = run_perf_stat(argc, argv); 2361 if (forever && status != -1) { 2362 print_counters(NULL, argc, argv); 2363 perf_stat__reset_stats(); 2364 } 2365 } 2366 2367 if (!forever && status != -1 && !interval) 2368 print_counters(NULL, argc, argv); 2369 2370 if (STAT_RECORD) { 2371 /* 2372 * We synthesize the kernel mmap record just so that older tools 2373 * don't emit warnings about not being able to resolve symbols 2374 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2375 * a saner message about no samples being in the perf.data file. 2376 * 2377 * This also serves to suppress a warning about f_header.data.size == 0 2378 * in header.c at the moment 'perf stat record' gets introduced, which 2379 * is not really needed once we start adding the stat specific PERF_RECORD_ 2380 * records, but the need to suppress the kptr_restrict messages in older 2381 * tools remain -acme 2382 */ 2383 int fd = perf_data_file__fd(&perf_stat.file); 2384 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2385 process_synthesized_event, 2386 &perf_stat.session->machines.host); 2387 if (err) { 2388 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2389 "older tools may produce warnings about this file\n."); 2390 } 2391 2392 if (!interval) { 2393 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2394 pr_err("failed to write stat round event\n"); 2395 } 2396 2397 if (!perf_stat.file.is_pipe) { 2398 perf_stat.session->header.data_size += perf_stat.bytes_written; 2399 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2400 } 2401 2402 perf_session__delete(perf_stat.session); 2403 } 2404 2405 perf_stat__exit_aggr_mode(); 2406 perf_evlist__free_stats(evsel_list); 2407 out: 2408 perf_evlist__delete(evsel_list); 2409 return status; 2410 } 2411