1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/group.h" 63 #include "util/session.h" 64 #include "util/tool.h" 65 #include "util/group.h" 66 #include "asm/bug.h" 67 68 #include <api/fs/fs.h> 69 #include <stdlib.h> 70 #include <sys/prctl.h> 71 #include <locale.h> 72 #include <math.h> 73 74 #define DEFAULT_SEPARATOR " " 75 #define CNTR_NOT_SUPPORTED "<not supported>" 76 #define CNTR_NOT_COUNTED "<not counted>" 77 78 static void print_counters(struct timespec *ts, int argc, const char **argv); 79 80 /* Default events used for perf stat -T */ 81 static const char *transaction_attrs = { 82 "task-clock," 83 "{" 84 "instructions," 85 "cycles," 86 "cpu/cycles-t/," 87 "cpu/tx-start/," 88 "cpu/el-start/," 89 "cpu/cycles-ct/" 90 "}" 91 }; 92 93 /* More limited version when the CPU does not have all events. */ 94 static const char * transaction_limited_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/" 101 "}" 102 }; 103 104 static const char * topdown_attrs[] = { 105 "topdown-total-slots", 106 "topdown-slots-retired", 107 "topdown-recovery-bubbles", 108 "topdown-fetch-bubbles", 109 "topdown-slots-issued", 110 NULL, 111 }; 112 113 static struct perf_evlist *evsel_list; 114 115 static struct target target = { 116 .uid = UINT_MAX, 117 }; 118 119 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 120 121 static int run_count = 1; 122 static bool no_inherit = false; 123 static volatile pid_t child_pid = -1; 124 static bool null_run = false; 125 static int detailed_run = 0; 126 static bool transaction_run; 127 static bool topdown_run = false; 128 static bool big_num = true; 129 static int big_num_opt = -1; 130 static const char *csv_sep = NULL; 131 static bool csv_output = false; 132 static bool group = false; 133 static const char *pre_cmd = NULL; 134 static const char *post_cmd = NULL; 135 static bool sync_run = false; 136 static unsigned int initial_delay = 0; 137 static unsigned int unit_width = 4; /* strlen("unit") */ 138 static bool forever = false; 139 static bool metric_only = false; 140 static bool force_metric_only = false; 141 static struct timespec ref_time; 142 static struct cpu_map *aggr_map; 143 static aggr_get_id_t aggr_get_id; 144 static bool append_file; 145 static const char *output_name; 146 static int output_fd; 147 148 struct perf_stat { 149 bool record; 150 struct perf_data_file file; 151 struct perf_session *session; 152 u64 bytes_written; 153 struct perf_tool tool; 154 bool maps_allocated; 155 struct cpu_map *cpus; 156 struct thread_map *threads; 157 enum aggr_mode aggr_mode; 158 }; 159 160 static struct perf_stat perf_stat; 161 #define STAT_RECORD perf_stat.record 162 163 static volatile int done = 0; 164 165 static struct perf_stat_config stat_config = { 166 .aggr_mode = AGGR_GLOBAL, 167 .scale = true, 168 }; 169 170 static inline void diff_timespec(struct timespec *r, struct timespec *a, 171 struct timespec *b) 172 { 173 r->tv_sec = a->tv_sec - b->tv_sec; 174 if (a->tv_nsec < b->tv_nsec) { 175 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 176 r->tv_sec--; 177 } else { 178 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 179 } 180 } 181 182 static void perf_stat__reset_stats(void) 183 { 184 perf_evlist__reset_stats(evsel_list); 185 perf_stat__reset_shadow_stats(); 186 } 187 188 static int create_perf_stat_counter(struct perf_evsel *evsel) 189 { 190 struct perf_event_attr *attr = &evsel->attr; 191 192 if (stat_config.scale) 193 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 194 PERF_FORMAT_TOTAL_TIME_RUNNING; 195 196 attr->inherit = !no_inherit; 197 198 /* 199 * Some events get initialized with sample_(period/type) set, 200 * like tracepoints. Clear it up for counting. 201 */ 202 attr->sample_period = 0; 203 204 /* 205 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 206 * while avoiding that older tools show confusing messages. 207 * 208 * However for pipe sessions we need to keep it zero, 209 * because script's perf_evsel__check_attr is triggered 210 * by attr->sample_type != 0, and we can't run it on 211 * stat sessions. 212 */ 213 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 214 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 215 216 /* 217 * Disabling all counters initially, they will be enabled 218 * either manually by us or by kernel via enable_on_exec 219 * set later. 220 */ 221 if (perf_evsel__is_group_leader(evsel)) { 222 attr->disabled = 1; 223 224 /* 225 * In case of initial_delay we enable tracee 226 * events manually. 227 */ 228 if (target__none(&target) && !initial_delay) 229 attr->enable_on_exec = 1; 230 } 231 232 if (target__has_cpu(&target)) 233 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 234 235 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 236 } 237 238 /* 239 * Does the counter have nsecs as a unit? 240 */ 241 static inline int nsec_counter(struct perf_evsel *evsel) 242 { 243 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 244 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 245 return 1; 246 247 return 0; 248 } 249 250 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 251 union perf_event *event, 252 struct perf_sample *sample __maybe_unused, 253 struct machine *machine __maybe_unused) 254 { 255 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 256 pr_err("failed to write perf data, error: %m\n"); 257 return -1; 258 } 259 260 perf_stat.bytes_written += event->header.size; 261 return 0; 262 } 263 264 static int write_stat_round_event(u64 tm, u64 type) 265 { 266 return perf_event__synthesize_stat_round(NULL, tm, type, 267 process_synthesized_event, 268 NULL); 269 } 270 271 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 272 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 273 274 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 275 276 static int 277 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 278 struct perf_counts_values *count) 279 { 280 struct perf_sample_id *sid = SID(counter, cpu, thread); 281 282 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 283 process_synthesized_event, NULL); 284 } 285 286 /* 287 * Read out the results of a single counter: 288 * do not aggregate counts across CPUs in system-wide mode 289 */ 290 static int read_counter(struct perf_evsel *counter) 291 { 292 int nthreads = thread_map__nr(evsel_list->threads); 293 int ncpus, cpu, thread; 294 295 if (target__has_cpu(&target)) 296 ncpus = perf_evsel__nr_cpus(counter); 297 else 298 ncpus = 1; 299 300 if (!counter->supported) 301 return -ENOENT; 302 303 if (counter->system_wide) 304 nthreads = 1; 305 306 for (thread = 0; thread < nthreads; thread++) { 307 for (cpu = 0; cpu < ncpus; cpu++) { 308 struct perf_counts_values *count; 309 310 count = perf_counts(counter->counts, cpu, thread); 311 if (perf_evsel__read(counter, cpu, thread, count)) 312 return -1; 313 314 if (STAT_RECORD) { 315 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 316 pr_err("failed to write stat event\n"); 317 return -1; 318 } 319 } 320 321 if (verbose > 1) { 322 fprintf(stat_config.output, 323 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 324 perf_evsel__name(counter), 325 cpu, 326 count->val, count->ena, count->run); 327 } 328 } 329 } 330 331 return 0; 332 } 333 334 static void read_counters(void) 335 { 336 struct perf_evsel *counter; 337 338 evlist__for_each_entry(evsel_list, counter) { 339 if (read_counter(counter)) 340 pr_debug("failed to read counter %s\n", counter->name); 341 342 if (perf_stat_process_counter(&stat_config, counter)) 343 pr_warning("failed to process counter %s\n", counter->name); 344 } 345 } 346 347 static void process_interval(void) 348 { 349 struct timespec ts, rs; 350 351 read_counters(); 352 353 clock_gettime(CLOCK_MONOTONIC, &ts); 354 diff_timespec(&rs, &ts, &ref_time); 355 356 if (STAT_RECORD) { 357 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 358 pr_err("failed to write stat round event\n"); 359 } 360 361 print_counters(&rs, 0, NULL); 362 } 363 364 static void enable_counters(void) 365 { 366 if (initial_delay) 367 usleep(initial_delay * 1000); 368 369 /* 370 * We need to enable counters only if: 371 * - we don't have tracee (attaching to task or cpu) 372 * - we have initial delay configured 373 */ 374 if (!target__none(&target) || initial_delay) 375 perf_evlist__enable(evsel_list); 376 } 377 378 static void disable_counters(void) 379 { 380 /* 381 * If we don't have tracee (attaching to task or cpu), counters may 382 * still be running. To get accurate group ratios, we must stop groups 383 * from counting before reading their constituent counters. 384 */ 385 if (!target__none(&target)) 386 perf_evlist__disable(evsel_list); 387 } 388 389 static volatile int workload_exec_errno; 390 391 /* 392 * perf_evlist__prepare_workload will send a SIGUSR1 393 * if the fork fails, since we asked by setting its 394 * want_signal to true. 395 */ 396 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 397 void *ucontext __maybe_unused) 398 { 399 workload_exec_errno = info->si_value.sival_int; 400 } 401 402 static bool has_unit(struct perf_evsel *counter) 403 { 404 return counter->unit && *counter->unit; 405 } 406 407 static bool has_scale(struct perf_evsel *counter) 408 { 409 return counter->scale != 1; 410 } 411 412 static int perf_stat_synthesize_config(bool is_pipe) 413 { 414 struct perf_evsel *counter; 415 int err; 416 417 if (is_pipe) { 418 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 419 process_synthesized_event); 420 if (err < 0) { 421 pr_err("Couldn't synthesize attrs.\n"); 422 return err; 423 } 424 } 425 426 /* 427 * Synthesize other events stuff not carried within 428 * attr event - unit, scale, name 429 */ 430 evlist__for_each_entry(evsel_list, counter) { 431 if (!counter->supported) 432 continue; 433 434 /* 435 * Synthesize unit and scale only if it's defined. 436 */ 437 if (has_unit(counter)) { 438 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 439 if (err < 0) { 440 pr_err("Couldn't synthesize evsel unit.\n"); 441 return err; 442 } 443 } 444 445 if (has_scale(counter)) { 446 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 447 if (err < 0) { 448 pr_err("Couldn't synthesize evsel scale.\n"); 449 return err; 450 } 451 } 452 453 if (counter->own_cpus) { 454 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 455 if (err < 0) { 456 pr_err("Couldn't synthesize evsel scale.\n"); 457 return err; 458 } 459 } 460 461 /* 462 * Name is needed only for pipe output, 463 * perf.data carries event names. 464 */ 465 if (is_pipe) { 466 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 467 if (err < 0) { 468 pr_err("Couldn't synthesize evsel name.\n"); 469 return err; 470 } 471 } 472 } 473 474 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 475 process_synthesized_event, 476 NULL); 477 if (err < 0) { 478 pr_err("Couldn't synthesize thread map.\n"); 479 return err; 480 } 481 482 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 483 process_synthesized_event, NULL); 484 if (err < 0) { 485 pr_err("Couldn't synthesize thread map.\n"); 486 return err; 487 } 488 489 err = perf_event__synthesize_stat_config(NULL, &stat_config, 490 process_synthesized_event, NULL); 491 if (err < 0) { 492 pr_err("Couldn't synthesize config.\n"); 493 return err; 494 } 495 496 return 0; 497 } 498 499 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 500 501 static int __store_counter_ids(struct perf_evsel *counter, 502 struct cpu_map *cpus, 503 struct thread_map *threads) 504 { 505 int cpu, thread; 506 507 for (cpu = 0; cpu < cpus->nr; cpu++) { 508 for (thread = 0; thread < threads->nr; thread++) { 509 int fd = FD(counter, cpu, thread); 510 511 if (perf_evlist__id_add_fd(evsel_list, counter, 512 cpu, thread, fd) < 0) 513 return -1; 514 } 515 } 516 517 return 0; 518 } 519 520 static int store_counter_ids(struct perf_evsel *counter) 521 { 522 struct cpu_map *cpus = counter->cpus; 523 struct thread_map *threads = counter->threads; 524 525 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 526 return -ENOMEM; 527 528 return __store_counter_ids(counter, cpus, threads); 529 } 530 531 static int __run_perf_stat(int argc, const char **argv) 532 { 533 int interval = stat_config.interval; 534 char msg[512]; 535 unsigned long long t0, t1; 536 struct perf_evsel *counter; 537 struct timespec ts; 538 size_t l; 539 int status = 0; 540 const bool forks = (argc > 0); 541 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 542 543 if (interval) { 544 ts.tv_sec = interval / 1000; 545 ts.tv_nsec = (interval % 1000) * 1000000; 546 } else { 547 ts.tv_sec = 1; 548 ts.tv_nsec = 0; 549 } 550 551 if (forks) { 552 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 553 workload_exec_failed_signal) < 0) { 554 perror("failed to prepare workload"); 555 return -1; 556 } 557 child_pid = evsel_list->workload.pid; 558 } 559 560 if (group) 561 perf_evlist__set_leader(evsel_list); 562 563 evlist__for_each_entry(evsel_list, counter) { 564 try_again: 565 if (create_perf_stat_counter(counter) < 0) { 566 /* 567 * PPC returns ENXIO for HW counters until 2.6.37 568 * (behavior changed with commit b0a873e). 569 */ 570 if (errno == EINVAL || errno == ENOSYS || 571 errno == ENOENT || errno == EOPNOTSUPP || 572 errno == ENXIO) { 573 if (verbose) 574 ui__warning("%s event is not supported by the kernel.\n", 575 perf_evsel__name(counter)); 576 counter->supported = false; 577 578 if ((counter->leader != counter) || 579 !(counter->leader->nr_members > 1)) 580 continue; 581 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 582 if (verbose) 583 ui__warning("%s\n", msg); 584 goto try_again; 585 } 586 587 perf_evsel__open_strerror(counter, &target, 588 errno, msg, sizeof(msg)); 589 ui__error("%s\n", msg); 590 591 if (child_pid != -1) 592 kill(child_pid, SIGTERM); 593 594 return -1; 595 } 596 counter->supported = true; 597 598 l = strlen(counter->unit); 599 if (l > unit_width) 600 unit_width = l; 601 602 if (STAT_RECORD && store_counter_ids(counter)) 603 return -1; 604 } 605 606 if (perf_evlist__apply_filters(evsel_list, &counter)) { 607 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 608 counter->filter, perf_evsel__name(counter), errno, 609 str_error_r(errno, msg, sizeof(msg))); 610 return -1; 611 } 612 613 if (STAT_RECORD) { 614 int err, fd = perf_data_file__fd(&perf_stat.file); 615 616 if (is_pipe) { 617 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 618 } else { 619 err = perf_session__write_header(perf_stat.session, evsel_list, 620 fd, false); 621 } 622 623 if (err < 0) 624 return err; 625 626 err = perf_stat_synthesize_config(is_pipe); 627 if (err < 0) 628 return err; 629 } 630 631 /* 632 * Enable counters and exec the command: 633 */ 634 t0 = rdclock(); 635 clock_gettime(CLOCK_MONOTONIC, &ref_time); 636 637 if (forks) { 638 perf_evlist__start_workload(evsel_list); 639 enable_counters(); 640 641 if (interval) { 642 while (!waitpid(child_pid, &status, WNOHANG)) { 643 nanosleep(&ts, NULL); 644 process_interval(); 645 } 646 } 647 wait(&status); 648 649 if (workload_exec_errno) { 650 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 651 pr_err("Workload failed: %s\n", emsg); 652 return -1; 653 } 654 655 if (WIFSIGNALED(status)) 656 psignal(WTERMSIG(status), argv[0]); 657 } else { 658 enable_counters(); 659 while (!done) { 660 nanosleep(&ts, NULL); 661 if (interval) 662 process_interval(); 663 } 664 } 665 666 disable_counters(); 667 668 t1 = rdclock(); 669 670 update_stats(&walltime_nsecs_stats, t1 - t0); 671 672 /* 673 * Closing a group leader splits the group, and as we only disable 674 * group leaders, results in remaining events becoming enabled. To 675 * avoid arbitrary skew, we must read all counters before closing any 676 * group leaders. 677 */ 678 read_counters(); 679 perf_evlist__close(evsel_list); 680 681 return WEXITSTATUS(status); 682 } 683 684 static int run_perf_stat(int argc, const char **argv) 685 { 686 int ret; 687 688 if (pre_cmd) { 689 ret = system(pre_cmd); 690 if (ret) 691 return ret; 692 } 693 694 if (sync_run) 695 sync(); 696 697 ret = __run_perf_stat(argc, argv); 698 if (ret) 699 return ret; 700 701 if (post_cmd) { 702 ret = system(post_cmd); 703 if (ret) 704 return ret; 705 } 706 707 return ret; 708 } 709 710 static void print_running(u64 run, u64 ena) 711 { 712 if (csv_output) { 713 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 714 csv_sep, 715 run, 716 csv_sep, 717 ena ? 100.0 * run / ena : 100.0); 718 } else if (run != ena) { 719 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 720 } 721 } 722 723 static void print_noise_pct(double total, double avg) 724 { 725 double pct = rel_stddev_stats(total, avg); 726 727 if (csv_output) 728 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 729 else if (pct) 730 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 731 } 732 733 static void print_noise(struct perf_evsel *evsel, double avg) 734 { 735 struct perf_stat_evsel *ps; 736 737 if (run_count == 1) 738 return; 739 740 ps = evsel->priv; 741 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 742 } 743 744 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 745 { 746 switch (stat_config.aggr_mode) { 747 case AGGR_CORE: 748 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 749 cpu_map__id_to_socket(id), 750 csv_output ? 0 : -8, 751 cpu_map__id_to_cpu(id), 752 csv_sep, 753 csv_output ? 0 : 4, 754 nr, 755 csv_sep); 756 break; 757 case AGGR_SOCKET: 758 fprintf(stat_config.output, "S%*d%s%*d%s", 759 csv_output ? 0 : -5, 760 id, 761 csv_sep, 762 csv_output ? 0 : 4, 763 nr, 764 csv_sep); 765 break; 766 case AGGR_NONE: 767 fprintf(stat_config.output, "CPU%*d%s", 768 csv_output ? 0 : -4, 769 perf_evsel__cpus(evsel)->map[id], csv_sep); 770 break; 771 case AGGR_THREAD: 772 fprintf(stat_config.output, "%*s-%*d%s", 773 csv_output ? 0 : 16, 774 thread_map__comm(evsel->threads, id), 775 csv_output ? 0 : -8, 776 thread_map__pid(evsel->threads, id), 777 csv_sep); 778 break; 779 case AGGR_GLOBAL: 780 case AGGR_UNSET: 781 default: 782 break; 783 } 784 } 785 786 struct outstate { 787 FILE *fh; 788 bool newline; 789 const char *prefix; 790 int nfields; 791 int id, nr; 792 struct perf_evsel *evsel; 793 }; 794 795 #define METRIC_LEN 35 796 797 static void new_line_std(void *ctx) 798 { 799 struct outstate *os = ctx; 800 801 os->newline = true; 802 } 803 804 static void do_new_line_std(struct outstate *os) 805 { 806 fputc('\n', os->fh); 807 fputs(os->prefix, os->fh); 808 aggr_printout(os->evsel, os->id, os->nr); 809 if (stat_config.aggr_mode == AGGR_NONE) 810 fprintf(os->fh, " "); 811 fprintf(os->fh, " "); 812 } 813 814 static void print_metric_std(void *ctx, const char *color, const char *fmt, 815 const char *unit, double val) 816 { 817 struct outstate *os = ctx; 818 FILE *out = os->fh; 819 int n; 820 bool newline = os->newline; 821 822 os->newline = false; 823 824 if (unit == NULL || fmt == NULL) { 825 fprintf(out, "%-*s", METRIC_LEN, ""); 826 return; 827 } 828 829 if (newline) 830 do_new_line_std(os); 831 832 n = fprintf(out, " # "); 833 if (color) 834 n += color_fprintf(out, color, fmt, val); 835 else 836 n += fprintf(out, fmt, val); 837 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 838 } 839 840 static void new_line_csv(void *ctx) 841 { 842 struct outstate *os = ctx; 843 int i; 844 845 fputc('\n', os->fh); 846 if (os->prefix) 847 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 848 aggr_printout(os->evsel, os->id, os->nr); 849 for (i = 0; i < os->nfields; i++) 850 fputs(csv_sep, os->fh); 851 } 852 853 static void print_metric_csv(void *ctx, 854 const char *color __maybe_unused, 855 const char *fmt, const char *unit, double val) 856 { 857 struct outstate *os = ctx; 858 FILE *out = os->fh; 859 char buf[64], *vals, *ends; 860 861 if (unit == NULL || fmt == NULL) { 862 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 863 return; 864 } 865 snprintf(buf, sizeof(buf), fmt, val); 866 vals = buf; 867 while (isspace(*vals)) 868 vals++; 869 ends = vals; 870 while (isdigit(*ends) || *ends == '.') 871 ends++; 872 *ends = 0; 873 while (isspace(*unit)) 874 unit++; 875 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 876 } 877 878 #define METRIC_ONLY_LEN 20 879 880 /* Filter out some columns that don't work well in metrics only mode */ 881 882 static bool valid_only_metric(const char *unit) 883 { 884 if (!unit) 885 return false; 886 if (strstr(unit, "/sec") || 887 strstr(unit, "hz") || 888 strstr(unit, "Hz") || 889 strstr(unit, "CPUs utilized")) 890 return false; 891 return true; 892 } 893 894 static const char *fixunit(char *buf, struct perf_evsel *evsel, 895 const char *unit) 896 { 897 if (!strncmp(unit, "of all", 6)) { 898 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 899 unit); 900 return buf; 901 } 902 return unit; 903 } 904 905 static void print_metric_only(void *ctx, const char *color, const char *fmt, 906 const char *unit, double val) 907 { 908 struct outstate *os = ctx; 909 FILE *out = os->fh; 910 int n; 911 char buf[1024]; 912 unsigned mlen = METRIC_ONLY_LEN; 913 914 if (!valid_only_metric(unit)) 915 return; 916 unit = fixunit(buf, os->evsel, unit); 917 if (color) 918 n = color_fprintf(out, color, fmt, val); 919 else 920 n = fprintf(out, fmt, val); 921 if (n > METRIC_ONLY_LEN) 922 n = METRIC_ONLY_LEN; 923 if (mlen < strlen(unit)) 924 mlen = strlen(unit) + 1; 925 fprintf(out, "%*s", mlen - n, ""); 926 } 927 928 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 929 const char *fmt, 930 const char *unit, double val) 931 { 932 struct outstate *os = ctx; 933 FILE *out = os->fh; 934 char buf[64], *vals, *ends; 935 char tbuf[1024]; 936 937 if (!valid_only_metric(unit)) 938 return; 939 unit = fixunit(tbuf, os->evsel, unit); 940 snprintf(buf, sizeof buf, fmt, val); 941 vals = buf; 942 while (isspace(*vals)) 943 vals++; 944 ends = vals; 945 while (isdigit(*ends) || *ends == '.') 946 ends++; 947 *ends = 0; 948 fprintf(out, "%s%s", vals, csv_sep); 949 } 950 951 static void new_line_metric(void *ctx __maybe_unused) 952 { 953 } 954 955 static void print_metric_header(void *ctx, const char *color __maybe_unused, 956 const char *fmt __maybe_unused, 957 const char *unit, double val __maybe_unused) 958 { 959 struct outstate *os = ctx; 960 char tbuf[1024]; 961 962 if (!valid_only_metric(unit)) 963 return; 964 unit = fixunit(tbuf, os->evsel, unit); 965 if (csv_output) 966 fprintf(os->fh, "%s%s", unit, csv_sep); 967 else 968 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 969 } 970 971 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 972 { 973 FILE *output = stat_config.output; 974 double msecs = avg / 1e6; 975 const char *fmt_v, *fmt_n; 976 char name[25]; 977 978 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 979 fmt_n = csv_output ? "%s" : "%-25s"; 980 981 aggr_printout(evsel, id, nr); 982 983 scnprintf(name, sizeof(name), "%s%s", 984 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 985 986 fprintf(output, fmt_v, msecs, csv_sep); 987 988 if (csv_output) 989 fprintf(output, "%s%s", evsel->unit, csv_sep); 990 else 991 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 992 993 fprintf(output, fmt_n, name); 994 995 if (evsel->cgrp) 996 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 997 } 998 999 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1000 { 1001 int i; 1002 1003 if (!aggr_get_id) 1004 return 0; 1005 1006 if (stat_config.aggr_mode == AGGR_NONE) 1007 return id; 1008 1009 if (stat_config.aggr_mode == AGGR_GLOBAL) 1010 return 0; 1011 1012 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1013 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1014 1015 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1016 return cpu2; 1017 } 1018 return 0; 1019 } 1020 1021 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1022 { 1023 FILE *output = stat_config.output; 1024 double sc = evsel->scale; 1025 const char *fmt; 1026 1027 if (csv_output) { 1028 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1029 } else { 1030 if (big_num) 1031 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1032 else 1033 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1034 } 1035 1036 aggr_printout(evsel, id, nr); 1037 1038 fprintf(output, fmt, avg, csv_sep); 1039 1040 if (evsel->unit) 1041 fprintf(output, "%-*s%s", 1042 csv_output ? 0 : unit_width, 1043 evsel->unit, csv_sep); 1044 1045 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1046 1047 if (evsel->cgrp) 1048 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1049 } 1050 1051 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1052 char *prefix, u64 run, u64 ena, double noise) 1053 { 1054 struct perf_stat_output_ctx out; 1055 struct outstate os = { 1056 .fh = stat_config.output, 1057 .prefix = prefix ? prefix : "", 1058 .id = id, 1059 .nr = nr, 1060 .evsel = counter, 1061 }; 1062 print_metric_t pm = print_metric_std; 1063 void (*nl)(void *); 1064 1065 if (metric_only) { 1066 nl = new_line_metric; 1067 if (csv_output) 1068 pm = print_metric_only_csv; 1069 else 1070 pm = print_metric_only; 1071 } else 1072 nl = new_line_std; 1073 1074 if (csv_output && !metric_only) { 1075 static int aggr_fields[] = { 1076 [AGGR_GLOBAL] = 0, 1077 [AGGR_THREAD] = 1, 1078 [AGGR_NONE] = 1, 1079 [AGGR_SOCKET] = 2, 1080 [AGGR_CORE] = 2, 1081 }; 1082 1083 pm = print_metric_csv; 1084 nl = new_line_csv; 1085 os.nfields = 3; 1086 os.nfields += aggr_fields[stat_config.aggr_mode]; 1087 if (counter->cgrp) 1088 os.nfields++; 1089 } 1090 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1091 if (metric_only) { 1092 pm(&os, NULL, "", "", 0); 1093 return; 1094 } 1095 aggr_printout(counter, id, nr); 1096 1097 fprintf(stat_config.output, "%*s%s", 1098 csv_output ? 0 : 18, 1099 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1100 csv_sep); 1101 1102 fprintf(stat_config.output, "%-*s%s", 1103 csv_output ? 0 : unit_width, 1104 counter->unit, csv_sep); 1105 1106 fprintf(stat_config.output, "%*s", 1107 csv_output ? 0 : -25, 1108 perf_evsel__name(counter)); 1109 1110 if (counter->cgrp) 1111 fprintf(stat_config.output, "%s%s", 1112 csv_sep, counter->cgrp->name); 1113 1114 if (!csv_output) 1115 pm(&os, NULL, NULL, "", 0); 1116 print_noise(counter, noise); 1117 print_running(run, ena); 1118 if (csv_output) 1119 pm(&os, NULL, NULL, "", 0); 1120 return; 1121 } 1122 1123 if (metric_only) 1124 /* nothing */; 1125 else if (nsec_counter(counter)) 1126 nsec_printout(id, nr, counter, uval); 1127 else 1128 abs_printout(id, nr, counter, uval); 1129 1130 out.print_metric = pm; 1131 out.new_line = nl; 1132 out.ctx = &os; 1133 1134 if (csv_output && !metric_only) { 1135 print_noise(counter, noise); 1136 print_running(run, ena); 1137 } 1138 1139 perf_stat__print_shadow_stats(counter, uval, 1140 first_shadow_cpu(counter, id), 1141 &out); 1142 if (!csv_output && !metric_only) { 1143 print_noise(counter, noise); 1144 print_running(run, ena); 1145 } 1146 } 1147 1148 static void aggr_update_shadow(void) 1149 { 1150 int cpu, s2, id, s; 1151 u64 val; 1152 struct perf_evsel *counter; 1153 1154 for (s = 0; s < aggr_map->nr; s++) { 1155 id = aggr_map->map[s]; 1156 evlist__for_each_entry(evsel_list, counter) { 1157 val = 0; 1158 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1159 s2 = aggr_get_id(evsel_list->cpus, cpu); 1160 if (s2 != id) 1161 continue; 1162 val += perf_counts(counter->counts, cpu, 0)->val; 1163 } 1164 val = val * counter->scale; 1165 perf_stat__update_shadow_stats(counter, &val, 1166 first_shadow_cpu(counter, id)); 1167 } 1168 } 1169 } 1170 1171 static void print_aggr(char *prefix) 1172 { 1173 FILE *output = stat_config.output; 1174 struct perf_evsel *counter; 1175 int cpu, s, s2, id, nr; 1176 double uval; 1177 u64 ena, run, val; 1178 bool first; 1179 1180 if (!(aggr_map || aggr_get_id)) 1181 return; 1182 1183 aggr_update_shadow(); 1184 1185 /* 1186 * With metric_only everything is on a single line. 1187 * Without each counter has its own line. 1188 */ 1189 for (s = 0; s < aggr_map->nr; s++) { 1190 if (prefix && metric_only) 1191 fprintf(output, "%s", prefix); 1192 1193 id = aggr_map->map[s]; 1194 first = true; 1195 evlist__for_each_entry(evsel_list, counter) { 1196 val = ena = run = 0; 1197 nr = 0; 1198 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1199 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1200 if (s2 != id) 1201 continue; 1202 val += perf_counts(counter->counts, cpu, 0)->val; 1203 ena += perf_counts(counter->counts, cpu, 0)->ena; 1204 run += perf_counts(counter->counts, cpu, 0)->run; 1205 nr++; 1206 } 1207 if (first && metric_only) { 1208 first = false; 1209 aggr_printout(counter, id, nr); 1210 } 1211 if (prefix && !metric_only) 1212 fprintf(output, "%s", prefix); 1213 1214 uval = val * counter->scale; 1215 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1216 if (!metric_only) 1217 fputc('\n', output); 1218 } 1219 if (metric_only) 1220 fputc('\n', output); 1221 } 1222 } 1223 1224 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1225 { 1226 FILE *output = stat_config.output; 1227 int nthreads = thread_map__nr(counter->threads); 1228 int ncpus = cpu_map__nr(counter->cpus); 1229 int cpu, thread; 1230 double uval; 1231 1232 for (thread = 0; thread < nthreads; thread++) { 1233 u64 ena = 0, run = 0, val = 0; 1234 1235 for (cpu = 0; cpu < ncpus; cpu++) { 1236 val += perf_counts(counter->counts, cpu, thread)->val; 1237 ena += perf_counts(counter->counts, cpu, thread)->ena; 1238 run += perf_counts(counter->counts, cpu, thread)->run; 1239 } 1240 1241 if (prefix) 1242 fprintf(output, "%s", prefix); 1243 1244 uval = val * counter->scale; 1245 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1246 fputc('\n', output); 1247 } 1248 } 1249 1250 /* 1251 * Print out the results of a single counter: 1252 * aggregated counts in system-wide mode 1253 */ 1254 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1255 { 1256 FILE *output = stat_config.output; 1257 struct perf_stat_evsel *ps = counter->priv; 1258 double avg = avg_stats(&ps->res_stats[0]); 1259 double uval; 1260 double avg_enabled, avg_running; 1261 1262 avg_enabled = avg_stats(&ps->res_stats[1]); 1263 avg_running = avg_stats(&ps->res_stats[2]); 1264 1265 if (prefix && !metric_only) 1266 fprintf(output, "%s", prefix); 1267 1268 uval = avg * counter->scale; 1269 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1270 if (!metric_only) 1271 fprintf(output, "\n"); 1272 } 1273 1274 /* 1275 * Print out the results of a single counter: 1276 * does not use aggregated count in system-wide 1277 */ 1278 static void print_counter(struct perf_evsel *counter, char *prefix) 1279 { 1280 FILE *output = stat_config.output; 1281 u64 ena, run, val; 1282 double uval; 1283 int cpu; 1284 1285 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1286 val = perf_counts(counter->counts, cpu, 0)->val; 1287 ena = perf_counts(counter->counts, cpu, 0)->ena; 1288 run = perf_counts(counter->counts, cpu, 0)->run; 1289 1290 if (prefix) 1291 fprintf(output, "%s", prefix); 1292 1293 uval = val * counter->scale; 1294 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1295 1296 fputc('\n', output); 1297 } 1298 } 1299 1300 static void print_no_aggr_metric(char *prefix) 1301 { 1302 int cpu; 1303 int nrcpus = 0; 1304 struct perf_evsel *counter; 1305 u64 ena, run, val; 1306 double uval; 1307 1308 nrcpus = evsel_list->cpus->nr; 1309 for (cpu = 0; cpu < nrcpus; cpu++) { 1310 bool first = true; 1311 1312 if (prefix) 1313 fputs(prefix, stat_config.output); 1314 evlist__for_each_entry(evsel_list, counter) { 1315 if (first) { 1316 aggr_printout(counter, cpu, 0); 1317 first = false; 1318 } 1319 val = perf_counts(counter->counts, cpu, 0)->val; 1320 ena = perf_counts(counter->counts, cpu, 0)->ena; 1321 run = perf_counts(counter->counts, cpu, 0)->run; 1322 1323 uval = val * counter->scale; 1324 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1325 } 1326 fputc('\n', stat_config.output); 1327 } 1328 } 1329 1330 static int aggr_header_lens[] = { 1331 [AGGR_CORE] = 18, 1332 [AGGR_SOCKET] = 12, 1333 [AGGR_NONE] = 6, 1334 [AGGR_THREAD] = 24, 1335 [AGGR_GLOBAL] = 0, 1336 }; 1337 1338 static const char *aggr_header_csv[] = { 1339 [AGGR_CORE] = "core,cpus,", 1340 [AGGR_SOCKET] = "socket,cpus", 1341 [AGGR_NONE] = "cpu,", 1342 [AGGR_THREAD] = "comm-pid,", 1343 [AGGR_GLOBAL] = "" 1344 }; 1345 1346 static void print_metric_headers(const char *prefix, bool no_indent) 1347 { 1348 struct perf_stat_output_ctx out; 1349 struct perf_evsel *counter; 1350 struct outstate os = { 1351 .fh = stat_config.output 1352 }; 1353 1354 if (prefix) 1355 fprintf(stat_config.output, "%s", prefix); 1356 1357 if (!csv_output && !no_indent) 1358 fprintf(stat_config.output, "%*s", 1359 aggr_header_lens[stat_config.aggr_mode], ""); 1360 if (csv_output) { 1361 if (stat_config.interval) 1362 fputs("time,", stat_config.output); 1363 fputs(aggr_header_csv[stat_config.aggr_mode], 1364 stat_config.output); 1365 } 1366 1367 /* Print metrics headers only */ 1368 evlist__for_each_entry(evsel_list, counter) { 1369 os.evsel = counter; 1370 out.ctx = &os; 1371 out.print_metric = print_metric_header; 1372 out.new_line = new_line_metric; 1373 os.evsel = counter; 1374 perf_stat__print_shadow_stats(counter, 0, 1375 0, 1376 &out); 1377 } 1378 fputc('\n', stat_config.output); 1379 } 1380 1381 static void print_interval(char *prefix, struct timespec *ts) 1382 { 1383 FILE *output = stat_config.output; 1384 static int num_print_interval; 1385 1386 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1387 1388 if (num_print_interval == 0 && !csv_output) { 1389 switch (stat_config.aggr_mode) { 1390 case AGGR_SOCKET: 1391 fprintf(output, "# time socket cpus"); 1392 if (!metric_only) 1393 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1394 break; 1395 case AGGR_CORE: 1396 fprintf(output, "# time core cpus"); 1397 if (!metric_only) 1398 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1399 break; 1400 case AGGR_NONE: 1401 fprintf(output, "# time CPU"); 1402 if (!metric_only) 1403 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1404 break; 1405 case AGGR_THREAD: 1406 fprintf(output, "# time comm-pid"); 1407 if (!metric_only) 1408 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1409 break; 1410 case AGGR_GLOBAL: 1411 default: 1412 fprintf(output, "# time"); 1413 if (!metric_only) 1414 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1415 case AGGR_UNSET: 1416 break; 1417 } 1418 } 1419 1420 if (num_print_interval == 0 && metric_only) 1421 print_metric_headers(" ", true); 1422 if (++num_print_interval == 25) 1423 num_print_interval = 0; 1424 } 1425 1426 static void print_header(int argc, const char **argv) 1427 { 1428 FILE *output = stat_config.output; 1429 int i; 1430 1431 fflush(stdout); 1432 1433 if (!csv_output) { 1434 fprintf(output, "\n"); 1435 fprintf(output, " Performance counter stats for "); 1436 if (target.system_wide) 1437 fprintf(output, "\'system wide"); 1438 else if (target.cpu_list) 1439 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1440 else if (!target__has_task(&target)) { 1441 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1442 for (i = 1; argv && (i < argc); i++) 1443 fprintf(output, " %s", argv[i]); 1444 } else if (target.pid) 1445 fprintf(output, "process id \'%s", target.pid); 1446 else 1447 fprintf(output, "thread id \'%s", target.tid); 1448 1449 fprintf(output, "\'"); 1450 if (run_count > 1) 1451 fprintf(output, " (%d runs)", run_count); 1452 fprintf(output, ":\n\n"); 1453 } 1454 } 1455 1456 static void print_footer(void) 1457 { 1458 FILE *output = stat_config.output; 1459 1460 if (!null_run) 1461 fprintf(output, "\n"); 1462 fprintf(output, " %17.9f seconds time elapsed", 1463 avg_stats(&walltime_nsecs_stats)/1e9); 1464 if (run_count > 1) { 1465 fprintf(output, " "); 1466 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1467 avg_stats(&walltime_nsecs_stats)); 1468 } 1469 fprintf(output, "\n\n"); 1470 } 1471 1472 static void print_counters(struct timespec *ts, int argc, const char **argv) 1473 { 1474 int interval = stat_config.interval; 1475 struct perf_evsel *counter; 1476 char buf[64], *prefix = NULL; 1477 1478 /* Do not print anything if we record to the pipe. */ 1479 if (STAT_RECORD && perf_stat.file.is_pipe) 1480 return; 1481 1482 if (interval) 1483 print_interval(prefix = buf, ts); 1484 else 1485 print_header(argc, argv); 1486 1487 if (metric_only) { 1488 static int num_print_iv; 1489 1490 if (num_print_iv == 0 && !interval) 1491 print_metric_headers(prefix, false); 1492 if (num_print_iv++ == 25) 1493 num_print_iv = 0; 1494 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1495 fprintf(stat_config.output, "%s", prefix); 1496 } 1497 1498 switch (stat_config.aggr_mode) { 1499 case AGGR_CORE: 1500 case AGGR_SOCKET: 1501 print_aggr(prefix); 1502 break; 1503 case AGGR_THREAD: 1504 evlist__for_each_entry(evsel_list, counter) 1505 print_aggr_thread(counter, prefix); 1506 break; 1507 case AGGR_GLOBAL: 1508 evlist__for_each_entry(evsel_list, counter) 1509 print_counter_aggr(counter, prefix); 1510 if (metric_only) 1511 fputc('\n', stat_config.output); 1512 break; 1513 case AGGR_NONE: 1514 if (metric_only) 1515 print_no_aggr_metric(prefix); 1516 else { 1517 evlist__for_each_entry(evsel_list, counter) 1518 print_counter(counter, prefix); 1519 } 1520 break; 1521 case AGGR_UNSET: 1522 default: 1523 break; 1524 } 1525 1526 if (!interval && !csv_output) 1527 print_footer(); 1528 1529 fflush(stat_config.output); 1530 } 1531 1532 static volatile int signr = -1; 1533 1534 static void skip_signal(int signo) 1535 { 1536 if ((child_pid == -1) || stat_config.interval) 1537 done = 1; 1538 1539 signr = signo; 1540 /* 1541 * render child_pid harmless 1542 * won't send SIGTERM to a random 1543 * process in case of race condition 1544 * and fast PID recycling 1545 */ 1546 child_pid = -1; 1547 } 1548 1549 static void sig_atexit(void) 1550 { 1551 sigset_t set, oset; 1552 1553 /* 1554 * avoid race condition with SIGCHLD handler 1555 * in skip_signal() which is modifying child_pid 1556 * goal is to avoid send SIGTERM to a random 1557 * process 1558 */ 1559 sigemptyset(&set); 1560 sigaddset(&set, SIGCHLD); 1561 sigprocmask(SIG_BLOCK, &set, &oset); 1562 1563 if (child_pid != -1) 1564 kill(child_pid, SIGTERM); 1565 1566 sigprocmask(SIG_SETMASK, &oset, NULL); 1567 1568 if (signr == -1) 1569 return; 1570 1571 signal(signr, SIG_DFL); 1572 kill(getpid(), signr); 1573 } 1574 1575 static int stat__set_big_num(const struct option *opt __maybe_unused, 1576 const char *s __maybe_unused, int unset) 1577 { 1578 big_num_opt = unset ? 0 : 1; 1579 return 0; 1580 } 1581 1582 static int enable_metric_only(const struct option *opt __maybe_unused, 1583 const char *s __maybe_unused, int unset) 1584 { 1585 force_metric_only = true; 1586 metric_only = !unset; 1587 return 0; 1588 } 1589 1590 static const struct option stat_options[] = { 1591 OPT_BOOLEAN('T', "transaction", &transaction_run, 1592 "hardware transaction statistics"), 1593 OPT_CALLBACK('e', "event", &evsel_list, "event", 1594 "event selector. use 'perf list' to list available events", 1595 parse_events_option), 1596 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1597 "event filter", parse_filter), 1598 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1599 "child tasks do not inherit counters"), 1600 OPT_STRING('p', "pid", &target.pid, "pid", 1601 "stat events on existing process id"), 1602 OPT_STRING('t', "tid", &target.tid, "tid", 1603 "stat events on existing thread id"), 1604 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1605 "system-wide collection from all CPUs"), 1606 OPT_BOOLEAN('g', "group", &group, 1607 "put the counters into a counter group"), 1608 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1609 OPT_INCR('v', "verbose", &verbose, 1610 "be more verbose (show counter open errors, etc)"), 1611 OPT_INTEGER('r', "repeat", &run_count, 1612 "repeat command and print average + stddev (max: 100, forever: 0)"), 1613 OPT_BOOLEAN('n', "null", &null_run, 1614 "null run - dont start any counters"), 1615 OPT_INCR('d', "detailed", &detailed_run, 1616 "detailed run - start a lot of events"), 1617 OPT_BOOLEAN('S', "sync", &sync_run, 1618 "call sync() before starting a run"), 1619 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1620 "print large numbers with thousands\' separators", 1621 stat__set_big_num), 1622 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1623 "list of cpus to monitor in system-wide"), 1624 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1625 "disable CPU count aggregation", AGGR_NONE), 1626 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1627 "print counts with custom separator"), 1628 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1629 "monitor event in cgroup name only", parse_cgroups), 1630 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1631 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1632 OPT_INTEGER(0, "log-fd", &output_fd, 1633 "log output to fd, instead of stderr"), 1634 OPT_STRING(0, "pre", &pre_cmd, "command", 1635 "command to run prior to the measured command"), 1636 OPT_STRING(0, "post", &post_cmd, "command", 1637 "command to run after to the measured command"), 1638 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1639 "print counts at regular interval in ms (>= 10)"), 1640 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1641 "aggregate counts per processor socket", AGGR_SOCKET), 1642 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1643 "aggregate counts per physical processor core", AGGR_CORE), 1644 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1645 "aggregate counts per thread", AGGR_THREAD), 1646 OPT_UINTEGER('D', "delay", &initial_delay, 1647 "ms to wait before starting measurement after program start"), 1648 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1649 "Only print computed metrics. No raw values", enable_metric_only), 1650 OPT_BOOLEAN(0, "topdown", &topdown_run, 1651 "measure topdown level 1 statistics"), 1652 OPT_END() 1653 }; 1654 1655 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1656 { 1657 return cpu_map__get_socket(map, cpu, NULL); 1658 } 1659 1660 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1661 { 1662 return cpu_map__get_core(map, cpu, NULL); 1663 } 1664 1665 static int cpu_map__get_max(struct cpu_map *map) 1666 { 1667 int i, max = -1; 1668 1669 for (i = 0; i < map->nr; i++) { 1670 if (map->map[i] > max) 1671 max = map->map[i]; 1672 } 1673 1674 return max; 1675 } 1676 1677 static struct cpu_map *cpus_aggr_map; 1678 1679 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1680 { 1681 int cpu; 1682 1683 if (idx >= map->nr) 1684 return -1; 1685 1686 cpu = map->map[idx]; 1687 1688 if (cpus_aggr_map->map[cpu] == -1) 1689 cpus_aggr_map->map[cpu] = get_id(map, idx); 1690 1691 return cpus_aggr_map->map[cpu]; 1692 } 1693 1694 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1695 { 1696 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1697 } 1698 1699 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1700 { 1701 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1702 } 1703 1704 static int perf_stat_init_aggr_mode(void) 1705 { 1706 int nr; 1707 1708 switch (stat_config.aggr_mode) { 1709 case AGGR_SOCKET: 1710 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1711 perror("cannot build socket map"); 1712 return -1; 1713 } 1714 aggr_get_id = perf_stat__get_socket_cached; 1715 break; 1716 case AGGR_CORE: 1717 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1718 perror("cannot build core map"); 1719 return -1; 1720 } 1721 aggr_get_id = perf_stat__get_core_cached; 1722 break; 1723 case AGGR_NONE: 1724 case AGGR_GLOBAL: 1725 case AGGR_THREAD: 1726 case AGGR_UNSET: 1727 default: 1728 break; 1729 } 1730 1731 /* 1732 * The evsel_list->cpus is the base we operate on, 1733 * taking the highest cpu number to be the size of 1734 * the aggregation translate cpumap. 1735 */ 1736 nr = cpu_map__get_max(evsel_list->cpus); 1737 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1738 return cpus_aggr_map ? 0 : -ENOMEM; 1739 } 1740 1741 static void perf_stat__exit_aggr_mode(void) 1742 { 1743 cpu_map__put(aggr_map); 1744 cpu_map__put(cpus_aggr_map); 1745 aggr_map = NULL; 1746 cpus_aggr_map = NULL; 1747 } 1748 1749 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1750 { 1751 int cpu; 1752 1753 if (idx > map->nr) 1754 return -1; 1755 1756 cpu = map->map[idx]; 1757 1758 if (cpu >= env->nr_cpus_online) 1759 return -1; 1760 1761 return cpu; 1762 } 1763 1764 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1765 { 1766 struct perf_env *env = data; 1767 int cpu = perf_env__get_cpu(env, map, idx); 1768 1769 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1770 } 1771 1772 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1773 { 1774 struct perf_env *env = data; 1775 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1776 1777 if (cpu != -1) { 1778 int socket_id = env->cpu[cpu].socket_id; 1779 1780 /* 1781 * Encode socket in upper 16 bits 1782 * core_id is relative to socket, and 1783 * we need a global id. So we combine 1784 * socket + core id. 1785 */ 1786 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1787 } 1788 1789 return core; 1790 } 1791 1792 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1793 struct cpu_map **sockp) 1794 { 1795 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1796 } 1797 1798 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1799 struct cpu_map **corep) 1800 { 1801 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1802 } 1803 1804 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1805 { 1806 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1807 } 1808 1809 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1810 { 1811 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1812 } 1813 1814 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1815 { 1816 struct perf_env *env = &st->session->header.env; 1817 1818 switch (stat_config.aggr_mode) { 1819 case AGGR_SOCKET: 1820 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1821 perror("cannot build socket map"); 1822 return -1; 1823 } 1824 aggr_get_id = perf_stat__get_socket_file; 1825 break; 1826 case AGGR_CORE: 1827 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1828 perror("cannot build core map"); 1829 return -1; 1830 } 1831 aggr_get_id = perf_stat__get_core_file; 1832 break; 1833 case AGGR_NONE: 1834 case AGGR_GLOBAL: 1835 case AGGR_THREAD: 1836 case AGGR_UNSET: 1837 default: 1838 break; 1839 } 1840 1841 return 0; 1842 } 1843 1844 static int topdown_filter_events(const char **attr, char **str, bool use_group) 1845 { 1846 int off = 0; 1847 int i; 1848 int len = 0; 1849 char *s; 1850 1851 for (i = 0; attr[i]; i++) { 1852 if (pmu_have_event("cpu", attr[i])) { 1853 len += strlen(attr[i]) + 1; 1854 attr[i - off] = attr[i]; 1855 } else 1856 off++; 1857 } 1858 attr[i - off] = NULL; 1859 1860 *str = malloc(len + 1 + 2); 1861 if (!*str) 1862 return -1; 1863 s = *str; 1864 if (i - off == 0) { 1865 *s = 0; 1866 return 0; 1867 } 1868 if (use_group) 1869 *s++ = '{'; 1870 for (i = 0; attr[i]; i++) { 1871 strcpy(s, attr[i]); 1872 s += strlen(s); 1873 *s++ = ','; 1874 } 1875 if (use_group) { 1876 s[-1] = '}'; 1877 *s = 0; 1878 } else 1879 s[-1] = 0; 1880 return 0; 1881 } 1882 1883 __weak bool arch_topdown_check_group(bool *warn) 1884 { 1885 *warn = false; 1886 return false; 1887 } 1888 1889 __weak void arch_topdown_group_warn(void) 1890 { 1891 } 1892 1893 /* 1894 * Add default attributes, if there were no attributes specified or 1895 * if -d/--detailed, -d -d or -d -d -d is used: 1896 */ 1897 static int add_default_attributes(void) 1898 { 1899 int err; 1900 struct perf_event_attr default_attrs0[] = { 1901 1902 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1903 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1904 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1905 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1906 1907 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1908 }; 1909 struct perf_event_attr frontend_attrs[] = { 1910 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1911 }; 1912 struct perf_event_attr backend_attrs[] = { 1913 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1914 }; 1915 struct perf_event_attr default_attrs1[] = { 1916 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1917 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1918 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1919 1920 }; 1921 1922 /* 1923 * Detailed stats (-d), covering the L1 and last level data caches: 1924 */ 1925 struct perf_event_attr detailed_attrs[] = { 1926 1927 { .type = PERF_TYPE_HW_CACHE, 1928 .config = 1929 PERF_COUNT_HW_CACHE_L1D << 0 | 1930 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1931 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1932 1933 { .type = PERF_TYPE_HW_CACHE, 1934 .config = 1935 PERF_COUNT_HW_CACHE_L1D << 0 | 1936 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1937 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1938 1939 { .type = PERF_TYPE_HW_CACHE, 1940 .config = 1941 PERF_COUNT_HW_CACHE_LL << 0 | 1942 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1943 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1944 1945 { .type = PERF_TYPE_HW_CACHE, 1946 .config = 1947 PERF_COUNT_HW_CACHE_LL << 0 | 1948 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1949 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1950 }; 1951 1952 /* 1953 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1954 */ 1955 struct perf_event_attr very_detailed_attrs[] = { 1956 1957 { .type = PERF_TYPE_HW_CACHE, 1958 .config = 1959 PERF_COUNT_HW_CACHE_L1I << 0 | 1960 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1961 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1962 1963 { .type = PERF_TYPE_HW_CACHE, 1964 .config = 1965 PERF_COUNT_HW_CACHE_L1I << 0 | 1966 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1967 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1968 1969 { .type = PERF_TYPE_HW_CACHE, 1970 .config = 1971 PERF_COUNT_HW_CACHE_DTLB << 0 | 1972 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1973 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1974 1975 { .type = PERF_TYPE_HW_CACHE, 1976 .config = 1977 PERF_COUNT_HW_CACHE_DTLB << 0 | 1978 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1979 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1980 1981 { .type = PERF_TYPE_HW_CACHE, 1982 .config = 1983 PERF_COUNT_HW_CACHE_ITLB << 0 | 1984 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1985 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1986 1987 { .type = PERF_TYPE_HW_CACHE, 1988 .config = 1989 PERF_COUNT_HW_CACHE_ITLB << 0 | 1990 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1991 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1992 1993 }; 1994 1995 /* 1996 * Very, very detailed stats (-d -d -d), adding prefetch events: 1997 */ 1998 struct perf_event_attr very_very_detailed_attrs[] = { 1999 2000 { .type = PERF_TYPE_HW_CACHE, 2001 .config = 2002 PERF_COUNT_HW_CACHE_L1D << 0 | 2003 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2004 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2005 2006 { .type = PERF_TYPE_HW_CACHE, 2007 .config = 2008 PERF_COUNT_HW_CACHE_L1D << 0 | 2009 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2010 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2011 }; 2012 2013 /* Set attrs if no event is selected and !null_run: */ 2014 if (null_run) 2015 return 0; 2016 2017 if (transaction_run) { 2018 if (pmu_have_event("cpu", "cycles-ct") && 2019 pmu_have_event("cpu", "el-start")) 2020 err = parse_events(evsel_list, transaction_attrs, NULL); 2021 else 2022 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2023 if (err) { 2024 fprintf(stderr, "Cannot set up transaction events\n"); 2025 return -1; 2026 } 2027 return 0; 2028 } 2029 2030 if (topdown_run) { 2031 char *str = NULL; 2032 bool warn = false; 2033 2034 if (stat_config.aggr_mode != AGGR_GLOBAL && 2035 stat_config.aggr_mode != AGGR_CORE) { 2036 pr_err("top down event configuration requires --per-core mode\n"); 2037 return -1; 2038 } 2039 stat_config.aggr_mode = AGGR_CORE; 2040 if (nr_cgroups || !target__has_cpu(&target)) { 2041 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2042 return -1; 2043 } 2044 2045 if (!force_metric_only) 2046 metric_only = true; 2047 if (topdown_filter_events(topdown_attrs, &str, 2048 arch_topdown_check_group(&warn)) < 0) { 2049 pr_err("Out of memory\n"); 2050 return -1; 2051 } 2052 if (topdown_attrs[0] && str) { 2053 if (warn) 2054 arch_topdown_group_warn(); 2055 err = parse_events(evsel_list, str, NULL); 2056 if (err) { 2057 fprintf(stderr, 2058 "Cannot set up top down events %s: %d\n", 2059 str, err); 2060 free(str); 2061 return -1; 2062 } 2063 } else { 2064 fprintf(stderr, "System does not support topdown\n"); 2065 return -1; 2066 } 2067 free(str); 2068 } 2069 2070 if (!evsel_list->nr_entries) { 2071 if (target__has_cpu(&target)) 2072 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2073 2074 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2075 return -1; 2076 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2077 if (perf_evlist__add_default_attrs(evsel_list, 2078 frontend_attrs) < 0) 2079 return -1; 2080 } 2081 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2082 if (perf_evlist__add_default_attrs(evsel_list, 2083 backend_attrs) < 0) 2084 return -1; 2085 } 2086 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2087 return -1; 2088 } 2089 2090 /* Detailed events get appended to the event list: */ 2091 2092 if (detailed_run < 1) 2093 return 0; 2094 2095 /* Append detailed run extra attributes: */ 2096 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2097 return -1; 2098 2099 if (detailed_run < 2) 2100 return 0; 2101 2102 /* Append very detailed run extra attributes: */ 2103 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2104 return -1; 2105 2106 if (detailed_run < 3) 2107 return 0; 2108 2109 /* Append very, very detailed run extra attributes: */ 2110 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2111 } 2112 2113 static const char * const stat_record_usage[] = { 2114 "perf stat record [<options>]", 2115 NULL, 2116 }; 2117 2118 static void init_features(struct perf_session *session) 2119 { 2120 int feat; 2121 2122 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2123 perf_header__set_feat(&session->header, feat); 2124 2125 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2126 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2127 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2128 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2129 } 2130 2131 static int __cmd_record(int argc, const char **argv) 2132 { 2133 struct perf_session *session; 2134 struct perf_data_file *file = &perf_stat.file; 2135 2136 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2137 PARSE_OPT_STOP_AT_NON_OPTION); 2138 2139 if (output_name) 2140 file->path = output_name; 2141 2142 if (run_count != 1 || forever) { 2143 pr_err("Cannot use -r option with perf stat record.\n"); 2144 return -1; 2145 } 2146 2147 session = perf_session__new(file, false, NULL); 2148 if (session == NULL) { 2149 pr_err("Perf session creation failed.\n"); 2150 return -1; 2151 } 2152 2153 init_features(session); 2154 2155 session->evlist = evsel_list; 2156 perf_stat.session = session; 2157 perf_stat.record = true; 2158 return argc; 2159 } 2160 2161 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2162 union perf_event *event, 2163 struct perf_session *session) 2164 { 2165 struct stat_round_event *stat_round = &event->stat_round; 2166 struct perf_evsel *counter; 2167 struct timespec tsh, *ts = NULL; 2168 const char **argv = session->header.env.cmdline_argv; 2169 int argc = session->header.env.nr_cmdline; 2170 2171 evlist__for_each_entry(evsel_list, counter) 2172 perf_stat_process_counter(&stat_config, counter); 2173 2174 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2175 update_stats(&walltime_nsecs_stats, stat_round->time); 2176 2177 if (stat_config.interval && stat_round->time) { 2178 tsh.tv_sec = stat_round->time / NSECS_PER_SEC; 2179 tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; 2180 ts = &tsh; 2181 } 2182 2183 print_counters(ts, argc, argv); 2184 return 0; 2185 } 2186 2187 static 2188 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2189 union perf_event *event, 2190 struct perf_session *session __maybe_unused) 2191 { 2192 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2193 2194 perf_event__read_stat_config(&stat_config, &event->stat_config); 2195 2196 if (cpu_map__empty(st->cpus)) { 2197 if (st->aggr_mode != AGGR_UNSET) 2198 pr_warning("warning: processing task data, aggregation mode not set\n"); 2199 return 0; 2200 } 2201 2202 if (st->aggr_mode != AGGR_UNSET) 2203 stat_config.aggr_mode = st->aggr_mode; 2204 2205 if (perf_stat.file.is_pipe) 2206 perf_stat_init_aggr_mode(); 2207 else 2208 perf_stat_init_aggr_mode_file(st); 2209 2210 return 0; 2211 } 2212 2213 static int set_maps(struct perf_stat *st) 2214 { 2215 if (!st->cpus || !st->threads) 2216 return 0; 2217 2218 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2219 return -EINVAL; 2220 2221 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2222 2223 if (perf_evlist__alloc_stats(evsel_list, true)) 2224 return -ENOMEM; 2225 2226 st->maps_allocated = true; 2227 return 0; 2228 } 2229 2230 static 2231 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2232 union perf_event *event, 2233 struct perf_session *session __maybe_unused) 2234 { 2235 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2236 2237 if (st->threads) { 2238 pr_warning("Extra thread map event, ignoring.\n"); 2239 return 0; 2240 } 2241 2242 st->threads = thread_map__new_event(&event->thread_map); 2243 if (!st->threads) 2244 return -ENOMEM; 2245 2246 return set_maps(st); 2247 } 2248 2249 static 2250 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2251 union perf_event *event, 2252 struct perf_session *session __maybe_unused) 2253 { 2254 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2255 struct cpu_map *cpus; 2256 2257 if (st->cpus) { 2258 pr_warning("Extra cpu map event, ignoring.\n"); 2259 return 0; 2260 } 2261 2262 cpus = cpu_map__new_data(&event->cpu_map.data); 2263 if (!cpus) 2264 return -ENOMEM; 2265 2266 st->cpus = cpus; 2267 return set_maps(st); 2268 } 2269 2270 static const char * const stat_report_usage[] = { 2271 "perf stat report [<options>]", 2272 NULL, 2273 }; 2274 2275 static struct perf_stat perf_stat = { 2276 .tool = { 2277 .attr = perf_event__process_attr, 2278 .event_update = perf_event__process_event_update, 2279 .thread_map = process_thread_map_event, 2280 .cpu_map = process_cpu_map_event, 2281 .stat_config = process_stat_config_event, 2282 .stat = perf_event__process_stat_event, 2283 .stat_round = process_stat_round_event, 2284 }, 2285 .aggr_mode = AGGR_UNSET, 2286 }; 2287 2288 static int __cmd_report(int argc, const char **argv) 2289 { 2290 struct perf_session *session; 2291 const struct option options[] = { 2292 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2293 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2294 "aggregate counts per processor socket", AGGR_SOCKET), 2295 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2296 "aggregate counts per physical processor core", AGGR_CORE), 2297 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2298 "disable CPU count aggregation", AGGR_NONE), 2299 OPT_END() 2300 }; 2301 struct stat st; 2302 int ret; 2303 2304 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2305 2306 if (!input_name || !strlen(input_name)) { 2307 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2308 input_name = "-"; 2309 else 2310 input_name = "perf.data"; 2311 } 2312 2313 perf_stat.file.path = input_name; 2314 perf_stat.file.mode = PERF_DATA_MODE_READ; 2315 2316 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2317 if (session == NULL) 2318 return -1; 2319 2320 perf_stat.session = session; 2321 stat_config.output = stderr; 2322 evsel_list = session->evlist; 2323 2324 ret = perf_session__process_events(session); 2325 if (ret) 2326 return ret; 2327 2328 perf_session__delete(session); 2329 return 0; 2330 } 2331 2332 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2333 { 2334 const char * const stat_usage[] = { 2335 "perf stat [<options>] [<command>]", 2336 NULL 2337 }; 2338 int status = -EINVAL, run_idx; 2339 const char *mode; 2340 FILE *output = stderr; 2341 unsigned int interval; 2342 const char * const stat_subcommands[] = { "record", "report" }; 2343 2344 setlocale(LC_ALL, ""); 2345 2346 evsel_list = perf_evlist__new(); 2347 if (evsel_list == NULL) 2348 return -ENOMEM; 2349 2350 parse_events__shrink_config_terms(); 2351 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2352 (const char **) stat_usage, 2353 PARSE_OPT_STOP_AT_NON_OPTION); 2354 perf_stat__init_shadow_stats(); 2355 2356 if (csv_sep) { 2357 csv_output = true; 2358 if (!strcmp(csv_sep, "\\t")) 2359 csv_sep = "\t"; 2360 } else 2361 csv_sep = DEFAULT_SEPARATOR; 2362 2363 if (argc && !strncmp(argv[0], "rec", 3)) { 2364 argc = __cmd_record(argc, argv); 2365 if (argc < 0) 2366 return -1; 2367 } else if (argc && !strncmp(argv[0], "rep", 3)) 2368 return __cmd_report(argc, argv); 2369 2370 interval = stat_config.interval; 2371 2372 /* 2373 * For record command the -o is already taken care of. 2374 */ 2375 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2376 output = NULL; 2377 2378 if (output_name && output_fd) { 2379 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2380 parse_options_usage(stat_usage, stat_options, "o", 1); 2381 parse_options_usage(NULL, stat_options, "log-fd", 0); 2382 goto out; 2383 } 2384 2385 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2386 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2387 goto out; 2388 } 2389 2390 if (metric_only && run_count > 1) { 2391 fprintf(stderr, "--metric-only is not supported with -r\n"); 2392 goto out; 2393 } 2394 2395 if (output_fd < 0) { 2396 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2397 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2398 goto out; 2399 } 2400 2401 if (!output) { 2402 struct timespec tm; 2403 mode = append_file ? "a" : "w"; 2404 2405 output = fopen(output_name, mode); 2406 if (!output) { 2407 perror("failed to create output file"); 2408 return -1; 2409 } 2410 clock_gettime(CLOCK_REALTIME, &tm); 2411 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2412 } else if (output_fd > 0) { 2413 mode = append_file ? "a" : "w"; 2414 output = fdopen(output_fd, mode); 2415 if (!output) { 2416 perror("Failed opening logfd"); 2417 return -errno; 2418 } 2419 } 2420 2421 stat_config.output = output; 2422 2423 /* 2424 * let the spreadsheet do the pretty-printing 2425 */ 2426 if (csv_output) { 2427 /* User explicitly passed -B? */ 2428 if (big_num_opt == 1) { 2429 fprintf(stderr, "-B option not supported with -x\n"); 2430 parse_options_usage(stat_usage, stat_options, "B", 1); 2431 parse_options_usage(NULL, stat_options, "x", 1); 2432 goto out; 2433 } else /* Nope, so disable big number formatting */ 2434 big_num = false; 2435 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2436 big_num = false; 2437 2438 if (!argc && target__none(&target)) 2439 usage_with_options(stat_usage, stat_options); 2440 2441 if (run_count < 0) { 2442 pr_err("Run count must be a positive number\n"); 2443 parse_options_usage(stat_usage, stat_options, "r", 1); 2444 goto out; 2445 } else if (run_count == 0) { 2446 forever = true; 2447 run_count = 1; 2448 } 2449 2450 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2451 fprintf(stderr, "The --per-thread option is only available " 2452 "when monitoring via -p -t options.\n"); 2453 parse_options_usage(NULL, stat_options, "p", 1); 2454 parse_options_usage(NULL, stat_options, "t", 1); 2455 goto out; 2456 } 2457 2458 /* 2459 * no_aggr, cgroup are for system-wide only 2460 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2461 */ 2462 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2463 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2464 !target__has_cpu(&target)) { 2465 fprintf(stderr, "both cgroup and no-aggregation " 2466 "modes only available in system-wide mode\n"); 2467 2468 parse_options_usage(stat_usage, stat_options, "G", 1); 2469 parse_options_usage(NULL, stat_options, "A", 1); 2470 parse_options_usage(NULL, stat_options, "a", 1); 2471 goto out; 2472 } 2473 2474 if (add_default_attributes()) 2475 goto out; 2476 2477 target__validate(&target); 2478 2479 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2480 if (target__has_task(&target)) { 2481 pr_err("Problems finding threads of monitor\n"); 2482 parse_options_usage(stat_usage, stat_options, "p", 1); 2483 parse_options_usage(NULL, stat_options, "t", 1); 2484 } else if (target__has_cpu(&target)) { 2485 perror("failed to parse CPUs map"); 2486 parse_options_usage(stat_usage, stat_options, "C", 1); 2487 parse_options_usage(NULL, stat_options, "a", 1); 2488 } 2489 goto out; 2490 } 2491 2492 /* 2493 * Initialize thread_map with comm names, 2494 * so we could print it out on output. 2495 */ 2496 if (stat_config.aggr_mode == AGGR_THREAD) 2497 thread_map__read_comms(evsel_list->threads); 2498 2499 if (interval && interval < 100) { 2500 if (interval < 10) { 2501 pr_err("print interval must be >= 10ms\n"); 2502 parse_options_usage(stat_usage, stat_options, "I", 1); 2503 goto out; 2504 } else 2505 pr_warning("print interval < 100ms. " 2506 "The overhead percentage could be high in some cases. " 2507 "Please proceed with caution.\n"); 2508 } 2509 2510 if (perf_evlist__alloc_stats(evsel_list, interval)) 2511 goto out; 2512 2513 if (perf_stat_init_aggr_mode()) 2514 goto out; 2515 2516 /* 2517 * We dont want to block the signals - that would cause 2518 * child tasks to inherit that and Ctrl-C would not work. 2519 * What we want is for Ctrl-C to work in the exec()-ed 2520 * task, but being ignored by perf stat itself: 2521 */ 2522 atexit(sig_atexit); 2523 if (!forever) 2524 signal(SIGINT, skip_signal); 2525 signal(SIGCHLD, skip_signal); 2526 signal(SIGALRM, skip_signal); 2527 signal(SIGABRT, skip_signal); 2528 2529 status = 0; 2530 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2531 if (run_count != 1 && verbose) 2532 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2533 run_idx + 1); 2534 2535 status = run_perf_stat(argc, argv); 2536 if (forever && status != -1) { 2537 print_counters(NULL, argc, argv); 2538 perf_stat__reset_stats(); 2539 } 2540 } 2541 2542 if (!forever && status != -1 && !interval) 2543 print_counters(NULL, argc, argv); 2544 2545 if (STAT_RECORD) { 2546 /* 2547 * We synthesize the kernel mmap record just so that older tools 2548 * don't emit warnings about not being able to resolve symbols 2549 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2550 * a saner message about no samples being in the perf.data file. 2551 * 2552 * This also serves to suppress a warning about f_header.data.size == 0 2553 * in header.c at the moment 'perf stat record' gets introduced, which 2554 * is not really needed once we start adding the stat specific PERF_RECORD_ 2555 * records, but the need to suppress the kptr_restrict messages in older 2556 * tools remain -acme 2557 */ 2558 int fd = perf_data_file__fd(&perf_stat.file); 2559 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2560 process_synthesized_event, 2561 &perf_stat.session->machines.host); 2562 if (err) { 2563 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2564 "older tools may produce warnings about this file\n."); 2565 } 2566 2567 if (!interval) { 2568 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2569 pr_err("failed to write stat round event\n"); 2570 } 2571 2572 if (!perf_stat.file.is_pipe) { 2573 perf_stat.session->header.data_size += perf_stat.bytes_written; 2574 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2575 } 2576 2577 perf_session__delete(perf_stat.session); 2578 } 2579 2580 perf_stat__exit_aggr_mode(); 2581 perf_evlist__free_stats(evsel_list); 2582 out: 2583 perf_evlist__delete(evsel_list); 2584 return status; 2585 } 2586