1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/group.h" 67 #include "asm/bug.h" 68 69 #include <linux/time64.h> 70 #include <api/fs/fs.h> 71 #include <stdlib.h> 72 #include <sys/prctl.h> 73 #include <locale.h> 74 #include <math.h> 75 76 #define DEFAULT_SEPARATOR " " 77 #define CNTR_NOT_SUPPORTED "<not supported>" 78 #define CNTR_NOT_COUNTED "<not counted>" 79 80 static void print_counters(struct timespec *ts, int argc, const char **argv); 81 82 /* Default events used for perf stat -T */ 83 static const char *transaction_attrs = { 84 "task-clock," 85 "{" 86 "instructions," 87 "cycles," 88 "cpu/cycles-t/," 89 "cpu/tx-start/," 90 "cpu/el-start/," 91 "cpu/cycles-ct/" 92 "}" 93 }; 94 95 /* More limited version when the CPU does not have all events. */ 96 static const char * transaction_limited_attrs = { 97 "task-clock," 98 "{" 99 "instructions," 100 "cycles," 101 "cpu/cycles-t/," 102 "cpu/tx-start/" 103 "}" 104 }; 105 106 static const char * topdown_attrs[] = { 107 "topdown-total-slots", 108 "topdown-slots-retired", 109 "topdown-recovery-bubbles", 110 "topdown-fetch-bubbles", 111 "topdown-slots-issued", 112 NULL, 113 }; 114 115 static struct perf_evlist *evsel_list; 116 117 static struct target target = { 118 .uid = UINT_MAX, 119 }; 120 121 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 122 123 static int run_count = 1; 124 static bool no_inherit = false; 125 static volatile pid_t child_pid = -1; 126 static bool null_run = false; 127 static int detailed_run = 0; 128 static bool transaction_run; 129 static bool topdown_run = false; 130 static bool big_num = true; 131 static int big_num_opt = -1; 132 static const char *csv_sep = NULL; 133 static bool csv_output = false; 134 static bool group = false; 135 static const char *pre_cmd = NULL; 136 static const char *post_cmd = NULL; 137 static bool sync_run = false; 138 static unsigned int initial_delay = 0; 139 static unsigned int unit_width = 4; /* strlen("unit") */ 140 static bool forever = false; 141 static bool metric_only = false; 142 static bool force_metric_only = false; 143 static struct timespec ref_time; 144 static struct cpu_map *aggr_map; 145 static aggr_get_id_t aggr_get_id; 146 static bool append_file; 147 static const char *output_name; 148 static int output_fd; 149 150 struct perf_stat { 151 bool record; 152 struct perf_data_file file; 153 struct perf_session *session; 154 u64 bytes_written; 155 struct perf_tool tool; 156 bool maps_allocated; 157 struct cpu_map *cpus; 158 struct thread_map *threads; 159 enum aggr_mode aggr_mode; 160 }; 161 162 static struct perf_stat perf_stat; 163 #define STAT_RECORD perf_stat.record 164 165 static volatile int done = 0; 166 167 static struct perf_stat_config stat_config = { 168 .aggr_mode = AGGR_GLOBAL, 169 .scale = true, 170 }; 171 172 static inline void diff_timespec(struct timespec *r, struct timespec *a, 173 struct timespec *b) 174 { 175 r->tv_sec = a->tv_sec - b->tv_sec; 176 if (a->tv_nsec < b->tv_nsec) { 177 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 178 r->tv_sec--; 179 } else { 180 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 181 } 182 } 183 184 static void perf_stat__reset_stats(void) 185 { 186 perf_evlist__reset_stats(evsel_list); 187 perf_stat__reset_shadow_stats(); 188 } 189 190 static int create_perf_stat_counter(struct perf_evsel *evsel) 191 { 192 struct perf_event_attr *attr = &evsel->attr; 193 194 if (stat_config.scale) 195 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 196 PERF_FORMAT_TOTAL_TIME_RUNNING; 197 198 attr->inherit = !no_inherit; 199 200 /* 201 * Some events get initialized with sample_(period/type) set, 202 * like tracepoints. Clear it up for counting. 203 */ 204 attr->sample_period = 0; 205 206 /* 207 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 208 * while avoiding that older tools show confusing messages. 209 * 210 * However for pipe sessions we need to keep it zero, 211 * because script's perf_evsel__check_attr is triggered 212 * by attr->sample_type != 0, and we can't run it on 213 * stat sessions. 214 */ 215 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 216 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 217 218 /* 219 * Disabling all counters initially, they will be enabled 220 * either manually by us or by kernel via enable_on_exec 221 * set later. 222 */ 223 if (perf_evsel__is_group_leader(evsel)) { 224 attr->disabled = 1; 225 226 /* 227 * In case of initial_delay we enable tracee 228 * events manually. 229 */ 230 if (target__none(&target) && !initial_delay) 231 attr->enable_on_exec = 1; 232 } 233 234 if (target__has_cpu(&target)) 235 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 236 237 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 238 } 239 240 /* 241 * Does the counter have nsecs as a unit? 242 */ 243 static inline int nsec_counter(struct perf_evsel *evsel) 244 { 245 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 246 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 247 return 1; 248 249 return 0; 250 } 251 252 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 253 union perf_event *event, 254 struct perf_sample *sample __maybe_unused, 255 struct machine *machine __maybe_unused) 256 { 257 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 258 pr_err("failed to write perf data, error: %m\n"); 259 return -1; 260 } 261 262 perf_stat.bytes_written += event->header.size; 263 return 0; 264 } 265 266 static int write_stat_round_event(u64 tm, u64 type) 267 { 268 return perf_event__synthesize_stat_round(NULL, tm, type, 269 process_synthesized_event, 270 NULL); 271 } 272 273 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 274 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 275 276 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 277 278 static int 279 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 280 struct perf_counts_values *count) 281 { 282 struct perf_sample_id *sid = SID(counter, cpu, thread); 283 284 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 285 process_synthesized_event, NULL); 286 } 287 288 /* 289 * Read out the results of a single counter: 290 * do not aggregate counts across CPUs in system-wide mode 291 */ 292 static int read_counter(struct perf_evsel *counter) 293 { 294 int nthreads = thread_map__nr(evsel_list->threads); 295 int ncpus, cpu, thread; 296 297 if (target__has_cpu(&target)) 298 ncpus = perf_evsel__nr_cpus(counter); 299 else 300 ncpus = 1; 301 302 if (!counter->supported) 303 return -ENOENT; 304 305 if (counter->system_wide) 306 nthreads = 1; 307 308 for (thread = 0; thread < nthreads; thread++) { 309 for (cpu = 0; cpu < ncpus; cpu++) { 310 struct perf_counts_values *count; 311 312 count = perf_counts(counter->counts, cpu, thread); 313 if (perf_evsel__read(counter, cpu, thread, count)) 314 return -1; 315 316 if (STAT_RECORD) { 317 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 318 pr_err("failed to write stat event\n"); 319 return -1; 320 } 321 } 322 323 if (verbose > 1) { 324 fprintf(stat_config.output, 325 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 326 perf_evsel__name(counter), 327 cpu, 328 count->val, count->ena, count->run); 329 } 330 } 331 } 332 333 return 0; 334 } 335 336 static void read_counters(void) 337 { 338 struct perf_evsel *counter; 339 340 evlist__for_each_entry(evsel_list, counter) { 341 if (read_counter(counter)) 342 pr_debug("failed to read counter %s\n", counter->name); 343 344 if (perf_stat_process_counter(&stat_config, counter)) 345 pr_warning("failed to process counter %s\n", counter->name); 346 } 347 } 348 349 static void process_interval(void) 350 { 351 struct timespec ts, rs; 352 353 read_counters(); 354 355 clock_gettime(CLOCK_MONOTONIC, &ts); 356 diff_timespec(&rs, &ts, &ref_time); 357 358 if (STAT_RECORD) { 359 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 360 pr_err("failed to write stat round event\n"); 361 } 362 363 print_counters(&rs, 0, NULL); 364 } 365 366 static void enable_counters(void) 367 { 368 if (initial_delay) 369 usleep(initial_delay * USEC_PER_MSEC); 370 371 /* 372 * We need to enable counters only if: 373 * - we don't have tracee (attaching to task or cpu) 374 * - we have initial delay configured 375 */ 376 if (!target__none(&target) || initial_delay) 377 perf_evlist__enable(evsel_list); 378 } 379 380 static void disable_counters(void) 381 { 382 /* 383 * If we don't have tracee (attaching to task or cpu), counters may 384 * still be running. To get accurate group ratios, we must stop groups 385 * from counting before reading their constituent counters. 386 */ 387 if (!target__none(&target)) 388 perf_evlist__disable(evsel_list); 389 } 390 391 static volatile int workload_exec_errno; 392 393 /* 394 * perf_evlist__prepare_workload will send a SIGUSR1 395 * if the fork fails, since we asked by setting its 396 * want_signal to true. 397 */ 398 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 399 void *ucontext __maybe_unused) 400 { 401 workload_exec_errno = info->si_value.sival_int; 402 } 403 404 static bool has_unit(struct perf_evsel *counter) 405 { 406 return counter->unit && *counter->unit; 407 } 408 409 static bool has_scale(struct perf_evsel *counter) 410 { 411 return counter->scale != 1; 412 } 413 414 static int perf_stat_synthesize_config(bool is_pipe) 415 { 416 struct perf_evsel *counter; 417 int err; 418 419 if (is_pipe) { 420 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 421 process_synthesized_event); 422 if (err < 0) { 423 pr_err("Couldn't synthesize attrs.\n"); 424 return err; 425 } 426 } 427 428 /* 429 * Synthesize other events stuff not carried within 430 * attr event - unit, scale, name 431 */ 432 evlist__for_each_entry(evsel_list, counter) { 433 if (!counter->supported) 434 continue; 435 436 /* 437 * Synthesize unit and scale only if it's defined. 438 */ 439 if (has_unit(counter)) { 440 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 441 if (err < 0) { 442 pr_err("Couldn't synthesize evsel unit.\n"); 443 return err; 444 } 445 } 446 447 if (has_scale(counter)) { 448 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 449 if (err < 0) { 450 pr_err("Couldn't synthesize evsel scale.\n"); 451 return err; 452 } 453 } 454 455 if (counter->own_cpus) { 456 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 457 if (err < 0) { 458 pr_err("Couldn't synthesize evsel scale.\n"); 459 return err; 460 } 461 } 462 463 /* 464 * Name is needed only for pipe output, 465 * perf.data carries event names. 466 */ 467 if (is_pipe) { 468 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 469 if (err < 0) { 470 pr_err("Couldn't synthesize evsel name.\n"); 471 return err; 472 } 473 } 474 } 475 476 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 477 process_synthesized_event, 478 NULL); 479 if (err < 0) { 480 pr_err("Couldn't synthesize thread map.\n"); 481 return err; 482 } 483 484 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 485 process_synthesized_event, NULL); 486 if (err < 0) { 487 pr_err("Couldn't synthesize thread map.\n"); 488 return err; 489 } 490 491 err = perf_event__synthesize_stat_config(NULL, &stat_config, 492 process_synthesized_event, NULL); 493 if (err < 0) { 494 pr_err("Couldn't synthesize config.\n"); 495 return err; 496 } 497 498 return 0; 499 } 500 501 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 502 503 static int __store_counter_ids(struct perf_evsel *counter, 504 struct cpu_map *cpus, 505 struct thread_map *threads) 506 { 507 int cpu, thread; 508 509 for (cpu = 0; cpu < cpus->nr; cpu++) { 510 for (thread = 0; thread < threads->nr; thread++) { 511 int fd = FD(counter, cpu, thread); 512 513 if (perf_evlist__id_add_fd(evsel_list, counter, 514 cpu, thread, fd) < 0) 515 return -1; 516 } 517 } 518 519 return 0; 520 } 521 522 static int store_counter_ids(struct perf_evsel *counter) 523 { 524 struct cpu_map *cpus = counter->cpus; 525 struct thread_map *threads = counter->threads; 526 527 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 528 return -ENOMEM; 529 530 return __store_counter_ids(counter, cpus, threads); 531 } 532 533 static int __run_perf_stat(int argc, const char **argv) 534 { 535 int interval = stat_config.interval; 536 char msg[512]; 537 unsigned long long t0, t1; 538 struct perf_evsel *counter; 539 struct timespec ts; 540 size_t l; 541 int status = 0; 542 const bool forks = (argc > 0); 543 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 544 struct perf_evsel_config_term *err_term; 545 546 if (interval) { 547 ts.tv_sec = interval / USEC_PER_MSEC; 548 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 549 } else { 550 ts.tv_sec = 1; 551 ts.tv_nsec = 0; 552 } 553 554 if (forks) { 555 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 556 workload_exec_failed_signal) < 0) { 557 perror("failed to prepare workload"); 558 return -1; 559 } 560 child_pid = evsel_list->workload.pid; 561 } 562 563 if (group) 564 perf_evlist__set_leader(evsel_list); 565 566 evlist__for_each_entry(evsel_list, counter) { 567 try_again: 568 if (create_perf_stat_counter(counter) < 0) { 569 /* 570 * PPC returns ENXIO for HW counters until 2.6.37 571 * (behavior changed with commit b0a873e). 572 */ 573 if (errno == EINVAL || errno == ENOSYS || 574 errno == ENOENT || errno == EOPNOTSUPP || 575 errno == ENXIO) { 576 if (verbose) 577 ui__warning("%s event is not supported by the kernel.\n", 578 perf_evsel__name(counter)); 579 counter->supported = false; 580 581 if ((counter->leader != counter) || 582 !(counter->leader->nr_members > 1)) 583 continue; 584 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 585 if (verbose) 586 ui__warning("%s\n", msg); 587 goto try_again; 588 } 589 590 perf_evsel__open_strerror(counter, &target, 591 errno, msg, sizeof(msg)); 592 ui__error("%s\n", msg); 593 594 if (child_pid != -1) 595 kill(child_pid, SIGTERM); 596 597 return -1; 598 } 599 counter->supported = true; 600 601 l = strlen(counter->unit); 602 if (l > unit_width) 603 unit_width = l; 604 605 if (STAT_RECORD && store_counter_ids(counter)) 606 return -1; 607 } 608 609 if (perf_evlist__apply_filters(evsel_list, &counter)) { 610 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 611 counter->filter, perf_evsel__name(counter), errno, 612 str_error_r(errno, msg, sizeof(msg))); 613 return -1; 614 } 615 616 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 617 error("failed to set config \"%s\" on event %s with %d (%s)\n", 618 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 619 str_error_r(errno, msg, sizeof(msg))); 620 return -1; 621 } 622 623 if (STAT_RECORD) { 624 int err, fd = perf_data_file__fd(&perf_stat.file); 625 626 if (is_pipe) { 627 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 628 } else { 629 err = perf_session__write_header(perf_stat.session, evsel_list, 630 fd, false); 631 } 632 633 if (err < 0) 634 return err; 635 636 err = perf_stat_synthesize_config(is_pipe); 637 if (err < 0) 638 return err; 639 } 640 641 /* 642 * Enable counters and exec the command: 643 */ 644 t0 = rdclock(); 645 clock_gettime(CLOCK_MONOTONIC, &ref_time); 646 647 if (forks) { 648 perf_evlist__start_workload(evsel_list); 649 enable_counters(); 650 651 if (interval) { 652 while (!waitpid(child_pid, &status, WNOHANG)) { 653 nanosleep(&ts, NULL); 654 process_interval(); 655 } 656 } 657 wait(&status); 658 659 if (workload_exec_errno) { 660 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 661 pr_err("Workload failed: %s\n", emsg); 662 return -1; 663 } 664 665 if (WIFSIGNALED(status)) 666 psignal(WTERMSIG(status), argv[0]); 667 } else { 668 enable_counters(); 669 while (!done) { 670 nanosleep(&ts, NULL); 671 if (interval) 672 process_interval(); 673 } 674 } 675 676 disable_counters(); 677 678 t1 = rdclock(); 679 680 update_stats(&walltime_nsecs_stats, t1 - t0); 681 682 /* 683 * Closing a group leader splits the group, and as we only disable 684 * group leaders, results in remaining events becoming enabled. To 685 * avoid arbitrary skew, we must read all counters before closing any 686 * group leaders. 687 */ 688 read_counters(); 689 perf_evlist__close(evsel_list); 690 691 return WEXITSTATUS(status); 692 } 693 694 static int run_perf_stat(int argc, const char **argv) 695 { 696 int ret; 697 698 if (pre_cmd) { 699 ret = system(pre_cmd); 700 if (ret) 701 return ret; 702 } 703 704 if (sync_run) 705 sync(); 706 707 ret = __run_perf_stat(argc, argv); 708 if (ret) 709 return ret; 710 711 if (post_cmd) { 712 ret = system(post_cmd); 713 if (ret) 714 return ret; 715 } 716 717 return ret; 718 } 719 720 static void print_running(u64 run, u64 ena) 721 { 722 if (csv_output) { 723 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 724 csv_sep, 725 run, 726 csv_sep, 727 ena ? 100.0 * run / ena : 100.0); 728 } else if (run != ena) { 729 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 730 } 731 } 732 733 static void print_noise_pct(double total, double avg) 734 { 735 double pct = rel_stddev_stats(total, avg); 736 737 if (csv_output) 738 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 739 else if (pct) 740 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 741 } 742 743 static void print_noise(struct perf_evsel *evsel, double avg) 744 { 745 struct perf_stat_evsel *ps; 746 747 if (run_count == 1) 748 return; 749 750 ps = evsel->priv; 751 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 752 } 753 754 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 755 { 756 switch (stat_config.aggr_mode) { 757 case AGGR_CORE: 758 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 759 cpu_map__id_to_socket(id), 760 csv_output ? 0 : -8, 761 cpu_map__id_to_cpu(id), 762 csv_sep, 763 csv_output ? 0 : 4, 764 nr, 765 csv_sep); 766 break; 767 case AGGR_SOCKET: 768 fprintf(stat_config.output, "S%*d%s%*d%s", 769 csv_output ? 0 : -5, 770 id, 771 csv_sep, 772 csv_output ? 0 : 4, 773 nr, 774 csv_sep); 775 break; 776 case AGGR_NONE: 777 fprintf(stat_config.output, "CPU%*d%s", 778 csv_output ? 0 : -4, 779 perf_evsel__cpus(evsel)->map[id], csv_sep); 780 break; 781 case AGGR_THREAD: 782 fprintf(stat_config.output, "%*s-%*d%s", 783 csv_output ? 0 : 16, 784 thread_map__comm(evsel->threads, id), 785 csv_output ? 0 : -8, 786 thread_map__pid(evsel->threads, id), 787 csv_sep); 788 break; 789 case AGGR_GLOBAL: 790 case AGGR_UNSET: 791 default: 792 break; 793 } 794 } 795 796 struct outstate { 797 FILE *fh; 798 bool newline; 799 const char *prefix; 800 int nfields; 801 int id, nr; 802 struct perf_evsel *evsel; 803 }; 804 805 #define METRIC_LEN 35 806 807 static void new_line_std(void *ctx) 808 { 809 struct outstate *os = ctx; 810 811 os->newline = true; 812 } 813 814 static void do_new_line_std(struct outstate *os) 815 { 816 fputc('\n', os->fh); 817 fputs(os->prefix, os->fh); 818 aggr_printout(os->evsel, os->id, os->nr); 819 if (stat_config.aggr_mode == AGGR_NONE) 820 fprintf(os->fh, " "); 821 fprintf(os->fh, " "); 822 } 823 824 static void print_metric_std(void *ctx, const char *color, const char *fmt, 825 const char *unit, double val) 826 { 827 struct outstate *os = ctx; 828 FILE *out = os->fh; 829 int n; 830 bool newline = os->newline; 831 832 os->newline = false; 833 834 if (unit == NULL || fmt == NULL) { 835 fprintf(out, "%-*s", METRIC_LEN, ""); 836 return; 837 } 838 839 if (newline) 840 do_new_line_std(os); 841 842 n = fprintf(out, " # "); 843 if (color) 844 n += color_fprintf(out, color, fmt, val); 845 else 846 n += fprintf(out, fmt, val); 847 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 848 } 849 850 static void new_line_csv(void *ctx) 851 { 852 struct outstate *os = ctx; 853 int i; 854 855 fputc('\n', os->fh); 856 if (os->prefix) 857 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 858 aggr_printout(os->evsel, os->id, os->nr); 859 for (i = 0; i < os->nfields; i++) 860 fputs(csv_sep, os->fh); 861 } 862 863 static void print_metric_csv(void *ctx, 864 const char *color __maybe_unused, 865 const char *fmt, const char *unit, double val) 866 { 867 struct outstate *os = ctx; 868 FILE *out = os->fh; 869 char buf[64], *vals, *ends; 870 871 if (unit == NULL || fmt == NULL) { 872 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 873 return; 874 } 875 snprintf(buf, sizeof(buf), fmt, val); 876 vals = buf; 877 while (isspace(*vals)) 878 vals++; 879 ends = vals; 880 while (isdigit(*ends) || *ends == '.') 881 ends++; 882 *ends = 0; 883 while (isspace(*unit)) 884 unit++; 885 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 886 } 887 888 #define METRIC_ONLY_LEN 20 889 890 /* Filter out some columns that don't work well in metrics only mode */ 891 892 static bool valid_only_metric(const char *unit) 893 { 894 if (!unit) 895 return false; 896 if (strstr(unit, "/sec") || 897 strstr(unit, "hz") || 898 strstr(unit, "Hz") || 899 strstr(unit, "CPUs utilized")) 900 return false; 901 return true; 902 } 903 904 static const char *fixunit(char *buf, struct perf_evsel *evsel, 905 const char *unit) 906 { 907 if (!strncmp(unit, "of all", 6)) { 908 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 909 unit); 910 return buf; 911 } 912 return unit; 913 } 914 915 static void print_metric_only(void *ctx, const char *color, const char *fmt, 916 const char *unit, double val) 917 { 918 struct outstate *os = ctx; 919 FILE *out = os->fh; 920 int n; 921 char buf[1024]; 922 unsigned mlen = METRIC_ONLY_LEN; 923 924 if (!valid_only_metric(unit)) 925 return; 926 unit = fixunit(buf, os->evsel, unit); 927 if (color) 928 n = color_fprintf(out, color, fmt, val); 929 else 930 n = fprintf(out, fmt, val); 931 if (n > METRIC_ONLY_LEN) 932 n = METRIC_ONLY_LEN; 933 if (mlen < strlen(unit)) 934 mlen = strlen(unit) + 1; 935 fprintf(out, "%*s", mlen - n, ""); 936 } 937 938 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 939 const char *fmt, 940 const char *unit, double val) 941 { 942 struct outstate *os = ctx; 943 FILE *out = os->fh; 944 char buf[64], *vals, *ends; 945 char tbuf[1024]; 946 947 if (!valid_only_metric(unit)) 948 return; 949 unit = fixunit(tbuf, os->evsel, unit); 950 snprintf(buf, sizeof buf, fmt, val); 951 vals = buf; 952 while (isspace(*vals)) 953 vals++; 954 ends = vals; 955 while (isdigit(*ends) || *ends == '.') 956 ends++; 957 *ends = 0; 958 fprintf(out, "%s%s", vals, csv_sep); 959 } 960 961 static void new_line_metric(void *ctx __maybe_unused) 962 { 963 } 964 965 static void print_metric_header(void *ctx, const char *color __maybe_unused, 966 const char *fmt __maybe_unused, 967 const char *unit, double val __maybe_unused) 968 { 969 struct outstate *os = ctx; 970 char tbuf[1024]; 971 972 if (!valid_only_metric(unit)) 973 return; 974 unit = fixunit(tbuf, os->evsel, unit); 975 if (csv_output) 976 fprintf(os->fh, "%s%s", unit, csv_sep); 977 else 978 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 979 } 980 981 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 982 { 983 FILE *output = stat_config.output; 984 double msecs = avg / NSEC_PER_MSEC; 985 const char *fmt_v, *fmt_n; 986 char name[25]; 987 988 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 989 fmt_n = csv_output ? "%s" : "%-25s"; 990 991 aggr_printout(evsel, id, nr); 992 993 scnprintf(name, sizeof(name), "%s%s", 994 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 995 996 fprintf(output, fmt_v, msecs, csv_sep); 997 998 if (csv_output) 999 fprintf(output, "%s%s", evsel->unit, csv_sep); 1000 else 1001 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1002 1003 fprintf(output, fmt_n, name); 1004 1005 if (evsel->cgrp) 1006 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1007 } 1008 1009 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1010 { 1011 int i; 1012 1013 if (!aggr_get_id) 1014 return 0; 1015 1016 if (stat_config.aggr_mode == AGGR_NONE) 1017 return id; 1018 1019 if (stat_config.aggr_mode == AGGR_GLOBAL) 1020 return 0; 1021 1022 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1023 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1024 1025 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1026 return cpu2; 1027 } 1028 return 0; 1029 } 1030 1031 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1032 { 1033 FILE *output = stat_config.output; 1034 double sc = evsel->scale; 1035 const char *fmt; 1036 1037 if (csv_output) { 1038 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1039 } else { 1040 if (big_num) 1041 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1042 else 1043 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1044 } 1045 1046 aggr_printout(evsel, id, nr); 1047 1048 fprintf(output, fmt, avg, csv_sep); 1049 1050 if (evsel->unit) 1051 fprintf(output, "%-*s%s", 1052 csv_output ? 0 : unit_width, 1053 evsel->unit, csv_sep); 1054 1055 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1056 1057 if (evsel->cgrp) 1058 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1059 } 1060 1061 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1062 char *prefix, u64 run, u64 ena, double noise) 1063 { 1064 struct perf_stat_output_ctx out; 1065 struct outstate os = { 1066 .fh = stat_config.output, 1067 .prefix = prefix ? prefix : "", 1068 .id = id, 1069 .nr = nr, 1070 .evsel = counter, 1071 }; 1072 print_metric_t pm = print_metric_std; 1073 void (*nl)(void *); 1074 1075 if (metric_only) { 1076 nl = new_line_metric; 1077 if (csv_output) 1078 pm = print_metric_only_csv; 1079 else 1080 pm = print_metric_only; 1081 } else 1082 nl = new_line_std; 1083 1084 if (csv_output && !metric_only) { 1085 static int aggr_fields[] = { 1086 [AGGR_GLOBAL] = 0, 1087 [AGGR_THREAD] = 1, 1088 [AGGR_NONE] = 1, 1089 [AGGR_SOCKET] = 2, 1090 [AGGR_CORE] = 2, 1091 }; 1092 1093 pm = print_metric_csv; 1094 nl = new_line_csv; 1095 os.nfields = 3; 1096 os.nfields += aggr_fields[stat_config.aggr_mode]; 1097 if (counter->cgrp) 1098 os.nfields++; 1099 } 1100 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1101 if (metric_only) { 1102 pm(&os, NULL, "", "", 0); 1103 return; 1104 } 1105 aggr_printout(counter, id, nr); 1106 1107 fprintf(stat_config.output, "%*s%s", 1108 csv_output ? 0 : 18, 1109 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1110 csv_sep); 1111 1112 fprintf(stat_config.output, "%-*s%s", 1113 csv_output ? 0 : unit_width, 1114 counter->unit, csv_sep); 1115 1116 fprintf(stat_config.output, "%*s", 1117 csv_output ? 0 : -25, 1118 perf_evsel__name(counter)); 1119 1120 if (counter->cgrp) 1121 fprintf(stat_config.output, "%s%s", 1122 csv_sep, counter->cgrp->name); 1123 1124 if (!csv_output) 1125 pm(&os, NULL, NULL, "", 0); 1126 print_noise(counter, noise); 1127 print_running(run, ena); 1128 if (csv_output) 1129 pm(&os, NULL, NULL, "", 0); 1130 return; 1131 } 1132 1133 if (metric_only) 1134 /* nothing */; 1135 else if (nsec_counter(counter)) 1136 nsec_printout(id, nr, counter, uval); 1137 else 1138 abs_printout(id, nr, counter, uval); 1139 1140 out.print_metric = pm; 1141 out.new_line = nl; 1142 out.ctx = &os; 1143 1144 if (csv_output && !metric_only) { 1145 print_noise(counter, noise); 1146 print_running(run, ena); 1147 } 1148 1149 perf_stat__print_shadow_stats(counter, uval, 1150 first_shadow_cpu(counter, id), 1151 &out); 1152 if (!csv_output && !metric_only) { 1153 print_noise(counter, noise); 1154 print_running(run, ena); 1155 } 1156 } 1157 1158 static void aggr_update_shadow(void) 1159 { 1160 int cpu, s2, id, s; 1161 u64 val; 1162 struct perf_evsel *counter; 1163 1164 for (s = 0; s < aggr_map->nr; s++) { 1165 id = aggr_map->map[s]; 1166 evlist__for_each_entry(evsel_list, counter) { 1167 val = 0; 1168 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1169 s2 = aggr_get_id(evsel_list->cpus, cpu); 1170 if (s2 != id) 1171 continue; 1172 val += perf_counts(counter->counts, cpu, 0)->val; 1173 } 1174 val = val * counter->scale; 1175 perf_stat__update_shadow_stats(counter, &val, 1176 first_shadow_cpu(counter, id)); 1177 } 1178 } 1179 } 1180 1181 static void print_aggr(char *prefix) 1182 { 1183 FILE *output = stat_config.output; 1184 struct perf_evsel *counter; 1185 int cpu, s, s2, id, nr; 1186 double uval; 1187 u64 ena, run, val; 1188 bool first; 1189 1190 if (!(aggr_map || aggr_get_id)) 1191 return; 1192 1193 aggr_update_shadow(); 1194 1195 /* 1196 * With metric_only everything is on a single line. 1197 * Without each counter has its own line. 1198 */ 1199 for (s = 0; s < aggr_map->nr; s++) { 1200 if (prefix && metric_only) 1201 fprintf(output, "%s", prefix); 1202 1203 id = aggr_map->map[s]; 1204 first = true; 1205 evlist__for_each_entry(evsel_list, counter) { 1206 val = ena = run = 0; 1207 nr = 0; 1208 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1209 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1210 if (s2 != id) 1211 continue; 1212 val += perf_counts(counter->counts, cpu, 0)->val; 1213 ena += perf_counts(counter->counts, cpu, 0)->ena; 1214 run += perf_counts(counter->counts, cpu, 0)->run; 1215 nr++; 1216 } 1217 if (first && metric_only) { 1218 first = false; 1219 aggr_printout(counter, id, nr); 1220 } 1221 if (prefix && !metric_only) 1222 fprintf(output, "%s", prefix); 1223 1224 uval = val * counter->scale; 1225 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1226 if (!metric_only) 1227 fputc('\n', output); 1228 } 1229 if (metric_only) 1230 fputc('\n', output); 1231 } 1232 } 1233 1234 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1235 { 1236 FILE *output = stat_config.output; 1237 int nthreads = thread_map__nr(counter->threads); 1238 int ncpus = cpu_map__nr(counter->cpus); 1239 int cpu, thread; 1240 double uval; 1241 1242 for (thread = 0; thread < nthreads; thread++) { 1243 u64 ena = 0, run = 0, val = 0; 1244 1245 for (cpu = 0; cpu < ncpus; cpu++) { 1246 val += perf_counts(counter->counts, cpu, thread)->val; 1247 ena += perf_counts(counter->counts, cpu, thread)->ena; 1248 run += perf_counts(counter->counts, cpu, thread)->run; 1249 } 1250 1251 if (prefix) 1252 fprintf(output, "%s", prefix); 1253 1254 uval = val * counter->scale; 1255 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1256 fputc('\n', output); 1257 } 1258 } 1259 1260 /* 1261 * Print out the results of a single counter: 1262 * aggregated counts in system-wide mode 1263 */ 1264 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1265 { 1266 FILE *output = stat_config.output; 1267 struct perf_stat_evsel *ps = counter->priv; 1268 double avg = avg_stats(&ps->res_stats[0]); 1269 double uval; 1270 double avg_enabled, avg_running; 1271 1272 avg_enabled = avg_stats(&ps->res_stats[1]); 1273 avg_running = avg_stats(&ps->res_stats[2]); 1274 1275 if (prefix && !metric_only) 1276 fprintf(output, "%s", prefix); 1277 1278 uval = avg * counter->scale; 1279 printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); 1280 if (!metric_only) 1281 fprintf(output, "\n"); 1282 } 1283 1284 /* 1285 * Print out the results of a single counter: 1286 * does not use aggregated count in system-wide 1287 */ 1288 static void print_counter(struct perf_evsel *counter, char *prefix) 1289 { 1290 FILE *output = stat_config.output; 1291 u64 ena, run, val; 1292 double uval; 1293 int cpu; 1294 1295 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1296 val = perf_counts(counter->counts, cpu, 0)->val; 1297 ena = perf_counts(counter->counts, cpu, 0)->ena; 1298 run = perf_counts(counter->counts, cpu, 0)->run; 1299 1300 if (prefix) 1301 fprintf(output, "%s", prefix); 1302 1303 uval = val * counter->scale; 1304 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1305 1306 fputc('\n', output); 1307 } 1308 } 1309 1310 static void print_no_aggr_metric(char *prefix) 1311 { 1312 int cpu; 1313 int nrcpus = 0; 1314 struct perf_evsel *counter; 1315 u64 ena, run, val; 1316 double uval; 1317 1318 nrcpus = evsel_list->cpus->nr; 1319 for (cpu = 0; cpu < nrcpus; cpu++) { 1320 bool first = true; 1321 1322 if (prefix) 1323 fputs(prefix, stat_config.output); 1324 evlist__for_each_entry(evsel_list, counter) { 1325 if (first) { 1326 aggr_printout(counter, cpu, 0); 1327 first = false; 1328 } 1329 val = perf_counts(counter->counts, cpu, 0)->val; 1330 ena = perf_counts(counter->counts, cpu, 0)->ena; 1331 run = perf_counts(counter->counts, cpu, 0)->run; 1332 1333 uval = val * counter->scale; 1334 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1335 } 1336 fputc('\n', stat_config.output); 1337 } 1338 } 1339 1340 static int aggr_header_lens[] = { 1341 [AGGR_CORE] = 18, 1342 [AGGR_SOCKET] = 12, 1343 [AGGR_NONE] = 6, 1344 [AGGR_THREAD] = 24, 1345 [AGGR_GLOBAL] = 0, 1346 }; 1347 1348 static const char *aggr_header_csv[] = { 1349 [AGGR_CORE] = "core,cpus,", 1350 [AGGR_SOCKET] = "socket,cpus", 1351 [AGGR_NONE] = "cpu,", 1352 [AGGR_THREAD] = "comm-pid,", 1353 [AGGR_GLOBAL] = "" 1354 }; 1355 1356 static void print_metric_headers(const char *prefix, bool no_indent) 1357 { 1358 struct perf_stat_output_ctx out; 1359 struct perf_evsel *counter; 1360 struct outstate os = { 1361 .fh = stat_config.output 1362 }; 1363 1364 if (prefix) 1365 fprintf(stat_config.output, "%s", prefix); 1366 1367 if (!csv_output && !no_indent) 1368 fprintf(stat_config.output, "%*s", 1369 aggr_header_lens[stat_config.aggr_mode], ""); 1370 if (csv_output) { 1371 if (stat_config.interval) 1372 fputs("time,", stat_config.output); 1373 fputs(aggr_header_csv[stat_config.aggr_mode], 1374 stat_config.output); 1375 } 1376 1377 /* Print metrics headers only */ 1378 evlist__for_each_entry(evsel_list, counter) { 1379 os.evsel = counter; 1380 out.ctx = &os; 1381 out.print_metric = print_metric_header; 1382 out.new_line = new_line_metric; 1383 os.evsel = counter; 1384 perf_stat__print_shadow_stats(counter, 0, 1385 0, 1386 &out); 1387 } 1388 fputc('\n', stat_config.output); 1389 } 1390 1391 static void print_interval(char *prefix, struct timespec *ts) 1392 { 1393 FILE *output = stat_config.output; 1394 static int num_print_interval; 1395 1396 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1397 1398 if (num_print_interval == 0 && !csv_output) { 1399 switch (stat_config.aggr_mode) { 1400 case AGGR_SOCKET: 1401 fprintf(output, "# time socket cpus"); 1402 if (!metric_only) 1403 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1404 break; 1405 case AGGR_CORE: 1406 fprintf(output, "# time core cpus"); 1407 if (!metric_only) 1408 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1409 break; 1410 case AGGR_NONE: 1411 fprintf(output, "# time CPU"); 1412 if (!metric_only) 1413 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1414 break; 1415 case AGGR_THREAD: 1416 fprintf(output, "# time comm-pid"); 1417 if (!metric_only) 1418 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1419 break; 1420 case AGGR_GLOBAL: 1421 default: 1422 fprintf(output, "# time"); 1423 if (!metric_only) 1424 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1425 case AGGR_UNSET: 1426 break; 1427 } 1428 } 1429 1430 if (num_print_interval == 0 && metric_only) 1431 print_metric_headers(" ", true); 1432 if (++num_print_interval == 25) 1433 num_print_interval = 0; 1434 } 1435 1436 static void print_header(int argc, const char **argv) 1437 { 1438 FILE *output = stat_config.output; 1439 int i; 1440 1441 fflush(stdout); 1442 1443 if (!csv_output) { 1444 fprintf(output, "\n"); 1445 fprintf(output, " Performance counter stats for "); 1446 if (target.system_wide) 1447 fprintf(output, "\'system wide"); 1448 else if (target.cpu_list) 1449 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1450 else if (!target__has_task(&target)) { 1451 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1452 for (i = 1; argv && (i < argc); i++) 1453 fprintf(output, " %s", argv[i]); 1454 } else if (target.pid) 1455 fprintf(output, "process id \'%s", target.pid); 1456 else 1457 fprintf(output, "thread id \'%s", target.tid); 1458 1459 fprintf(output, "\'"); 1460 if (run_count > 1) 1461 fprintf(output, " (%d runs)", run_count); 1462 fprintf(output, ":\n\n"); 1463 } 1464 } 1465 1466 static void print_footer(void) 1467 { 1468 FILE *output = stat_config.output; 1469 1470 if (!null_run) 1471 fprintf(output, "\n"); 1472 fprintf(output, " %17.9f seconds time elapsed", 1473 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1474 if (run_count > 1) { 1475 fprintf(output, " "); 1476 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1477 avg_stats(&walltime_nsecs_stats)); 1478 } 1479 fprintf(output, "\n\n"); 1480 } 1481 1482 static void print_counters(struct timespec *ts, int argc, const char **argv) 1483 { 1484 int interval = stat_config.interval; 1485 struct perf_evsel *counter; 1486 char buf[64], *prefix = NULL; 1487 1488 /* Do not print anything if we record to the pipe. */ 1489 if (STAT_RECORD && perf_stat.file.is_pipe) 1490 return; 1491 1492 if (interval) 1493 print_interval(prefix = buf, ts); 1494 else 1495 print_header(argc, argv); 1496 1497 if (metric_only) { 1498 static int num_print_iv; 1499 1500 if (num_print_iv == 0 && !interval) 1501 print_metric_headers(prefix, false); 1502 if (num_print_iv++ == 25) 1503 num_print_iv = 0; 1504 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1505 fprintf(stat_config.output, "%s", prefix); 1506 } 1507 1508 switch (stat_config.aggr_mode) { 1509 case AGGR_CORE: 1510 case AGGR_SOCKET: 1511 print_aggr(prefix); 1512 break; 1513 case AGGR_THREAD: 1514 evlist__for_each_entry(evsel_list, counter) 1515 print_aggr_thread(counter, prefix); 1516 break; 1517 case AGGR_GLOBAL: 1518 evlist__for_each_entry(evsel_list, counter) 1519 print_counter_aggr(counter, prefix); 1520 if (metric_only) 1521 fputc('\n', stat_config.output); 1522 break; 1523 case AGGR_NONE: 1524 if (metric_only) 1525 print_no_aggr_metric(prefix); 1526 else { 1527 evlist__for_each_entry(evsel_list, counter) 1528 print_counter(counter, prefix); 1529 } 1530 break; 1531 case AGGR_UNSET: 1532 default: 1533 break; 1534 } 1535 1536 if (!interval && !csv_output) 1537 print_footer(); 1538 1539 fflush(stat_config.output); 1540 } 1541 1542 static volatile int signr = -1; 1543 1544 static void skip_signal(int signo) 1545 { 1546 if ((child_pid == -1) || stat_config.interval) 1547 done = 1; 1548 1549 signr = signo; 1550 /* 1551 * render child_pid harmless 1552 * won't send SIGTERM to a random 1553 * process in case of race condition 1554 * and fast PID recycling 1555 */ 1556 child_pid = -1; 1557 } 1558 1559 static void sig_atexit(void) 1560 { 1561 sigset_t set, oset; 1562 1563 /* 1564 * avoid race condition with SIGCHLD handler 1565 * in skip_signal() which is modifying child_pid 1566 * goal is to avoid send SIGTERM to a random 1567 * process 1568 */ 1569 sigemptyset(&set); 1570 sigaddset(&set, SIGCHLD); 1571 sigprocmask(SIG_BLOCK, &set, &oset); 1572 1573 if (child_pid != -1) 1574 kill(child_pid, SIGTERM); 1575 1576 sigprocmask(SIG_SETMASK, &oset, NULL); 1577 1578 if (signr == -1) 1579 return; 1580 1581 signal(signr, SIG_DFL); 1582 kill(getpid(), signr); 1583 } 1584 1585 static int stat__set_big_num(const struct option *opt __maybe_unused, 1586 const char *s __maybe_unused, int unset) 1587 { 1588 big_num_opt = unset ? 0 : 1; 1589 return 0; 1590 } 1591 1592 static int enable_metric_only(const struct option *opt __maybe_unused, 1593 const char *s __maybe_unused, int unset) 1594 { 1595 force_metric_only = true; 1596 metric_only = !unset; 1597 return 0; 1598 } 1599 1600 static const struct option stat_options[] = { 1601 OPT_BOOLEAN('T', "transaction", &transaction_run, 1602 "hardware transaction statistics"), 1603 OPT_CALLBACK('e', "event", &evsel_list, "event", 1604 "event selector. use 'perf list' to list available events", 1605 parse_events_option), 1606 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1607 "event filter", parse_filter), 1608 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1609 "child tasks do not inherit counters"), 1610 OPT_STRING('p', "pid", &target.pid, "pid", 1611 "stat events on existing process id"), 1612 OPT_STRING('t', "tid", &target.tid, "tid", 1613 "stat events on existing thread id"), 1614 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1615 "system-wide collection from all CPUs"), 1616 OPT_BOOLEAN('g', "group", &group, 1617 "put the counters into a counter group"), 1618 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1619 OPT_INCR('v', "verbose", &verbose, 1620 "be more verbose (show counter open errors, etc)"), 1621 OPT_INTEGER('r', "repeat", &run_count, 1622 "repeat command and print average + stddev (max: 100, forever: 0)"), 1623 OPT_BOOLEAN('n', "null", &null_run, 1624 "null run - dont start any counters"), 1625 OPT_INCR('d', "detailed", &detailed_run, 1626 "detailed run - start a lot of events"), 1627 OPT_BOOLEAN('S', "sync", &sync_run, 1628 "call sync() before starting a run"), 1629 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1630 "print large numbers with thousands\' separators", 1631 stat__set_big_num), 1632 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1633 "list of cpus to monitor in system-wide"), 1634 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1635 "disable CPU count aggregation", AGGR_NONE), 1636 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1637 "print counts with custom separator"), 1638 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1639 "monitor event in cgroup name only", parse_cgroups), 1640 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1641 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1642 OPT_INTEGER(0, "log-fd", &output_fd, 1643 "log output to fd, instead of stderr"), 1644 OPT_STRING(0, "pre", &pre_cmd, "command", 1645 "command to run prior to the measured command"), 1646 OPT_STRING(0, "post", &post_cmd, "command", 1647 "command to run after to the measured command"), 1648 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1649 "print counts at regular interval in ms (>= 10)"), 1650 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1651 "aggregate counts per processor socket", AGGR_SOCKET), 1652 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1653 "aggregate counts per physical processor core", AGGR_CORE), 1654 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1655 "aggregate counts per thread", AGGR_THREAD), 1656 OPT_UINTEGER('D', "delay", &initial_delay, 1657 "ms to wait before starting measurement after program start"), 1658 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1659 "Only print computed metrics. No raw values", enable_metric_only), 1660 OPT_BOOLEAN(0, "topdown", &topdown_run, 1661 "measure topdown level 1 statistics"), 1662 OPT_END() 1663 }; 1664 1665 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1666 { 1667 return cpu_map__get_socket(map, cpu, NULL); 1668 } 1669 1670 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1671 { 1672 return cpu_map__get_core(map, cpu, NULL); 1673 } 1674 1675 static int cpu_map__get_max(struct cpu_map *map) 1676 { 1677 int i, max = -1; 1678 1679 for (i = 0; i < map->nr; i++) { 1680 if (map->map[i] > max) 1681 max = map->map[i]; 1682 } 1683 1684 return max; 1685 } 1686 1687 static struct cpu_map *cpus_aggr_map; 1688 1689 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1690 { 1691 int cpu; 1692 1693 if (idx >= map->nr) 1694 return -1; 1695 1696 cpu = map->map[idx]; 1697 1698 if (cpus_aggr_map->map[cpu] == -1) 1699 cpus_aggr_map->map[cpu] = get_id(map, idx); 1700 1701 return cpus_aggr_map->map[cpu]; 1702 } 1703 1704 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1705 { 1706 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1707 } 1708 1709 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1710 { 1711 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1712 } 1713 1714 static int perf_stat_init_aggr_mode(void) 1715 { 1716 int nr; 1717 1718 switch (stat_config.aggr_mode) { 1719 case AGGR_SOCKET: 1720 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1721 perror("cannot build socket map"); 1722 return -1; 1723 } 1724 aggr_get_id = perf_stat__get_socket_cached; 1725 break; 1726 case AGGR_CORE: 1727 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1728 perror("cannot build core map"); 1729 return -1; 1730 } 1731 aggr_get_id = perf_stat__get_core_cached; 1732 break; 1733 case AGGR_NONE: 1734 case AGGR_GLOBAL: 1735 case AGGR_THREAD: 1736 case AGGR_UNSET: 1737 default: 1738 break; 1739 } 1740 1741 /* 1742 * The evsel_list->cpus is the base we operate on, 1743 * taking the highest cpu number to be the size of 1744 * the aggregation translate cpumap. 1745 */ 1746 nr = cpu_map__get_max(evsel_list->cpus); 1747 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1748 return cpus_aggr_map ? 0 : -ENOMEM; 1749 } 1750 1751 static void perf_stat__exit_aggr_mode(void) 1752 { 1753 cpu_map__put(aggr_map); 1754 cpu_map__put(cpus_aggr_map); 1755 aggr_map = NULL; 1756 cpus_aggr_map = NULL; 1757 } 1758 1759 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1760 { 1761 int cpu; 1762 1763 if (idx > map->nr) 1764 return -1; 1765 1766 cpu = map->map[idx]; 1767 1768 if (cpu >= env->nr_cpus_online) 1769 return -1; 1770 1771 return cpu; 1772 } 1773 1774 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1775 { 1776 struct perf_env *env = data; 1777 int cpu = perf_env__get_cpu(env, map, idx); 1778 1779 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1780 } 1781 1782 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1783 { 1784 struct perf_env *env = data; 1785 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1786 1787 if (cpu != -1) { 1788 int socket_id = env->cpu[cpu].socket_id; 1789 1790 /* 1791 * Encode socket in upper 16 bits 1792 * core_id is relative to socket, and 1793 * we need a global id. So we combine 1794 * socket + core id. 1795 */ 1796 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1797 } 1798 1799 return core; 1800 } 1801 1802 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1803 struct cpu_map **sockp) 1804 { 1805 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1806 } 1807 1808 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1809 struct cpu_map **corep) 1810 { 1811 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1812 } 1813 1814 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1815 { 1816 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1817 } 1818 1819 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1820 { 1821 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1822 } 1823 1824 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1825 { 1826 struct perf_env *env = &st->session->header.env; 1827 1828 switch (stat_config.aggr_mode) { 1829 case AGGR_SOCKET: 1830 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1831 perror("cannot build socket map"); 1832 return -1; 1833 } 1834 aggr_get_id = perf_stat__get_socket_file; 1835 break; 1836 case AGGR_CORE: 1837 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1838 perror("cannot build core map"); 1839 return -1; 1840 } 1841 aggr_get_id = perf_stat__get_core_file; 1842 break; 1843 case AGGR_NONE: 1844 case AGGR_GLOBAL: 1845 case AGGR_THREAD: 1846 case AGGR_UNSET: 1847 default: 1848 break; 1849 } 1850 1851 return 0; 1852 } 1853 1854 static int topdown_filter_events(const char **attr, char **str, bool use_group) 1855 { 1856 int off = 0; 1857 int i; 1858 int len = 0; 1859 char *s; 1860 1861 for (i = 0; attr[i]; i++) { 1862 if (pmu_have_event("cpu", attr[i])) { 1863 len += strlen(attr[i]) + 1; 1864 attr[i - off] = attr[i]; 1865 } else 1866 off++; 1867 } 1868 attr[i - off] = NULL; 1869 1870 *str = malloc(len + 1 + 2); 1871 if (!*str) 1872 return -1; 1873 s = *str; 1874 if (i - off == 0) { 1875 *s = 0; 1876 return 0; 1877 } 1878 if (use_group) 1879 *s++ = '{'; 1880 for (i = 0; attr[i]; i++) { 1881 strcpy(s, attr[i]); 1882 s += strlen(s); 1883 *s++ = ','; 1884 } 1885 if (use_group) { 1886 s[-1] = '}'; 1887 *s = 0; 1888 } else 1889 s[-1] = 0; 1890 return 0; 1891 } 1892 1893 __weak bool arch_topdown_check_group(bool *warn) 1894 { 1895 *warn = false; 1896 return false; 1897 } 1898 1899 __weak void arch_topdown_group_warn(void) 1900 { 1901 } 1902 1903 /* 1904 * Add default attributes, if there were no attributes specified or 1905 * if -d/--detailed, -d -d or -d -d -d is used: 1906 */ 1907 static int add_default_attributes(void) 1908 { 1909 int err; 1910 struct perf_event_attr default_attrs0[] = { 1911 1912 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1913 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1914 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1915 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1916 1917 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1918 }; 1919 struct perf_event_attr frontend_attrs[] = { 1920 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1921 }; 1922 struct perf_event_attr backend_attrs[] = { 1923 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1924 }; 1925 struct perf_event_attr default_attrs1[] = { 1926 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1927 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1928 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1929 1930 }; 1931 1932 /* 1933 * Detailed stats (-d), covering the L1 and last level data caches: 1934 */ 1935 struct perf_event_attr detailed_attrs[] = { 1936 1937 { .type = PERF_TYPE_HW_CACHE, 1938 .config = 1939 PERF_COUNT_HW_CACHE_L1D << 0 | 1940 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1941 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1942 1943 { .type = PERF_TYPE_HW_CACHE, 1944 .config = 1945 PERF_COUNT_HW_CACHE_L1D << 0 | 1946 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1947 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1948 1949 { .type = PERF_TYPE_HW_CACHE, 1950 .config = 1951 PERF_COUNT_HW_CACHE_LL << 0 | 1952 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1953 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1954 1955 { .type = PERF_TYPE_HW_CACHE, 1956 .config = 1957 PERF_COUNT_HW_CACHE_LL << 0 | 1958 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1959 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1960 }; 1961 1962 /* 1963 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1964 */ 1965 struct perf_event_attr very_detailed_attrs[] = { 1966 1967 { .type = PERF_TYPE_HW_CACHE, 1968 .config = 1969 PERF_COUNT_HW_CACHE_L1I << 0 | 1970 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1971 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1972 1973 { .type = PERF_TYPE_HW_CACHE, 1974 .config = 1975 PERF_COUNT_HW_CACHE_L1I << 0 | 1976 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1977 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1978 1979 { .type = PERF_TYPE_HW_CACHE, 1980 .config = 1981 PERF_COUNT_HW_CACHE_DTLB << 0 | 1982 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1983 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1984 1985 { .type = PERF_TYPE_HW_CACHE, 1986 .config = 1987 PERF_COUNT_HW_CACHE_DTLB << 0 | 1988 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1989 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1990 1991 { .type = PERF_TYPE_HW_CACHE, 1992 .config = 1993 PERF_COUNT_HW_CACHE_ITLB << 0 | 1994 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1995 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1996 1997 { .type = PERF_TYPE_HW_CACHE, 1998 .config = 1999 PERF_COUNT_HW_CACHE_ITLB << 0 | 2000 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2001 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2002 2003 }; 2004 2005 /* 2006 * Very, very detailed stats (-d -d -d), adding prefetch events: 2007 */ 2008 struct perf_event_attr very_very_detailed_attrs[] = { 2009 2010 { .type = PERF_TYPE_HW_CACHE, 2011 .config = 2012 PERF_COUNT_HW_CACHE_L1D << 0 | 2013 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2014 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2015 2016 { .type = PERF_TYPE_HW_CACHE, 2017 .config = 2018 PERF_COUNT_HW_CACHE_L1D << 0 | 2019 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2020 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2021 }; 2022 2023 /* Set attrs if no event is selected and !null_run: */ 2024 if (null_run) 2025 return 0; 2026 2027 if (transaction_run) { 2028 if (pmu_have_event("cpu", "cycles-ct") && 2029 pmu_have_event("cpu", "el-start")) 2030 err = parse_events(evsel_list, transaction_attrs, NULL); 2031 else 2032 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2033 if (err) { 2034 fprintf(stderr, "Cannot set up transaction events\n"); 2035 return -1; 2036 } 2037 return 0; 2038 } 2039 2040 if (topdown_run) { 2041 char *str = NULL; 2042 bool warn = false; 2043 2044 if (stat_config.aggr_mode != AGGR_GLOBAL && 2045 stat_config.aggr_mode != AGGR_CORE) { 2046 pr_err("top down event configuration requires --per-core mode\n"); 2047 return -1; 2048 } 2049 stat_config.aggr_mode = AGGR_CORE; 2050 if (nr_cgroups || !target__has_cpu(&target)) { 2051 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2052 return -1; 2053 } 2054 2055 if (!force_metric_only) 2056 metric_only = true; 2057 if (topdown_filter_events(topdown_attrs, &str, 2058 arch_topdown_check_group(&warn)) < 0) { 2059 pr_err("Out of memory\n"); 2060 return -1; 2061 } 2062 if (topdown_attrs[0] && str) { 2063 if (warn) 2064 arch_topdown_group_warn(); 2065 err = parse_events(evsel_list, str, NULL); 2066 if (err) { 2067 fprintf(stderr, 2068 "Cannot set up top down events %s: %d\n", 2069 str, err); 2070 free(str); 2071 return -1; 2072 } 2073 } else { 2074 fprintf(stderr, "System does not support topdown\n"); 2075 return -1; 2076 } 2077 free(str); 2078 } 2079 2080 if (!evsel_list->nr_entries) { 2081 if (target__has_cpu(&target)) 2082 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2083 2084 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2085 return -1; 2086 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2087 if (perf_evlist__add_default_attrs(evsel_list, 2088 frontend_attrs) < 0) 2089 return -1; 2090 } 2091 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2092 if (perf_evlist__add_default_attrs(evsel_list, 2093 backend_attrs) < 0) 2094 return -1; 2095 } 2096 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2097 return -1; 2098 } 2099 2100 /* Detailed events get appended to the event list: */ 2101 2102 if (detailed_run < 1) 2103 return 0; 2104 2105 /* Append detailed run extra attributes: */ 2106 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2107 return -1; 2108 2109 if (detailed_run < 2) 2110 return 0; 2111 2112 /* Append very detailed run extra attributes: */ 2113 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2114 return -1; 2115 2116 if (detailed_run < 3) 2117 return 0; 2118 2119 /* Append very, very detailed run extra attributes: */ 2120 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2121 } 2122 2123 static const char * const stat_record_usage[] = { 2124 "perf stat record [<options>]", 2125 NULL, 2126 }; 2127 2128 static void init_features(struct perf_session *session) 2129 { 2130 int feat; 2131 2132 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2133 perf_header__set_feat(&session->header, feat); 2134 2135 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2136 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2137 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2138 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2139 } 2140 2141 static int __cmd_record(int argc, const char **argv) 2142 { 2143 struct perf_session *session; 2144 struct perf_data_file *file = &perf_stat.file; 2145 2146 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2147 PARSE_OPT_STOP_AT_NON_OPTION); 2148 2149 if (output_name) 2150 file->path = output_name; 2151 2152 if (run_count != 1 || forever) { 2153 pr_err("Cannot use -r option with perf stat record.\n"); 2154 return -1; 2155 } 2156 2157 session = perf_session__new(file, false, NULL); 2158 if (session == NULL) { 2159 pr_err("Perf session creation failed.\n"); 2160 return -1; 2161 } 2162 2163 init_features(session); 2164 2165 session->evlist = evsel_list; 2166 perf_stat.session = session; 2167 perf_stat.record = true; 2168 return argc; 2169 } 2170 2171 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2172 union perf_event *event, 2173 struct perf_session *session) 2174 { 2175 struct stat_round_event *stat_round = &event->stat_round; 2176 struct perf_evsel *counter; 2177 struct timespec tsh, *ts = NULL; 2178 const char **argv = session->header.env.cmdline_argv; 2179 int argc = session->header.env.nr_cmdline; 2180 2181 evlist__for_each_entry(evsel_list, counter) 2182 perf_stat_process_counter(&stat_config, counter); 2183 2184 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2185 update_stats(&walltime_nsecs_stats, stat_round->time); 2186 2187 if (stat_config.interval && stat_round->time) { 2188 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2189 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2190 ts = &tsh; 2191 } 2192 2193 print_counters(ts, argc, argv); 2194 return 0; 2195 } 2196 2197 static 2198 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 2199 union perf_event *event, 2200 struct perf_session *session __maybe_unused) 2201 { 2202 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2203 2204 perf_event__read_stat_config(&stat_config, &event->stat_config); 2205 2206 if (cpu_map__empty(st->cpus)) { 2207 if (st->aggr_mode != AGGR_UNSET) 2208 pr_warning("warning: processing task data, aggregation mode not set\n"); 2209 return 0; 2210 } 2211 2212 if (st->aggr_mode != AGGR_UNSET) 2213 stat_config.aggr_mode = st->aggr_mode; 2214 2215 if (perf_stat.file.is_pipe) 2216 perf_stat_init_aggr_mode(); 2217 else 2218 perf_stat_init_aggr_mode_file(st); 2219 2220 return 0; 2221 } 2222 2223 static int set_maps(struct perf_stat *st) 2224 { 2225 if (!st->cpus || !st->threads) 2226 return 0; 2227 2228 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2229 return -EINVAL; 2230 2231 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2232 2233 if (perf_evlist__alloc_stats(evsel_list, true)) 2234 return -ENOMEM; 2235 2236 st->maps_allocated = true; 2237 return 0; 2238 } 2239 2240 static 2241 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 2242 union perf_event *event, 2243 struct perf_session *session __maybe_unused) 2244 { 2245 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2246 2247 if (st->threads) { 2248 pr_warning("Extra thread map event, ignoring.\n"); 2249 return 0; 2250 } 2251 2252 st->threads = thread_map__new_event(&event->thread_map); 2253 if (!st->threads) 2254 return -ENOMEM; 2255 2256 return set_maps(st); 2257 } 2258 2259 static 2260 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 2261 union perf_event *event, 2262 struct perf_session *session __maybe_unused) 2263 { 2264 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2265 struct cpu_map *cpus; 2266 2267 if (st->cpus) { 2268 pr_warning("Extra cpu map event, ignoring.\n"); 2269 return 0; 2270 } 2271 2272 cpus = cpu_map__new_data(&event->cpu_map.data); 2273 if (!cpus) 2274 return -ENOMEM; 2275 2276 st->cpus = cpus; 2277 return set_maps(st); 2278 } 2279 2280 static const char * const stat_report_usage[] = { 2281 "perf stat report [<options>]", 2282 NULL, 2283 }; 2284 2285 static struct perf_stat perf_stat = { 2286 .tool = { 2287 .attr = perf_event__process_attr, 2288 .event_update = perf_event__process_event_update, 2289 .thread_map = process_thread_map_event, 2290 .cpu_map = process_cpu_map_event, 2291 .stat_config = process_stat_config_event, 2292 .stat = perf_event__process_stat_event, 2293 .stat_round = process_stat_round_event, 2294 }, 2295 .aggr_mode = AGGR_UNSET, 2296 }; 2297 2298 static int __cmd_report(int argc, const char **argv) 2299 { 2300 struct perf_session *session; 2301 const struct option options[] = { 2302 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2303 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2304 "aggregate counts per processor socket", AGGR_SOCKET), 2305 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2306 "aggregate counts per physical processor core", AGGR_CORE), 2307 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2308 "disable CPU count aggregation", AGGR_NONE), 2309 OPT_END() 2310 }; 2311 struct stat st; 2312 int ret; 2313 2314 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2315 2316 if (!input_name || !strlen(input_name)) { 2317 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2318 input_name = "-"; 2319 else 2320 input_name = "perf.data"; 2321 } 2322 2323 perf_stat.file.path = input_name; 2324 perf_stat.file.mode = PERF_DATA_MODE_READ; 2325 2326 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2327 if (session == NULL) 2328 return -1; 2329 2330 perf_stat.session = session; 2331 stat_config.output = stderr; 2332 evsel_list = session->evlist; 2333 2334 ret = perf_session__process_events(session); 2335 if (ret) 2336 return ret; 2337 2338 perf_session__delete(session); 2339 return 0; 2340 } 2341 2342 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 2343 { 2344 const char * const stat_usage[] = { 2345 "perf stat [<options>] [<command>]", 2346 NULL 2347 }; 2348 int status = -EINVAL, run_idx; 2349 const char *mode; 2350 FILE *output = stderr; 2351 unsigned int interval; 2352 const char * const stat_subcommands[] = { "record", "report" }; 2353 2354 setlocale(LC_ALL, ""); 2355 2356 evsel_list = perf_evlist__new(); 2357 if (evsel_list == NULL) 2358 return -ENOMEM; 2359 2360 parse_events__shrink_config_terms(); 2361 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2362 (const char **) stat_usage, 2363 PARSE_OPT_STOP_AT_NON_OPTION); 2364 perf_stat__init_shadow_stats(); 2365 2366 if (csv_sep) { 2367 csv_output = true; 2368 if (!strcmp(csv_sep, "\\t")) 2369 csv_sep = "\t"; 2370 } else 2371 csv_sep = DEFAULT_SEPARATOR; 2372 2373 if (argc && !strncmp(argv[0], "rec", 3)) { 2374 argc = __cmd_record(argc, argv); 2375 if (argc < 0) 2376 return -1; 2377 } else if (argc && !strncmp(argv[0], "rep", 3)) 2378 return __cmd_report(argc, argv); 2379 2380 interval = stat_config.interval; 2381 2382 /* 2383 * For record command the -o is already taken care of. 2384 */ 2385 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2386 output = NULL; 2387 2388 if (output_name && output_fd) { 2389 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2390 parse_options_usage(stat_usage, stat_options, "o", 1); 2391 parse_options_usage(NULL, stat_options, "log-fd", 0); 2392 goto out; 2393 } 2394 2395 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2396 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2397 goto out; 2398 } 2399 2400 if (metric_only && run_count > 1) { 2401 fprintf(stderr, "--metric-only is not supported with -r\n"); 2402 goto out; 2403 } 2404 2405 if (output_fd < 0) { 2406 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2407 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2408 goto out; 2409 } 2410 2411 if (!output) { 2412 struct timespec tm; 2413 mode = append_file ? "a" : "w"; 2414 2415 output = fopen(output_name, mode); 2416 if (!output) { 2417 perror("failed to create output file"); 2418 return -1; 2419 } 2420 clock_gettime(CLOCK_REALTIME, &tm); 2421 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2422 } else if (output_fd > 0) { 2423 mode = append_file ? "a" : "w"; 2424 output = fdopen(output_fd, mode); 2425 if (!output) { 2426 perror("Failed opening logfd"); 2427 return -errno; 2428 } 2429 } 2430 2431 stat_config.output = output; 2432 2433 /* 2434 * let the spreadsheet do the pretty-printing 2435 */ 2436 if (csv_output) { 2437 /* User explicitly passed -B? */ 2438 if (big_num_opt == 1) { 2439 fprintf(stderr, "-B option not supported with -x\n"); 2440 parse_options_usage(stat_usage, stat_options, "B", 1); 2441 parse_options_usage(NULL, stat_options, "x", 1); 2442 goto out; 2443 } else /* Nope, so disable big number formatting */ 2444 big_num = false; 2445 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2446 big_num = false; 2447 2448 if (!argc && target__none(&target)) 2449 usage_with_options(stat_usage, stat_options); 2450 2451 if (run_count < 0) { 2452 pr_err("Run count must be a positive number\n"); 2453 parse_options_usage(stat_usage, stat_options, "r", 1); 2454 goto out; 2455 } else if (run_count == 0) { 2456 forever = true; 2457 run_count = 1; 2458 } 2459 2460 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2461 fprintf(stderr, "The --per-thread option is only available " 2462 "when monitoring via -p -t options.\n"); 2463 parse_options_usage(NULL, stat_options, "p", 1); 2464 parse_options_usage(NULL, stat_options, "t", 1); 2465 goto out; 2466 } 2467 2468 /* 2469 * no_aggr, cgroup are for system-wide only 2470 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2471 */ 2472 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2473 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2474 !target__has_cpu(&target)) { 2475 fprintf(stderr, "both cgroup and no-aggregation " 2476 "modes only available in system-wide mode\n"); 2477 2478 parse_options_usage(stat_usage, stat_options, "G", 1); 2479 parse_options_usage(NULL, stat_options, "A", 1); 2480 parse_options_usage(NULL, stat_options, "a", 1); 2481 goto out; 2482 } 2483 2484 if (add_default_attributes()) 2485 goto out; 2486 2487 target__validate(&target); 2488 2489 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2490 if (target__has_task(&target)) { 2491 pr_err("Problems finding threads of monitor\n"); 2492 parse_options_usage(stat_usage, stat_options, "p", 1); 2493 parse_options_usage(NULL, stat_options, "t", 1); 2494 } else if (target__has_cpu(&target)) { 2495 perror("failed to parse CPUs map"); 2496 parse_options_usage(stat_usage, stat_options, "C", 1); 2497 parse_options_usage(NULL, stat_options, "a", 1); 2498 } 2499 goto out; 2500 } 2501 2502 /* 2503 * Initialize thread_map with comm names, 2504 * so we could print it out on output. 2505 */ 2506 if (stat_config.aggr_mode == AGGR_THREAD) 2507 thread_map__read_comms(evsel_list->threads); 2508 2509 if (interval && interval < 100) { 2510 if (interval < 10) { 2511 pr_err("print interval must be >= 10ms\n"); 2512 parse_options_usage(stat_usage, stat_options, "I", 1); 2513 goto out; 2514 } else 2515 pr_warning("print interval < 100ms. " 2516 "The overhead percentage could be high in some cases. " 2517 "Please proceed with caution.\n"); 2518 } 2519 2520 if (perf_evlist__alloc_stats(evsel_list, interval)) 2521 goto out; 2522 2523 if (perf_stat_init_aggr_mode()) 2524 goto out; 2525 2526 /* 2527 * We dont want to block the signals - that would cause 2528 * child tasks to inherit that and Ctrl-C would not work. 2529 * What we want is for Ctrl-C to work in the exec()-ed 2530 * task, but being ignored by perf stat itself: 2531 */ 2532 atexit(sig_atexit); 2533 if (!forever) 2534 signal(SIGINT, skip_signal); 2535 signal(SIGCHLD, skip_signal); 2536 signal(SIGALRM, skip_signal); 2537 signal(SIGABRT, skip_signal); 2538 2539 status = 0; 2540 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2541 if (run_count != 1 && verbose) 2542 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2543 run_idx + 1); 2544 2545 status = run_perf_stat(argc, argv); 2546 if (forever && status != -1) { 2547 print_counters(NULL, argc, argv); 2548 perf_stat__reset_stats(); 2549 } 2550 } 2551 2552 if (!forever && status != -1 && !interval) 2553 print_counters(NULL, argc, argv); 2554 2555 if (STAT_RECORD) { 2556 /* 2557 * We synthesize the kernel mmap record just so that older tools 2558 * don't emit warnings about not being able to resolve symbols 2559 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2560 * a saner message about no samples being in the perf.data file. 2561 * 2562 * This also serves to suppress a warning about f_header.data.size == 0 2563 * in header.c at the moment 'perf stat record' gets introduced, which 2564 * is not really needed once we start adding the stat specific PERF_RECORD_ 2565 * records, but the need to suppress the kptr_restrict messages in older 2566 * tools remain -acme 2567 */ 2568 int fd = perf_data_file__fd(&perf_stat.file); 2569 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2570 process_synthesized_event, 2571 &perf_stat.session->machines.host); 2572 if (err) { 2573 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2574 "older tools may produce warnings about this file\n."); 2575 } 2576 2577 if (!interval) { 2578 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2579 pr_err("failed to write stat round event\n"); 2580 } 2581 2582 if (!perf_stat.file.is_pipe) { 2583 perf_stat.session->header.data_size += perf_stat.bytes_written; 2584 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2585 } 2586 2587 perf_session__delete(perf_stat.session); 2588 } 2589 2590 perf_stat__exit_aggr_mode(); 2591 perf_evlist__free_stats(evsel_list); 2592 out: 2593 perf_evlist__delete(evsel_list); 2594 return status; 2595 } 2596