1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/group.h" 67 #include "util/string2.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct target target = { 137 .uid = UINT_MAX, 138 }; 139 140 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 141 142 static int run_count = 1; 143 static bool no_inherit = false; 144 static volatile pid_t child_pid = -1; 145 static bool null_run = false; 146 static int detailed_run = 0; 147 static bool transaction_run; 148 static bool topdown_run = false; 149 static bool smi_cost = false; 150 static bool smi_reset = false; 151 static bool big_num = true; 152 static int big_num_opt = -1; 153 static const char *csv_sep = NULL; 154 static bool csv_output = false; 155 static bool group = false; 156 static const char *pre_cmd = NULL; 157 static const char *post_cmd = NULL; 158 static bool sync_run = false; 159 static unsigned int initial_delay = 0; 160 static unsigned int unit_width = 4; /* strlen("unit") */ 161 static bool forever = false; 162 static bool metric_only = false; 163 static bool force_metric_only = false; 164 static bool no_merge = false; 165 static struct timespec ref_time; 166 static struct cpu_map *aggr_map; 167 static aggr_get_id_t aggr_get_id; 168 static bool append_file; 169 static const char *output_name; 170 static int output_fd; 171 static int print_free_counters_hint; 172 173 struct perf_stat { 174 bool record; 175 struct perf_data_file file; 176 struct perf_session *session; 177 u64 bytes_written; 178 struct perf_tool tool; 179 bool maps_allocated; 180 struct cpu_map *cpus; 181 struct thread_map *threads; 182 enum aggr_mode aggr_mode; 183 }; 184 185 static struct perf_stat perf_stat; 186 #define STAT_RECORD perf_stat.record 187 188 static volatile int done = 0; 189 190 static struct perf_stat_config stat_config = { 191 .aggr_mode = AGGR_GLOBAL, 192 .scale = true, 193 }; 194 195 static inline void diff_timespec(struct timespec *r, struct timespec *a, 196 struct timespec *b) 197 { 198 r->tv_sec = a->tv_sec - b->tv_sec; 199 if (a->tv_nsec < b->tv_nsec) { 200 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 201 r->tv_sec--; 202 } else { 203 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 204 } 205 } 206 207 static void perf_stat__reset_stats(void) 208 { 209 perf_evlist__reset_stats(evsel_list); 210 perf_stat__reset_shadow_stats(); 211 } 212 213 static int create_perf_stat_counter(struct perf_evsel *evsel) 214 { 215 struct perf_event_attr *attr = &evsel->attr; 216 217 if (stat_config.scale) 218 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 219 PERF_FORMAT_TOTAL_TIME_RUNNING; 220 221 attr->inherit = !no_inherit; 222 223 /* 224 * Some events get initialized with sample_(period/type) set, 225 * like tracepoints. Clear it up for counting. 226 */ 227 attr->sample_period = 0; 228 229 /* 230 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 231 * while avoiding that older tools show confusing messages. 232 * 233 * However for pipe sessions we need to keep it zero, 234 * because script's perf_evsel__check_attr is triggered 235 * by attr->sample_type != 0, and we can't run it on 236 * stat sessions. 237 */ 238 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 239 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 240 241 /* 242 * Disabling all counters initially, they will be enabled 243 * either manually by us or by kernel via enable_on_exec 244 * set later. 245 */ 246 if (perf_evsel__is_group_leader(evsel)) { 247 attr->disabled = 1; 248 249 /* 250 * In case of initial_delay we enable tracee 251 * events manually. 252 */ 253 if (target__none(&target) && !initial_delay) 254 attr->enable_on_exec = 1; 255 } 256 257 if (target__has_cpu(&target)) 258 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 259 260 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 261 } 262 263 /* 264 * Does the counter have nsecs as a unit? 265 */ 266 static inline int nsec_counter(struct perf_evsel *evsel) 267 { 268 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 269 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 270 return 1; 271 272 return 0; 273 } 274 275 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 276 union perf_event *event, 277 struct perf_sample *sample __maybe_unused, 278 struct machine *machine __maybe_unused) 279 { 280 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 281 pr_err("failed to write perf data, error: %m\n"); 282 return -1; 283 } 284 285 perf_stat.bytes_written += event->header.size; 286 return 0; 287 } 288 289 static int write_stat_round_event(u64 tm, u64 type) 290 { 291 return perf_event__synthesize_stat_round(NULL, tm, type, 292 process_synthesized_event, 293 NULL); 294 } 295 296 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 297 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 298 299 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 300 301 static int 302 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 303 struct perf_counts_values *count) 304 { 305 struct perf_sample_id *sid = SID(counter, cpu, thread); 306 307 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 308 process_synthesized_event, NULL); 309 } 310 311 /* 312 * Read out the results of a single counter: 313 * do not aggregate counts across CPUs in system-wide mode 314 */ 315 static int read_counter(struct perf_evsel *counter) 316 { 317 int nthreads = thread_map__nr(evsel_list->threads); 318 int ncpus, cpu, thread; 319 320 if (target__has_cpu(&target)) 321 ncpus = perf_evsel__nr_cpus(counter); 322 else 323 ncpus = 1; 324 325 if (!counter->supported) 326 return -ENOENT; 327 328 if (counter->system_wide) 329 nthreads = 1; 330 331 for (thread = 0; thread < nthreads; thread++) { 332 for (cpu = 0; cpu < ncpus; cpu++) { 333 struct perf_counts_values *count; 334 335 count = perf_counts(counter->counts, cpu, thread); 336 if (perf_evsel__read(counter, cpu, thread, count)) { 337 counter->counts->scaled = -1; 338 perf_counts(counter->counts, cpu, thread)->ena = 0; 339 perf_counts(counter->counts, cpu, thread)->run = 0; 340 return -1; 341 } 342 343 if (STAT_RECORD) { 344 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 345 pr_err("failed to write stat event\n"); 346 return -1; 347 } 348 } 349 350 if (verbose > 1) { 351 fprintf(stat_config.output, 352 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 353 perf_evsel__name(counter), 354 cpu, 355 count->val, count->ena, count->run); 356 } 357 } 358 } 359 360 return 0; 361 } 362 363 static void read_counters(void) 364 { 365 struct perf_evsel *counter; 366 int ret; 367 368 evlist__for_each_entry(evsel_list, counter) { 369 ret = read_counter(counter); 370 if (ret) 371 pr_debug("failed to read counter %s\n", counter->name); 372 373 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 374 pr_warning("failed to process counter %s\n", counter->name); 375 } 376 } 377 378 static void process_interval(void) 379 { 380 struct timespec ts, rs; 381 382 read_counters(); 383 384 clock_gettime(CLOCK_MONOTONIC, &ts); 385 diff_timespec(&rs, &ts, &ref_time); 386 387 if (STAT_RECORD) { 388 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 389 pr_err("failed to write stat round event\n"); 390 } 391 392 print_counters(&rs, 0, NULL); 393 } 394 395 static void enable_counters(void) 396 { 397 if (initial_delay) 398 usleep(initial_delay * USEC_PER_MSEC); 399 400 /* 401 * We need to enable counters only if: 402 * - we don't have tracee (attaching to task or cpu) 403 * - we have initial delay configured 404 */ 405 if (!target__none(&target) || initial_delay) 406 perf_evlist__enable(evsel_list); 407 } 408 409 static void disable_counters(void) 410 { 411 /* 412 * If we don't have tracee (attaching to task or cpu), counters may 413 * still be running. To get accurate group ratios, we must stop groups 414 * from counting before reading their constituent counters. 415 */ 416 if (!target__none(&target)) 417 perf_evlist__disable(evsel_list); 418 } 419 420 static volatile int workload_exec_errno; 421 422 /* 423 * perf_evlist__prepare_workload will send a SIGUSR1 424 * if the fork fails, since we asked by setting its 425 * want_signal to true. 426 */ 427 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 428 void *ucontext __maybe_unused) 429 { 430 workload_exec_errno = info->si_value.sival_int; 431 } 432 433 static bool has_unit(struct perf_evsel *counter) 434 { 435 return counter->unit && *counter->unit; 436 } 437 438 static bool has_scale(struct perf_evsel *counter) 439 { 440 return counter->scale != 1; 441 } 442 443 static int perf_stat_synthesize_config(bool is_pipe) 444 { 445 struct perf_evsel *counter; 446 int err; 447 448 if (is_pipe) { 449 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 450 process_synthesized_event); 451 if (err < 0) { 452 pr_err("Couldn't synthesize attrs.\n"); 453 return err; 454 } 455 } 456 457 /* 458 * Synthesize other events stuff not carried within 459 * attr event - unit, scale, name 460 */ 461 evlist__for_each_entry(evsel_list, counter) { 462 if (!counter->supported) 463 continue; 464 465 /* 466 * Synthesize unit and scale only if it's defined. 467 */ 468 if (has_unit(counter)) { 469 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 470 if (err < 0) { 471 pr_err("Couldn't synthesize evsel unit.\n"); 472 return err; 473 } 474 } 475 476 if (has_scale(counter)) { 477 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 478 if (err < 0) { 479 pr_err("Couldn't synthesize evsel scale.\n"); 480 return err; 481 } 482 } 483 484 if (counter->own_cpus) { 485 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 486 if (err < 0) { 487 pr_err("Couldn't synthesize evsel scale.\n"); 488 return err; 489 } 490 } 491 492 /* 493 * Name is needed only for pipe output, 494 * perf.data carries event names. 495 */ 496 if (is_pipe) { 497 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 498 if (err < 0) { 499 pr_err("Couldn't synthesize evsel name.\n"); 500 return err; 501 } 502 } 503 } 504 505 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 506 process_synthesized_event, 507 NULL); 508 if (err < 0) { 509 pr_err("Couldn't synthesize thread map.\n"); 510 return err; 511 } 512 513 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 514 process_synthesized_event, NULL); 515 if (err < 0) { 516 pr_err("Couldn't synthesize thread map.\n"); 517 return err; 518 } 519 520 err = perf_event__synthesize_stat_config(NULL, &stat_config, 521 process_synthesized_event, NULL); 522 if (err < 0) { 523 pr_err("Couldn't synthesize config.\n"); 524 return err; 525 } 526 527 return 0; 528 } 529 530 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 531 532 static int __store_counter_ids(struct perf_evsel *counter, 533 struct cpu_map *cpus, 534 struct thread_map *threads) 535 { 536 int cpu, thread; 537 538 for (cpu = 0; cpu < cpus->nr; cpu++) { 539 for (thread = 0; thread < threads->nr; thread++) { 540 int fd = FD(counter, cpu, thread); 541 542 if (perf_evlist__id_add_fd(evsel_list, counter, 543 cpu, thread, fd) < 0) 544 return -1; 545 } 546 } 547 548 return 0; 549 } 550 551 static int store_counter_ids(struct perf_evsel *counter) 552 { 553 struct cpu_map *cpus = counter->cpus; 554 struct thread_map *threads = counter->threads; 555 556 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 557 return -ENOMEM; 558 559 return __store_counter_ids(counter, cpus, threads); 560 } 561 562 static int __run_perf_stat(int argc, const char **argv) 563 { 564 int interval = stat_config.interval; 565 char msg[BUFSIZ]; 566 unsigned long long t0, t1; 567 struct perf_evsel *counter; 568 struct timespec ts; 569 size_t l; 570 int status = 0; 571 const bool forks = (argc > 0); 572 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 573 struct perf_evsel_config_term *err_term; 574 575 if (interval) { 576 ts.tv_sec = interval / USEC_PER_MSEC; 577 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 578 } else { 579 ts.tv_sec = 1; 580 ts.tv_nsec = 0; 581 } 582 583 if (forks) { 584 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 585 workload_exec_failed_signal) < 0) { 586 perror("failed to prepare workload"); 587 return -1; 588 } 589 child_pid = evsel_list->workload.pid; 590 } 591 592 if (group) 593 perf_evlist__set_leader(evsel_list); 594 595 evlist__for_each_entry(evsel_list, counter) { 596 try_again: 597 if (create_perf_stat_counter(counter) < 0) { 598 /* 599 * PPC returns ENXIO for HW counters until 2.6.37 600 * (behavior changed with commit b0a873e). 601 */ 602 if (errno == EINVAL || errno == ENOSYS || 603 errno == ENOENT || errno == EOPNOTSUPP || 604 errno == ENXIO) { 605 if (verbose > 0) 606 ui__warning("%s event is not supported by the kernel.\n", 607 perf_evsel__name(counter)); 608 counter->supported = false; 609 610 if ((counter->leader != counter) || 611 !(counter->leader->nr_members > 1)) 612 continue; 613 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 614 if (verbose > 0) 615 ui__warning("%s\n", msg); 616 goto try_again; 617 } 618 619 perf_evsel__open_strerror(counter, &target, 620 errno, msg, sizeof(msg)); 621 ui__error("%s\n", msg); 622 623 if (child_pid != -1) 624 kill(child_pid, SIGTERM); 625 626 return -1; 627 } 628 counter->supported = true; 629 630 l = strlen(counter->unit); 631 if (l > unit_width) 632 unit_width = l; 633 634 if (STAT_RECORD && store_counter_ids(counter)) 635 return -1; 636 } 637 638 if (perf_evlist__apply_filters(evsel_list, &counter)) { 639 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 640 counter->filter, perf_evsel__name(counter), errno, 641 str_error_r(errno, msg, sizeof(msg))); 642 return -1; 643 } 644 645 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 646 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 647 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 648 str_error_r(errno, msg, sizeof(msg))); 649 return -1; 650 } 651 652 if (STAT_RECORD) { 653 int err, fd = perf_data_file__fd(&perf_stat.file); 654 655 if (is_pipe) { 656 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 657 } else { 658 err = perf_session__write_header(perf_stat.session, evsel_list, 659 fd, false); 660 } 661 662 if (err < 0) 663 return err; 664 665 err = perf_stat_synthesize_config(is_pipe); 666 if (err < 0) 667 return err; 668 } 669 670 /* 671 * Enable counters and exec the command: 672 */ 673 t0 = rdclock(); 674 clock_gettime(CLOCK_MONOTONIC, &ref_time); 675 676 if (forks) { 677 perf_evlist__start_workload(evsel_list); 678 enable_counters(); 679 680 if (interval) { 681 while (!waitpid(child_pid, &status, WNOHANG)) { 682 nanosleep(&ts, NULL); 683 process_interval(); 684 } 685 } 686 wait(&status); 687 688 if (workload_exec_errno) { 689 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 690 pr_err("Workload failed: %s\n", emsg); 691 return -1; 692 } 693 694 if (WIFSIGNALED(status)) 695 psignal(WTERMSIG(status), argv[0]); 696 } else { 697 enable_counters(); 698 while (!done) { 699 nanosleep(&ts, NULL); 700 if (interval) 701 process_interval(); 702 } 703 } 704 705 disable_counters(); 706 707 t1 = rdclock(); 708 709 update_stats(&walltime_nsecs_stats, t1 - t0); 710 711 /* 712 * Closing a group leader splits the group, and as we only disable 713 * group leaders, results in remaining events becoming enabled. To 714 * avoid arbitrary skew, we must read all counters before closing any 715 * group leaders. 716 */ 717 read_counters(); 718 perf_evlist__close(evsel_list); 719 720 return WEXITSTATUS(status); 721 } 722 723 static int run_perf_stat(int argc, const char **argv) 724 { 725 int ret; 726 727 if (pre_cmd) { 728 ret = system(pre_cmd); 729 if (ret) 730 return ret; 731 } 732 733 if (sync_run) 734 sync(); 735 736 ret = __run_perf_stat(argc, argv); 737 if (ret) 738 return ret; 739 740 if (post_cmd) { 741 ret = system(post_cmd); 742 if (ret) 743 return ret; 744 } 745 746 return ret; 747 } 748 749 static void print_running(u64 run, u64 ena) 750 { 751 if (csv_output) { 752 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 753 csv_sep, 754 run, 755 csv_sep, 756 ena ? 100.0 * run / ena : 100.0); 757 } else if (run != ena) { 758 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 759 } 760 } 761 762 static void print_noise_pct(double total, double avg) 763 { 764 double pct = rel_stddev_stats(total, avg); 765 766 if (csv_output) 767 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 768 else if (pct) 769 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 770 } 771 772 static void print_noise(struct perf_evsel *evsel, double avg) 773 { 774 struct perf_stat_evsel *ps; 775 776 if (run_count == 1) 777 return; 778 779 ps = evsel->priv; 780 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 781 } 782 783 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 784 { 785 switch (stat_config.aggr_mode) { 786 case AGGR_CORE: 787 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 788 cpu_map__id_to_socket(id), 789 csv_output ? 0 : -8, 790 cpu_map__id_to_cpu(id), 791 csv_sep, 792 csv_output ? 0 : 4, 793 nr, 794 csv_sep); 795 break; 796 case AGGR_SOCKET: 797 fprintf(stat_config.output, "S%*d%s%*d%s", 798 csv_output ? 0 : -5, 799 id, 800 csv_sep, 801 csv_output ? 0 : 4, 802 nr, 803 csv_sep); 804 break; 805 case AGGR_NONE: 806 fprintf(stat_config.output, "CPU%*d%s", 807 csv_output ? 0 : -4, 808 perf_evsel__cpus(evsel)->map[id], csv_sep); 809 break; 810 case AGGR_THREAD: 811 fprintf(stat_config.output, "%*s-%*d%s", 812 csv_output ? 0 : 16, 813 thread_map__comm(evsel->threads, id), 814 csv_output ? 0 : -8, 815 thread_map__pid(evsel->threads, id), 816 csv_sep); 817 break; 818 case AGGR_GLOBAL: 819 case AGGR_UNSET: 820 default: 821 break; 822 } 823 } 824 825 struct outstate { 826 FILE *fh; 827 bool newline; 828 const char *prefix; 829 int nfields; 830 int id, nr; 831 struct perf_evsel *evsel; 832 }; 833 834 #define METRIC_LEN 35 835 836 static void new_line_std(void *ctx) 837 { 838 struct outstate *os = ctx; 839 840 os->newline = true; 841 } 842 843 static void do_new_line_std(struct outstate *os) 844 { 845 fputc('\n', os->fh); 846 fputs(os->prefix, os->fh); 847 aggr_printout(os->evsel, os->id, os->nr); 848 if (stat_config.aggr_mode == AGGR_NONE) 849 fprintf(os->fh, " "); 850 fprintf(os->fh, " "); 851 } 852 853 static void print_metric_std(void *ctx, const char *color, const char *fmt, 854 const char *unit, double val) 855 { 856 struct outstate *os = ctx; 857 FILE *out = os->fh; 858 int n; 859 bool newline = os->newline; 860 861 os->newline = false; 862 863 if (unit == NULL || fmt == NULL) { 864 fprintf(out, "%-*s", METRIC_LEN, ""); 865 return; 866 } 867 868 if (newline) 869 do_new_line_std(os); 870 871 n = fprintf(out, " # "); 872 if (color) 873 n += color_fprintf(out, color, fmt, val); 874 else 875 n += fprintf(out, fmt, val); 876 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 877 } 878 879 static void new_line_csv(void *ctx) 880 { 881 struct outstate *os = ctx; 882 int i; 883 884 fputc('\n', os->fh); 885 if (os->prefix) 886 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 887 aggr_printout(os->evsel, os->id, os->nr); 888 for (i = 0; i < os->nfields; i++) 889 fputs(csv_sep, os->fh); 890 } 891 892 static void print_metric_csv(void *ctx, 893 const char *color __maybe_unused, 894 const char *fmt, const char *unit, double val) 895 { 896 struct outstate *os = ctx; 897 FILE *out = os->fh; 898 char buf[64], *vals, *ends; 899 900 if (unit == NULL || fmt == NULL) { 901 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 902 return; 903 } 904 snprintf(buf, sizeof(buf), fmt, val); 905 ends = vals = ltrim(buf); 906 while (isdigit(*ends) || *ends == '.') 907 ends++; 908 *ends = 0; 909 while (isspace(*unit)) 910 unit++; 911 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 912 } 913 914 #define METRIC_ONLY_LEN 20 915 916 /* Filter out some columns that don't work well in metrics only mode */ 917 918 static bool valid_only_metric(const char *unit) 919 { 920 if (!unit) 921 return false; 922 if (strstr(unit, "/sec") || 923 strstr(unit, "hz") || 924 strstr(unit, "Hz") || 925 strstr(unit, "CPUs utilized")) 926 return false; 927 return true; 928 } 929 930 static const char *fixunit(char *buf, struct perf_evsel *evsel, 931 const char *unit) 932 { 933 if (!strncmp(unit, "of all", 6)) { 934 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 935 unit); 936 return buf; 937 } 938 return unit; 939 } 940 941 static void print_metric_only(void *ctx, const char *color, const char *fmt, 942 const char *unit, double val) 943 { 944 struct outstate *os = ctx; 945 FILE *out = os->fh; 946 int n; 947 char buf[1024]; 948 unsigned mlen = METRIC_ONLY_LEN; 949 950 if (!valid_only_metric(unit)) 951 return; 952 unit = fixunit(buf, os->evsel, unit); 953 if (color) 954 n = color_fprintf(out, color, fmt, val); 955 else 956 n = fprintf(out, fmt, val); 957 if (n > METRIC_ONLY_LEN) 958 n = METRIC_ONLY_LEN; 959 if (mlen < strlen(unit)) 960 mlen = strlen(unit) + 1; 961 fprintf(out, "%*s", mlen - n, ""); 962 } 963 964 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 965 const char *fmt, 966 const char *unit, double val) 967 { 968 struct outstate *os = ctx; 969 FILE *out = os->fh; 970 char buf[64], *vals, *ends; 971 char tbuf[1024]; 972 973 if (!valid_only_metric(unit)) 974 return; 975 unit = fixunit(tbuf, os->evsel, unit); 976 snprintf(buf, sizeof buf, fmt, val); 977 ends = vals = ltrim(buf); 978 while (isdigit(*ends) || *ends == '.') 979 ends++; 980 *ends = 0; 981 fprintf(out, "%s%s", vals, csv_sep); 982 } 983 984 static void new_line_metric(void *ctx __maybe_unused) 985 { 986 } 987 988 static void print_metric_header(void *ctx, const char *color __maybe_unused, 989 const char *fmt __maybe_unused, 990 const char *unit, double val __maybe_unused) 991 { 992 struct outstate *os = ctx; 993 char tbuf[1024]; 994 995 if (!valid_only_metric(unit)) 996 return; 997 unit = fixunit(tbuf, os->evsel, unit); 998 if (csv_output) 999 fprintf(os->fh, "%s%s", unit, csv_sep); 1000 else 1001 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1002 } 1003 1004 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1005 { 1006 FILE *output = stat_config.output; 1007 double msecs = avg / NSEC_PER_MSEC; 1008 const char *fmt_v, *fmt_n; 1009 char name[25]; 1010 1011 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1012 fmt_n = csv_output ? "%s" : "%-25s"; 1013 1014 aggr_printout(evsel, id, nr); 1015 1016 scnprintf(name, sizeof(name), "%s%s", 1017 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1018 1019 fprintf(output, fmt_v, msecs, csv_sep); 1020 1021 if (csv_output) 1022 fprintf(output, "%s%s", evsel->unit, csv_sep); 1023 else 1024 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1025 1026 fprintf(output, fmt_n, name); 1027 1028 if (evsel->cgrp) 1029 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1030 } 1031 1032 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1033 { 1034 int i; 1035 1036 if (!aggr_get_id) 1037 return 0; 1038 1039 if (stat_config.aggr_mode == AGGR_NONE) 1040 return id; 1041 1042 if (stat_config.aggr_mode == AGGR_GLOBAL) 1043 return 0; 1044 1045 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1046 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1047 1048 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1049 return cpu2; 1050 } 1051 return 0; 1052 } 1053 1054 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1055 { 1056 FILE *output = stat_config.output; 1057 double sc = evsel->scale; 1058 const char *fmt; 1059 1060 if (csv_output) { 1061 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1062 } else { 1063 if (big_num) 1064 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1065 else 1066 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1067 } 1068 1069 aggr_printout(evsel, id, nr); 1070 1071 fprintf(output, fmt, avg, csv_sep); 1072 1073 if (evsel->unit) 1074 fprintf(output, "%-*s%s", 1075 csv_output ? 0 : unit_width, 1076 evsel->unit, csv_sep); 1077 1078 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1079 1080 if (evsel->cgrp) 1081 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1082 } 1083 1084 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1085 char *prefix, u64 run, u64 ena, double noise) 1086 { 1087 struct perf_stat_output_ctx out; 1088 struct outstate os = { 1089 .fh = stat_config.output, 1090 .prefix = prefix ? prefix : "", 1091 .id = id, 1092 .nr = nr, 1093 .evsel = counter, 1094 }; 1095 print_metric_t pm = print_metric_std; 1096 void (*nl)(void *); 1097 1098 if (metric_only) { 1099 nl = new_line_metric; 1100 if (csv_output) 1101 pm = print_metric_only_csv; 1102 else 1103 pm = print_metric_only; 1104 } else 1105 nl = new_line_std; 1106 1107 if (csv_output && !metric_only) { 1108 static int aggr_fields[] = { 1109 [AGGR_GLOBAL] = 0, 1110 [AGGR_THREAD] = 1, 1111 [AGGR_NONE] = 1, 1112 [AGGR_SOCKET] = 2, 1113 [AGGR_CORE] = 2, 1114 }; 1115 1116 pm = print_metric_csv; 1117 nl = new_line_csv; 1118 os.nfields = 3; 1119 os.nfields += aggr_fields[stat_config.aggr_mode]; 1120 if (counter->cgrp) 1121 os.nfields++; 1122 } 1123 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1124 if (metric_only) { 1125 pm(&os, NULL, "", "", 0); 1126 return; 1127 } 1128 aggr_printout(counter, id, nr); 1129 1130 fprintf(stat_config.output, "%*s%s", 1131 csv_output ? 0 : 18, 1132 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1133 csv_sep); 1134 1135 if (counter->supported) 1136 print_free_counters_hint = 1; 1137 1138 fprintf(stat_config.output, "%-*s%s", 1139 csv_output ? 0 : unit_width, 1140 counter->unit, csv_sep); 1141 1142 fprintf(stat_config.output, "%*s", 1143 csv_output ? 0 : -25, 1144 perf_evsel__name(counter)); 1145 1146 if (counter->cgrp) 1147 fprintf(stat_config.output, "%s%s", 1148 csv_sep, counter->cgrp->name); 1149 1150 if (!csv_output) 1151 pm(&os, NULL, NULL, "", 0); 1152 print_noise(counter, noise); 1153 print_running(run, ena); 1154 if (csv_output) 1155 pm(&os, NULL, NULL, "", 0); 1156 return; 1157 } 1158 1159 if (metric_only) 1160 /* nothing */; 1161 else if (nsec_counter(counter)) 1162 nsec_printout(id, nr, counter, uval); 1163 else 1164 abs_printout(id, nr, counter, uval); 1165 1166 out.print_metric = pm; 1167 out.new_line = nl; 1168 out.ctx = &os; 1169 out.force_header = false; 1170 1171 if (csv_output && !metric_only) { 1172 print_noise(counter, noise); 1173 print_running(run, ena); 1174 } 1175 1176 perf_stat__print_shadow_stats(counter, uval, 1177 first_shadow_cpu(counter, id), 1178 &out); 1179 if (!csv_output && !metric_only) { 1180 print_noise(counter, noise); 1181 print_running(run, ena); 1182 } 1183 } 1184 1185 static void aggr_update_shadow(void) 1186 { 1187 int cpu, s2, id, s; 1188 u64 val; 1189 struct perf_evsel *counter; 1190 1191 for (s = 0; s < aggr_map->nr; s++) { 1192 id = aggr_map->map[s]; 1193 evlist__for_each_entry(evsel_list, counter) { 1194 val = 0; 1195 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1196 s2 = aggr_get_id(evsel_list->cpus, cpu); 1197 if (s2 != id) 1198 continue; 1199 val += perf_counts(counter->counts, cpu, 0)->val; 1200 } 1201 val = val * counter->scale; 1202 perf_stat__update_shadow_stats(counter, &val, 1203 first_shadow_cpu(counter, id)); 1204 } 1205 } 1206 } 1207 1208 static void collect_all_aliases(struct perf_evsel *counter, 1209 void (*cb)(struct perf_evsel *counter, void *data, 1210 bool first), 1211 void *data) 1212 { 1213 struct perf_evsel *alias; 1214 1215 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1216 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1217 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1218 alias->scale != counter->scale || 1219 alias->cgrp != counter->cgrp || 1220 strcmp(alias->unit, counter->unit) || 1221 nsec_counter(alias) != nsec_counter(counter)) 1222 break; 1223 alias->merged_stat = true; 1224 cb(alias, data, false); 1225 } 1226 } 1227 1228 static bool collect_data(struct perf_evsel *counter, 1229 void (*cb)(struct perf_evsel *counter, void *data, 1230 bool first), 1231 void *data) 1232 { 1233 if (counter->merged_stat) 1234 return false; 1235 cb(counter, data, true); 1236 if (!no_merge) 1237 collect_all_aliases(counter, cb, data); 1238 return true; 1239 } 1240 1241 struct aggr_data { 1242 u64 ena, run, val; 1243 int id; 1244 int nr; 1245 int cpu; 1246 }; 1247 1248 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1249 { 1250 struct aggr_data *ad = data; 1251 int cpu, s2; 1252 1253 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1254 struct perf_counts_values *counts; 1255 1256 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1257 if (s2 != ad->id) 1258 continue; 1259 if (first) 1260 ad->nr++; 1261 counts = perf_counts(counter->counts, cpu, 0); 1262 /* 1263 * When any result is bad, make them all to give 1264 * consistent output in interval mode. 1265 */ 1266 if (counts->ena == 0 || counts->run == 0 || 1267 counter->counts->scaled == -1) { 1268 ad->ena = 0; 1269 ad->run = 0; 1270 break; 1271 } 1272 ad->val += counts->val; 1273 ad->ena += counts->ena; 1274 ad->run += counts->run; 1275 } 1276 } 1277 1278 static void print_aggr(char *prefix) 1279 { 1280 FILE *output = stat_config.output; 1281 struct perf_evsel *counter; 1282 int s, id, nr; 1283 double uval; 1284 u64 ena, run, val; 1285 bool first; 1286 1287 if (!(aggr_map || aggr_get_id)) 1288 return; 1289 1290 aggr_update_shadow(); 1291 1292 /* 1293 * With metric_only everything is on a single line. 1294 * Without each counter has its own line. 1295 */ 1296 for (s = 0; s < aggr_map->nr; s++) { 1297 struct aggr_data ad; 1298 if (prefix && metric_only) 1299 fprintf(output, "%s", prefix); 1300 1301 ad.id = id = aggr_map->map[s]; 1302 first = true; 1303 evlist__for_each_entry(evsel_list, counter) { 1304 ad.val = ad.ena = ad.run = 0; 1305 ad.nr = 0; 1306 if (!collect_data(counter, aggr_cb, &ad)) 1307 continue; 1308 nr = ad.nr; 1309 ena = ad.ena; 1310 run = ad.run; 1311 val = ad.val; 1312 if (first && metric_only) { 1313 first = false; 1314 aggr_printout(counter, id, nr); 1315 } 1316 if (prefix && !metric_only) 1317 fprintf(output, "%s", prefix); 1318 1319 uval = val * counter->scale; 1320 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1321 if (!metric_only) 1322 fputc('\n', output); 1323 } 1324 if (metric_only) 1325 fputc('\n', output); 1326 } 1327 } 1328 1329 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1330 { 1331 FILE *output = stat_config.output; 1332 int nthreads = thread_map__nr(counter->threads); 1333 int ncpus = cpu_map__nr(counter->cpus); 1334 int cpu, thread; 1335 double uval; 1336 1337 for (thread = 0; thread < nthreads; thread++) { 1338 u64 ena = 0, run = 0, val = 0; 1339 1340 for (cpu = 0; cpu < ncpus; cpu++) { 1341 val += perf_counts(counter->counts, cpu, thread)->val; 1342 ena += perf_counts(counter->counts, cpu, thread)->ena; 1343 run += perf_counts(counter->counts, cpu, thread)->run; 1344 } 1345 1346 if (prefix) 1347 fprintf(output, "%s", prefix); 1348 1349 uval = val * counter->scale; 1350 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1351 fputc('\n', output); 1352 } 1353 } 1354 1355 struct caggr_data { 1356 double avg, avg_enabled, avg_running; 1357 }; 1358 1359 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1360 bool first __maybe_unused) 1361 { 1362 struct caggr_data *cd = data; 1363 struct perf_stat_evsel *ps = counter->priv; 1364 1365 cd->avg += avg_stats(&ps->res_stats[0]); 1366 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1367 cd->avg_running += avg_stats(&ps->res_stats[2]); 1368 } 1369 1370 /* 1371 * Print out the results of a single counter: 1372 * aggregated counts in system-wide mode 1373 */ 1374 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1375 { 1376 FILE *output = stat_config.output; 1377 double uval; 1378 struct caggr_data cd = { .avg = 0.0 }; 1379 1380 if (!collect_data(counter, counter_aggr_cb, &cd)) 1381 return; 1382 1383 if (prefix && !metric_only) 1384 fprintf(output, "%s", prefix); 1385 1386 uval = cd.avg * counter->scale; 1387 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); 1388 if (!metric_only) 1389 fprintf(output, "\n"); 1390 } 1391 1392 static void counter_cb(struct perf_evsel *counter, void *data, 1393 bool first __maybe_unused) 1394 { 1395 struct aggr_data *ad = data; 1396 1397 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1398 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1399 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1400 } 1401 1402 /* 1403 * Print out the results of a single counter: 1404 * does not use aggregated count in system-wide 1405 */ 1406 static void print_counter(struct perf_evsel *counter, char *prefix) 1407 { 1408 FILE *output = stat_config.output; 1409 u64 ena, run, val; 1410 double uval; 1411 int cpu; 1412 1413 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1414 struct aggr_data ad = { .cpu = cpu }; 1415 1416 if (!collect_data(counter, counter_cb, &ad)) 1417 return; 1418 val = ad.val; 1419 ena = ad.ena; 1420 run = ad.run; 1421 1422 if (prefix) 1423 fprintf(output, "%s", prefix); 1424 1425 uval = val * counter->scale; 1426 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1427 1428 fputc('\n', output); 1429 } 1430 } 1431 1432 static void print_no_aggr_metric(char *prefix) 1433 { 1434 int cpu; 1435 int nrcpus = 0; 1436 struct perf_evsel *counter; 1437 u64 ena, run, val; 1438 double uval; 1439 1440 nrcpus = evsel_list->cpus->nr; 1441 for (cpu = 0; cpu < nrcpus; cpu++) { 1442 bool first = true; 1443 1444 if (prefix) 1445 fputs(prefix, stat_config.output); 1446 evlist__for_each_entry(evsel_list, counter) { 1447 if (first) { 1448 aggr_printout(counter, cpu, 0); 1449 first = false; 1450 } 1451 val = perf_counts(counter->counts, cpu, 0)->val; 1452 ena = perf_counts(counter->counts, cpu, 0)->ena; 1453 run = perf_counts(counter->counts, cpu, 0)->run; 1454 1455 uval = val * counter->scale; 1456 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1457 } 1458 fputc('\n', stat_config.output); 1459 } 1460 } 1461 1462 static int aggr_header_lens[] = { 1463 [AGGR_CORE] = 18, 1464 [AGGR_SOCKET] = 12, 1465 [AGGR_NONE] = 6, 1466 [AGGR_THREAD] = 24, 1467 [AGGR_GLOBAL] = 0, 1468 }; 1469 1470 static const char *aggr_header_csv[] = { 1471 [AGGR_CORE] = "core,cpus,", 1472 [AGGR_SOCKET] = "socket,cpus", 1473 [AGGR_NONE] = "cpu,", 1474 [AGGR_THREAD] = "comm-pid,", 1475 [AGGR_GLOBAL] = "" 1476 }; 1477 1478 static void print_metric_headers(const char *prefix, bool no_indent) 1479 { 1480 struct perf_stat_output_ctx out; 1481 struct perf_evsel *counter; 1482 struct outstate os = { 1483 .fh = stat_config.output 1484 }; 1485 1486 if (prefix) 1487 fprintf(stat_config.output, "%s", prefix); 1488 1489 if (!csv_output && !no_indent) 1490 fprintf(stat_config.output, "%*s", 1491 aggr_header_lens[stat_config.aggr_mode], ""); 1492 if (csv_output) { 1493 if (stat_config.interval) 1494 fputs("time,", stat_config.output); 1495 fputs(aggr_header_csv[stat_config.aggr_mode], 1496 stat_config.output); 1497 } 1498 1499 /* Print metrics headers only */ 1500 evlist__for_each_entry(evsel_list, counter) { 1501 os.evsel = counter; 1502 out.ctx = &os; 1503 out.print_metric = print_metric_header; 1504 out.new_line = new_line_metric; 1505 out.force_header = true; 1506 os.evsel = counter; 1507 perf_stat__print_shadow_stats(counter, 0, 1508 0, 1509 &out); 1510 } 1511 fputc('\n', stat_config.output); 1512 } 1513 1514 static void print_interval(char *prefix, struct timespec *ts) 1515 { 1516 FILE *output = stat_config.output; 1517 static int num_print_interval; 1518 1519 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1520 1521 if (num_print_interval == 0 && !csv_output) { 1522 switch (stat_config.aggr_mode) { 1523 case AGGR_SOCKET: 1524 fprintf(output, "# time socket cpus"); 1525 if (!metric_only) 1526 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1527 break; 1528 case AGGR_CORE: 1529 fprintf(output, "# time core cpus"); 1530 if (!metric_only) 1531 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1532 break; 1533 case AGGR_NONE: 1534 fprintf(output, "# time CPU"); 1535 if (!metric_only) 1536 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1537 break; 1538 case AGGR_THREAD: 1539 fprintf(output, "# time comm-pid"); 1540 if (!metric_only) 1541 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1542 break; 1543 case AGGR_GLOBAL: 1544 default: 1545 fprintf(output, "# time"); 1546 if (!metric_only) 1547 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1548 case AGGR_UNSET: 1549 break; 1550 } 1551 } 1552 1553 if (num_print_interval == 0 && metric_only) 1554 print_metric_headers(" ", true); 1555 if (++num_print_interval == 25) 1556 num_print_interval = 0; 1557 } 1558 1559 static void print_header(int argc, const char **argv) 1560 { 1561 FILE *output = stat_config.output; 1562 int i; 1563 1564 fflush(stdout); 1565 1566 if (!csv_output) { 1567 fprintf(output, "\n"); 1568 fprintf(output, " Performance counter stats for "); 1569 if (target.system_wide) 1570 fprintf(output, "\'system wide"); 1571 else if (target.cpu_list) 1572 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1573 else if (!target__has_task(&target)) { 1574 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1575 for (i = 1; argv && (i < argc); i++) 1576 fprintf(output, " %s", argv[i]); 1577 } else if (target.pid) 1578 fprintf(output, "process id \'%s", target.pid); 1579 else 1580 fprintf(output, "thread id \'%s", target.tid); 1581 1582 fprintf(output, "\'"); 1583 if (run_count > 1) 1584 fprintf(output, " (%d runs)", run_count); 1585 fprintf(output, ":\n\n"); 1586 } 1587 } 1588 1589 static void print_footer(void) 1590 { 1591 FILE *output = stat_config.output; 1592 int n; 1593 1594 if (!null_run) 1595 fprintf(output, "\n"); 1596 fprintf(output, " %17.9f seconds time elapsed", 1597 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1598 if (run_count > 1) { 1599 fprintf(output, " "); 1600 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1601 avg_stats(&walltime_nsecs_stats)); 1602 } 1603 fprintf(output, "\n\n"); 1604 1605 if (print_free_counters_hint && 1606 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1607 n > 0) 1608 fprintf(output, 1609 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1610 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1611 " perf stat ...\n" 1612 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1613 } 1614 1615 static void print_counters(struct timespec *ts, int argc, const char **argv) 1616 { 1617 int interval = stat_config.interval; 1618 struct perf_evsel *counter; 1619 char buf[64], *prefix = NULL; 1620 1621 /* Do not print anything if we record to the pipe. */ 1622 if (STAT_RECORD && perf_stat.file.is_pipe) 1623 return; 1624 1625 if (interval) 1626 print_interval(prefix = buf, ts); 1627 else 1628 print_header(argc, argv); 1629 1630 if (metric_only) { 1631 static int num_print_iv; 1632 1633 if (num_print_iv == 0 && !interval) 1634 print_metric_headers(prefix, false); 1635 if (num_print_iv++ == 25) 1636 num_print_iv = 0; 1637 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1638 fprintf(stat_config.output, "%s", prefix); 1639 } 1640 1641 switch (stat_config.aggr_mode) { 1642 case AGGR_CORE: 1643 case AGGR_SOCKET: 1644 print_aggr(prefix); 1645 break; 1646 case AGGR_THREAD: 1647 evlist__for_each_entry(evsel_list, counter) 1648 print_aggr_thread(counter, prefix); 1649 break; 1650 case AGGR_GLOBAL: 1651 evlist__for_each_entry(evsel_list, counter) 1652 print_counter_aggr(counter, prefix); 1653 if (metric_only) 1654 fputc('\n', stat_config.output); 1655 break; 1656 case AGGR_NONE: 1657 if (metric_only) 1658 print_no_aggr_metric(prefix); 1659 else { 1660 evlist__for_each_entry(evsel_list, counter) 1661 print_counter(counter, prefix); 1662 } 1663 break; 1664 case AGGR_UNSET: 1665 default: 1666 break; 1667 } 1668 1669 if (!interval && !csv_output) 1670 print_footer(); 1671 1672 fflush(stat_config.output); 1673 } 1674 1675 static volatile int signr = -1; 1676 1677 static void skip_signal(int signo) 1678 { 1679 if ((child_pid == -1) || stat_config.interval) 1680 done = 1; 1681 1682 signr = signo; 1683 /* 1684 * render child_pid harmless 1685 * won't send SIGTERM to a random 1686 * process in case of race condition 1687 * and fast PID recycling 1688 */ 1689 child_pid = -1; 1690 } 1691 1692 static void sig_atexit(void) 1693 { 1694 sigset_t set, oset; 1695 1696 /* 1697 * avoid race condition with SIGCHLD handler 1698 * in skip_signal() which is modifying child_pid 1699 * goal is to avoid send SIGTERM to a random 1700 * process 1701 */ 1702 sigemptyset(&set); 1703 sigaddset(&set, SIGCHLD); 1704 sigprocmask(SIG_BLOCK, &set, &oset); 1705 1706 if (child_pid != -1) 1707 kill(child_pid, SIGTERM); 1708 1709 sigprocmask(SIG_SETMASK, &oset, NULL); 1710 1711 if (signr == -1) 1712 return; 1713 1714 signal(signr, SIG_DFL); 1715 kill(getpid(), signr); 1716 } 1717 1718 static int stat__set_big_num(const struct option *opt __maybe_unused, 1719 const char *s __maybe_unused, int unset) 1720 { 1721 big_num_opt = unset ? 0 : 1; 1722 return 0; 1723 } 1724 1725 static int enable_metric_only(const struct option *opt __maybe_unused, 1726 const char *s __maybe_unused, int unset) 1727 { 1728 force_metric_only = true; 1729 metric_only = !unset; 1730 return 0; 1731 } 1732 1733 static const struct option stat_options[] = { 1734 OPT_BOOLEAN('T', "transaction", &transaction_run, 1735 "hardware transaction statistics"), 1736 OPT_CALLBACK('e', "event", &evsel_list, "event", 1737 "event selector. use 'perf list' to list available events", 1738 parse_events_option), 1739 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1740 "event filter", parse_filter), 1741 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1742 "child tasks do not inherit counters"), 1743 OPT_STRING('p', "pid", &target.pid, "pid", 1744 "stat events on existing process id"), 1745 OPT_STRING('t', "tid", &target.tid, "tid", 1746 "stat events on existing thread id"), 1747 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1748 "system-wide collection from all CPUs"), 1749 OPT_BOOLEAN('g', "group", &group, 1750 "put the counters into a counter group"), 1751 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1752 OPT_INCR('v', "verbose", &verbose, 1753 "be more verbose (show counter open errors, etc)"), 1754 OPT_INTEGER('r', "repeat", &run_count, 1755 "repeat command and print average + stddev (max: 100, forever: 0)"), 1756 OPT_BOOLEAN('n', "null", &null_run, 1757 "null run - dont start any counters"), 1758 OPT_INCR('d', "detailed", &detailed_run, 1759 "detailed run - start a lot of events"), 1760 OPT_BOOLEAN('S', "sync", &sync_run, 1761 "call sync() before starting a run"), 1762 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1763 "print large numbers with thousands\' separators", 1764 stat__set_big_num), 1765 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1766 "list of cpus to monitor in system-wide"), 1767 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1768 "disable CPU count aggregation", AGGR_NONE), 1769 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1770 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1771 "print counts with custom separator"), 1772 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1773 "monitor event in cgroup name only", parse_cgroups), 1774 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1775 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1776 OPT_INTEGER(0, "log-fd", &output_fd, 1777 "log output to fd, instead of stderr"), 1778 OPT_STRING(0, "pre", &pre_cmd, "command", 1779 "command to run prior to the measured command"), 1780 OPT_STRING(0, "post", &post_cmd, "command", 1781 "command to run after to the measured command"), 1782 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1783 "print counts at regular interval in ms (>= 10)"), 1784 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1785 "aggregate counts per processor socket", AGGR_SOCKET), 1786 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1787 "aggregate counts per physical processor core", AGGR_CORE), 1788 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1789 "aggregate counts per thread", AGGR_THREAD), 1790 OPT_UINTEGER('D', "delay", &initial_delay, 1791 "ms to wait before starting measurement after program start"), 1792 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1793 "Only print computed metrics. No raw values", enable_metric_only), 1794 OPT_BOOLEAN(0, "topdown", &topdown_run, 1795 "measure topdown level 1 statistics"), 1796 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1797 "measure SMI cost"), 1798 OPT_END() 1799 }; 1800 1801 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1802 { 1803 return cpu_map__get_socket(map, cpu, NULL); 1804 } 1805 1806 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1807 { 1808 return cpu_map__get_core(map, cpu, NULL); 1809 } 1810 1811 static int cpu_map__get_max(struct cpu_map *map) 1812 { 1813 int i, max = -1; 1814 1815 for (i = 0; i < map->nr; i++) { 1816 if (map->map[i] > max) 1817 max = map->map[i]; 1818 } 1819 1820 return max; 1821 } 1822 1823 static struct cpu_map *cpus_aggr_map; 1824 1825 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1826 { 1827 int cpu; 1828 1829 if (idx >= map->nr) 1830 return -1; 1831 1832 cpu = map->map[idx]; 1833 1834 if (cpus_aggr_map->map[cpu] == -1) 1835 cpus_aggr_map->map[cpu] = get_id(map, idx); 1836 1837 return cpus_aggr_map->map[cpu]; 1838 } 1839 1840 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1841 { 1842 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1843 } 1844 1845 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1846 { 1847 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1848 } 1849 1850 static int perf_stat_init_aggr_mode(void) 1851 { 1852 int nr; 1853 1854 switch (stat_config.aggr_mode) { 1855 case AGGR_SOCKET: 1856 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1857 perror("cannot build socket map"); 1858 return -1; 1859 } 1860 aggr_get_id = perf_stat__get_socket_cached; 1861 break; 1862 case AGGR_CORE: 1863 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1864 perror("cannot build core map"); 1865 return -1; 1866 } 1867 aggr_get_id = perf_stat__get_core_cached; 1868 break; 1869 case AGGR_NONE: 1870 case AGGR_GLOBAL: 1871 case AGGR_THREAD: 1872 case AGGR_UNSET: 1873 default: 1874 break; 1875 } 1876 1877 /* 1878 * The evsel_list->cpus is the base we operate on, 1879 * taking the highest cpu number to be the size of 1880 * the aggregation translate cpumap. 1881 */ 1882 nr = cpu_map__get_max(evsel_list->cpus); 1883 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1884 return cpus_aggr_map ? 0 : -ENOMEM; 1885 } 1886 1887 static void perf_stat__exit_aggr_mode(void) 1888 { 1889 cpu_map__put(aggr_map); 1890 cpu_map__put(cpus_aggr_map); 1891 aggr_map = NULL; 1892 cpus_aggr_map = NULL; 1893 } 1894 1895 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1896 { 1897 int cpu; 1898 1899 if (idx > map->nr) 1900 return -1; 1901 1902 cpu = map->map[idx]; 1903 1904 if (cpu >= env->nr_cpus_avail) 1905 return -1; 1906 1907 return cpu; 1908 } 1909 1910 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1911 { 1912 struct perf_env *env = data; 1913 int cpu = perf_env__get_cpu(env, map, idx); 1914 1915 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1916 } 1917 1918 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1919 { 1920 struct perf_env *env = data; 1921 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1922 1923 if (cpu != -1) { 1924 int socket_id = env->cpu[cpu].socket_id; 1925 1926 /* 1927 * Encode socket in upper 16 bits 1928 * core_id is relative to socket, and 1929 * we need a global id. So we combine 1930 * socket + core id. 1931 */ 1932 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1933 } 1934 1935 return core; 1936 } 1937 1938 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1939 struct cpu_map **sockp) 1940 { 1941 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1942 } 1943 1944 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1945 struct cpu_map **corep) 1946 { 1947 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1948 } 1949 1950 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1951 { 1952 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1953 } 1954 1955 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1956 { 1957 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1958 } 1959 1960 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1961 { 1962 struct perf_env *env = &st->session->header.env; 1963 1964 switch (stat_config.aggr_mode) { 1965 case AGGR_SOCKET: 1966 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1967 perror("cannot build socket map"); 1968 return -1; 1969 } 1970 aggr_get_id = perf_stat__get_socket_file; 1971 break; 1972 case AGGR_CORE: 1973 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1974 perror("cannot build core map"); 1975 return -1; 1976 } 1977 aggr_get_id = perf_stat__get_core_file; 1978 break; 1979 case AGGR_NONE: 1980 case AGGR_GLOBAL: 1981 case AGGR_THREAD: 1982 case AGGR_UNSET: 1983 default: 1984 break; 1985 } 1986 1987 return 0; 1988 } 1989 1990 static int topdown_filter_events(const char **attr, char **str, bool use_group) 1991 { 1992 int off = 0; 1993 int i; 1994 int len = 0; 1995 char *s; 1996 1997 for (i = 0; attr[i]; i++) { 1998 if (pmu_have_event("cpu", attr[i])) { 1999 len += strlen(attr[i]) + 1; 2000 attr[i - off] = attr[i]; 2001 } else 2002 off++; 2003 } 2004 attr[i - off] = NULL; 2005 2006 *str = malloc(len + 1 + 2); 2007 if (!*str) 2008 return -1; 2009 s = *str; 2010 if (i - off == 0) { 2011 *s = 0; 2012 return 0; 2013 } 2014 if (use_group) 2015 *s++ = '{'; 2016 for (i = 0; attr[i]; i++) { 2017 strcpy(s, attr[i]); 2018 s += strlen(s); 2019 *s++ = ','; 2020 } 2021 if (use_group) { 2022 s[-1] = '}'; 2023 *s = 0; 2024 } else 2025 s[-1] = 0; 2026 return 0; 2027 } 2028 2029 __weak bool arch_topdown_check_group(bool *warn) 2030 { 2031 *warn = false; 2032 return false; 2033 } 2034 2035 __weak void arch_topdown_group_warn(void) 2036 { 2037 } 2038 2039 /* 2040 * Add default attributes, if there were no attributes specified or 2041 * if -d/--detailed, -d -d or -d -d -d is used: 2042 */ 2043 static int add_default_attributes(void) 2044 { 2045 int err; 2046 struct perf_event_attr default_attrs0[] = { 2047 2048 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2049 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2050 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2051 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2052 2053 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2054 }; 2055 struct perf_event_attr frontend_attrs[] = { 2056 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2057 }; 2058 struct perf_event_attr backend_attrs[] = { 2059 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2060 }; 2061 struct perf_event_attr default_attrs1[] = { 2062 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2063 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2064 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2065 2066 }; 2067 2068 /* 2069 * Detailed stats (-d), covering the L1 and last level data caches: 2070 */ 2071 struct perf_event_attr detailed_attrs[] = { 2072 2073 { .type = PERF_TYPE_HW_CACHE, 2074 .config = 2075 PERF_COUNT_HW_CACHE_L1D << 0 | 2076 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2077 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2078 2079 { .type = PERF_TYPE_HW_CACHE, 2080 .config = 2081 PERF_COUNT_HW_CACHE_L1D << 0 | 2082 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2083 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2084 2085 { .type = PERF_TYPE_HW_CACHE, 2086 .config = 2087 PERF_COUNT_HW_CACHE_LL << 0 | 2088 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2089 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2090 2091 { .type = PERF_TYPE_HW_CACHE, 2092 .config = 2093 PERF_COUNT_HW_CACHE_LL << 0 | 2094 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2095 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2096 }; 2097 2098 /* 2099 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2100 */ 2101 struct perf_event_attr very_detailed_attrs[] = { 2102 2103 { .type = PERF_TYPE_HW_CACHE, 2104 .config = 2105 PERF_COUNT_HW_CACHE_L1I << 0 | 2106 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2107 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2108 2109 { .type = PERF_TYPE_HW_CACHE, 2110 .config = 2111 PERF_COUNT_HW_CACHE_L1I << 0 | 2112 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2113 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2114 2115 { .type = PERF_TYPE_HW_CACHE, 2116 .config = 2117 PERF_COUNT_HW_CACHE_DTLB << 0 | 2118 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2119 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2120 2121 { .type = PERF_TYPE_HW_CACHE, 2122 .config = 2123 PERF_COUNT_HW_CACHE_DTLB << 0 | 2124 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2125 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2126 2127 { .type = PERF_TYPE_HW_CACHE, 2128 .config = 2129 PERF_COUNT_HW_CACHE_ITLB << 0 | 2130 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2131 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2132 2133 { .type = PERF_TYPE_HW_CACHE, 2134 .config = 2135 PERF_COUNT_HW_CACHE_ITLB << 0 | 2136 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2137 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2138 2139 }; 2140 2141 /* 2142 * Very, very detailed stats (-d -d -d), adding prefetch events: 2143 */ 2144 struct perf_event_attr very_very_detailed_attrs[] = { 2145 2146 { .type = PERF_TYPE_HW_CACHE, 2147 .config = 2148 PERF_COUNT_HW_CACHE_L1D << 0 | 2149 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2150 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2151 2152 { .type = PERF_TYPE_HW_CACHE, 2153 .config = 2154 PERF_COUNT_HW_CACHE_L1D << 0 | 2155 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2156 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2157 }; 2158 2159 /* Set attrs if no event is selected and !null_run: */ 2160 if (null_run) 2161 return 0; 2162 2163 if (transaction_run) { 2164 if (pmu_have_event("cpu", "cycles-ct") && 2165 pmu_have_event("cpu", "el-start")) 2166 err = parse_events(evsel_list, transaction_attrs, NULL); 2167 else 2168 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2169 if (err) { 2170 fprintf(stderr, "Cannot set up transaction events\n"); 2171 return -1; 2172 } 2173 return 0; 2174 } 2175 2176 if (smi_cost) { 2177 int smi; 2178 2179 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2180 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2181 return -1; 2182 } 2183 2184 if (!smi) { 2185 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2186 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2187 return -1; 2188 } 2189 smi_reset = true; 2190 } 2191 2192 if (pmu_have_event("msr", "aperf") && 2193 pmu_have_event("msr", "smi")) { 2194 if (!force_metric_only) 2195 metric_only = true; 2196 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2197 } else { 2198 fprintf(stderr, "To measure SMI cost, it needs " 2199 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2200 return -1; 2201 } 2202 if (err) { 2203 fprintf(stderr, "Cannot set up SMI cost events\n"); 2204 return -1; 2205 } 2206 return 0; 2207 } 2208 2209 if (topdown_run) { 2210 char *str = NULL; 2211 bool warn = false; 2212 2213 if (stat_config.aggr_mode != AGGR_GLOBAL && 2214 stat_config.aggr_mode != AGGR_CORE) { 2215 pr_err("top down event configuration requires --per-core mode\n"); 2216 return -1; 2217 } 2218 stat_config.aggr_mode = AGGR_CORE; 2219 if (nr_cgroups || !target__has_cpu(&target)) { 2220 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2221 return -1; 2222 } 2223 2224 if (!force_metric_only) 2225 metric_only = true; 2226 if (topdown_filter_events(topdown_attrs, &str, 2227 arch_topdown_check_group(&warn)) < 0) { 2228 pr_err("Out of memory\n"); 2229 return -1; 2230 } 2231 if (topdown_attrs[0] && str) { 2232 if (warn) 2233 arch_topdown_group_warn(); 2234 err = parse_events(evsel_list, str, NULL); 2235 if (err) { 2236 fprintf(stderr, 2237 "Cannot set up top down events %s: %d\n", 2238 str, err); 2239 free(str); 2240 return -1; 2241 } 2242 } else { 2243 fprintf(stderr, "System does not support topdown\n"); 2244 return -1; 2245 } 2246 free(str); 2247 } 2248 2249 if (!evsel_list->nr_entries) { 2250 if (target__has_cpu(&target)) 2251 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2252 2253 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2254 return -1; 2255 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2256 if (perf_evlist__add_default_attrs(evsel_list, 2257 frontend_attrs) < 0) 2258 return -1; 2259 } 2260 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2261 if (perf_evlist__add_default_attrs(evsel_list, 2262 backend_attrs) < 0) 2263 return -1; 2264 } 2265 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2266 return -1; 2267 } 2268 2269 /* Detailed events get appended to the event list: */ 2270 2271 if (detailed_run < 1) 2272 return 0; 2273 2274 /* Append detailed run extra attributes: */ 2275 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2276 return -1; 2277 2278 if (detailed_run < 2) 2279 return 0; 2280 2281 /* Append very detailed run extra attributes: */ 2282 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2283 return -1; 2284 2285 if (detailed_run < 3) 2286 return 0; 2287 2288 /* Append very, very detailed run extra attributes: */ 2289 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2290 } 2291 2292 static const char * const stat_record_usage[] = { 2293 "perf stat record [<options>]", 2294 NULL, 2295 }; 2296 2297 static void init_features(struct perf_session *session) 2298 { 2299 int feat; 2300 2301 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2302 perf_header__set_feat(&session->header, feat); 2303 2304 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2305 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2306 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2307 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2308 } 2309 2310 static int __cmd_record(int argc, const char **argv) 2311 { 2312 struct perf_session *session; 2313 struct perf_data_file *file = &perf_stat.file; 2314 2315 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2316 PARSE_OPT_STOP_AT_NON_OPTION); 2317 2318 if (output_name) 2319 file->path = output_name; 2320 2321 if (run_count != 1 || forever) { 2322 pr_err("Cannot use -r option with perf stat record.\n"); 2323 return -1; 2324 } 2325 2326 session = perf_session__new(file, false, NULL); 2327 if (session == NULL) { 2328 pr_err("Perf session creation failed.\n"); 2329 return -1; 2330 } 2331 2332 init_features(session); 2333 2334 session->evlist = evsel_list; 2335 perf_stat.session = session; 2336 perf_stat.record = true; 2337 return argc; 2338 } 2339 2340 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2341 union perf_event *event, 2342 struct perf_session *session) 2343 { 2344 struct stat_round_event *stat_round = &event->stat_round; 2345 struct perf_evsel *counter; 2346 struct timespec tsh, *ts = NULL; 2347 const char **argv = session->header.env.cmdline_argv; 2348 int argc = session->header.env.nr_cmdline; 2349 2350 evlist__for_each_entry(evsel_list, counter) 2351 perf_stat_process_counter(&stat_config, counter); 2352 2353 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2354 update_stats(&walltime_nsecs_stats, stat_round->time); 2355 2356 if (stat_config.interval && stat_round->time) { 2357 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2358 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2359 ts = &tsh; 2360 } 2361 2362 print_counters(ts, argc, argv); 2363 return 0; 2364 } 2365 2366 static 2367 int process_stat_config_event(struct perf_tool *tool, 2368 union perf_event *event, 2369 struct perf_session *session __maybe_unused) 2370 { 2371 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2372 2373 perf_event__read_stat_config(&stat_config, &event->stat_config); 2374 2375 if (cpu_map__empty(st->cpus)) { 2376 if (st->aggr_mode != AGGR_UNSET) 2377 pr_warning("warning: processing task data, aggregation mode not set\n"); 2378 return 0; 2379 } 2380 2381 if (st->aggr_mode != AGGR_UNSET) 2382 stat_config.aggr_mode = st->aggr_mode; 2383 2384 if (perf_stat.file.is_pipe) 2385 perf_stat_init_aggr_mode(); 2386 else 2387 perf_stat_init_aggr_mode_file(st); 2388 2389 return 0; 2390 } 2391 2392 static int set_maps(struct perf_stat *st) 2393 { 2394 if (!st->cpus || !st->threads) 2395 return 0; 2396 2397 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2398 return -EINVAL; 2399 2400 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2401 2402 if (perf_evlist__alloc_stats(evsel_list, true)) 2403 return -ENOMEM; 2404 2405 st->maps_allocated = true; 2406 return 0; 2407 } 2408 2409 static 2410 int process_thread_map_event(struct perf_tool *tool, 2411 union perf_event *event, 2412 struct perf_session *session __maybe_unused) 2413 { 2414 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2415 2416 if (st->threads) { 2417 pr_warning("Extra thread map event, ignoring.\n"); 2418 return 0; 2419 } 2420 2421 st->threads = thread_map__new_event(&event->thread_map); 2422 if (!st->threads) 2423 return -ENOMEM; 2424 2425 return set_maps(st); 2426 } 2427 2428 static 2429 int process_cpu_map_event(struct perf_tool *tool, 2430 union perf_event *event, 2431 struct perf_session *session __maybe_unused) 2432 { 2433 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2434 struct cpu_map *cpus; 2435 2436 if (st->cpus) { 2437 pr_warning("Extra cpu map event, ignoring.\n"); 2438 return 0; 2439 } 2440 2441 cpus = cpu_map__new_data(&event->cpu_map.data); 2442 if (!cpus) 2443 return -ENOMEM; 2444 2445 st->cpus = cpus; 2446 return set_maps(st); 2447 } 2448 2449 static const char * const stat_report_usage[] = { 2450 "perf stat report [<options>]", 2451 NULL, 2452 }; 2453 2454 static struct perf_stat perf_stat = { 2455 .tool = { 2456 .attr = perf_event__process_attr, 2457 .event_update = perf_event__process_event_update, 2458 .thread_map = process_thread_map_event, 2459 .cpu_map = process_cpu_map_event, 2460 .stat_config = process_stat_config_event, 2461 .stat = perf_event__process_stat_event, 2462 .stat_round = process_stat_round_event, 2463 }, 2464 .aggr_mode = AGGR_UNSET, 2465 }; 2466 2467 static int __cmd_report(int argc, const char **argv) 2468 { 2469 struct perf_session *session; 2470 const struct option options[] = { 2471 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2472 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2473 "aggregate counts per processor socket", AGGR_SOCKET), 2474 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2475 "aggregate counts per physical processor core", AGGR_CORE), 2476 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2477 "disable CPU count aggregation", AGGR_NONE), 2478 OPT_END() 2479 }; 2480 struct stat st; 2481 int ret; 2482 2483 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2484 2485 if (!input_name || !strlen(input_name)) { 2486 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2487 input_name = "-"; 2488 else 2489 input_name = "perf.data"; 2490 } 2491 2492 perf_stat.file.path = input_name; 2493 perf_stat.file.mode = PERF_DATA_MODE_READ; 2494 2495 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2496 if (session == NULL) 2497 return -1; 2498 2499 perf_stat.session = session; 2500 stat_config.output = stderr; 2501 evsel_list = session->evlist; 2502 2503 ret = perf_session__process_events(session); 2504 if (ret) 2505 return ret; 2506 2507 perf_session__delete(session); 2508 return 0; 2509 } 2510 2511 static void setup_system_wide(int forks) 2512 { 2513 /* 2514 * Make system wide (-a) the default target if 2515 * no target was specified and one of following 2516 * conditions is met: 2517 * 2518 * - there's no workload specified 2519 * - there is workload specified but all requested 2520 * events are system wide events 2521 */ 2522 if (!target__none(&target)) 2523 return; 2524 2525 if (!forks) 2526 target.system_wide = true; 2527 else { 2528 struct perf_evsel *counter; 2529 2530 evlist__for_each_entry(evsel_list, counter) { 2531 if (!counter->system_wide) 2532 return; 2533 } 2534 2535 if (evsel_list->nr_entries) 2536 target.system_wide = true; 2537 } 2538 } 2539 2540 int cmd_stat(int argc, const char **argv) 2541 { 2542 const char * const stat_usage[] = { 2543 "perf stat [<options>] [<command>]", 2544 NULL 2545 }; 2546 int status = -EINVAL, run_idx; 2547 const char *mode; 2548 FILE *output = stderr; 2549 unsigned int interval; 2550 const char * const stat_subcommands[] = { "record", "report" }; 2551 2552 setlocale(LC_ALL, ""); 2553 2554 evsel_list = perf_evlist__new(); 2555 if (evsel_list == NULL) 2556 return -ENOMEM; 2557 2558 parse_events__shrink_config_terms(); 2559 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2560 (const char **) stat_usage, 2561 PARSE_OPT_STOP_AT_NON_OPTION); 2562 perf_stat__collect_metric_expr(evsel_list); 2563 perf_stat__init_shadow_stats(); 2564 2565 if (csv_sep) { 2566 csv_output = true; 2567 if (!strcmp(csv_sep, "\\t")) 2568 csv_sep = "\t"; 2569 } else 2570 csv_sep = DEFAULT_SEPARATOR; 2571 2572 if (argc && !strncmp(argv[0], "rec", 3)) { 2573 argc = __cmd_record(argc, argv); 2574 if (argc < 0) 2575 return -1; 2576 } else if (argc && !strncmp(argv[0], "rep", 3)) 2577 return __cmd_report(argc, argv); 2578 2579 interval = stat_config.interval; 2580 2581 /* 2582 * For record command the -o is already taken care of. 2583 */ 2584 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2585 output = NULL; 2586 2587 if (output_name && output_fd) { 2588 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2589 parse_options_usage(stat_usage, stat_options, "o", 1); 2590 parse_options_usage(NULL, stat_options, "log-fd", 0); 2591 goto out; 2592 } 2593 2594 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2595 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2596 goto out; 2597 } 2598 2599 if (metric_only && run_count > 1) { 2600 fprintf(stderr, "--metric-only is not supported with -r\n"); 2601 goto out; 2602 } 2603 2604 if (output_fd < 0) { 2605 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2606 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2607 goto out; 2608 } 2609 2610 if (!output) { 2611 struct timespec tm; 2612 mode = append_file ? "a" : "w"; 2613 2614 output = fopen(output_name, mode); 2615 if (!output) { 2616 perror("failed to create output file"); 2617 return -1; 2618 } 2619 clock_gettime(CLOCK_REALTIME, &tm); 2620 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2621 } else if (output_fd > 0) { 2622 mode = append_file ? "a" : "w"; 2623 output = fdopen(output_fd, mode); 2624 if (!output) { 2625 perror("Failed opening logfd"); 2626 return -errno; 2627 } 2628 } 2629 2630 stat_config.output = output; 2631 2632 /* 2633 * let the spreadsheet do the pretty-printing 2634 */ 2635 if (csv_output) { 2636 /* User explicitly passed -B? */ 2637 if (big_num_opt == 1) { 2638 fprintf(stderr, "-B option not supported with -x\n"); 2639 parse_options_usage(stat_usage, stat_options, "B", 1); 2640 parse_options_usage(NULL, stat_options, "x", 1); 2641 goto out; 2642 } else /* Nope, so disable big number formatting */ 2643 big_num = false; 2644 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2645 big_num = false; 2646 2647 setup_system_wide(argc); 2648 2649 if (run_count < 0) { 2650 pr_err("Run count must be a positive number\n"); 2651 parse_options_usage(stat_usage, stat_options, "r", 1); 2652 goto out; 2653 } else if (run_count == 0) { 2654 forever = true; 2655 run_count = 1; 2656 } 2657 2658 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2659 fprintf(stderr, "The --per-thread option is only available " 2660 "when monitoring via -p -t options.\n"); 2661 parse_options_usage(NULL, stat_options, "p", 1); 2662 parse_options_usage(NULL, stat_options, "t", 1); 2663 goto out; 2664 } 2665 2666 /* 2667 * no_aggr, cgroup are for system-wide only 2668 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2669 */ 2670 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2671 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2672 !target__has_cpu(&target)) { 2673 fprintf(stderr, "both cgroup and no-aggregation " 2674 "modes only available in system-wide mode\n"); 2675 2676 parse_options_usage(stat_usage, stat_options, "G", 1); 2677 parse_options_usage(NULL, stat_options, "A", 1); 2678 parse_options_usage(NULL, stat_options, "a", 1); 2679 goto out; 2680 } 2681 2682 if (add_default_attributes()) 2683 goto out; 2684 2685 target__validate(&target); 2686 2687 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2688 if (target__has_task(&target)) { 2689 pr_err("Problems finding threads of monitor\n"); 2690 parse_options_usage(stat_usage, stat_options, "p", 1); 2691 parse_options_usage(NULL, stat_options, "t", 1); 2692 } else if (target__has_cpu(&target)) { 2693 perror("failed to parse CPUs map"); 2694 parse_options_usage(stat_usage, stat_options, "C", 1); 2695 parse_options_usage(NULL, stat_options, "a", 1); 2696 } 2697 goto out; 2698 } 2699 2700 /* 2701 * Initialize thread_map with comm names, 2702 * so we could print it out on output. 2703 */ 2704 if (stat_config.aggr_mode == AGGR_THREAD) 2705 thread_map__read_comms(evsel_list->threads); 2706 2707 if (interval && interval < 100) { 2708 if (interval < 10) { 2709 pr_err("print interval must be >= 10ms\n"); 2710 parse_options_usage(stat_usage, stat_options, "I", 1); 2711 goto out; 2712 } else 2713 pr_warning("print interval < 100ms. " 2714 "The overhead percentage could be high in some cases. " 2715 "Please proceed with caution.\n"); 2716 } 2717 2718 if (perf_evlist__alloc_stats(evsel_list, interval)) 2719 goto out; 2720 2721 if (perf_stat_init_aggr_mode()) 2722 goto out; 2723 2724 /* 2725 * We dont want to block the signals - that would cause 2726 * child tasks to inherit that and Ctrl-C would not work. 2727 * What we want is for Ctrl-C to work in the exec()-ed 2728 * task, but being ignored by perf stat itself: 2729 */ 2730 atexit(sig_atexit); 2731 if (!forever) 2732 signal(SIGINT, skip_signal); 2733 signal(SIGCHLD, skip_signal); 2734 signal(SIGALRM, skip_signal); 2735 signal(SIGABRT, skip_signal); 2736 2737 status = 0; 2738 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2739 if (run_count != 1 && verbose > 0) 2740 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2741 run_idx + 1); 2742 2743 status = run_perf_stat(argc, argv); 2744 if (forever && status != -1) { 2745 print_counters(NULL, argc, argv); 2746 perf_stat__reset_stats(); 2747 } 2748 } 2749 2750 if (!forever && status != -1 && !interval) 2751 print_counters(NULL, argc, argv); 2752 2753 if (STAT_RECORD) { 2754 /* 2755 * We synthesize the kernel mmap record just so that older tools 2756 * don't emit warnings about not being able to resolve symbols 2757 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2758 * a saner message about no samples being in the perf.data file. 2759 * 2760 * This also serves to suppress a warning about f_header.data.size == 0 2761 * in header.c at the moment 'perf stat record' gets introduced, which 2762 * is not really needed once we start adding the stat specific PERF_RECORD_ 2763 * records, but the need to suppress the kptr_restrict messages in older 2764 * tools remain -acme 2765 */ 2766 int fd = perf_data_file__fd(&perf_stat.file); 2767 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2768 process_synthesized_event, 2769 &perf_stat.session->machines.host); 2770 if (err) { 2771 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2772 "older tools may produce warnings about this file\n."); 2773 } 2774 2775 if (!interval) { 2776 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2777 pr_err("failed to write stat round event\n"); 2778 } 2779 2780 if (!perf_stat.file.is_pipe) { 2781 perf_stat.session->header.data_size += perf_stat.bytes_written; 2782 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2783 } 2784 2785 perf_session__delete(perf_stat.session); 2786 } 2787 2788 perf_stat__exit_aggr_mode(); 2789 perf_evlist__free_stats(evsel_list); 2790 out: 2791 if (smi_cost && smi_reset) 2792 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 2793 2794 perf_evlist__delete(evsel_list); 2795 return status; 2796 } 2797