1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/group.h" 67 #include "util/string2.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct target target = { 137 .uid = UINT_MAX, 138 }; 139 140 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 141 142 static int run_count = 1; 143 static bool no_inherit = false; 144 static volatile pid_t child_pid = -1; 145 static bool null_run = false; 146 static int detailed_run = 0; 147 static bool transaction_run; 148 static bool topdown_run = false; 149 static bool smi_cost = false; 150 static bool smi_reset = false; 151 static bool big_num = true; 152 static int big_num_opt = -1; 153 static const char *csv_sep = NULL; 154 static bool csv_output = false; 155 static bool group = false; 156 static const char *pre_cmd = NULL; 157 static const char *post_cmd = NULL; 158 static bool sync_run = false; 159 static unsigned int initial_delay = 0; 160 static unsigned int unit_width = 4; /* strlen("unit") */ 161 static bool forever = false; 162 static bool metric_only = false; 163 static bool force_metric_only = false; 164 static bool no_merge = false; 165 static struct timespec ref_time; 166 static struct cpu_map *aggr_map; 167 static aggr_get_id_t aggr_get_id; 168 static bool append_file; 169 static const char *output_name; 170 static int output_fd; 171 static int print_free_counters_hint; 172 173 struct perf_stat { 174 bool record; 175 struct perf_data_file file; 176 struct perf_session *session; 177 u64 bytes_written; 178 struct perf_tool tool; 179 bool maps_allocated; 180 struct cpu_map *cpus; 181 struct thread_map *threads; 182 enum aggr_mode aggr_mode; 183 }; 184 185 static struct perf_stat perf_stat; 186 #define STAT_RECORD perf_stat.record 187 188 static volatile int done = 0; 189 190 static struct perf_stat_config stat_config = { 191 .aggr_mode = AGGR_GLOBAL, 192 .scale = true, 193 }; 194 195 static inline void diff_timespec(struct timespec *r, struct timespec *a, 196 struct timespec *b) 197 { 198 r->tv_sec = a->tv_sec - b->tv_sec; 199 if (a->tv_nsec < b->tv_nsec) { 200 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 201 r->tv_sec--; 202 } else { 203 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 204 } 205 } 206 207 static void perf_stat__reset_stats(void) 208 { 209 perf_evlist__reset_stats(evsel_list); 210 perf_stat__reset_shadow_stats(); 211 } 212 213 static int create_perf_stat_counter(struct perf_evsel *evsel) 214 { 215 struct perf_event_attr *attr = &evsel->attr; 216 struct perf_evsel *leader = evsel->leader; 217 218 if (stat_config.scale) { 219 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 220 PERF_FORMAT_TOTAL_TIME_RUNNING; 221 } 222 223 /* 224 * The event is part of non trivial group, let's enable 225 * the group read (for leader) and ID retrieval for all 226 * members. 227 */ 228 if (leader->nr_members > 1) 229 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 230 231 attr->inherit = !no_inherit; 232 233 /* 234 * Some events get initialized with sample_(period/type) set, 235 * like tracepoints. Clear it up for counting. 236 */ 237 attr->sample_period = 0; 238 239 /* 240 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 241 * while avoiding that older tools show confusing messages. 242 * 243 * However for pipe sessions we need to keep it zero, 244 * because script's perf_evsel__check_attr is triggered 245 * by attr->sample_type != 0, and we can't run it on 246 * stat sessions. 247 */ 248 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 249 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 250 251 /* 252 * Disabling all counters initially, they will be enabled 253 * either manually by us or by kernel via enable_on_exec 254 * set later. 255 */ 256 if (perf_evsel__is_group_leader(evsel)) { 257 attr->disabled = 1; 258 259 /* 260 * In case of initial_delay we enable tracee 261 * events manually. 262 */ 263 if (target__none(&target) && !initial_delay) 264 attr->enable_on_exec = 1; 265 } 266 267 if (target__has_cpu(&target)) 268 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 269 270 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 271 } 272 273 /* 274 * Does the counter have nsecs as a unit? 275 */ 276 static inline int nsec_counter(struct perf_evsel *evsel) 277 { 278 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 279 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 280 return 1; 281 282 return 0; 283 } 284 285 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 286 union perf_event *event, 287 struct perf_sample *sample __maybe_unused, 288 struct machine *machine __maybe_unused) 289 { 290 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 291 pr_err("failed to write perf data, error: %m\n"); 292 return -1; 293 } 294 295 perf_stat.bytes_written += event->header.size; 296 return 0; 297 } 298 299 static int write_stat_round_event(u64 tm, u64 type) 300 { 301 return perf_event__synthesize_stat_round(NULL, tm, type, 302 process_synthesized_event, 303 NULL); 304 } 305 306 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 307 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 308 309 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 310 311 static int 312 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 313 struct perf_counts_values *count) 314 { 315 struct perf_sample_id *sid = SID(counter, cpu, thread); 316 317 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 318 process_synthesized_event, NULL); 319 } 320 321 /* 322 * Read out the results of a single counter: 323 * do not aggregate counts across CPUs in system-wide mode 324 */ 325 static int read_counter(struct perf_evsel *counter) 326 { 327 int nthreads = thread_map__nr(evsel_list->threads); 328 int ncpus, cpu, thread; 329 330 if (target__has_cpu(&target)) 331 ncpus = perf_evsel__nr_cpus(counter); 332 else 333 ncpus = 1; 334 335 if (!counter->supported) 336 return -ENOENT; 337 338 if (counter->system_wide) 339 nthreads = 1; 340 341 for (thread = 0; thread < nthreads; thread++) { 342 for (cpu = 0; cpu < ncpus; cpu++) { 343 struct perf_counts_values *count; 344 345 count = perf_counts(counter->counts, cpu, thread); 346 347 /* 348 * The leader's group read loads data into its group members 349 * (via perf_evsel__read_counter) and sets threir count->loaded. 350 */ 351 if (!count->loaded && 352 perf_evsel__read_counter(counter, cpu, thread)) { 353 counter->counts->scaled = -1; 354 perf_counts(counter->counts, cpu, thread)->ena = 0; 355 perf_counts(counter->counts, cpu, thread)->run = 0; 356 return -1; 357 } 358 359 count->loaded = false; 360 361 if (STAT_RECORD) { 362 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 363 pr_err("failed to write stat event\n"); 364 return -1; 365 } 366 } 367 368 if (verbose > 1) { 369 fprintf(stat_config.output, 370 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 371 perf_evsel__name(counter), 372 cpu, 373 count->val, count->ena, count->run); 374 } 375 } 376 } 377 378 return 0; 379 } 380 381 static void read_counters(void) 382 { 383 struct perf_evsel *counter; 384 int ret; 385 386 evlist__for_each_entry(evsel_list, counter) { 387 ret = read_counter(counter); 388 if (ret) 389 pr_debug("failed to read counter %s\n", counter->name); 390 391 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 392 pr_warning("failed to process counter %s\n", counter->name); 393 } 394 } 395 396 static void process_interval(void) 397 { 398 struct timespec ts, rs; 399 400 read_counters(); 401 402 clock_gettime(CLOCK_MONOTONIC, &ts); 403 diff_timespec(&rs, &ts, &ref_time); 404 405 if (STAT_RECORD) { 406 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 407 pr_err("failed to write stat round event\n"); 408 } 409 410 print_counters(&rs, 0, NULL); 411 } 412 413 static void enable_counters(void) 414 { 415 if (initial_delay) 416 usleep(initial_delay * USEC_PER_MSEC); 417 418 /* 419 * We need to enable counters only if: 420 * - we don't have tracee (attaching to task or cpu) 421 * - we have initial delay configured 422 */ 423 if (!target__none(&target) || initial_delay) 424 perf_evlist__enable(evsel_list); 425 } 426 427 static void disable_counters(void) 428 { 429 /* 430 * If we don't have tracee (attaching to task or cpu), counters may 431 * still be running. To get accurate group ratios, we must stop groups 432 * from counting before reading their constituent counters. 433 */ 434 if (!target__none(&target)) 435 perf_evlist__disable(evsel_list); 436 } 437 438 static volatile int workload_exec_errno; 439 440 /* 441 * perf_evlist__prepare_workload will send a SIGUSR1 442 * if the fork fails, since we asked by setting its 443 * want_signal to true. 444 */ 445 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 446 void *ucontext __maybe_unused) 447 { 448 workload_exec_errno = info->si_value.sival_int; 449 } 450 451 static bool has_unit(struct perf_evsel *counter) 452 { 453 return counter->unit && *counter->unit; 454 } 455 456 static bool has_scale(struct perf_evsel *counter) 457 { 458 return counter->scale != 1; 459 } 460 461 static int perf_stat_synthesize_config(bool is_pipe) 462 { 463 struct perf_evsel *counter; 464 int err; 465 466 if (is_pipe) { 467 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 468 process_synthesized_event); 469 if (err < 0) { 470 pr_err("Couldn't synthesize attrs.\n"); 471 return err; 472 } 473 } 474 475 /* 476 * Synthesize other events stuff not carried within 477 * attr event - unit, scale, name 478 */ 479 evlist__for_each_entry(evsel_list, counter) { 480 if (!counter->supported) 481 continue; 482 483 /* 484 * Synthesize unit and scale only if it's defined. 485 */ 486 if (has_unit(counter)) { 487 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 488 if (err < 0) { 489 pr_err("Couldn't synthesize evsel unit.\n"); 490 return err; 491 } 492 } 493 494 if (has_scale(counter)) { 495 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 496 if (err < 0) { 497 pr_err("Couldn't synthesize evsel scale.\n"); 498 return err; 499 } 500 } 501 502 if (counter->own_cpus) { 503 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 504 if (err < 0) { 505 pr_err("Couldn't synthesize evsel scale.\n"); 506 return err; 507 } 508 } 509 510 /* 511 * Name is needed only for pipe output, 512 * perf.data carries event names. 513 */ 514 if (is_pipe) { 515 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 516 if (err < 0) { 517 pr_err("Couldn't synthesize evsel name.\n"); 518 return err; 519 } 520 } 521 } 522 523 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 524 process_synthesized_event, 525 NULL); 526 if (err < 0) { 527 pr_err("Couldn't synthesize thread map.\n"); 528 return err; 529 } 530 531 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 532 process_synthesized_event, NULL); 533 if (err < 0) { 534 pr_err("Couldn't synthesize thread map.\n"); 535 return err; 536 } 537 538 err = perf_event__synthesize_stat_config(NULL, &stat_config, 539 process_synthesized_event, NULL); 540 if (err < 0) { 541 pr_err("Couldn't synthesize config.\n"); 542 return err; 543 } 544 545 return 0; 546 } 547 548 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 549 550 static int __store_counter_ids(struct perf_evsel *counter, 551 struct cpu_map *cpus, 552 struct thread_map *threads) 553 { 554 int cpu, thread; 555 556 for (cpu = 0; cpu < cpus->nr; cpu++) { 557 for (thread = 0; thread < threads->nr; thread++) { 558 int fd = FD(counter, cpu, thread); 559 560 if (perf_evlist__id_add_fd(evsel_list, counter, 561 cpu, thread, fd) < 0) 562 return -1; 563 } 564 } 565 566 return 0; 567 } 568 569 static int store_counter_ids(struct perf_evsel *counter) 570 { 571 struct cpu_map *cpus = counter->cpus; 572 struct thread_map *threads = counter->threads; 573 574 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 575 return -ENOMEM; 576 577 return __store_counter_ids(counter, cpus, threads); 578 } 579 580 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 581 { 582 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 583 } 584 585 static int __run_perf_stat(int argc, const char **argv) 586 { 587 int interval = stat_config.interval; 588 char msg[BUFSIZ]; 589 unsigned long long t0, t1; 590 struct perf_evsel *counter; 591 struct timespec ts; 592 size_t l; 593 int status = 0; 594 const bool forks = (argc > 0); 595 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 596 struct perf_evsel_config_term *err_term; 597 598 if (interval) { 599 ts.tv_sec = interval / USEC_PER_MSEC; 600 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 601 } else { 602 ts.tv_sec = 1; 603 ts.tv_nsec = 0; 604 } 605 606 if (forks) { 607 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 608 workload_exec_failed_signal) < 0) { 609 perror("failed to prepare workload"); 610 return -1; 611 } 612 child_pid = evsel_list->workload.pid; 613 } 614 615 if (group) 616 perf_evlist__set_leader(evsel_list); 617 618 evlist__for_each_entry(evsel_list, counter) { 619 try_again: 620 if (create_perf_stat_counter(counter) < 0) { 621 /* 622 * PPC returns ENXIO for HW counters until 2.6.37 623 * (behavior changed with commit b0a873e). 624 */ 625 if (errno == EINVAL || errno == ENOSYS || 626 errno == ENOENT || errno == EOPNOTSUPP || 627 errno == ENXIO) { 628 if (verbose > 0) 629 ui__warning("%s event is not supported by the kernel.\n", 630 perf_evsel__name(counter)); 631 counter->supported = false; 632 633 if ((counter->leader != counter) || 634 !(counter->leader->nr_members > 1)) 635 continue; 636 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 637 if (verbose > 0) 638 ui__warning("%s\n", msg); 639 goto try_again; 640 } 641 642 perf_evsel__open_strerror(counter, &target, 643 errno, msg, sizeof(msg)); 644 ui__error("%s\n", msg); 645 646 if (child_pid != -1) 647 kill(child_pid, SIGTERM); 648 649 return -1; 650 } 651 counter->supported = true; 652 653 l = strlen(counter->unit); 654 if (l > unit_width) 655 unit_width = l; 656 657 if (perf_evsel__should_store_id(counter) && 658 store_counter_ids(counter)) 659 return -1; 660 } 661 662 if (perf_evlist__apply_filters(evsel_list, &counter)) { 663 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 664 counter->filter, perf_evsel__name(counter), errno, 665 str_error_r(errno, msg, sizeof(msg))); 666 return -1; 667 } 668 669 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 670 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 671 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 672 str_error_r(errno, msg, sizeof(msg))); 673 return -1; 674 } 675 676 if (STAT_RECORD) { 677 int err, fd = perf_data_file__fd(&perf_stat.file); 678 679 if (is_pipe) { 680 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 681 } else { 682 err = perf_session__write_header(perf_stat.session, evsel_list, 683 fd, false); 684 } 685 686 if (err < 0) 687 return err; 688 689 err = perf_stat_synthesize_config(is_pipe); 690 if (err < 0) 691 return err; 692 } 693 694 /* 695 * Enable counters and exec the command: 696 */ 697 t0 = rdclock(); 698 clock_gettime(CLOCK_MONOTONIC, &ref_time); 699 700 if (forks) { 701 perf_evlist__start_workload(evsel_list); 702 enable_counters(); 703 704 if (interval) { 705 while (!waitpid(child_pid, &status, WNOHANG)) { 706 nanosleep(&ts, NULL); 707 process_interval(); 708 } 709 } 710 waitpid(child_pid, &status, 0); 711 712 if (workload_exec_errno) { 713 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 714 pr_err("Workload failed: %s\n", emsg); 715 return -1; 716 } 717 718 if (WIFSIGNALED(status)) 719 psignal(WTERMSIG(status), argv[0]); 720 } else { 721 enable_counters(); 722 while (!done) { 723 nanosleep(&ts, NULL); 724 if (interval) 725 process_interval(); 726 } 727 } 728 729 disable_counters(); 730 731 t1 = rdclock(); 732 733 update_stats(&walltime_nsecs_stats, t1 - t0); 734 735 /* 736 * Closing a group leader splits the group, and as we only disable 737 * group leaders, results in remaining events becoming enabled. To 738 * avoid arbitrary skew, we must read all counters before closing any 739 * group leaders. 740 */ 741 read_counters(); 742 perf_evlist__close(evsel_list); 743 744 return WEXITSTATUS(status); 745 } 746 747 static int run_perf_stat(int argc, const char **argv) 748 { 749 int ret; 750 751 if (pre_cmd) { 752 ret = system(pre_cmd); 753 if (ret) 754 return ret; 755 } 756 757 if (sync_run) 758 sync(); 759 760 ret = __run_perf_stat(argc, argv); 761 if (ret) 762 return ret; 763 764 if (post_cmd) { 765 ret = system(post_cmd); 766 if (ret) 767 return ret; 768 } 769 770 return ret; 771 } 772 773 static void print_running(u64 run, u64 ena) 774 { 775 if (csv_output) { 776 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 777 csv_sep, 778 run, 779 csv_sep, 780 ena ? 100.0 * run / ena : 100.0); 781 } else if (run != ena) { 782 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 783 } 784 } 785 786 static void print_noise_pct(double total, double avg) 787 { 788 double pct = rel_stddev_stats(total, avg); 789 790 if (csv_output) 791 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 792 else if (pct) 793 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 794 } 795 796 static void print_noise(struct perf_evsel *evsel, double avg) 797 { 798 struct perf_stat_evsel *ps; 799 800 if (run_count == 1) 801 return; 802 803 ps = evsel->priv; 804 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 805 } 806 807 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 808 { 809 switch (stat_config.aggr_mode) { 810 case AGGR_CORE: 811 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 812 cpu_map__id_to_socket(id), 813 csv_output ? 0 : -8, 814 cpu_map__id_to_cpu(id), 815 csv_sep, 816 csv_output ? 0 : 4, 817 nr, 818 csv_sep); 819 break; 820 case AGGR_SOCKET: 821 fprintf(stat_config.output, "S%*d%s%*d%s", 822 csv_output ? 0 : -5, 823 id, 824 csv_sep, 825 csv_output ? 0 : 4, 826 nr, 827 csv_sep); 828 break; 829 case AGGR_NONE: 830 fprintf(stat_config.output, "CPU%*d%s", 831 csv_output ? 0 : -4, 832 perf_evsel__cpus(evsel)->map[id], csv_sep); 833 break; 834 case AGGR_THREAD: 835 fprintf(stat_config.output, "%*s-%*d%s", 836 csv_output ? 0 : 16, 837 thread_map__comm(evsel->threads, id), 838 csv_output ? 0 : -8, 839 thread_map__pid(evsel->threads, id), 840 csv_sep); 841 break; 842 case AGGR_GLOBAL: 843 case AGGR_UNSET: 844 default: 845 break; 846 } 847 } 848 849 struct outstate { 850 FILE *fh; 851 bool newline; 852 const char *prefix; 853 int nfields; 854 int id, nr; 855 struct perf_evsel *evsel; 856 }; 857 858 #define METRIC_LEN 35 859 860 static void new_line_std(void *ctx) 861 { 862 struct outstate *os = ctx; 863 864 os->newline = true; 865 } 866 867 static void do_new_line_std(struct outstate *os) 868 { 869 fputc('\n', os->fh); 870 fputs(os->prefix, os->fh); 871 aggr_printout(os->evsel, os->id, os->nr); 872 if (stat_config.aggr_mode == AGGR_NONE) 873 fprintf(os->fh, " "); 874 fprintf(os->fh, " "); 875 } 876 877 static void print_metric_std(void *ctx, const char *color, const char *fmt, 878 const char *unit, double val) 879 { 880 struct outstate *os = ctx; 881 FILE *out = os->fh; 882 int n; 883 bool newline = os->newline; 884 885 os->newline = false; 886 887 if (unit == NULL || fmt == NULL) { 888 fprintf(out, "%-*s", METRIC_LEN, ""); 889 return; 890 } 891 892 if (newline) 893 do_new_line_std(os); 894 895 n = fprintf(out, " # "); 896 if (color) 897 n += color_fprintf(out, color, fmt, val); 898 else 899 n += fprintf(out, fmt, val); 900 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 901 } 902 903 static void new_line_csv(void *ctx) 904 { 905 struct outstate *os = ctx; 906 int i; 907 908 fputc('\n', os->fh); 909 if (os->prefix) 910 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 911 aggr_printout(os->evsel, os->id, os->nr); 912 for (i = 0; i < os->nfields; i++) 913 fputs(csv_sep, os->fh); 914 } 915 916 static void print_metric_csv(void *ctx, 917 const char *color __maybe_unused, 918 const char *fmt, const char *unit, double val) 919 { 920 struct outstate *os = ctx; 921 FILE *out = os->fh; 922 char buf[64], *vals, *ends; 923 924 if (unit == NULL || fmt == NULL) { 925 fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); 926 return; 927 } 928 snprintf(buf, sizeof(buf), fmt, val); 929 ends = vals = ltrim(buf); 930 while (isdigit(*ends) || *ends == '.') 931 ends++; 932 *ends = 0; 933 while (isspace(*unit)) 934 unit++; 935 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 936 } 937 938 #define METRIC_ONLY_LEN 20 939 940 /* Filter out some columns that don't work well in metrics only mode */ 941 942 static bool valid_only_metric(const char *unit) 943 { 944 if (!unit) 945 return false; 946 if (strstr(unit, "/sec") || 947 strstr(unit, "hz") || 948 strstr(unit, "Hz") || 949 strstr(unit, "CPUs utilized")) 950 return false; 951 return true; 952 } 953 954 static const char *fixunit(char *buf, struct perf_evsel *evsel, 955 const char *unit) 956 { 957 if (!strncmp(unit, "of all", 6)) { 958 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 959 unit); 960 return buf; 961 } 962 return unit; 963 } 964 965 static void print_metric_only(void *ctx, const char *color, const char *fmt, 966 const char *unit, double val) 967 { 968 struct outstate *os = ctx; 969 FILE *out = os->fh; 970 int n; 971 char buf[1024]; 972 unsigned mlen = METRIC_ONLY_LEN; 973 974 if (!valid_only_metric(unit)) 975 return; 976 unit = fixunit(buf, os->evsel, unit); 977 if (color) 978 n = color_fprintf(out, color, fmt, val); 979 else 980 n = fprintf(out, fmt, val); 981 if (n > METRIC_ONLY_LEN) 982 n = METRIC_ONLY_LEN; 983 if (mlen < strlen(unit)) 984 mlen = strlen(unit) + 1; 985 fprintf(out, "%*s", mlen - n, ""); 986 } 987 988 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 989 const char *fmt, 990 const char *unit, double val) 991 { 992 struct outstate *os = ctx; 993 FILE *out = os->fh; 994 char buf[64], *vals, *ends; 995 char tbuf[1024]; 996 997 if (!valid_only_metric(unit)) 998 return; 999 unit = fixunit(tbuf, os->evsel, unit); 1000 snprintf(buf, sizeof buf, fmt, val); 1001 ends = vals = ltrim(buf); 1002 while (isdigit(*ends) || *ends == '.') 1003 ends++; 1004 *ends = 0; 1005 fprintf(out, "%s%s", vals, csv_sep); 1006 } 1007 1008 static void new_line_metric(void *ctx __maybe_unused) 1009 { 1010 } 1011 1012 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1013 const char *fmt __maybe_unused, 1014 const char *unit, double val __maybe_unused) 1015 { 1016 struct outstate *os = ctx; 1017 char tbuf[1024]; 1018 1019 if (!valid_only_metric(unit)) 1020 return; 1021 unit = fixunit(tbuf, os->evsel, unit); 1022 if (csv_output) 1023 fprintf(os->fh, "%s%s", unit, csv_sep); 1024 else 1025 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1026 } 1027 1028 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1029 { 1030 FILE *output = stat_config.output; 1031 double msecs = avg / NSEC_PER_MSEC; 1032 const char *fmt_v, *fmt_n; 1033 char name[25]; 1034 1035 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1036 fmt_n = csv_output ? "%s" : "%-25s"; 1037 1038 aggr_printout(evsel, id, nr); 1039 1040 scnprintf(name, sizeof(name), "%s%s", 1041 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1042 1043 fprintf(output, fmt_v, msecs, csv_sep); 1044 1045 if (csv_output) 1046 fprintf(output, "%s%s", evsel->unit, csv_sep); 1047 else 1048 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1049 1050 fprintf(output, fmt_n, name); 1051 1052 if (evsel->cgrp) 1053 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1054 } 1055 1056 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1057 { 1058 int i; 1059 1060 if (!aggr_get_id) 1061 return 0; 1062 1063 if (stat_config.aggr_mode == AGGR_NONE) 1064 return id; 1065 1066 if (stat_config.aggr_mode == AGGR_GLOBAL) 1067 return 0; 1068 1069 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1070 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1071 1072 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1073 return cpu2; 1074 } 1075 return 0; 1076 } 1077 1078 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1079 { 1080 FILE *output = stat_config.output; 1081 double sc = evsel->scale; 1082 const char *fmt; 1083 1084 if (csv_output) { 1085 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1086 } else { 1087 if (big_num) 1088 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1089 else 1090 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1091 } 1092 1093 aggr_printout(evsel, id, nr); 1094 1095 fprintf(output, fmt, avg, csv_sep); 1096 1097 if (evsel->unit) 1098 fprintf(output, "%-*s%s", 1099 csv_output ? 0 : unit_width, 1100 evsel->unit, csv_sep); 1101 1102 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1103 1104 if (evsel->cgrp) 1105 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1106 } 1107 1108 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1109 char *prefix, u64 run, u64 ena, double noise) 1110 { 1111 struct perf_stat_output_ctx out; 1112 struct outstate os = { 1113 .fh = stat_config.output, 1114 .prefix = prefix ? prefix : "", 1115 .id = id, 1116 .nr = nr, 1117 .evsel = counter, 1118 }; 1119 print_metric_t pm = print_metric_std; 1120 void (*nl)(void *); 1121 1122 if (metric_only) { 1123 nl = new_line_metric; 1124 if (csv_output) 1125 pm = print_metric_only_csv; 1126 else 1127 pm = print_metric_only; 1128 } else 1129 nl = new_line_std; 1130 1131 if (csv_output && !metric_only) { 1132 static int aggr_fields[] = { 1133 [AGGR_GLOBAL] = 0, 1134 [AGGR_THREAD] = 1, 1135 [AGGR_NONE] = 1, 1136 [AGGR_SOCKET] = 2, 1137 [AGGR_CORE] = 2, 1138 }; 1139 1140 pm = print_metric_csv; 1141 nl = new_line_csv; 1142 os.nfields = 3; 1143 os.nfields += aggr_fields[stat_config.aggr_mode]; 1144 if (counter->cgrp) 1145 os.nfields++; 1146 } 1147 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1148 if (metric_only) { 1149 pm(&os, NULL, "", "", 0); 1150 return; 1151 } 1152 aggr_printout(counter, id, nr); 1153 1154 fprintf(stat_config.output, "%*s%s", 1155 csv_output ? 0 : 18, 1156 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1157 csv_sep); 1158 1159 if (counter->supported) 1160 print_free_counters_hint = 1; 1161 1162 fprintf(stat_config.output, "%-*s%s", 1163 csv_output ? 0 : unit_width, 1164 counter->unit, csv_sep); 1165 1166 fprintf(stat_config.output, "%*s", 1167 csv_output ? 0 : -25, 1168 perf_evsel__name(counter)); 1169 1170 if (counter->cgrp) 1171 fprintf(stat_config.output, "%s%s", 1172 csv_sep, counter->cgrp->name); 1173 1174 if (!csv_output) 1175 pm(&os, NULL, NULL, "", 0); 1176 print_noise(counter, noise); 1177 print_running(run, ena); 1178 if (csv_output) 1179 pm(&os, NULL, NULL, "", 0); 1180 return; 1181 } 1182 1183 if (metric_only) 1184 /* nothing */; 1185 else if (nsec_counter(counter)) 1186 nsec_printout(id, nr, counter, uval); 1187 else 1188 abs_printout(id, nr, counter, uval); 1189 1190 out.print_metric = pm; 1191 out.new_line = nl; 1192 out.ctx = &os; 1193 out.force_header = false; 1194 1195 if (csv_output && !metric_only) { 1196 print_noise(counter, noise); 1197 print_running(run, ena); 1198 } 1199 1200 perf_stat__print_shadow_stats(counter, uval, 1201 first_shadow_cpu(counter, id), 1202 &out); 1203 if (!csv_output && !metric_only) { 1204 print_noise(counter, noise); 1205 print_running(run, ena); 1206 } 1207 } 1208 1209 static void aggr_update_shadow(void) 1210 { 1211 int cpu, s2, id, s; 1212 u64 val; 1213 struct perf_evsel *counter; 1214 1215 for (s = 0; s < aggr_map->nr; s++) { 1216 id = aggr_map->map[s]; 1217 evlist__for_each_entry(evsel_list, counter) { 1218 val = 0; 1219 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1220 s2 = aggr_get_id(evsel_list->cpus, cpu); 1221 if (s2 != id) 1222 continue; 1223 val += perf_counts(counter->counts, cpu, 0)->val; 1224 } 1225 val = val * counter->scale; 1226 perf_stat__update_shadow_stats(counter, &val, 1227 first_shadow_cpu(counter, id)); 1228 } 1229 } 1230 } 1231 1232 static void collect_all_aliases(struct perf_evsel *counter, 1233 void (*cb)(struct perf_evsel *counter, void *data, 1234 bool first), 1235 void *data) 1236 { 1237 struct perf_evsel *alias; 1238 1239 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1240 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1241 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1242 alias->scale != counter->scale || 1243 alias->cgrp != counter->cgrp || 1244 strcmp(alias->unit, counter->unit) || 1245 nsec_counter(alias) != nsec_counter(counter)) 1246 break; 1247 alias->merged_stat = true; 1248 cb(alias, data, false); 1249 } 1250 } 1251 1252 static bool collect_data(struct perf_evsel *counter, 1253 void (*cb)(struct perf_evsel *counter, void *data, 1254 bool first), 1255 void *data) 1256 { 1257 if (counter->merged_stat) 1258 return false; 1259 cb(counter, data, true); 1260 if (!no_merge && counter->auto_merge_stats) 1261 collect_all_aliases(counter, cb, data); 1262 return true; 1263 } 1264 1265 struct aggr_data { 1266 u64 ena, run, val; 1267 int id; 1268 int nr; 1269 int cpu; 1270 }; 1271 1272 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1273 { 1274 struct aggr_data *ad = data; 1275 int cpu, s2; 1276 1277 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1278 struct perf_counts_values *counts; 1279 1280 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1281 if (s2 != ad->id) 1282 continue; 1283 if (first) 1284 ad->nr++; 1285 counts = perf_counts(counter->counts, cpu, 0); 1286 /* 1287 * When any result is bad, make them all to give 1288 * consistent output in interval mode. 1289 */ 1290 if (counts->ena == 0 || counts->run == 0 || 1291 counter->counts->scaled == -1) { 1292 ad->ena = 0; 1293 ad->run = 0; 1294 break; 1295 } 1296 ad->val += counts->val; 1297 ad->ena += counts->ena; 1298 ad->run += counts->run; 1299 } 1300 } 1301 1302 static void print_aggr(char *prefix) 1303 { 1304 FILE *output = stat_config.output; 1305 struct perf_evsel *counter; 1306 int s, id, nr; 1307 double uval; 1308 u64 ena, run, val; 1309 bool first; 1310 1311 if (!(aggr_map || aggr_get_id)) 1312 return; 1313 1314 aggr_update_shadow(); 1315 1316 /* 1317 * With metric_only everything is on a single line. 1318 * Without each counter has its own line. 1319 */ 1320 for (s = 0; s < aggr_map->nr; s++) { 1321 struct aggr_data ad; 1322 if (prefix && metric_only) 1323 fprintf(output, "%s", prefix); 1324 1325 ad.id = id = aggr_map->map[s]; 1326 first = true; 1327 evlist__for_each_entry(evsel_list, counter) { 1328 ad.val = ad.ena = ad.run = 0; 1329 ad.nr = 0; 1330 if (!collect_data(counter, aggr_cb, &ad)) 1331 continue; 1332 nr = ad.nr; 1333 ena = ad.ena; 1334 run = ad.run; 1335 val = ad.val; 1336 if (first && metric_only) { 1337 first = false; 1338 aggr_printout(counter, id, nr); 1339 } 1340 if (prefix && !metric_only) 1341 fprintf(output, "%s", prefix); 1342 1343 uval = val * counter->scale; 1344 printout(id, nr, counter, uval, prefix, run, ena, 1.0); 1345 if (!metric_only) 1346 fputc('\n', output); 1347 } 1348 if (metric_only) 1349 fputc('\n', output); 1350 } 1351 } 1352 1353 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1354 { 1355 FILE *output = stat_config.output; 1356 int nthreads = thread_map__nr(counter->threads); 1357 int ncpus = cpu_map__nr(counter->cpus); 1358 int cpu, thread; 1359 double uval; 1360 1361 for (thread = 0; thread < nthreads; thread++) { 1362 u64 ena = 0, run = 0, val = 0; 1363 1364 for (cpu = 0; cpu < ncpus; cpu++) { 1365 val += perf_counts(counter->counts, cpu, thread)->val; 1366 ena += perf_counts(counter->counts, cpu, thread)->ena; 1367 run += perf_counts(counter->counts, cpu, thread)->run; 1368 } 1369 1370 if (prefix) 1371 fprintf(output, "%s", prefix); 1372 1373 uval = val * counter->scale; 1374 printout(thread, 0, counter, uval, prefix, run, ena, 1.0); 1375 fputc('\n', output); 1376 } 1377 } 1378 1379 struct caggr_data { 1380 double avg, avg_enabled, avg_running; 1381 }; 1382 1383 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1384 bool first __maybe_unused) 1385 { 1386 struct caggr_data *cd = data; 1387 struct perf_stat_evsel *ps = counter->priv; 1388 1389 cd->avg += avg_stats(&ps->res_stats[0]); 1390 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1391 cd->avg_running += avg_stats(&ps->res_stats[2]); 1392 } 1393 1394 /* 1395 * Print out the results of a single counter: 1396 * aggregated counts in system-wide mode 1397 */ 1398 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1399 { 1400 FILE *output = stat_config.output; 1401 double uval; 1402 struct caggr_data cd = { .avg = 0.0 }; 1403 1404 if (!collect_data(counter, counter_aggr_cb, &cd)) 1405 return; 1406 1407 if (prefix && !metric_only) 1408 fprintf(output, "%s", prefix); 1409 1410 uval = cd.avg * counter->scale; 1411 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); 1412 if (!metric_only) 1413 fprintf(output, "\n"); 1414 } 1415 1416 static void counter_cb(struct perf_evsel *counter, void *data, 1417 bool first __maybe_unused) 1418 { 1419 struct aggr_data *ad = data; 1420 1421 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1422 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1423 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1424 } 1425 1426 /* 1427 * Print out the results of a single counter: 1428 * does not use aggregated count in system-wide 1429 */ 1430 static void print_counter(struct perf_evsel *counter, char *prefix) 1431 { 1432 FILE *output = stat_config.output; 1433 u64 ena, run, val; 1434 double uval; 1435 int cpu; 1436 1437 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1438 struct aggr_data ad = { .cpu = cpu }; 1439 1440 if (!collect_data(counter, counter_cb, &ad)) 1441 return; 1442 val = ad.val; 1443 ena = ad.ena; 1444 run = ad.run; 1445 1446 if (prefix) 1447 fprintf(output, "%s", prefix); 1448 1449 uval = val * counter->scale; 1450 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1451 1452 fputc('\n', output); 1453 } 1454 } 1455 1456 static void print_no_aggr_metric(char *prefix) 1457 { 1458 int cpu; 1459 int nrcpus = 0; 1460 struct perf_evsel *counter; 1461 u64 ena, run, val; 1462 double uval; 1463 1464 nrcpus = evsel_list->cpus->nr; 1465 for (cpu = 0; cpu < nrcpus; cpu++) { 1466 bool first = true; 1467 1468 if (prefix) 1469 fputs(prefix, stat_config.output); 1470 evlist__for_each_entry(evsel_list, counter) { 1471 if (first) { 1472 aggr_printout(counter, cpu, 0); 1473 first = false; 1474 } 1475 val = perf_counts(counter->counts, cpu, 0)->val; 1476 ena = perf_counts(counter->counts, cpu, 0)->ena; 1477 run = perf_counts(counter->counts, cpu, 0)->run; 1478 1479 uval = val * counter->scale; 1480 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); 1481 } 1482 fputc('\n', stat_config.output); 1483 } 1484 } 1485 1486 static int aggr_header_lens[] = { 1487 [AGGR_CORE] = 18, 1488 [AGGR_SOCKET] = 12, 1489 [AGGR_NONE] = 6, 1490 [AGGR_THREAD] = 24, 1491 [AGGR_GLOBAL] = 0, 1492 }; 1493 1494 static const char *aggr_header_csv[] = { 1495 [AGGR_CORE] = "core,cpus,", 1496 [AGGR_SOCKET] = "socket,cpus", 1497 [AGGR_NONE] = "cpu,", 1498 [AGGR_THREAD] = "comm-pid,", 1499 [AGGR_GLOBAL] = "" 1500 }; 1501 1502 static void print_metric_headers(const char *prefix, bool no_indent) 1503 { 1504 struct perf_stat_output_ctx out; 1505 struct perf_evsel *counter; 1506 struct outstate os = { 1507 .fh = stat_config.output 1508 }; 1509 1510 if (prefix) 1511 fprintf(stat_config.output, "%s", prefix); 1512 1513 if (!csv_output && !no_indent) 1514 fprintf(stat_config.output, "%*s", 1515 aggr_header_lens[stat_config.aggr_mode], ""); 1516 if (csv_output) { 1517 if (stat_config.interval) 1518 fputs("time,", stat_config.output); 1519 fputs(aggr_header_csv[stat_config.aggr_mode], 1520 stat_config.output); 1521 } 1522 1523 /* Print metrics headers only */ 1524 evlist__for_each_entry(evsel_list, counter) { 1525 os.evsel = counter; 1526 out.ctx = &os; 1527 out.print_metric = print_metric_header; 1528 out.new_line = new_line_metric; 1529 out.force_header = true; 1530 os.evsel = counter; 1531 perf_stat__print_shadow_stats(counter, 0, 1532 0, 1533 &out); 1534 } 1535 fputc('\n', stat_config.output); 1536 } 1537 1538 static void print_interval(char *prefix, struct timespec *ts) 1539 { 1540 FILE *output = stat_config.output; 1541 static int num_print_interval; 1542 1543 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1544 1545 if (num_print_interval == 0 && !csv_output) { 1546 switch (stat_config.aggr_mode) { 1547 case AGGR_SOCKET: 1548 fprintf(output, "# time socket cpus"); 1549 if (!metric_only) 1550 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1551 break; 1552 case AGGR_CORE: 1553 fprintf(output, "# time core cpus"); 1554 if (!metric_only) 1555 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1556 break; 1557 case AGGR_NONE: 1558 fprintf(output, "# time CPU"); 1559 if (!metric_only) 1560 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1561 break; 1562 case AGGR_THREAD: 1563 fprintf(output, "# time comm-pid"); 1564 if (!metric_only) 1565 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1566 break; 1567 case AGGR_GLOBAL: 1568 default: 1569 fprintf(output, "# time"); 1570 if (!metric_only) 1571 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1572 case AGGR_UNSET: 1573 break; 1574 } 1575 } 1576 1577 if (num_print_interval == 0 && metric_only) 1578 print_metric_headers(" ", true); 1579 if (++num_print_interval == 25) 1580 num_print_interval = 0; 1581 } 1582 1583 static void print_header(int argc, const char **argv) 1584 { 1585 FILE *output = stat_config.output; 1586 int i; 1587 1588 fflush(stdout); 1589 1590 if (!csv_output) { 1591 fprintf(output, "\n"); 1592 fprintf(output, " Performance counter stats for "); 1593 if (target.system_wide) 1594 fprintf(output, "\'system wide"); 1595 else if (target.cpu_list) 1596 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1597 else if (!target__has_task(&target)) { 1598 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1599 for (i = 1; argv && (i < argc); i++) 1600 fprintf(output, " %s", argv[i]); 1601 } else if (target.pid) 1602 fprintf(output, "process id \'%s", target.pid); 1603 else 1604 fprintf(output, "thread id \'%s", target.tid); 1605 1606 fprintf(output, "\'"); 1607 if (run_count > 1) 1608 fprintf(output, " (%d runs)", run_count); 1609 fprintf(output, ":\n\n"); 1610 } 1611 } 1612 1613 static void print_footer(void) 1614 { 1615 FILE *output = stat_config.output; 1616 int n; 1617 1618 if (!null_run) 1619 fprintf(output, "\n"); 1620 fprintf(output, " %17.9f seconds time elapsed", 1621 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1622 if (run_count > 1) { 1623 fprintf(output, " "); 1624 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1625 avg_stats(&walltime_nsecs_stats)); 1626 } 1627 fprintf(output, "\n\n"); 1628 1629 if (print_free_counters_hint && 1630 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1631 n > 0) 1632 fprintf(output, 1633 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1634 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1635 " perf stat ...\n" 1636 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1637 } 1638 1639 static void print_counters(struct timespec *ts, int argc, const char **argv) 1640 { 1641 int interval = stat_config.interval; 1642 struct perf_evsel *counter; 1643 char buf[64], *prefix = NULL; 1644 1645 /* Do not print anything if we record to the pipe. */ 1646 if (STAT_RECORD && perf_stat.file.is_pipe) 1647 return; 1648 1649 if (interval) 1650 print_interval(prefix = buf, ts); 1651 else 1652 print_header(argc, argv); 1653 1654 if (metric_only) { 1655 static int num_print_iv; 1656 1657 if (num_print_iv == 0 && !interval) 1658 print_metric_headers(prefix, false); 1659 if (num_print_iv++ == 25) 1660 num_print_iv = 0; 1661 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1662 fprintf(stat_config.output, "%s", prefix); 1663 } 1664 1665 switch (stat_config.aggr_mode) { 1666 case AGGR_CORE: 1667 case AGGR_SOCKET: 1668 print_aggr(prefix); 1669 break; 1670 case AGGR_THREAD: 1671 evlist__for_each_entry(evsel_list, counter) 1672 print_aggr_thread(counter, prefix); 1673 break; 1674 case AGGR_GLOBAL: 1675 evlist__for_each_entry(evsel_list, counter) 1676 print_counter_aggr(counter, prefix); 1677 if (metric_only) 1678 fputc('\n', stat_config.output); 1679 break; 1680 case AGGR_NONE: 1681 if (metric_only) 1682 print_no_aggr_metric(prefix); 1683 else { 1684 evlist__for_each_entry(evsel_list, counter) 1685 print_counter(counter, prefix); 1686 } 1687 break; 1688 case AGGR_UNSET: 1689 default: 1690 break; 1691 } 1692 1693 if (!interval && !csv_output) 1694 print_footer(); 1695 1696 fflush(stat_config.output); 1697 } 1698 1699 static volatile int signr = -1; 1700 1701 static void skip_signal(int signo) 1702 { 1703 if ((child_pid == -1) || stat_config.interval) 1704 done = 1; 1705 1706 signr = signo; 1707 /* 1708 * render child_pid harmless 1709 * won't send SIGTERM to a random 1710 * process in case of race condition 1711 * and fast PID recycling 1712 */ 1713 child_pid = -1; 1714 } 1715 1716 static void sig_atexit(void) 1717 { 1718 sigset_t set, oset; 1719 1720 /* 1721 * avoid race condition with SIGCHLD handler 1722 * in skip_signal() which is modifying child_pid 1723 * goal is to avoid send SIGTERM to a random 1724 * process 1725 */ 1726 sigemptyset(&set); 1727 sigaddset(&set, SIGCHLD); 1728 sigprocmask(SIG_BLOCK, &set, &oset); 1729 1730 if (child_pid != -1) 1731 kill(child_pid, SIGTERM); 1732 1733 sigprocmask(SIG_SETMASK, &oset, NULL); 1734 1735 if (signr == -1) 1736 return; 1737 1738 signal(signr, SIG_DFL); 1739 kill(getpid(), signr); 1740 } 1741 1742 static int stat__set_big_num(const struct option *opt __maybe_unused, 1743 const char *s __maybe_unused, int unset) 1744 { 1745 big_num_opt = unset ? 0 : 1; 1746 return 0; 1747 } 1748 1749 static int enable_metric_only(const struct option *opt __maybe_unused, 1750 const char *s __maybe_unused, int unset) 1751 { 1752 force_metric_only = true; 1753 metric_only = !unset; 1754 return 0; 1755 } 1756 1757 static const struct option stat_options[] = { 1758 OPT_BOOLEAN('T', "transaction", &transaction_run, 1759 "hardware transaction statistics"), 1760 OPT_CALLBACK('e', "event", &evsel_list, "event", 1761 "event selector. use 'perf list' to list available events", 1762 parse_events_option), 1763 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1764 "event filter", parse_filter), 1765 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1766 "child tasks do not inherit counters"), 1767 OPT_STRING('p', "pid", &target.pid, "pid", 1768 "stat events on existing process id"), 1769 OPT_STRING('t', "tid", &target.tid, "tid", 1770 "stat events on existing thread id"), 1771 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1772 "system-wide collection from all CPUs"), 1773 OPT_BOOLEAN('g', "group", &group, 1774 "put the counters into a counter group"), 1775 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1776 OPT_INCR('v', "verbose", &verbose, 1777 "be more verbose (show counter open errors, etc)"), 1778 OPT_INTEGER('r', "repeat", &run_count, 1779 "repeat command and print average + stddev (max: 100, forever: 0)"), 1780 OPT_BOOLEAN('n', "null", &null_run, 1781 "null run - dont start any counters"), 1782 OPT_INCR('d', "detailed", &detailed_run, 1783 "detailed run - start a lot of events"), 1784 OPT_BOOLEAN('S', "sync", &sync_run, 1785 "call sync() before starting a run"), 1786 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1787 "print large numbers with thousands\' separators", 1788 stat__set_big_num), 1789 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1790 "list of cpus to monitor in system-wide"), 1791 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1792 "disable CPU count aggregation", AGGR_NONE), 1793 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1794 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1795 "print counts with custom separator"), 1796 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1797 "monitor event in cgroup name only", parse_cgroups), 1798 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1799 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1800 OPT_INTEGER(0, "log-fd", &output_fd, 1801 "log output to fd, instead of stderr"), 1802 OPT_STRING(0, "pre", &pre_cmd, "command", 1803 "command to run prior to the measured command"), 1804 OPT_STRING(0, "post", &post_cmd, "command", 1805 "command to run after to the measured command"), 1806 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1807 "print counts at regular interval in ms (>= 10)"), 1808 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1809 "aggregate counts per processor socket", AGGR_SOCKET), 1810 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1811 "aggregate counts per physical processor core", AGGR_CORE), 1812 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1813 "aggregate counts per thread", AGGR_THREAD), 1814 OPT_UINTEGER('D', "delay", &initial_delay, 1815 "ms to wait before starting measurement after program start"), 1816 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1817 "Only print computed metrics. No raw values", enable_metric_only), 1818 OPT_BOOLEAN(0, "topdown", &topdown_run, 1819 "measure topdown level 1 statistics"), 1820 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1821 "measure SMI cost"), 1822 OPT_END() 1823 }; 1824 1825 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1826 { 1827 return cpu_map__get_socket(map, cpu, NULL); 1828 } 1829 1830 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1831 { 1832 return cpu_map__get_core(map, cpu, NULL); 1833 } 1834 1835 static int cpu_map__get_max(struct cpu_map *map) 1836 { 1837 int i, max = -1; 1838 1839 for (i = 0; i < map->nr; i++) { 1840 if (map->map[i] > max) 1841 max = map->map[i]; 1842 } 1843 1844 return max; 1845 } 1846 1847 static struct cpu_map *cpus_aggr_map; 1848 1849 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1850 { 1851 int cpu; 1852 1853 if (idx >= map->nr) 1854 return -1; 1855 1856 cpu = map->map[idx]; 1857 1858 if (cpus_aggr_map->map[cpu] == -1) 1859 cpus_aggr_map->map[cpu] = get_id(map, idx); 1860 1861 return cpus_aggr_map->map[cpu]; 1862 } 1863 1864 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1865 { 1866 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1867 } 1868 1869 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1870 { 1871 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1872 } 1873 1874 static int perf_stat_init_aggr_mode(void) 1875 { 1876 int nr; 1877 1878 switch (stat_config.aggr_mode) { 1879 case AGGR_SOCKET: 1880 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1881 perror("cannot build socket map"); 1882 return -1; 1883 } 1884 aggr_get_id = perf_stat__get_socket_cached; 1885 break; 1886 case AGGR_CORE: 1887 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1888 perror("cannot build core map"); 1889 return -1; 1890 } 1891 aggr_get_id = perf_stat__get_core_cached; 1892 break; 1893 case AGGR_NONE: 1894 case AGGR_GLOBAL: 1895 case AGGR_THREAD: 1896 case AGGR_UNSET: 1897 default: 1898 break; 1899 } 1900 1901 /* 1902 * The evsel_list->cpus is the base we operate on, 1903 * taking the highest cpu number to be the size of 1904 * the aggregation translate cpumap. 1905 */ 1906 nr = cpu_map__get_max(evsel_list->cpus); 1907 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1908 return cpus_aggr_map ? 0 : -ENOMEM; 1909 } 1910 1911 static void perf_stat__exit_aggr_mode(void) 1912 { 1913 cpu_map__put(aggr_map); 1914 cpu_map__put(cpus_aggr_map); 1915 aggr_map = NULL; 1916 cpus_aggr_map = NULL; 1917 } 1918 1919 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1920 { 1921 int cpu; 1922 1923 if (idx > map->nr) 1924 return -1; 1925 1926 cpu = map->map[idx]; 1927 1928 if (cpu >= env->nr_cpus_avail) 1929 return -1; 1930 1931 return cpu; 1932 } 1933 1934 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1935 { 1936 struct perf_env *env = data; 1937 int cpu = perf_env__get_cpu(env, map, idx); 1938 1939 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1940 } 1941 1942 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1943 { 1944 struct perf_env *env = data; 1945 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1946 1947 if (cpu != -1) { 1948 int socket_id = env->cpu[cpu].socket_id; 1949 1950 /* 1951 * Encode socket in upper 16 bits 1952 * core_id is relative to socket, and 1953 * we need a global id. So we combine 1954 * socket + core id. 1955 */ 1956 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1957 } 1958 1959 return core; 1960 } 1961 1962 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1963 struct cpu_map **sockp) 1964 { 1965 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1966 } 1967 1968 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1969 struct cpu_map **corep) 1970 { 1971 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1972 } 1973 1974 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1975 { 1976 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1977 } 1978 1979 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1980 { 1981 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1982 } 1983 1984 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1985 { 1986 struct perf_env *env = &st->session->header.env; 1987 1988 switch (stat_config.aggr_mode) { 1989 case AGGR_SOCKET: 1990 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1991 perror("cannot build socket map"); 1992 return -1; 1993 } 1994 aggr_get_id = perf_stat__get_socket_file; 1995 break; 1996 case AGGR_CORE: 1997 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1998 perror("cannot build core map"); 1999 return -1; 2000 } 2001 aggr_get_id = perf_stat__get_core_file; 2002 break; 2003 case AGGR_NONE: 2004 case AGGR_GLOBAL: 2005 case AGGR_THREAD: 2006 case AGGR_UNSET: 2007 default: 2008 break; 2009 } 2010 2011 return 0; 2012 } 2013 2014 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2015 { 2016 int off = 0; 2017 int i; 2018 int len = 0; 2019 char *s; 2020 2021 for (i = 0; attr[i]; i++) { 2022 if (pmu_have_event("cpu", attr[i])) { 2023 len += strlen(attr[i]) + 1; 2024 attr[i - off] = attr[i]; 2025 } else 2026 off++; 2027 } 2028 attr[i - off] = NULL; 2029 2030 *str = malloc(len + 1 + 2); 2031 if (!*str) 2032 return -1; 2033 s = *str; 2034 if (i - off == 0) { 2035 *s = 0; 2036 return 0; 2037 } 2038 if (use_group) 2039 *s++ = '{'; 2040 for (i = 0; attr[i]; i++) { 2041 strcpy(s, attr[i]); 2042 s += strlen(s); 2043 *s++ = ','; 2044 } 2045 if (use_group) { 2046 s[-1] = '}'; 2047 *s = 0; 2048 } else 2049 s[-1] = 0; 2050 return 0; 2051 } 2052 2053 __weak bool arch_topdown_check_group(bool *warn) 2054 { 2055 *warn = false; 2056 return false; 2057 } 2058 2059 __weak void arch_topdown_group_warn(void) 2060 { 2061 } 2062 2063 /* 2064 * Add default attributes, if there were no attributes specified or 2065 * if -d/--detailed, -d -d or -d -d -d is used: 2066 */ 2067 static int add_default_attributes(void) 2068 { 2069 int err; 2070 struct perf_event_attr default_attrs0[] = { 2071 2072 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2073 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2074 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2075 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2076 2077 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2078 }; 2079 struct perf_event_attr frontend_attrs[] = { 2080 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2081 }; 2082 struct perf_event_attr backend_attrs[] = { 2083 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2084 }; 2085 struct perf_event_attr default_attrs1[] = { 2086 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2087 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2088 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2089 2090 }; 2091 2092 /* 2093 * Detailed stats (-d), covering the L1 and last level data caches: 2094 */ 2095 struct perf_event_attr detailed_attrs[] = { 2096 2097 { .type = PERF_TYPE_HW_CACHE, 2098 .config = 2099 PERF_COUNT_HW_CACHE_L1D << 0 | 2100 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2101 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2102 2103 { .type = PERF_TYPE_HW_CACHE, 2104 .config = 2105 PERF_COUNT_HW_CACHE_L1D << 0 | 2106 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2107 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2108 2109 { .type = PERF_TYPE_HW_CACHE, 2110 .config = 2111 PERF_COUNT_HW_CACHE_LL << 0 | 2112 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2113 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2114 2115 { .type = PERF_TYPE_HW_CACHE, 2116 .config = 2117 PERF_COUNT_HW_CACHE_LL << 0 | 2118 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2119 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2120 }; 2121 2122 /* 2123 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2124 */ 2125 struct perf_event_attr very_detailed_attrs[] = { 2126 2127 { .type = PERF_TYPE_HW_CACHE, 2128 .config = 2129 PERF_COUNT_HW_CACHE_L1I << 0 | 2130 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2131 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2132 2133 { .type = PERF_TYPE_HW_CACHE, 2134 .config = 2135 PERF_COUNT_HW_CACHE_L1I << 0 | 2136 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2137 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2138 2139 { .type = PERF_TYPE_HW_CACHE, 2140 .config = 2141 PERF_COUNT_HW_CACHE_DTLB << 0 | 2142 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2143 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2144 2145 { .type = PERF_TYPE_HW_CACHE, 2146 .config = 2147 PERF_COUNT_HW_CACHE_DTLB << 0 | 2148 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2149 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2150 2151 { .type = PERF_TYPE_HW_CACHE, 2152 .config = 2153 PERF_COUNT_HW_CACHE_ITLB << 0 | 2154 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2155 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2156 2157 { .type = PERF_TYPE_HW_CACHE, 2158 .config = 2159 PERF_COUNT_HW_CACHE_ITLB << 0 | 2160 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2161 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2162 2163 }; 2164 2165 /* 2166 * Very, very detailed stats (-d -d -d), adding prefetch events: 2167 */ 2168 struct perf_event_attr very_very_detailed_attrs[] = { 2169 2170 { .type = PERF_TYPE_HW_CACHE, 2171 .config = 2172 PERF_COUNT_HW_CACHE_L1D << 0 | 2173 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2174 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2175 2176 { .type = PERF_TYPE_HW_CACHE, 2177 .config = 2178 PERF_COUNT_HW_CACHE_L1D << 0 | 2179 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2180 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2181 }; 2182 2183 /* Set attrs if no event is selected and !null_run: */ 2184 if (null_run) 2185 return 0; 2186 2187 if (transaction_run) { 2188 if (pmu_have_event("cpu", "cycles-ct") && 2189 pmu_have_event("cpu", "el-start")) 2190 err = parse_events(evsel_list, transaction_attrs, NULL); 2191 else 2192 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 2193 if (err) { 2194 fprintf(stderr, "Cannot set up transaction events\n"); 2195 return -1; 2196 } 2197 return 0; 2198 } 2199 2200 if (smi_cost) { 2201 int smi; 2202 2203 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2204 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2205 return -1; 2206 } 2207 2208 if (!smi) { 2209 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2210 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2211 return -1; 2212 } 2213 smi_reset = true; 2214 } 2215 2216 if (pmu_have_event("msr", "aperf") && 2217 pmu_have_event("msr", "smi")) { 2218 if (!force_metric_only) 2219 metric_only = true; 2220 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2221 } else { 2222 fprintf(stderr, "To measure SMI cost, it needs " 2223 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2224 return -1; 2225 } 2226 if (err) { 2227 fprintf(stderr, "Cannot set up SMI cost events\n"); 2228 return -1; 2229 } 2230 return 0; 2231 } 2232 2233 if (topdown_run) { 2234 char *str = NULL; 2235 bool warn = false; 2236 2237 if (stat_config.aggr_mode != AGGR_GLOBAL && 2238 stat_config.aggr_mode != AGGR_CORE) { 2239 pr_err("top down event configuration requires --per-core mode\n"); 2240 return -1; 2241 } 2242 stat_config.aggr_mode = AGGR_CORE; 2243 if (nr_cgroups || !target__has_cpu(&target)) { 2244 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2245 return -1; 2246 } 2247 2248 if (!force_metric_only) 2249 metric_only = true; 2250 if (topdown_filter_events(topdown_attrs, &str, 2251 arch_topdown_check_group(&warn)) < 0) { 2252 pr_err("Out of memory\n"); 2253 return -1; 2254 } 2255 if (topdown_attrs[0] && str) { 2256 if (warn) 2257 arch_topdown_group_warn(); 2258 err = parse_events(evsel_list, str, NULL); 2259 if (err) { 2260 fprintf(stderr, 2261 "Cannot set up top down events %s: %d\n", 2262 str, err); 2263 free(str); 2264 return -1; 2265 } 2266 } else { 2267 fprintf(stderr, "System does not support topdown\n"); 2268 return -1; 2269 } 2270 free(str); 2271 } 2272 2273 if (!evsel_list->nr_entries) { 2274 if (target__has_cpu(&target)) 2275 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2276 2277 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2278 return -1; 2279 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2280 if (perf_evlist__add_default_attrs(evsel_list, 2281 frontend_attrs) < 0) 2282 return -1; 2283 } 2284 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2285 if (perf_evlist__add_default_attrs(evsel_list, 2286 backend_attrs) < 0) 2287 return -1; 2288 } 2289 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2290 return -1; 2291 } 2292 2293 /* Detailed events get appended to the event list: */ 2294 2295 if (detailed_run < 1) 2296 return 0; 2297 2298 /* Append detailed run extra attributes: */ 2299 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2300 return -1; 2301 2302 if (detailed_run < 2) 2303 return 0; 2304 2305 /* Append very detailed run extra attributes: */ 2306 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2307 return -1; 2308 2309 if (detailed_run < 3) 2310 return 0; 2311 2312 /* Append very, very detailed run extra attributes: */ 2313 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2314 } 2315 2316 static const char * const stat_record_usage[] = { 2317 "perf stat record [<options>]", 2318 NULL, 2319 }; 2320 2321 static void init_features(struct perf_session *session) 2322 { 2323 int feat; 2324 2325 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2326 perf_header__set_feat(&session->header, feat); 2327 2328 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2329 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2330 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2331 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2332 } 2333 2334 static int __cmd_record(int argc, const char **argv) 2335 { 2336 struct perf_session *session; 2337 struct perf_data_file *file = &perf_stat.file; 2338 2339 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2340 PARSE_OPT_STOP_AT_NON_OPTION); 2341 2342 if (output_name) 2343 file->path = output_name; 2344 2345 if (run_count != 1 || forever) { 2346 pr_err("Cannot use -r option with perf stat record.\n"); 2347 return -1; 2348 } 2349 2350 session = perf_session__new(file, false, NULL); 2351 if (session == NULL) { 2352 pr_err("Perf session creation failed.\n"); 2353 return -1; 2354 } 2355 2356 init_features(session); 2357 2358 session->evlist = evsel_list; 2359 perf_stat.session = session; 2360 perf_stat.record = true; 2361 return argc; 2362 } 2363 2364 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2365 union perf_event *event, 2366 struct perf_session *session) 2367 { 2368 struct stat_round_event *stat_round = &event->stat_round; 2369 struct perf_evsel *counter; 2370 struct timespec tsh, *ts = NULL; 2371 const char **argv = session->header.env.cmdline_argv; 2372 int argc = session->header.env.nr_cmdline; 2373 2374 evlist__for_each_entry(evsel_list, counter) 2375 perf_stat_process_counter(&stat_config, counter); 2376 2377 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2378 update_stats(&walltime_nsecs_stats, stat_round->time); 2379 2380 if (stat_config.interval && stat_round->time) { 2381 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2382 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2383 ts = &tsh; 2384 } 2385 2386 print_counters(ts, argc, argv); 2387 return 0; 2388 } 2389 2390 static 2391 int process_stat_config_event(struct perf_tool *tool, 2392 union perf_event *event, 2393 struct perf_session *session __maybe_unused) 2394 { 2395 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2396 2397 perf_event__read_stat_config(&stat_config, &event->stat_config); 2398 2399 if (cpu_map__empty(st->cpus)) { 2400 if (st->aggr_mode != AGGR_UNSET) 2401 pr_warning("warning: processing task data, aggregation mode not set\n"); 2402 return 0; 2403 } 2404 2405 if (st->aggr_mode != AGGR_UNSET) 2406 stat_config.aggr_mode = st->aggr_mode; 2407 2408 if (perf_stat.file.is_pipe) 2409 perf_stat_init_aggr_mode(); 2410 else 2411 perf_stat_init_aggr_mode_file(st); 2412 2413 return 0; 2414 } 2415 2416 static int set_maps(struct perf_stat *st) 2417 { 2418 if (!st->cpus || !st->threads) 2419 return 0; 2420 2421 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2422 return -EINVAL; 2423 2424 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2425 2426 if (perf_evlist__alloc_stats(evsel_list, true)) 2427 return -ENOMEM; 2428 2429 st->maps_allocated = true; 2430 return 0; 2431 } 2432 2433 static 2434 int process_thread_map_event(struct perf_tool *tool, 2435 union perf_event *event, 2436 struct perf_session *session __maybe_unused) 2437 { 2438 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2439 2440 if (st->threads) { 2441 pr_warning("Extra thread map event, ignoring.\n"); 2442 return 0; 2443 } 2444 2445 st->threads = thread_map__new_event(&event->thread_map); 2446 if (!st->threads) 2447 return -ENOMEM; 2448 2449 return set_maps(st); 2450 } 2451 2452 static 2453 int process_cpu_map_event(struct perf_tool *tool, 2454 union perf_event *event, 2455 struct perf_session *session __maybe_unused) 2456 { 2457 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2458 struct cpu_map *cpus; 2459 2460 if (st->cpus) { 2461 pr_warning("Extra cpu map event, ignoring.\n"); 2462 return 0; 2463 } 2464 2465 cpus = cpu_map__new_data(&event->cpu_map.data); 2466 if (!cpus) 2467 return -ENOMEM; 2468 2469 st->cpus = cpus; 2470 return set_maps(st); 2471 } 2472 2473 static const char * const stat_report_usage[] = { 2474 "perf stat report [<options>]", 2475 NULL, 2476 }; 2477 2478 static struct perf_stat perf_stat = { 2479 .tool = { 2480 .attr = perf_event__process_attr, 2481 .event_update = perf_event__process_event_update, 2482 .thread_map = process_thread_map_event, 2483 .cpu_map = process_cpu_map_event, 2484 .stat_config = process_stat_config_event, 2485 .stat = perf_event__process_stat_event, 2486 .stat_round = process_stat_round_event, 2487 }, 2488 .aggr_mode = AGGR_UNSET, 2489 }; 2490 2491 static int __cmd_report(int argc, const char **argv) 2492 { 2493 struct perf_session *session; 2494 const struct option options[] = { 2495 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2496 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2497 "aggregate counts per processor socket", AGGR_SOCKET), 2498 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2499 "aggregate counts per physical processor core", AGGR_CORE), 2500 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2501 "disable CPU count aggregation", AGGR_NONE), 2502 OPT_END() 2503 }; 2504 struct stat st; 2505 int ret; 2506 2507 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2508 2509 if (!input_name || !strlen(input_name)) { 2510 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2511 input_name = "-"; 2512 else 2513 input_name = "perf.data"; 2514 } 2515 2516 perf_stat.file.path = input_name; 2517 perf_stat.file.mode = PERF_DATA_MODE_READ; 2518 2519 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 2520 if (session == NULL) 2521 return -1; 2522 2523 perf_stat.session = session; 2524 stat_config.output = stderr; 2525 evsel_list = session->evlist; 2526 2527 ret = perf_session__process_events(session); 2528 if (ret) 2529 return ret; 2530 2531 perf_session__delete(session); 2532 return 0; 2533 } 2534 2535 static void setup_system_wide(int forks) 2536 { 2537 /* 2538 * Make system wide (-a) the default target if 2539 * no target was specified and one of following 2540 * conditions is met: 2541 * 2542 * - there's no workload specified 2543 * - there is workload specified but all requested 2544 * events are system wide events 2545 */ 2546 if (!target__none(&target)) 2547 return; 2548 2549 if (!forks) 2550 target.system_wide = true; 2551 else { 2552 struct perf_evsel *counter; 2553 2554 evlist__for_each_entry(evsel_list, counter) { 2555 if (!counter->system_wide) 2556 return; 2557 } 2558 2559 if (evsel_list->nr_entries) 2560 target.system_wide = true; 2561 } 2562 } 2563 2564 int cmd_stat(int argc, const char **argv) 2565 { 2566 const char * const stat_usage[] = { 2567 "perf stat [<options>] [<command>]", 2568 NULL 2569 }; 2570 int status = -EINVAL, run_idx; 2571 const char *mode; 2572 FILE *output = stderr; 2573 unsigned int interval; 2574 const char * const stat_subcommands[] = { "record", "report" }; 2575 2576 setlocale(LC_ALL, ""); 2577 2578 evsel_list = perf_evlist__new(); 2579 if (evsel_list == NULL) 2580 return -ENOMEM; 2581 2582 parse_events__shrink_config_terms(); 2583 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2584 (const char **) stat_usage, 2585 PARSE_OPT_STOP_AT_NON_OPTION); 2586 perf_stat__collect_metric_expr(evsel_list); 2587 perf_stat__init_shadow_stats(); 2588 2589 if (csv_sep) { 2590 csv_output = true; 2591 if (!strcmp(csv_sep, "\\t")) 2592 csv_sep = "\t"; 2593 } else 2594 csv_sep = DEFAULT_SEPARATOR; 2595 2596 if (argc && !strncmp(argv[0], "rec", 3)) { 2597 argc = __cmd_record(argc, argv); 2598 if (argc < 0) 2599 return -1; 2600 } else if (argc && !strncmp(argv[0], "rep", 3)) 2601 return __cmd_report(argc, argv); 2602 2603 interval = stat_config.interval; 2604 2605 /* 2606 * For record command the -o is already taken care of. 2607 */ 2608 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2609 output = NULL; 2610 2611 if (output_name && output_fd) { 2612 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2613 parse_options_usage(stat_usage, stat_options, "o", 1); 2614 parse_options_usage(NULL, stat_options, "log-fd", 0); 2615 goto out; 2616 } 2617 2618 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2619 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2620 goto out; 2621 } 2622 2623 if (metric_only && run_count > 1) { 2624 fprintf(stderr, "--metric-only is not supported with -r\n"); 2625 goto out; 2626 } 2627 2628 if (output_fd < 0) { 2629 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2630 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2631 goto out; 2632 } 2633 2634 if (!output) { 2635 struct timespec tm; 2636 mode = append_file ? "a" : "w"; 2637 2638 output = fopen(output_name, mode); 2639 if (!output) { 2640 perror("failed to create output file"); 2641 return -1; 2642 } 2643 clock_gettime(CLOCK_REALTIME, &tm); 2644 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2645 } else if (output_fd > 0) { 2646 mode = append_file ? "a" : "w"; 2647 output = fdopen(output_fd, mode); 2648 if (!output) { 2649 perror("Failed opening logfd"); 2650 return -errno; 2651 } 2652 } 2653 2654 stat_config.output = output; 2655 2656 /* 2657 * let the spreadsheet do the pretty-printing 2658 */ 2659 if (csv_output) { 2660 /* User explicitly passed -B? */ 2661 if (big_num_opt == 1) { 2662 fprintf(stderr, "-B option not supported with -x\n"); 2663 parse_options_usage(stat_usage, stat_options, "B", 1); 2664 parse_options_usage(NULL, stat_options, "x", 1); 2665 goto out; 2666 } else /* Nope, so disable big number formatting */ 2667 big_num = false; 2668 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2669 big_num = false; 2670 2671 setup_system_wide(argc); 2672 2673 if (run_count < 0) { 2674 pr_err("Run count must be a positive number\n"); 2675 parse_options_usage(stat_usage, stat_options, "r", 1); 2676 goto out; 2677 } else if (run_count == 0) { 2678 forever = true; 2679 run_count = 1; 2680 } 2681 2682 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 2683 fprintf(stderr, "The --per-thread option is only available " 2684 "when monitoring via -p -t options.\n"); 2685 parse_options_usage(NULL, stat_options, "p", 1); 2686 parse_options_usage(NULL, stat_options, "t", 1); 2687 goto out; 2688 } 2689 2690 /* 2691 * no_aggr, cgroup are for system-wide only 2692 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2693 */ 2694 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2695 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2696 !target__has_cpu(&target)) { 2697 fprintf(stderr, "both cgroup and no-aggregation " 2698 "modes only available in system-wide mode\n"); 2699 2700 parse_options_usage(stat_usage, stat_options, "G", 1); 2701 parse_options_usage(NULL, stat_options, "A", 1); 2702 parse_options_usage(NULL, stat_options, "a", 1); 2703 goto out; 2704 } 2705 2706 if (add_default_attributes()) 2707 goto out; 2708 2709 target__validate(&target); 2710 2711 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2712 if (target__has_task(&target)) { 2713 pr_err("Problems finding threads of monitor\n"); 2714 parse_options_usage(stat_usage, stat_options, "p", 1); 2715 parse_options_usage(NULL, stat_options, "t", 1); 2716 } else if (target__has_cpu(&target)) { 2717 perror("failed to parse CPUs map"); 2718 parse_options_usage(stat_usage, stat_options, "C", 1); 2719 parse_options_usage(NULL, stat_options, "a", 1); 2720 } 2721 goto out; 2722 } 2723 2724 /* 2725 * Initialize thread_map with comm names, 2726 * so we could print it out on output. 2727 */ 2728 if (stat_config.aggr_mode == AGGR_THREAD) 2729 thread_map__read_comms(evsel_list->threads); 2730 2731 if (interval && interval < 100) { 2732 if (interval < 10) { 2733 pr_err("print interval must be >= 10ms\n"); 2734 parse_options_usage(stat_usage, stat_options, "I", 1); 2735 goto out; 2736 } else 2737 pr_warning("print interval < 100ms. " 2738 "The overhead percentage could be high in some cases. " 2739 "Please proceed with caution.\n"); 2740 } 2741 2742 if (perf_evlist__alloc_stats(evsel_list, interval)) 2743 goto out; 2744 2745 if (perf_stat_init_aggr_mode()) 2746 goto out; 2747 2748 /* 2749 * We dont want to block the signals - that would cause 2750 * child tasks to inherit that and Ctrl-C would not work. 2751 * What we want is for Ctrl-C to work in the exec()-ed 2752 * task, but being ignored by perf stat itself: 2753 */ 2754 atexit(sig_atexit); 2755 if (!forever) 2756 signal(SIGINT, skip_signal); 2757 signal(SIGCHLD, skip_signal); 2758 signal(SIGALRM, skip_signal); 2759 signal(SIGABRT, skip_signal); 2760 2761 status = 0; 2762 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2763 if (run_count != 1 && verbose > 0) 2764 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2765 run_idx + 1); 2766 2767 status = run_perf_stat(argc, argv); 2768 if (forever && status != -1) { 2769 print_counters(NULL, argc, argv); 2770 perf_stat__reset_stats(); 2771 } 2772 } 2773 2774 if (!forever && status != -1 && !interval) 2775 print_counters(NULL, argc, argv); 2776 2777 if (STAT_RECORD) { 2778 /* 2779 * We synthesize the kernel mmap record just so that older tools 2780 * don't emit warnings about not being able to resolve symbols 2781 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2782 * a saner message about no samples being in the perf.data file. 2783 * 2784 * This also serves to suppress a warning about f_header.data.size == 0 2785 * in header.c at the moment 'perf stat record' gets introduced, which 2786 * is not really needed once we start adding the stat specific PERF_RECORD_ 2787 * records, but the need to suppress the kptr_restrict messages in older 2788 * tools remain -acme 2789 */ 2790 int fd = perf_data_file__fd(&perf_stat.file); 2791 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2792 process_synthesized_event, 2793 &perf_stat.session->machines.host); 2794 if (err) { 2795 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2796 "older tools may produce warnings about this file\n."); 2797 } 2798 2799 if (!interval) { 2800 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2801 pr_err("failed to write stat round event\n"); 2802 } 2803 2804 if (!perf_stat.file.is_pipe) { 2805 perf_stat.session->header.data_size += perf_stat.bytes_written; 2806 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2807 } 2808 2809 perf_session__delete(perf_stat.session); 2810 } 2811 2812 perf_stat__exit_aggr_mode(); 2813 perf_evlist__free_stats(evsel_list); 2814 out: 2815 if (smi_cost && smi_reset) 2816 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 2817 2818 perf_evlist__delete(evsel_list); 2819 return status; 2820 } 2821