1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/string2.h" 67 #include "util/metricgroup.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct rblist metric_events; 137 138 static struct target target = { 139 .uid = UINT_MAX, 140 }; 141 142 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 143 144 static int run_count = 1; 145 static bool no_inherit = false; 146 static volatile pid_t child_pid = -1; 147 static bool null_run = false; 148 static int detailed_run = 0; 149 static bool transaction_run; 150 static bool topdown_run = false; 151 static bool smi_cost = false; 152 static bool smi_reset = false; 153 static bool big_num = true; 154 static int big_num_opt = -1; 155 static const char *csv_sep = NULL; 156 static bool csv_output = false; 157 static bool group = false; 158 static const char *pre_cmd = NULL; 159 static const char *post_cmd = NULL; 160 static bool sync_run = false; 161 static unsigned int initial_delay = 0; 162 static unsigned int unit_width = 4; /* strlen("unit") */ 163 static bool forever = false; 164 static bool metric_only = false; 165 static bool force_metric_only = false; 166 static bool no_merge = false; 167 static struct timespec ref_time; 168 static struct cpu_map *aggr_map; 169 static aggr_get_id_t aggr_get_id; 170 static bool append_file; 171 static bool interval_count; 172 static const char *output_name; 173 static int output_fd; 174 static int print_free_counters_hint; 175 176 struct perf_stat { 177 bool record; 178 struct perf_data data; 179 struct perf_session *session; 180 u64 bytes_written; 181 struct perf_tool tool; 182 bool maps_allocated; 183 struct cpu_map *cpus; 184 struct thread_map *threads; 185 enum aggr_mode aggr_mode; 186 }; 187 188 static struct perf_stat perf_stat; 189 #define STAT_RECORD perf_stat.record 190 191 static volatile int done = 0; 192 193 static struct perf_stat_config stat_config = { 194 .aggr_mode = AGGR_GLOBAL, 195 .scale = true, 196 }; 197 198 static bool is_duration_time(struct perf_evsel *evsel) 199 { 200 return !strcmp(evsel->name, "duration_time"); 201 } 202 203 static inline void diff_timespec(struct timespec *r, struct timespec *a, 204 struct timespec *b) 205 { 206 r->tv_sec = a->tv_sec - b->tv_sec; 207 if (a->tv_nsec < b->tv_nsec) { 208 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 209 r->tv_sec--; 210 } else { 211 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 212 } 213 } 214 215 static void perf_stat__reset_stats(void) 216 { 217 int i; 218 219 perf_evlist__reset_stats(evsel_list); 220 perf_stat__reset_shadow_stats(); 221 222 for (i = 0; i < stat_config.stats_num; i++) 223 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 224 } 225 226 static int create_perf_stat_counter(struct perf_evsel *evsel) 227 { 228 struct perf_event_attr *attr = &evsel->attr; 229 struct perf_evsel *leader = evsel->leader; 230 231 if (stat_config.scale) { 232 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 233 PERF_FORMAT_TOTAL_TIME_RUNNING; 234 } 235 236 /* 237 * The event is part of non trivial group, let's enable 238 * the group read (for leader) and ID retrieval for all 239 * members. 240 */ 241 if (leader->nr_members > 1) 242 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 243 244 attr->inherit = !no_inherit; 245 246 /* 247 * Some events get initialized with sample_(period/type) set, 248 * like tracepoints. Clear it up for counting. 249 */ 250 attr->sample_period = 0; 251 252 /* 253 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 254 * while avoiding that older tools show confusing messages. 255 * 256 * However for pipe sessions we need to keep it zero, 257 * because script's perf_evsel__check_attr is triggered 258 * by attr->sample_type != 0, and we can't run it on 259 * stat sessions. 260 */ 261 if (!(STAT_RECORD && perf_stat.data.is_pipe)) 262 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 263 264 /* 265 * Disabling all counters initially, they will be enabled 266 * either manually by us or by kernel via enable_on_exec 267 * set later. 268 */ 269 if (perf_evsel__is_group_leader(evsel)) { 270 attr->disabled = 1; 271 272 /* 273 * In case of initial_delay we enable tracee 274 * events manually. 275 */ 276 if (target__none(&target) && !initial_delay) 277 attr->enable_on_exec = 1; 278 } 279 280 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 281 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 282 283 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 284 } 285 286 /* 287 * Does the counter have nsecs as a unit? 288 */ 289 static inline int nsec_counter(struct perf_evsel *evsel) 290 { 291 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 292 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 293 return 1; 294 295 return 0; 296 } 297 298 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 299 union perf_event *event, 300 struct perf_sample *sample __maybe_unused, 301 struct machine *machine __maybe_unused) 302 { 303 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 304 pr_err("failed to write perf data, error: %m\n"); 305 return -1; 306 } 307 308 perf_stat.bytes_written += event->header.size; 309 return 0; 310 } 311 312 static int write_stat_round_event(u64 tm, u64 type) 313 { 314 return perf_event__synthesize_stat_round(NULL, tm, type, 315 process_synthesized_event, 316 NULL); 317 } 318 319 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 320 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 321 322 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 323 324 static int 325 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 326 struct perf_counts_values *count) 327 { 328 struct perf_sample_id *sid = SID(counter, cpu, thread); 329 330 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 331 process_synthesized_event, NULL); 332 } 333 334 /* 335 * Read out the results of a single counter: 336 * do not aggregate counts across CPUs in system-wide mode 337 */ 338 static int read_counter(struct perf_evsel *counter) 339 { 340 int nthreads = thread_map__nr(evsel_list->threads); 341 int ncpus, cpu, thread; 342 343 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 344 ncpus = perf_evsel__nr_cpus(counter); 345 else 346 ncpus = 1; 347 348 if (!counter->supported) 349 return -ENOENT; 350 351 if (counter->system_wide) 352 nthreads = 1; 353 354 for (thread = 0; thread < nthreads; thread++) { 355 for (cpu = 0; cpu < ncpus; cpu++) { 356 struct perf_counts_values *count; 357 358 count = perf_counts(counter->counts, cpu, thread); 359 360 /* 361 * The leader's group read loads data into its group members 362 * (via perf_evsel__read_counter) and sets threir count->loaded. 363 */ 364 if (!count->loaded && 365 perf_evsel__read_counter(counter, cpu, thread)) { 366 counter->counts->scaled = -1; 367 perf_counts(counter->counts, cpu, thread)->ena = 0; 368 perf_counts(counter->counts, cpu, thread)->run = 0; 369 return -1; 370 } 371 372 count->loaded = false; 373 374 if (STAT_RECORD) { 375 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 376 pr_err("failed to write stat event\n"); 377 return -1; 378 } 379 } 380 381 if (verbose > 1) { 382 fprintf(stat_config.output, 383 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 384 perf_evsel__name(counter), 385 cpu, 386 count->val, count->ena, count->run); 387 } 388 } 389 } 390 391 return 0; 392 } 393 394 static void read_counters(void) 395 { 396 struct perf_evsel *counter; 397 int ret; 398 399 evlist__for_each_entry(evsel_list, counter) { 400 ret = read_counter(counter); 401 if (ret) 402 pr_debug("failed to read counter %s\n", counter->name); 403 404 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 405 pr_warning("failed to process counter %s\n", counter->name); 406 } 407 } 408 409 static void process_interval(void) 410 { 411 struct timespec ts, rs; 412 413 read_counters(); 414 415 clock_gettime(CLOCK_MONOTONIC, &ts); 416 diff_timespec(&rs, &ts, &ref_time); 417 418 if (STAT_RECORD) { 419 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 420 pr_err("failed to write stat round event\n"); 421 } 422 423 init_stats(&walltime_nsecs_stats); 424 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 425 print_counters(&rs, 0, NULL); 426 } 427 428 static void enable_counters(void) 429 { 430 if (initial_delay) 431 usleep(initial_delay * USEC_PER_MSEC); 432 433 /* 434 * We need to enable counters only if: 435 * - we don't have tracee (attaching to task or cpu) 436 * - we have initial delay configured 437 */ 438 if (!target__none(&target) || initial_delay) 439 perf_evlist__enable(evsel_list); 440 } 441 442 static void disable_counters(void) 443 { 444 /* 445 * If we don't have tracee (attaching to task or cpu), counters may 446 * still be running. To get accurate group ratios, we must stop groups 447 * from counting before reading their constituent counters. 448 */ 449 if (!target__none(&target)) 450 perf_evlist__disable(evsel_list); 451 } 452 453 static volatile int workload_exec_errno; 454 455 /* 456 * perf_evlist__prepare_workload will send a SIGUSR1 457 * if the fork fails, since we asked by setting its 458 * want_signal to true. 459 */ 460 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 461 void *ucontext __maybe_unused) 462 { 463 workload_exec_errno = info->si_value.sival_int; 464 } 465 466 static int perf_stat_synthesize_config(bool is_pipe) 467 { 468 int err; 469 470 if (is_pipe) { 471 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 472 process_synthesized_event); 473 if (err < 0) { 474 pr_err("Couldn't synthesize attrs.\n"); 475 return err; 476 } 477 } 478 479 err = perf_event__synthesize_extra_attr(NULL, 480 evsel_list, 481 process_synthesized_event, 482 is_pipe); 483 484 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 485 process_synthesized_event, 486 NULL); 487 if (err < 0) { 488 pr_err("Couldn't synthesize thread map.\n"); 489 return err; 490 } 491 492 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 493 process_synthesized_event, NULL); 494 if (err < 0) { 495 pr_err("Couldn't synthesize thread map.\n"); 496 return err; 497 } 498 499 err = perf_event__synthesize_stat_config(NULL, &stat_config, 500 process_synthesized_event, NULL); 501 if (err < 0) { 502 pr_err("Couldn't synthesize config.\n"); 503 return err; 504 } 505 506 return 0; 507 } 508 509 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 510 511 static int __store_counter_ids(struct perf_evsel *counter) 512 { 513 int cpu, thread; 514 515 for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { 516 for (thread = 0; thread < xyarray__max_y(counter->fd); 517 thread++) { 518 int fd = FD(counter, cpu, thread); 519 520 if (perf_evlist__id_add_fd(evsel_list, counter, 521 cpu, thread, fd) < 0) 522 return -1; 523 } 524 } 525 526 return 0; 527 } 528 529 static int store_counter_ids(struct perf_evsel *counter) 530 { 531 struct cpu_map *cpus = counter->cpus; 532 struct thread_map *threads = counter->threads; 533 534 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 535 return -ENOMEM; 536 537 return __store_counter_ids(counter); 538 } 539 540 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 541 { 542 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 543 } 544 545 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 546 { 547 struct perf_evsel *c2, *leader; 548 bool is_open = true; 549 550 leader = evsel->leader; 551 pr_debug("Weak group for %s/%d failed\n", 552 leader->name, leader->nr_members); 553 554 /* 555 * for_each_group_member doesn't work here because it doesn't 556 * include the first entry. 557 */ 558 evlist__for_each_entry(evsel_list, c2) { 559 if (c2 == evsel) 560 is_open = false; 561 if (c2->leader == leader) { 562 if (is_open) 563 perf_evsel__close(c2); 564 c2->leader = c2; 565 c2->nr_members = 0; 566 } 567 } 568 return leader; 569 } 570 571 static int __run_perf_stat(int argc, const char **argv) 572 { 573 int interval = stat_config.interval; 574 int times = stat_config.times; 575 int timeout = stat_config.timeout; 576 char msg[BUFSIZ]; 577 unsigned long long t0, t1; 578 struct perf_evsel *counter; 579 struct timespec ts; 580 size_t l; 581 int status = 0; 582 const bool forks = (argc > 0); 583 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 584 struct perf_evsel_config_term *err_term; 585 586 if (interval) { 587 ts.tv_sec = interval / USEC_PER_MSEC; 588 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 589 } else if (timeout) { 590 ts.tv_sec = timeout / USEC_PER_MSEC; 591 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 592 } else { 593 ts.tv_sec = 1; 594 ts.tv_nsec = 0; 595 } 596 597 if (forks) { 598 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 599 workload_exec_failed_signal) < 0) { 600 perror("failed to prepare workload"); 601 return -1; 602 } 603 child_pid = evsel_list->workload.pid; 604 } 605 606 if (group) 607 perf_evlist__set_leader(evsel_list); 608 609 evlist__for_each_entry(evsel_list, counter) { 610 try_again: 611 if (create_perf_stat_counter(counter) < 0) { 612 613 /* Weak group failed. Reset the group. */ 614 if ((errno == EINVAL || errno == EBADF) && 615 counter->leader != counter && 616 counter->weak_group) { 617 counter = perf_evsel__reset_weak_group(counter); 618 goto try_again; 619 } 620 621 /* 622 * PPC returns ENXIO for HW counters until 2.6.37 623 * (behavior changed with commit b0a873e). 624 */ 625 if (errno == EINVAL || errno == ENOSYS || 626 errno == ENOENT || errno == EOPNOTSUPP || 627 errno == ENXIO) { 628 if (verbose > 0) 629 ui__warning("%s event is not supported by the kernel.\n", 630 perf_evsel__name(counter)); 631 counter->supported = false; 632 633 if ((counter->leader != counter) || 634 !(counter->leader->nr_members > 1)) 635 continue; 636 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 637 if (verbose > 0) 638 ui__warning("%s\n", msg); 639 goto try_again; 640 } else if (target__has_per_thread(&target) && 641 evsel_list->threads && 642 evsel_list->threads->err_thread != -1) { 643 /* 644 * For global --per-thread case, skip current 645 * error thread. 646 */ 647 if (!thread_map__remove(evsel_list->threads, 648 evsel_list->threads->err_thread)) { 649 evsel_list->threads->err_thread = -1; 650 goto try_again; 651 } 652 } 653 654 perf_evsel__open_strerror(counter, &target, 655 errno, msg, sizeof(msg)); 656 ui__error("%s\n", msg); 657 658 if (child_pid != -1) 659 kill(child_pid, SIGTERM); 660 661 return -1; 662 } 663 counter->supported = true; 664 665 l = strlen(counter->unit); 666 if (l > unit_width) 667 unit_width = l; 668 669 if (perf_evsel__should_store_id(counter) && 670 store_counter_ids(counter)) 671 return -1; 672 } 673 674 if (perf_evlist__apply_filters(evsel_list, &counter)) { 675 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 676 counter->filter, perf_evsel__name(counter), errno, 677 str_error_r(errno, msg, sizeof(msg))); 678 return -1; 679 } 680 681 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 682 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 683 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 684 str_error_r(errno, msg, sizeof(msg))); 685 return -1; 686 } 687 688 if (STAT_RECORD) { 689 int err, fd = perf_data__fd(&perf_stat.data); 690 691 if (is_pipe) { 692 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 693 } else { 694 err = perf_session__write_header(perf_stat.session, evsel_list, 695 fd, false); 696 } 697 698 if (err < 0) 699 return err; 700 701 err = perf_stat_synthesize_config(is_pipe); 702 if (err < 0) 703 return err; 704 } 705 706 /* 707 * Enable counters and exec the command: 708 */ 709 t0 = rdclock(); 710 clock_gettime(CLOCK_MONOTONIC, &ref_time); 711 712 if (forks) { 713 perf_evlist__start_workload(evsel_list); 714 enable_counters(); 715 716 if (interval || timeout) { 717 while (!waitpid(child_pid, &status, WNOHANG)) { 718 nanosleep(&ts, NULL); 719 if (timeout) 720 break; 721 process_interval(); 722 if (interval_count && !(--times)) 723 break; 724 } 725 } 726 waitpid(child_pid, &status, 0); 727 728 if (workload_exec_errno) { 729 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 730 pr_err("Workload failed: %s\n", emsg); 731 return -1; 732 } 733 734 if (WIFSIGNALED(status)) 735 psignal(WTERMSIG(status), argv[0]); 736 } else { 737 enable_counters(); 738 while (!done) { 739 nanosleep(&ts, NULL); 740 if (timeout) 741 break; 742 if (interval) { 743 process_interval(); 744 if (interval_count && !(--times)) 745 break; 746 } 747 } 748 } 749 750 disable_counters(); 751 752 t1 = rdclock(); 753 754 update_stats(&walltime_nsecs_stats, t1 - t0); 755 756 /* 757 * Closing a group leader splits the group, and as we only disable 758 * group leaders, results in remaining events becoming enabled. To 759 * avoid arbitrary skew, we must read all counters before closing any 760 * group leaders. 761 */ 762 read_counters(); 763 perf_evlist__close(evsel_list); 764 765 return WEXITSTATUS(status); 766 } 767 768 static int run_perf_stat(int argc, const char **argv) 769 { 770 int ret; 771 772 if (pre_cmd) { 773 ret = system(pre_cmd); 774 if (ret) 775 return ret; 776 } 777 778 if (sync_run) 779 sync(); 780 781 ret = __run_perf_stat(argc, argv); 782 if (ret) 783 return ret; 784 785 if (post_cmd) { 786 ret = system(post_cmd); 787 if (ret) 788 return ret; 789 } 790 791 return ret; 792 } 793 794 static void print_running(u64 run, u64 ena) 795 { 796 if (csv_output) { 797 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 798 csv_sep, 799 run, 800 csv_sep, 801 ena ? 100.0 * run / ena : 100.0); 802 } else if (run != ena) { 803 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 804 } 805 } 806 807 static void print_noise_pct(double total, double avg) 808 { 809 double pct = rel_stddev_stats(total, avg); 810 811 if (csv_output) 812 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 813 else if (pct) 814 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 815 } 816 817 static void print_noise(struct perf_evsel *evsel, double avg) 818 { 819 struct perf_stat_evsel *ps; 820 821 if (run_count == 1) 822 return; 823 824 ps = evsel->stats; 825 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 826 } 827 828 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 829 { 830 switch (stat_config.aggr_mode) { 831 case AGGR_CORE: 832 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 833 cpu_map__id_to_socket(id), 834 csv_output ? 0 : -8, 835 cpu_map__id_to_cpu(id), 836 csv_sep, 837 csv_output ? 0 : 4, 838 nr, 839 csv_sep); 840 break; 841 case AGGR_SOCKET: 842 fprintf(stat_config.output, "S%*d%s%*d%s", 843 csv_output ? 0 : -5, 844 id, 845 csv_sep, 846 csv_output ? 0 : 4, 847 nr, 848 csv_sep); 849 break; 850 case AGGR_NONE: 851 fprintf(stat_config.output, "CPU%*d%s", 852 csv_output ? 0 : -4, 853 perf_evsel__cpus(evsel)->map[id], csv_sep); 854 break; 855 case AGGR_THREAD: 856 fprintf(stat_config.output, "%*s-%*d%s", 857 csv_output ? 0 : 16, 858 thread_map__comm(evsel->threads, id), 859 csv_output ? 0 : -8, 860 thread_map__pid(evsel->threads, id), 861 csv_sep); 862 break; 863 case AGGR_GLOBAL: 864 case AGGR_UNSET: 865 default: 866 break; 867 } 868 } 869 870 struct outstate { 871 FILE *fh; 872 bool newline; 873 const char *prefix; 874 int nfields; 875 int id, nr; 876 struct perf_evsel *evsel; 877 }; 878 879 #define METRIC_LEN 35 880 881 static void new_line_std(void *ctx) 882 { 883 struct outstate *os = ctx; 884 885 os->newline = true; 886 } 887 888 static void do_new_line_std(struct outstate *os) 889 { 890 fputc('\n', os->fh); 891 fputs(os->prefix, os->fh); 892 aggr_printout(os->evsel, os->id, os->nr); 893 if (stat_config.aggr_mode == AGGR_NONE) 894 fprintf(os->fh, " "); 895 fprintf(os->fh, " "); 896 } 897 898 static void print_metric_std(void *ctx, const char *color, const char *fmt, 899 const char *unit, double val) 900 { 901 struct outstate *os = ctx; 902 FILE *out = os->fh; 903 int n; 904 bool newline = os->newline; 905 906 os->newline = false; 907 908 if (unit == NULL || fmt == NULL) { 909 fprintf(out, "%-*s", METRIC_LEN, ""); 910 return; 911 } 912 913 if (newline) 914 do_new_line_std(os); 915 916 n = fprintf(out, " # "); 917 if (color) 918 n += color_fprintf(out, color, fmt, val); 919 else 920 n += fprintf(out, fmt, val); 921 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 922 } 923 924 static void new_line_csv(void *ctx) 925 { 926 struct outstate *os = ctx; 927 int i; 928 929 fputc('\n', os->fh); 930 if (os->prefix) 931 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 932 aggr_printout(os->evsel, os->id, os->nr); 933 for (i = 0; i < os->nfields; i++) 934 fputs(csv_sep, os->fh); 935 } 936 937 static void print_metric_csv(void *ctx, 938 const char *color __maybe_unused, 939 const char *fmt, const char *unit, double val) 940 { 941 struct outstate *os = ctx; 942 FILE *out = os->fh; 943 char buf[64], *vals, *ends; 944 945 if (unit == NULL || fmt == NULL) { 946 fprintf(out, "%s%s", csv_sep, csv_sep); 947 return; 948 } 949 snprintf(buf, sizeof(buf), fmt, val); 950 ends = vals = ltrim(buf); 951 while (isdigit(*ends) || *ends == '.') 952 ends++; 953 *ends = 0; 954 while (isspace(*unit)) 955 unit++; 956 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 957 } 958 959 #define METRIC_ONLY_LEN 20 960 961 /* Filter out some columns that don't work well in metrics only mode */ 962 963 static bool valid_only_metric(const char *unit) 964 { 965 if (!unit) 966 return false; 967 if (strstr(unit, "/sec") || 968 strstr(unit, "hz") || 969 strstr(unit, "Hz") || 970 strstr(unit, "CPUs utilized")) 971 return false; 972 return true; 973 } 974 975 static const char *fixunit(char *buf, struct perf_evsel *evsel, 976 const char *unit) 977 { 978 if (!strncmp(unit, "of all", 6)) { 979 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 980 unit); 981 return buf; 982 } 983 return unit; 984 } 985 986 static void print_metric_only(void *ctx, const char *color, const char *fmt, 987 const char *unit, double val) 988 { 989 struct outstate *os = ctx; 990 FILE *out = os->fh; 991 int n; 992 char buf[1024]; 993 unsigned mlen = METRIC_ONLY_LEN; 994 995 if (!valid_only_metric(unit)) 996 return; 997 unit = fixunit(buf, os->evsel, unit); 998 if (color) 999 n = color_fprintf(out, color, fmt, val); 1000 else 1001 n = fprintf(out, fmt, val); 1002 if (n > METRIC_ONLY_LEN) 1003 n = METRIC_ONLY_LEN; 1004 if (mlen < strlen(unit)) 1005 mlen = strlen(unit) + 1; 1006 fprintf(out, "%*s", mlen - n, ""); 1007 } 1008 1009 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1010 const char *fmt, 1011 const char *unit, double val) 1012 { 1013 struct outstate *os = ctx; 1014 FILE *out = os->fh; 1015 char buf[64], *vals, *ends; 1016 char tbuf[1024]; 1017 1018 if (!valid_only_metric(unit)) 1019 return; 1020 unit = fixunit(tbuf, os->evsel, unit); 1021 snprintf(buf, sizeof buf, fmt, val); 1022 ends = vals = ltrim(buf); 1023 while (isdigit(*ends) || *ends == '.') 1024 ends++; 1025 *ends = 0; 1026 fprintf(out, "%s%s", vals, csv_sep); 1027 } 1028 1029 static void new_line_metric(void *ctx __maybe_unused) 1030 { 1031 } 1032 1033 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1034 const char *fmt __maybe_unused, 1035 const char *unit, double val __maybe_unused) 1036 { 1037 struct outstate *os = ctx; 1038 char tbuf[1024]; 1039 1040 if (!valid_only_metric(unit)) 1041 return; 1042 unit = fixunit(tbuf, os->evsel, unit); 1043 if (csv_output) 1044 fprintf(os->fh, "%s%s", unit, csv_sep); 1045 else 1046 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1047 } 1048 1049 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1050 { 1051 FILE *output = stat_config.output; 1052 double msecs = avg / NSEC_PER_MSEC; 1053 const char *fmt_v, *fmt_n; 1054 char name[25]; 1055 1056 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1057 fmt_n = csv_output ? "%s" : "%-25s"; 1058 1059 aggr_printout(evsel, id, nr); 1060 1061 scnprintf(name, sizeof(name), "%s%s", 1062 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1063 1064 fprintf(output, fmt_v, msecs, csv_sep); 1065 1066 if (csv_output) 1067 fprintf(output, "%s%s", evsel->unit, csv_sep); 1068 else 1069 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1070 1071 fprintf(output, fmt_n, name); 1072 1073 if (evsel->cgrp) 1074 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1075 } 1076 1077 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1078 { 1079 int i; 1080 1081 if (!aggr_get_id) 1082 return 0; 1083 1084 if (stat_config.aggr_mode == AGGR_NONE) 1085 return id; 1086 1087 if (stat_config.aggr_mode == AGGR_GLOBAL) 1088 return 0; 1089 1090 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1091 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1092 1093 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1094 return cpu2; 1095 } 1096 return 0; 1097 } 1098 1099 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1100 { 1101 FILE *output = stat_config.output; 1102 double sc = evsel->scale; 1103 const char *fmt; 1104 1105 if (csv_output) { 1106 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1107 } else { 1108 if (big_num) 1109 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1110 else 1111 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1112 } 1113 1114 aggr_printout(evsel, id, nr); 1115 1116 fprintf(output, fmt, avg, csv_sep); 1117 1118 if (evsel->unit) 1119 fprintf(output, "%-*s%s", 1120 csv_output ? 0 : unit_width, 1121 evsel->unit, csv_sep); 1122 1123 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1124 1125 if (evsel->cgrp) 1126 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1127 } 1128 1129 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1130 char *prefix, u64 run, u64 ena, double noise, 1131 struct runtime_stat *st) 1132 { 1133 struct perf_stat_output_ctx out; 1134 struct outstate os = { 1135 .fh = stat_config.output, 1136 .prefix = prefix ? prefix : "", 1137 .id = id, 1138 .nr = nr, 1139 .evsel = counter, 1140 }; 1141 print_metric_t pm = print_metric_std; 1142 void (*nl)(void *); 1143 1144 if (metric_only) { 1145 nl = new_line_metric; 1146 if (csv_output) 1147 pm = print_metric_only_csv; 1148 else 1149 pm = print_metric_only; 1150 } else 1151 nl = new_line_std; 1152 1153 if (csv_output && !metric_only) { 1154 static int aggr_fields[] = { 1155 [AGGR_GLOBAL] = 0, 1156 [AGGR_THREAD] = 1, 1157 [AGGR_NONE] = 1, 1158 [AGGR_SOCKET] = 2, 1159 [AGGR_CORE] = 2, 1160 }; 1161 1162 pm = print_metric_csv; 1163 nl = new_line_csv; 1164 os.nfields = 3; 1165 os.nfields += aggr_fields[stat_config.aggr_mode]; 1166 if (counter->cgrp) 1167 os.nfields++; 1168 } 1169 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1170 if (metric_only) { 1171 pm(&os, NULL, "", "", 0); 1172 return; 1173 } 1174 aggr_printout(counter, id, nr); 1175 1176 fprintf(stat_config.output, "%*s%s", 1177 csv_output ? 0 : 18, 1178 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1179 csv_sep); 1180 1181 if (counter->supported) 1182 print_free_counters_hint = 1; 1183 1184 fprintf(stat_config.output, "%-*s%s", 1185 csv_output ? 0 : unit_width, 1186 counter->unit, csv_sep); 1187 1188 fprintf(stat_config.output, "%*s", 1189 csv_output ? 0 : -25, 1190 perf_evsel__name(counter)); 1191 1192 if (counter->cgrp) 1193 fprintf(stat_config.output, "%s%s", 1194 csv_sep, counter->cgrp->name); 1195 1196 if (!csv_output) 1197 pm(&os, NULL, NULL, "", 0); 1198 print_noise(counter, noise); 1199 print_running(run, ena); 1200 if (csv_output) 1201 pm(&os, NULL, NULL, "", 0); 1202 return; 1203 } 1204 1205 if (metric_only) 1206 /* nothing */; 1207 else if (nsec_counter(counter)) 1208 nsec_printout(id, nr, counter, uval); 1209 else 1210 abs_printout(id, nr, counter, uval); 1211 1212 out.print_metric = pm; 1213 out.new_line = nl; 1214 out.ctx = &os; 1215 out.force_header = false; 1216 1217 if (csv_output && !metric_only) { 1218 print_noise(counter, noise); 1219 print_running(run, ena); 1220 } 1221 1222 perf_stat__print_shadow_stats(counter, uval, 1223 first_shadow_cpu(counter, id), 1224 &out, &metric_events, st); 1225 if (!csv_output && !metric_only) { 1226 print_noise(counter, noise); 1227 print_running(run, ena); 1228 } 1229 } 1230 1231 static void aggr_update_shadow(void) 1232 { 1233 int cpu, s2, id, s; 1234 u64 val; 1235 struct perf_evsel *counter; 1236 1237 for (s = 0; s < aggr_map->nr; s++) { 1238 id = aggr_map->map[s]; 1239 evlist__for_each_entry(evsel_list, counter) { 1240 val = 0; 1241 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1242 s2 = aggr_get_id(evsel_list->cpus, cpu); 1243 if (s2 != id) 1244 continue; 1245 val += perf_counts(counter->counts, cpu, 0)->val; 1246 } 1247 perf_stat__update_shadow_stats(counter, val, 1248 first_shadow_cpu(counter, id), 1249 &rt_stat); 1250 } 1251 } 1252 } 1253 1254 static void uniquify_event_name(struct perf_evsel *counter) 1255 { 1256 char *new_name; 1257 char *config; 1258 1259 if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name, 1260 strlen(counter->pmu_name))) 1261 return; 1262 1263 config = strchr(counter->name, '/'); 1264 if (config) { 1265 if (asprintf(&new_name, 1266 "%s%s", counter->pmu_name, config) > 0) { 1267 free(counter->name); 1268 counter->name = new_name; 1269 } 1270 } else { 1271 if (asprintf(&new_name, 1272 "%s [%s]", counter->name, counter->pmu_name) > 0) { 1273 free(counter->name); 1274 counter->name = new_name; 1275 } 1276 } 1277 } 1278 1279 static void collect_all_aliases(struct perf_evsel *counter, 1280 void (*cb)(struct perf_evsel *counter, void *data, 1281 bool first), 1282 void *data) 1283 { 1284 struct perf_evsel *alias; 1285 1286 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1287 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1288 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1289 alias->scale != counter->scale || 1290 alias->cgrp != counter->cgrp || 1291 strcmp(alias->unit, counter->unit) || 1292 nsec_counter(alias) != nsec_counter(counter)) 1293 break; 1294 alias->merged_stat = true; 1295 cb(alias, data, false); 1296 } 1297 } 1298 1299 static bool collect_data(struct perf_evsel *counter, 1300 void (*cb)(struct perf_evsel *counter, void *data, 1301 bool first), 1302 void *data) 1303 { 1304 if (counter->merged_stat) 1305 return false; 1306 cb(counter, data, true); 1307 if (no_merge) 1308 uniquify_event_name(counter); 1309 else if (counter->auto_merge_stats) 1310 collect_all_aliases(counter, cb, data); 1311 return true; 1312 } 1313 1314 struct aggr_data { 1315 u64 ena, run, val; 1316 int id; 1317 int nr; 1318 int cpu; 1319 }; 1320 1321 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1322 { 1323 struct aggr_data *ad = data; 1324 int cpu, s2; 1325 1326 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1327 struct perf_counts_values *counts; 1328 1329 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1330 if (s2 != ad->id) 1331 continue; 1332 if (first) 1333 ad->nr++; 1334 counts = perf_counts(counter->counts, cpu, 0); 1335 /* 1336 * When any result is bad, make them all to give 1337 * consistent output in interval mode. 1338 */ 1339 if (counts->ena == 0 || counts->run == 0 || 1340 counter->counts->scaled == -1) { 1341 ad->ena = 0; 1342 ad->run = 0; 1343 break; 1344 } 1345 ad->val += counts->val; 1346 ad->ena += counts->ena; 1347 ad->run += counts->run; 1348 } 1349 } 1350 1351 static void print_aggr(char *prefix) 1352 { 1353 FILE *output = stat_config.output; 1354 struct perf_evsel *counter; 1355 int s, id, nr; 1356 double uval; 1357 u64 ena, run, val; 1358 bool first; 1359 1360 if (!(aggr_map || aggr_get_id)) 1361 return; 1362 1363 aggr_update_shadow(); 1364 1365 /* 1366 * With metric_only everything is on a single line. 1367 * Without each counter has its own line. 1368 */ 1369 for (s = 0; s < aggr_map->nr; s++) { 1370 struct aggr_data ad; 1371 if (prefix && metric_only) 1372 fprintf(output, "%s", prefix); 1373 1374 ad.id = id = aggr_map->map[s]; 1375 first = true; 1376 evlist__for_each_entry(evsel_list, counter) { 1377 if (is_duration_time(counter)) 1378 continue; 1379 1380 ad.val = ad.ena = ad.run = 0; 1381 ad.nr = 0; 1382 if (!collect_data(counter, aggr_cb, &ad)) 1383 continue; 1384 nr = ad.nr; 1385 ena = ad.ena; 1386 run = ad.run; 1387 val = ad.val; 1388 if (first && metric_only) { 1389 first = false; 1390 aggr_printout(counter, id, nr); 1391 } 1392 if (prefix && !metric_only) 1393 fprintf(output, "%s", prefix); 1394 1395 uval = val * counter->scale; 1396 printout(id, nr, counter, uval, prefix, run, ena, 1.0, 1397 &rt_stat); 1398 if (!metric_only) 1399 fputc('\n', output); 1400 } 1401 if (metric_only) 1402 fputc('\n', output); 1403 } 1404 } 1405 1406 static int cmp_val(const void *a, const void *b) 1407 { 1408 return ((struct perf_aggr_thread_value *)b)->val - 1409 ((struct perf_aggr_thread_value *)a)->val; 1410 } 1411 1412 static struct perf_aggr_thread_value *sort_aggr_thread( 1413 struct perf_evsel *counter, 1414 int nthreads, int ncpus, 1415 int *ret) 1416 { 1417 int cpu, thread, i = 0; 1418 double uval; 1419 struct perf_aggr_thread_value *buf; 1420 1421 buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 1422 if (!buf) 1423 return NULL; 1424 1425 for (thread = 0; thread < nthreads; thread++) { 1426 u64 ena = 0, run = 0, val = 0; 1427 1428 for (cpu = 0; cpu < ncpus; cpu++) { 1429 val += perf_counts(counter->counts, cpu, thread)->val; 1430 ena += perf_counts(counter->counts, cpu, thread)->ena; 1431 run += perf_counts(counter->counts, cpu, thread)->run; 1432 } 1433 1434 uval = val * counter->scale; 1435 1436 /* 1437 * Skip value 0 when enabling --per-thread globally, 1438 * otherwise too many 0 output. 1439 */ 1440 if (uval == 0.0 && target__has_per_thread(&target)) 1441 continue; 1442 1443 buf[i].counter = counter; 1444 buf[i].id = thread; 1445 buf[i].uval = uval; 1446 buf[i].val = val; 1447 buf[i].run = run; 1448 buf[i].ena = ena; 1449 i++; 1450 } 1451 1452 qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 1453 1454 if (ret) 1455 *ret = i; 1456 1457 return buf; 1458 } 1459 1460 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1461 { 1462 FILE *output = stat_config.output; 1463 int nthreads = thread_map__nr(counter->threads); 1464 int ncpus = cpu_map__nr(counter->cpus); 1465 int thread, sorted_threads, id; 1466 struct perf_aggr_thread_value *buf; 1467 1468 buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); 1469 if (!buf) { 1470 perror("cannot sort aggr thread"); 1471 return; 1472 } 1473 1474 for (thread = 0; thread < sorted_threads; thread++) { 1475 if (prefix) 1476 fprintf(output, "%s", prefix); 1477 1478 id = buf[thread].id; 1479 if (stat_config.stats) 1480 printout(id, 0, buf[thread].counter, buf[thread].uval, 1481 prefix, buf[thread].run, buf[thread].ena, 1.0, 1482 &stat_config.stats[id]); 1483 else 1484 printout(id, 0, buf[thread].counter, buf[thread].uval, 1485 prefix, buf[thread].run, buf[thread].ena, 1.0, 1486 &rt_stat); 1487 fputc('\n', output); 1488 } 1489 1490 free(buf); 1491 } 1492 1493 struct caggr_data { 1494 double avg, avg_enabled, avg_running; 1495 }; 1496 1497 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1498 bool first __maybe_unused) 1499 { 1500 struct caggr_data *cd = data; 1501 struct perf_stat_evsel *ps = counter->stats; 1502 1503 cd->avg += avg_stats(&ps->res_stats[0]); 1504 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1505 cd->avg_running += avg_stats(&ps->res_stats[2]); 1506 } 1507 1508 /* 1509 * Print out the results of a single counter: 1510 * aggregated counts in system-wide mode 1511 */ 1512 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1513 { 1514 FILE *output = stat_config.output; 1515 double uval; 1516 struct caggr_data cd = { .avg = 0.0 }; 1517 1518 if (!collect_data(counter, counter_aggr_cb, &cd)) 1519 return; 1520 1521 if (prefix && !metric_only) 1522 fprintf(output, "%s", prefix); 1523 1524 uval = cd.avg * counter->scale; 1525 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 1526 cd.avg, &rt_stat); 1527 if (!metric_only) 1528 fprintf(output, "\n"); 1529 } 1530 1531 static void counter_cb(struct perf_evsel *counter, void *data, 1532 bool first __maybe_unused) 1533 { 1534 struct aggr_data *ad = data; 1535 1536 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1537 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1538 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1539 } 1540 1541 /* 1542 * Print out the results of a single counter: 1543 * does not use aggregated count in system-wide 1544 */ 1545 static void print_counter(struct perf_evsel *counter, char *prefix) 1546 { 1547 FILE *output = stat_config.output; 1548 u64 ena, run, val; 1549 double uval; 1550 int cpu; 1551 1552 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1553 struct aggr_data ad = { .cpu = cpu }; 1554 1555 if (!collect_data(counter, counter_cb, &ad)) 1556 return; 1557 val = ad.val; 1558 ena = ad.ena; 1559 run = ad.run; 1560 1561 if (prefix) 1562 fprintf(output, "%s", prefix); 1563 1564 uval = val * counter->scale; 1565 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1566 &rt_stat); 1567 1568 fputc('\n', output); 1569 } 1570 } 1571 1572 static void print_no_aggr_metric(char *prefix) 1573 { 1574 int cpu; 1575 int nrcpus = 0; 1576 struct perf_evsel *counter; 1577 u64 ena, run, val; 1578 double uval; 1579 1580 nrcpus = evsel_list->cpus->nr; 1581 for (cpu = 0; cpu < nrcpus; cpu++) { 1582 bool first = true; 1583 1584 if (prefix) 1585 fputs(prefix, stat_config.output); 1586 evlist__for_each_entry(evsel_list, counter) { 1587 if (is_duration_time(counter)) 1588 continue; 1589 if (first) { 1590 aggr_printout(counter, cpu, 0); 1591 first = false; 1592 } 1593 val = perf_counts(counter->counts, cpu, 0)->val; 1594 ena = perf_counts(counter->counts, cpu, 0)->ena; 1595 run = perf_counts(counter->counts, cpu, 0)->run; 1596 1597 uval = val * counter->scale; 1598 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1599 &rt_stat); 1600 } 1601 fputc('\n', stat_config.output); 1602 } 1603 } 1604 1605 static int aggr_header_lens[] = { 1606 [AGGR_CORE] = 18, 1607 [AGGR_SOCKET] = 12, 1608 [AGGR_NONE] = 6, 1609 [AGGR_THREAD] = 24, 1610 [AGGR_GLOBAL] = 0, 1611 }; 1612 1613 static const char *aggr_header_csv[] = { 1614 [AGGR_CORE] = "core,cpus,", 1615 [AGGR_SOCKET] = "socket,cpus", 1616 [AGGR_NONE] = "cpu,", 1617 [AGGR_THREAD] = "comm-pid,", 1618 [AGGR_GLOBAL] = "" 1619 }; 1620 1621 static void print_metric_headers(const char *prefix, bool no_indent) 1622 { 1623 struct perf_stat_output_ctx out; 1624 struct perf_evsel *counter; 1625 struct outstate os = { 1626 .fh = stat_config.output 1627 }; 1628 1629 if (prefix) 1630 fprintf(stat_config.output, "%s", prefix); 1631 1632 if (!csv_output && !no_indent) 1633 fprintf(stat_config.output, "%*s", 1634 aggr_header_lens[stat_config.aggr_mode], ""); 1635 if (csv_output) { 1636 if (stat_config.interval) 1637 fputs("time,", stat_config.output); 1638 fputs(aggr_header_csv[stat_config.aggr_mode], 1639 stat_config.output); 1640 } 1641 1642 /* Print metrics headers only */ 1643 evlist__for_each_entry(evsel_list, counter) { 1644 if (is_duration_time(counter)) 1645 continue; 1646 os.evsel = counter; 1647 out.ctx = &os; 1648 out.print_metric = print_metric_header; 1649 out.new_line = new_line_metric; 1650 out.force_header = true; 1651 os.evsel = counter; 1652 perf_stat__print_shadow_stats(counter, 0, 1653 0, 1654 &out, 1655 &metric_events, 1656 &rt_stat); 1657 } 1658 fputc('\n', stat_config.output); 1659 } 1660 1661 static void print_interval(char *prefix, struct timespec *ts) 1662 { 1663 FILE *output = stat_config.output; 1664 static int num_print_interval; 1665 1666 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1667 1668 if (num_print_interval == 0 && !csv_output) { 1669 switch (stat_config.aggr_mode) { 1670 case AGGR_SOCKET: 1671 fprintf(output, "# time socket cpus"); 1672 if (!metric_only) 1673 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1674 break; 1675 case AGGR_CORE: 1676 fprintf(output, "# time core cpus"); 1677 if (!metric_only) 1678 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1679 break; 1680 case AGGR_NONE: 1681 fprintf(output, "# time CPU"); 1682 if (!metric_only) 1683 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1684 break; 1685 case AGGR_THREAD: 1686 fprintf(output, "# time comm-pid"); 1687 if (!metric_only) 1688 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1689 break; 1690 case AGGR_GLOBAL: 1691 default: 1692 fprintf(output, "# time"); 1693 if (!metric_only) 1694 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1695 case AGGR_UNSET: 1696 break; 1697 } 1698 } 1699 1700 if (num_print_interval == 0 && metric_only) 1701 print_metric_headers(" ", true); 1702 if (++num_print_interval == 25) 1703 num_print_interval = 0; 1704 } 1705 1706 static void print_header(int argc, const char **argv) 1707 { 1708 FILE *output = stat_config.output; 1709 int i; 1710 1711 fflush(stdout); 1712 1713 if (!csv_output) { 1714 fprintf(output, "\n"); 1715 fprintf(output, " Performance counter stats for "); 1716 if (target.system_wide) 1717 fprintf(output, "\'system wide"); 1718 else if (target.cpu_list) 1719 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1720 else if (!target__has_task(&target)) { 1721 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1722 for (i = 1; argv && (i < argc); i++) 1723 fprintf(output, " %s", argv[i]); 1724 } else if (target.pid) 1725 fprintf(output, "process id \'%s", target.pid); 1726 else 1727 fprintf(output, "thread id \'%s", target.tid); 1728 1729 fprintf(output, "\'"); 1730 if (run_count > 1) 1731 fprintf(output, " (%d runs)", run_count); 1732 fprintf(output, ":\n\n"); 1733 } 1734 } 1735 1736 static void print_footer(void) 1737 { 1738 FILE *output = stat_config.output; 1739 int n; 1740 1741 if (!null_run) 1742 fprintf(output, "\n"); 1743 fprintf(output, " %17.9f seconds time elapsed", 1744 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1745 if (run_count > 1) { 1746 fprintf(output, " "); 1747 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1748 avg_stats(&walltime_nsecs_stats)); 1749 } 1750 fprintf(output, "\n\n"); 1751 1752 if (print_free_counters_hint && 1753 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1754 n > 0) 1755 fprintf(output, 1756 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1757 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1758 " perf stat ...\n" 1759 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1760 } 1761 1762 static void print_counters(struct timespec *ts, int argc, const char **argv) 1763 { 1764 int interval = stat_config.interval; 1765 struct perf_evsel *counter; 1766 char buf[64], *prefix = NULL; 1767 1768 /* Do not print anything if we record to the pipe. */ 1769 if (STAT_RECORD && perf_stat.data.is_pipe) 1770 return; 1771 1772 if (interval) 1773 print_interval(prefix = buf, ts); 1774 else 1775 print_header(argc, argv); 1776 1777 if (metric_only) { 1778 static int num_print_iv; 1779 1780 if (num_print_iv == 0 && !interval) 1781 print_metric_headers(prefix, false); 1782 if (num_print_iv++ == 25) 1783 num_print_iv = 0; 1784 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1785 fprintf(stat_config.output, "%s", prefix); 1786 } 1787 1788 switch (stat_config.aggr_mode) { 1789 case AGGR_CORE: 1790 case AGGR_SOCKET: 1791 print_aggr(prefix); 1792 break; 1793 case AGGR_THREAD: 1794 evlist__for_each_entry(evsel_list, counter) { 1795 if (is_duration_time(counter)) 1796 continue; 1797 print_aggr_thread(counter, prefix); 1798 } 1799 break; 1800 case AGGR_GLOBAL: 1801 evlist__for_each_entry(evsel_list, counter) { 1802 if (is_duration_time(counter)) 1803 continue; 1804 print_counter_aggr(counter, prefix); 1805 } 1806 if (metric_only) 1807 fputc('\n', stat_config.output); 1808 break; 1809 case AGGR_NONE: 1810 if (metric_only) 1811 print_no_aggr_metric(prefix); 1812 else { 1813 evlist__for_each_entry(evsel_list, counter) { 1814 if (is_duration_time(counter)) 1815 continue; 1816 print_counter(counter, prefix); 1817 } 1818 } 1819 break; 1820 case AGGR_UNSET: 1821 default: 1822 break; 1823 } 1824 1825 if (!interval && !csv_output) 1826 print_footer(); 1827 1828 fflush(stat_config.output); 1829 } 1830 1831 static volatile int signr = -1; 1832 1833 static void skip_signal(int signo) 1834 { 1835 if ((child_pid == -1) || stat_config.interval) 1836 done = 1; 1837 1838 signr = signo; 1839 /* 1840 * render child_pid harmless 1841 * won't send SIGTERM to a random 1842 * process in case of race condition 1843 * and fast PID recycling 1844 */ 1845 child_pid = -1; 1846 } 1847 1848 static void sig_atexit(void) 1849 { 1850 sigset_t set, oset; 1851 1852 /* 1853 * avoid race condition with SIGCHLD handler 1854 * in skip_signal() which is modifying child_pid 1855 * goal is to avoid send SIGTERM to a random 1856 * process 1857 */ 1858 sigemptyset(&set); 1859 sigaddset(&set, SIGCHLD); 1860 sigprocmask(SIG_BLOCK, &set, &oset); 1861 1862 if (child_pid != -1) 1863 kill(child_pid, SIGTERM); 1864 1865 sigprocmask(SIG_SETMASK, &oset, NULL); 1866 1867 if (signr == -1) 1868 return; 1869 1870 signal(signr, SIG_DFL); 1871 kill(getpid(), signr); 1872 } 1873 1874 static int stat__set_big_num(const struct option *opt __maybe_unused, 1875 const char *s __maybe_unused, int unset) 1876 { 1877 big_num_opt = unset ? 0 : 1; 1878 return 0; 1879 } 1880 1881 static int enable_metric_only(const struct option *opt __maybe_unused, 1882 const char *s __maybe_unused, int unset) 1883 { 1884 force_metric_only = true; 1885 metric_only = !unset; 1886 return 0; 1887 } 1888 1889 static int parse_metric_groups(const struct option *opt, 1890 const char *str, 1891 int unset __maybe_unused) 1892 { 1893 return metricgroup__parse_groups(opt, str, &metric_events); 1894 } 1895 1896 static const struct option stat_options[] = { 1897 OPT_BOOLEAN('T', "transaction", &transaction_run, 1898 "hardware transaction statistics"), 1899 OPT_CALLBACK('e', "event", &evsel_list, "event", 1900 "event selector. use 'perf list' to list available events", 1901 parse_events_option), 1902 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1903 "event filter", parse_filter), 1904 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1905 "child tasks do not inherit counters"), 1906 OPT_STRING('p', "pid", &target.pid, "pid", 1907 "stat events on existing process id"), 1908 OPT_STRING('t', "tid", &target.tid, "tid", 1909 "stat events on existing thread id"), 1910 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1911 "system-wide collection from all CPUs"), 1912 OPT_BOOLEAN('g', "group", &group, 1913 "put the counters into a counter group"), 1914 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1915 OPT_INCR('v', "verbose", &verbose, 1916 "be more verbose (show counter open errors, etc)"), 1917 OPT_INTEGER('r', "repeat", &run_count, 1918 "repeat command and print average + stddev (max: 100, forever: 0)"), 1919 OPT_BOOLEAN('n', "null", &null_run, 1920 "null run - dont start any counters"), 1921 OPT_INCR('d', "detailed", &detailed_run, 1922 "detailed run - start a lot of events"), 1923 OPT_BOOLEAN('S', "sync", &sync_run, 1924 "call sync() before starting a run"), 1925 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1926 "print large numbers with thousands\' separators", 1927 stat__set_big_num), 1928 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1929 "list of cpus to monitor in system-wide"), 1930 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1931 "disable CPU count aggregation", AGGR_NONE), 1932 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1933 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1934 "print counts with custom separator"), 1935 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1936 "monitor event in cgroup name only", parse_cgroups), 1937 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1938 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1939 OPT_INTEGER(0, "log-fd", &output_fd, 1940 "log output to fd, instead of stderr"), 1941 OPT_STRING(0, "pre", &pre_cmd, "command", 1942 "command to run prior to the measured command"), 1943 OPT_STRING(0, "post", &post_cmd, "command", 1944 "command to run after to the measured command"), 1945 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1946 "print counts at regular interval in ms " 1947 "(overhead is possible for values <= 100ms)"), 1948 OPT_INTEGER(0, "interval-count", &stat_config.times, 1949 "print counts for fixed number of times"), 1950 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 1951 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 1952 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1953 "aggregate counts per processor socket", AGGR_SOCKET), 1954 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1955 "aggregate counts per physical processor core", AGGR_CORE), 1956 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1957 "aggregate counts per thread", AGGR_THREAD), 1958 OPT_UINTEGER('D', "delay", &initial_delay, 1959 "ms to wait before starting measurement after program start"), 1960 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1961 "Only print computed metrics. No raw values", enable_metric_only), 1962 OPT_BOOLEAN(0, "topdown", &topdown_run, 1963 "measure topdown level 1 statistics"), 1964 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1965 "measure SMI cost"), 1966 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 1967 "monitor specified metrics or metric groups (separated by ,)", 1968 parse_metric_groups), 1969 OPT_END() 1970 }; 1971 1972 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1973 { 1974 return cpu_map__get_socket(map, cpu, NULL); 1975 } 1976 1977 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1978 { 1979 return cpu_map__get_core(map, cpu, NULL); 1980 } 1981 1982 static int cpu_map__get_max(struct cpu_map *map) 1983 { 1984 int i, max = -1; 1985 1986 for (i = 0; i < map->nr; i++) { 1987 if (map->map[i] > max) 1988 max = map->map[i]; 1989 } 1990 1991 return max; 1992 } 1993 1994 static struct cpu_map *cpus_aggr_map; 1995 1996 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1997 { 1998 int cpu; 1999 2000 if (idx >= map->nr) 2001 return -1; 2002 2003 cpu = map->map[idx]; 2004 2005 if (cpus_aggr_map->map[cpu] == -1) 2006 cpus_aggr_map->map[cpu] = get_id(map, idx); 2007 2008 return cpus_aggr_map->map[cpu]; 2009 } 2010 2011 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 2012 { 2013 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 2014 } 2015 2016 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 2017 { 2018 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 2019 } 2020 2021 static int perf_stat_init_aggr_mode(void) 2022 { 2023 int nr; 2024 2025 switch (stat_config.aggr_mode) { 2026 case AGGR_SOCKET: 2027 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 2028 perror("cannot build socket map"); 2029 return -1; 2030 } 2031 aggr_get_id = perf_stat__get_socket_cached; 2032 break; 2033 case AGGR_CORE: 2034 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 2035 perror("cannot build core map"); 2036 return -1; 2037 } 2038 aggr_get_id = perf_stat__get_core_cached; 2039 break; 2040 case AGGR_NONE: 2041 case AGGR_GLOBAL: 2042 case AGGR_THREAD: 2043 case AGGR_UNSET: 2044 default: 2045 break; 2046 } 2047 2048 /* 2049 * The evsel_list->cpus is the base we operate on, 2050 * taking the highest cpu number to be the size of 2051 * the aggregation translate cpumap. 2052 */ 2053 nr = cpu_map__get_max(evsel_list->cpus); 2054 cpus_aggr_map = cpu_map__empty_new(nr + 1); 2055 return cpus_aggr_map ? 0 : -ENOMEM; 2056 } 2057 2058 static void perf_stat__exit_aggr_mode(void) 2059 { 2060 cpu_map__put(aggr_map); 2061 cpu_map__put(cpus_aggr_map); 2062 aggr_map = NULL; 2063 cpus_aggr_map = NULL; 2064 } 2065 2066 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 2067 { 2068 int cpu; 2069 2070 if (idx > map->nr) 2071 return -1; 2072 2073 cpu = map->map[idx]; 2074 2075 if (cpu >= env->nr_cpus_avail) 2076 return -1; 2077 2078 return cpu; 2079 } 2080 2081 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2082 { 2083 struct perf_env *env = data; 2084 int cpu = perf_env__get_cpu(env, map, idx); 2085 2086 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2087 } 2088 2089 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2090 { 2091 struct perf_env *env = data; 2092 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2093 2094 if (cpu != -1) { 2095 int socket_id = env->cpu[cpu].socket_id; 2096 2097 /* 2098 * Encode socket in upper 16 bits 2099 * core_id is relative to socket, and 2100 * we need a global id. So we combine 2101 * socket + core id. 2102 */ 2103 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2104 } 2105 2106 return core; 2107 } 2108 2109 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2110 struct cpu_map **sockp) 2111 { 2112 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2113 } 2114 2115 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2116 struct cpu_map **corep) 2117 { 2118 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2119 } 2120 2121 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2122 { 2123 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2124 } 2125 2126 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2127 { 2128 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2129 } 2130 2131 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2132 { 2133 struct perf_env *env = &st->session->header.env; 2134 2135 switch (stat_config.aggr_mode) { 2136 case AGGR_SOCKET: 2137 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2138 perror("cannot build socket map"); 2139 return -1; 2140 } 2141 aggr_get_id = perf_stat__get_socket_file; 2142 break; 2143 case AGGR_CORE: 2144 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2145 perror("cannot build core map"); 2146 return -1; 2147 } 2148 aggr_get_id = perf_stat__get_core_file; 2149 break; 2150 case AGGR_NONE: 2151 case AGGR_GLOBAL: 2152 case AGGR_THREAD: 2153 case AGGR_UNSET: 2154 default: 2155 break; 2156 } 2157 2158 return 0; 2159 } 2160 2161 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2162 { 2163 int off = 0; 2164 int i; 2165 int len = 0; 2166 char *s; 2167 2168 for (i = 0; attr[i]; i++) { 2169 if (pmu_have_event("cpu", attr[i])) { 2170 len += strlen(attr[i]) + 1; 2171 attr[i - off] = attr[i]; 2172 } else 2173 off++; 2174 } 2175 attr[i - off] = NULL; 2176 2177 *str = malloc(len + 1 + 2); 2178 if (!*str) 2179 return -1; 2180 s = *str; 2181 if (i - off == 0) { 2182 *s = 0; 2183 return 0; 2184 } 2185 if (use_group) 2186 *s++ = '{'; 2187 for (i = 0; attr[i]; i++) { 2188 strcpy(s, attr[i]); 2189 s += strlen(s); 2190 *s++ = ','; 2191 } 2192 if (use_group) { 2193 s[-1] = '}'; 2194 *s = 0; 2195 } else 2196 s[-1] = 0; 2197 return 0; 2198 } 2199 2200 __weak bool arch_topdown_check_group(bool *warn) 2201 { 2202 *warn = false; 2203 return false; 2204 } 2205 2206 __weak void arch_topdown_group_warn(void) 2207 { 2208 } 2209 2210 /* 2211 * Add default attributes, if there were no attributes specified or 2212 * if -d/--detailed, -d -d or -d -d -d is used: 2213 */ 2214 static int add_default_attributes(void) 2215 { 2216 int err; 2217 struct perf_event_attr default_attrs0[] = { 2218 2219 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2220 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2221 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2222 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2223 2224 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2225 }; 2226 struct perf_event_attr frontend_attrs[] = { 2227 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2228 }; 2229 struct perf_event_attr backend_attrs[] = { 2230 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2231 }; 2232 struct perf_event_attr default_attrs1[] = { 2233 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2234 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2235 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2236 2237 }; 2238 2239 /* 2240 * Detailed stats (-d), covering the L1 and last level data caches: 2241 */ 2242 struct perf_event_attr detailed_attrs[] = { 2243 2244 { .type = PERF_TYPE_HW_CACHE, 2245 .config = 2246 PERF_COUNT_HW_CACHE_L1D << 0 | 2247 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2248 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2249 2250 { .type = PERF_TYPE_HW_CACHE, 2251 .config = 2252 PERF_COUNT_HW_CACHE_L1D << 0 | 2253 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2254 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2255 2256 { .type = PERF_TYPE_HW_CACHE, 2257 .config = 2258 PERF_COUNT_HW_CACHE_LL << 0 | 2259 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2260 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2261 2262 { .type = PERF_TYPE_HW_CACHE, 2263 .config = 2264 PERF_COUNT_HW_CACHE_LL << 0 | 2265 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2266 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2267 }; 2268 2269 /* 2270 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2271 */ 2272 struct perf_event_attr very_detailed_attrs[] = { 2273 2274 { .type = PERF_TYPE_HW_CACHE, 2275 .config = 2276 PERF_COUNT_HW_CACHE_L1I << 0 | 2277 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2278 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2279 2280 { .type = PERF_TYPE_HW_CACHE, 2281 .config = 2282 PERF_COUNT_HW_CACHE_L1I << 0 | 2283 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2284 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2285 2286 { .type = PERF_TYPE_HW_CACHE, 2287 .config = 2288 PERF_COUNT_HW_CACHE_DTLB << 0 | 2289 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2290 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2291 2292 { .type = PERF_TYPE_HW_CACHE, 2293 .config = 2294 PERF_COUNT_HW_CACHE_DTLB << 0 | 2295 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2296 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2297 2298 { .type = PERF_TYPE_HW_CACHE, 2299 .config = 2300 PERF_COUNT_HW_CACHE_ITLB << 0 | 2301 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2302 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2303 2304 { .type = PERF_TYPE_HW_CACHE, 2305 .config = 2306 PERF_COUNT_HW_CACHE_ITLB << 0 | 2307 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2308 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2309 2310 }; 2311 2312 /* 2313 * Very, very detailed stats (-d -d -d), adding prefetch events: 2314 */ 2315 struct perf_event_attr very_very_detailed_attrs[] = { 2316 2317 { .type = PERF_TYPE_HW_CACHE, 2318 .config = 2319 PERF_COUNT_HW_CACHE_L1D << 0 | 2320 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2321 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2322 2323 { .type = PERF_TYPE_HW_CACHE, 2324 .config = 2325 PERF_COUNT_HW_CACHE_L1D << 0 | 2326 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2327 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2328 }; 2329 2330 /* Set attrs if no event is selected and !null_run: */ 2331 if (null_run) 2332 return 0; 2333 2334 if (transaction_run) { 2335 struct parse_events_error errinfo; 2336 2337 if (pmu_have_event("cpu", "cycles-ct") && 2338 pmu_have_event("cpu", "el-start")) 2339 err = parse_events(evsel_list, transaction_attrs, 2340 &errinfo); 2341 else 2342 err = parse_events(evsel_list, 2343 transaction_limited_attrs, 2344 &errinfo); 2345 if (err) { 2346 fprintf(stderr, "Cannot set up transaction events\n"); 2347 return -1; 2348 } 2349 return 0; 2350 } 2351 2352 if (smi_cost) { 2353 int smi; 2354 2355 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2356 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2357 return -1; 2358 } 2359 2360 if (!smi) { 2361 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2362 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2363 return -1; 2364 } 2365 smi_reset = true; 2366 } 2367 2368 if (pmu_have_event("msr", "aperf") && 2369 pmu_have_event("msr", "smi")) { 2370 if (!force_metric_only) 2371 metric_only = true; 2372 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2373 } else { 2374 fprintf(stderr, "To measure SMI cost, it needs " 2375 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2376 return -1; 2377 } 2378 if (err) { 2379 fprintf(stderr, "Cannot set up SMI cost events\n"); 2380 return -1; 2381 } 2382 return 0; 2383 } 2384 2385 if (topdown_run) { 2386 char *str = NULL; 2387 bool warn = false; 2388 2389 if (stat_config.aggr_mode != AGGR_GLOBAL && 2390 stat_config.aggr_mode != AGGR_CORE) { 2391 pr_err("top down event configuration requires --per-core mode\n"); 2392 return -1; 2393 } 2394 stat_config.aggr_mode = AGGR_CORE; 2395 if (nr_cgroups || !target__has_cpu(&target)) { 2396 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2397 return -1; 2398 } 2399 2400 if (!force_metric_only) 2401 metric_only = true; 2402 if (topdown_filter_events(topdown_attrs, &str, 2403 arch_topdown_check_group(&warn)) < 0) { 2404 pr_err("Out of memory\n"); 2405 return -1; 2406 } 2407 if (topdown_attrs[0] && str) { 2408 if (warn) 2409 arch_topdown_group_warn(); 2410 err = parse_events(evsel_list, str, NULL); 2411 if (err) { 2412 fprintf(stderr, 2413 "Cannot set up top down events %s: %d\n", 2414 str, err); 2415 free(str); 2416 return -1; 2417 } 2418 } else { 2419 fprintf(stderr, "System does not support topdown\n"); 2420 return -1; 2421 } 2422 free(str); 2423 } 2424 2425 if (!evsel_list->nr_entries) { 2426 if (target__has_cpu(&target)) 2427 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2428 2429 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2430 return -1; 2431 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2432 if (perf_evlist__add_default_attrs(evsel_list, 2433 frontend_attrs) < 0) 2434 return -1; 2435 } 2436 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2437 if (perf_evlist__add_default_attrs(evsel_list, 2438 backend_attrs) < 0) 2439 return -1; 2440 } 2441 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2442 return -1; 2443 } 2444 2445 /* Detailed events get appended to the event list: */ 2446 2447 if (detailed_run < 1) 2448 return 0; 2449 2450 /* Append detailed run extra attributes: */ 2451 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2452 return -1; 2453 2454 if (detailed_run < 2) 2455 return 0; 2456 2457 /* Append very detailed run extra attributes: */ 2458 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2459 return -1; 2460 2461 if (detailed_run < 3) 2462 return 0; 2463 2464 /* Append very, very detailed run extra attributes: */ 2465 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2466 } 2467 2468 static const char * const stat_record_usage[] = { 2469 "perf stat record [<options>]", 2470 NULL, 2471 }; 2472 2473 static void init_features(struct perf_session *session) 2474 { 2475 int feat; 2476 2477 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2478 perf_header__set_feat(&session->header, feat); 2479 2480 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2481 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2482 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2483 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2484 } 2485 2486 static int __cmd_record(int argc, const char **argv) 2487 { 2488 struct perf_session *session; 2489 struct perf_data *data = &perf_stat.data; 2490 2491 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2492 PARSE_OPT_STOP_AT_NON_OPTION); 2493 2494 if (output_name) 2495 data->file.path = output_name; 2496 2497 if (run_count != 1 || forever) { 2498 pr_err("Cannot use -r option with perf stat record.\n"); 2499 return -1; 2500 } 2501 2502 session = perf_session__new(data, false, NULL); 2503 if (session == NULL) { 2504 pr_err("Perf session creation failed.\n"); 2505 return -1; 2506 } 2507 2508 init_features(session); 2509 2510 session->evlist = evsel_list; 2511 perf_stat.session = session; 2512 perf_stat.record = true; 2513 return argc; 2514 } 2515 2516 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2517 union perf_event *event, 2518 struct perf_session *session) 2519 { 2520 struct stat_round_event *stat_round = &event->stat_round; 2521 struct perf_evsel *counter; 2522 struct timespec tsh, *ts = NULL; 2523 const char **argv = session->header.env.cmdline_argv; 2524 int argc = session->header.env.nr_cmdline; 2525 2526 evlist__for_each_entry(evsel_list, counter) 2527 perf_stat_process_counter(&stat_config, counter); 2528 2529 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2530 update_stats(&walltime_nsecs_stats, stat_round->time); 2531 2532 if (stat_config.interval && stat_round->time) { 2533 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2534 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2535 ts = &tsh; 2536 } 2537 2538 print_counters(ts, argc, argv); 2539 return 0; 2540 } 2541 2542 static 2543 int process_stat_config_event(struct perf_tool *tool, 2544 union perf_event *event, 2545 struct perf_session *session __maybe_unused) 2546 { 2547 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2548 2549 perf_event__read_stat_config(&stat_config, &event->stat_config); 2550 2551 if (cpu_map__empty(st->cpus)) { 2552 if (st->aggr_mode != AGGR_UNSET) 2553 pr_warning("warning: processing task data, aggregation mode not set\n"); 2554 return 0; 2555 } 2556 2557 if (st->aggr_mode != AGGR_UNSET) 2558 stat_config.aggr_mode = st->aggr_mode; 2559 2560 if (perf_stat.data.is_pipe) 2561 perf_stat_init_aggr_mode(); 2562 else 2563 perf_stat_init_aggr_mode_file(st); 2564 2565 return 0; 2566 } 2567 2568 static int set_maps(struct perf_stat *st) 2569 { 2570 if (!st->cpus || !st->threads) 2571 return 0; 2572 2573 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2574 return -EINVAL; 2575 2576 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2577 2578 if (perf_evlist__alloc_stats(evsel_list, true)) 2579 return -ENOMEM; 2580 2581 st->maps_allocated = true; 2582 return 0; 2583 } 2584 2585 static 2586 int process_thread_map_event(struct perf_tool *tool, 2587 union perf_event *event, 2588 struct perf_session *session __maybe_unused) 2589 { 2590 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2591 2592 if (st->threads) { 2593 pr_warning("Extra thread map event, ignoring.\n"); 2594 return 0; 2595 } 2596 2597 st->threads = thread_map__new_event(&event->thread_map); 2598 if (!st->threads) 2599 return -ENOMEM; 2600 2601 return set_maps(st); 2602 } 2603 2604 static 2605 int process_cpu_map_event(struct perf_tool *tool, 2606 union perf_event *event, 2607 struct perf_session *session __maybe_unused) 2608 { 2609 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2610 struct cpu_map *cpus; 2611 2612 if (st->cpus) { 2613 pr_warning("Extra cpu map event, ignoring.\n"); 2614 return 0; 2615 } 2616 2617 cpus = cpu_map__new_data(&event->cpu_map.data); 2618 if (!cpus) 2619 return -ENOMEM; 2620 2621 st->cpus = cpus; 2622 return set_maps(st); 2623 } 2624 2625 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 2626 { 2627 int i; 2628 2629 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 2630 if (!config->stats) 2631 return -1; 2632 2633 config->stats_num = nthreads; 2634 2635 for (i = 0; i < nthreads; i++) 2636 runtime_stat__init(&config->stats[i]); 2637 2638 return 0; 2639 } 2640 2641 static void runtime_stat_delete(struct perf_stat_config *config) 2642 { 2643 int i; 2644 2645 if (!config->stats) 2646 return; 2647 2648 for (i = 0; i < config->stats_num; i++) 2649 runtime_stat__exit(&config->stats[i]); 2650 2651 free(config->stats); 2652 } 2653 2654 static const char * const stat_report_usage[] = { 2655 "perf stat report [<options>]", 2656 NULL, 2657 }; 2658 2659 static struct perf_stat perf_stat = { 2660 .tool = { 2661 .attr = perf_event__process_attr, 2662 .event_update = perf_event__process_event_update, 2663 .thread_map = process_thread_map_event, 2664 .cpu_map = process_cpu_map_event, 2665 .stat_config = process_stat_config_event, 2666 .stat = perf_event__process_stat_event, 2667 .stat_round = process_stat_round_event, 2668 }, 2669 .aggr_mode = AGGR_UNSET, 2670 }; 2671 2672 static int __cmd_report(int argc, const char **argv) 2673 { 2674 struct perf_session *session; 2675 const struct option options[] = { 2676 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2677 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2678 "aggregate counts per processor socket", AGGR_SOCKET), 2679 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2680 "aggregate counts per physical processor core", AGGR_CORE), 2681 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2682 "disable CPU count aggregation", AGGR_NONE), 2683 OPT_END() 2684 }; 2685 struct stat st; 2686 int ret; 2687 2688 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2689 2690 if (!input_name || !strlen(input_name)) { 2691 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2692 input_name = "-"; 2693 else 2694 input_name = "perf.data"; 2695 } 2696 2697 perf_stat.data.file.path = input_name; 2698 perf_stat.data.mode = PERF_DATA_MODE_READ; 2699 2700 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 2701 if (session == NULL) 2702 return -1; 2703 2704 perf_stat.session = session; 2705 stat_config.output = stderr; 2706 evsel_list = session->evlist; 2707 2708 ret = perf_session__process_events(session); 2709 if (ret) 2710 return ret; 2711 2712 perf_session__delete(session); 2713 return 0; 2714 } 2715 2716 static void setup_system_wide(int forks) 2717 { 2718 /* 2719 * Make system wide (-a) the default target if 2720 * no target was specified and one of following 2721 * conditions is met: 2722 * 2723 * - there's no workload specified 2724 * - there is workload specified but all requested 2725 * events are system wide events 2726 */ 2727 if (!target__none(&target)) 2728 return; 2729 2730 if (!forks) 2731 target.system_wide = true; 2732 else { 2733 struct perf_evsel *counter; 2734 2735 evlist__for_each_entry(evsel_list, counter) { 2736 if (!counter->system_wide) 2737 return; 2738 } 2739 2740 if (evsel_list->nr_entries) 2741 target.system_wide = true; 2742 } 2743 } 2744 2745 int cmd_stat(int argc, const char **argv) 2746 { 2747 const char * const stat_usage[] = { 2748 "perf stat [<options>] [<command>]", 2749 NULL 2750 }; 2751 int status = -EINVAL, run_idx; 2752 const char *mode; 2753 FILE *output = stderr; 2754 unsigned int interval, timeout; 2755 const char * const stat_subcommands[] = { "record", "report" }; 2756 2757 setlocale(LC_ALL, ""); 2758 2759 evsel_list = perf_evlist__new(); 2760 if (evsel_list == NULL) 2761 return -ENOMEM; 2762 2763 parse_events__shrink_config_terms(); 2764 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2765 (const char **) stat_usage, 2766 PARSE_OPT_STOP_AT_NON_OPTION); 2767 perf_stat__collect_metric_expr(evsel_list); 2768 perf_stat__init_shadow_stats(); 2769 2770 if (csv_sep) { 2771 csv_output = true; 2772 if (!strcmp(csv_sep, "\\t")) 2773 csv_sep = "\t"; 2774 } else 2775 csv_sep = DEFAULT_SEPARATOR; 2776 2777 if (argc && !strncmp(argv[0], "rec", 3)) { 2778 argc = __cmd_record(argc, argv); 2779 if (argc < 0) 2780 return -1; 2781 } else if (argc && !strncmp(argv[0], "rep", 3)) 2782 return __cmd_report(argc, argv); 2783 2784 interval = stat_config.interval; 2785 timeout = stat_config.timeout; 2786 2787 /* 2788 * For record command the -o is already taken care of. 2789 */ 2790 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2791 output = NULL; 2792 2793 if (output_name && output_fd) { 2794 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2795 parse_options_usage(stat_usage, stat_options, "o", 1); 2796 parse_options_usage(NULL, stat_options, "log-fd", 0); 2797 goto out; 2798 } 2799 2800 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2801 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2802 goto out; 2803 } 2804 2805 if (metric_only && run_count > 1) { 2806 fprintf(stderr, "--metric-only is not supported with -r\n"); 2807 goto out; 2808 } 2809 2810 if (output_fd < 0) { 2811 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2812 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2813 goto out; 2814 } 2815 2816 if (!output) { 2817 struct timespec tm; 2818 mode = append_file ? "a" : "w"; 2819 2820 output = fopen(output_name, mode); 2821 if (!output) { 2822 perror("failed to create output file"); 2823 return -1; 2824 } 2825 clock_gettime(CLOCK_REALTIME, &tm); 2826 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2827 } else if (output_fd > 0) { 2828 mode = append_file ? "a" : "w"; 2829 output = fdopen(output_fd, mode); 2830 if (!output) { 2831 perror("Failed opening logfd"); 2832 return -errno; 2833 } 2834 } 2835 2836 stat_config.output = output; 2837 2838 /* 2839 * let the spreadsheet do the pretty-printing 2840 */ 2841 if (csv_output) { 2842 /* User explicitly passed -B? */ 2843 if (big_num_opt == 1) { 2844 fprintf(stderr, "-B option not supported with -x\n"); 2845 parse_options_usage(stat_usage, stat_options, "B", 1); 2846 parse_options_usage(NULL, stat_options, "x", 1); 2847 goto out; 2848 } else /* Nope, so disable big number formatting */ 2849 big_num = false; 2850 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2851 big_num = false; 2852 2853 setup_system_wide(argc); 2854 2855 if (run_count < 0) { 2856 pr_err("Run count must be a positive number\n"); 2857 parse_options_usage(stat_usage, stat_options, "r", 1); 2858 goto out; 2859 } else if (run_count == 0) { 2860 forever = true; 2861 run_count = 1; 2862 } 2863 2864 if ((stat_config.aggr_mode == AGGR_THREAD) && 2865 !target__has_task(&target)) { 2866 if (!target.system_wide || target.cpu_list) { 2867 fprintf(stderr, "The --per-thread option is only " 2868 "available when monitoring via -p -t -a " 2869 "options or only --per-thread.\n"); 2870 parse_options_usage(NULL, stat_options, "p", 1); 2871 parse_options_usage(NULL, stat_options, "t", 1); 2872 goto out; 2873 } 2874 } 2875 2876 /* 2877 * no_aggr, cgroup are for system-wide only 2878 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2879 */ 2880 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2881 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2882 !target__has_cpu(&target)) { 2883 fprintf(stderr, "both cgroup and no-aggregation " 2884 "modes only available in system-wide mode\n"); 2885 2886 parse_options_usage(stat_usage, stat_options, "G", 1); 2887 parse_options_usage(NULL, stat_options, "A", 1); 2888 parse_options_usage(NULL, stat_options, "a", 1); 2889 goto out; 2890 } 2891 2892 if (add_default_attributes()) 2893 goto out; 2894 2895 target__validate(&target); 2896 2897 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 2898 target.per_thread = true; 2899 2900 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2901 if (target__has_task(&target)) { 2902 pr_err("Problems finding threads of monitor\n"); 2903 parse_options_usage(stat_usage, stat_options, "p", 1); 2904 parse_options_usage(NULL, stat_options, "t", 1); 2905 } else if (target__has_cpu(&target)) { 2906 perror("failed to parse CPUs map"); 2907 parse_options_usage(stat_usage, stat_options, "C", 1); 2908 parse_options_usage(NULL, stat_options, "a", 1); 2909 } 2910 goto out; 2911 } 2912 2913 /* 2914 * Initialize thread_map with comm names, 2915 * so we could print it out on output. 2916 */ 2917 if (stat_config.aggr_mode == AGGR_THREAD) { 2918 thread_map__read_comms(evsel_list->threads); 2919 if (target.system_wide) { 2920 if (runtime_stat_new(&stat_config, 2921 thread_map__nr(evsel_list->threads))) { 2922 goto out; 2923 } 2924 } 2925 } 2926 2927 if (stat_config.times && interval) 2928 interval_count = true; 2929 else if (stat_config.times && !interval) { 2930 pr_err("interval-count option should be used together with " 2931 "interval-print.\n"); 2932 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 2933 parse_options_usage(stat_usage, stat_options, "I", 1); 2934 goto out; 2935 } 2936 2937 if (timeout && timeout < 100) { 2938 if (timeout < 10) { 2939 pr_err("timeout must be >= 10ms.\n"); 2940 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2941 goto out; 2942 } else 2943 pr_warning("timeout < 100ms. " 2944 "The overhead percentage could be high in some cases. " 2945 "Please proceed with caution.\n"); 2946 } 2947 if (timeout && interval) { 2948 pr_err("timeout option is not supported with interval-print.\n"); 2949 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2950 parse_options_usage(stat_usage, stat_options, "I", 1); 2951 goto out; 2952 } 2953 2954 if (perf_evlist__alloc_stats(evsel_list, interval)) 2955 goto out; 2956 2957 if (perf_stat_init_aggr_mode()) 2958 goto out; 2959 2960 /* 2961 * We dont want to block the signals - that would cause 2962 * child tasks to inherit that and Ctrl-C would not work. 2963 * What we want is for Ctrl-C to work in the exec()-ed 2964 * task, but being ignored by perf stat itself: 2965 */ 2966 atexit(sig_atexit); 2967 if (!forever) 2968 signal(SIGINT, skip_signal); 2969 signal(SIGCHLD, skip_signal); 2970 signal(SIGALRM, skip_signal); 2971 signal(SIGABRT, skip_signal); 2972 2973 status = 0; 2974 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2975 if (run_count != 1 && verbose > 0) 2976 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2977 run_idx + 1); 2978 2979 status = run_perf_stat(argc, argv); 2980 if (forever && status != -1) { 2981 print_counters(NULL, argc, argv); 2982 perf_stat__reset_stats(); 2983 } 2984 } 2985 2986 if (!forever && status != -1 && !interval) 2987 print_counters(NULL, argc, argv); 2988 2989 if (STAT_RECORD) { 2990 /* 2991 * We synthesize the kernel mmap record just so that older tools 2992 * don't emit warnings about not being able to resolve symbols 2993 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2994 * a saner message about no samples being in the perf.data file. 2995 * 2996 * This also serves to suppress a warning about f_header.data.size == 0 2997 * in header.c at the moment 'perf stat record' gets introduced, which 2998 * is not really needed once we start adding the stat specific PERF_RECORD_ 2999 * records, but the need to suppress the kptr_restrict messages in older 3000 * tools remain -acme 3001 */ 3002 int fd = perf_data__fd(&perf_stat.data); 3003 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 3004 process_synthesized_event, 3005 &perf_stat.session->machines.host); 3006 if (err) { 3007 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 3008 "older tools may produce warnings about this file\n."); 3009 } 3010 3011 if (!interval) { 3012 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 3013 pr_err("failed to write stat round event\n"); 3014 } 3015 3016 if (!perf_stat.data.is_pipe) { 3017 perf_stat.session->header.data_size += perf_stat.bytes_written; 3018 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 3019 } 3020 3021 perf_session__delete(perf_stat.session); 3022 } 3023 3024 perf_stat__exit_aggr_mode(); 3025 perf_evlist__free_stats(evsel_list); 3026 out: 3027 if (smi_cost && smi_reset) 3028 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3029 3030 perf_evlist__delete(evsel_list); 3031 3032 runtime_stat_delete(&stat_config); 3033 3034 return status; 3035 } 3036