1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/string2.h" 67 #include "util/metricgroup.h" 68 #include "util/top.h" 69 #include "asm/bug.h" 70 71 #include <linux/time64.h> 72 #include <api/fs/fs.h> 73 #include <errno.h> 74 #include <signal.h> 75 #include <stdlib.h> 76 #include <sys/prctl.h> 77 #include <inttypes.h> 78 #include <locale.h> 79 #include <math.h> 80 #include <sys/types.h> 81 #include <sys/stat.h> 82 #include <sys/wait.h> 83 #include <unistd.h> 84 #include <sys/time.h> 85 #include <sys/resource.h> 86 #include <sys/wait.h> 87 88 #include "sane_ctype.h" 89 90 #define DEFAULT_SEPARATOR " " 91 #define CNTR_NOT_SUPPORTED "<not supported>" 92 #define CNTR_NOT_COUNTED "<not counted>" 93 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 94 95 static void print_counters(struct timespec *ts, int argc, const char **argv); 96 97 /* Default events used for perf stat -T */ 98 static const char *transaction_attrs = { 99 "task-clock," 100 "{" 101 "instructions," 102 "cycles," 103 "cpu/cycles-t/," 104 "cpu/tx-start/," 105 "cpu/el-start/," 106 "cpu/cycles-ct/" 107 "}" 108 }; 109 110 /* More limited version when the CPU does not have all events. */ 111 static const char * transaction_limited_attrs = { 112 "task-clock," 113 "{" 114 "instructions," 115 "cycles," 116 "cpu/cycles-t/," 117 "cpu/tx-start/" 118 "}" 119 }; 120 121 static const char * topdown_attrs[] = { 122 "topdown-total-slots", 123 "topdown-slots-retired", 124 "topdown-recovery-bubbles", 125 "topdown-fetch-bubbles", 126 "topdown-slots-issued", 127 NULL, 128 }; 129 130 static const char *smi_cost_attrs = { 131 "{" 132 "msr/aperf/," 133 "msr/smi/," 134 "cycles" 135 "}" 136 }; 137 138 static struct perf_evlist *evsel_list; 139 140 static struct rblist metric_events; 141 142 static struct target target = { 143 .uid = UINT_MAX, 144 }; 145 146 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 147 148 #define METRIC_ONLY_LEN 20 149 150 static int run_count = 1; 151 static bool no_inherit = false; 152 static volatile pid_t child_pid = -1; 153 static bool null_run = false; 154 static int detailed_run = 0; 155 static bool transaction_run; 156 static bool topdown_run = false; 157 static bool smi_cost = false; 158 static bool smi_reset = false; 159 static bool big_num = true; 160 static int big_num_opt = -1; 161 static const char *csv_sep = NULL; 162 static bool csv_output = false; 163 static bool group = false; 164 static const char *pre_cmd = NULL; 165 static const char *post_cmd = NULL; 166 static bool sync_run = false; 167 static unsigned int initial_delay = 0; 168 static unsigned int unit_width = 4; /* strlen("unit") */ 169 static bool forever = false; 170 static bool metric_only = false; 171 static bool force_metric_only = false; 172 static bool no_merge = false; 173 static bool walltime_run_table = false; 174 static struct timespec ref_time; 175 static struct cpu_map *aggr_map; 176 static aggr_get_id_t aggr_get_id; 177 static bool append_file; 178 static bool interval_count; 179 static bool interval_clear; 180 static const char *output_name; 181 static int output_fd; 182 static int print_free_counters_hint; 183 static int print_mixed_hw_group_error; 184 static u64 *walltime_run; 185 static bool ru_display = false; 186 static struct rusage ru_data; 187 static unsigned int metric_only_len = METRIC_ONLY_LEN; 188 189 struct perf_stat { 190 bool record; 191 struct perf_data data; 192 struct perf_session *session; 193 u64 bytes_written; 194 struct perf_tool tool; 195 bool maps_allocated; 196 struct cpu_map *cpus; 197 struct thread_map *threads; 198 enum aggr_mode aggr_mode; 199 }; 200 201 static struct perf_stat perf_stat; 202 #define STAT_RECORD perf_stat.record 203 204 static volatile int done = 0; 205 206 static struct perf_stat_config stat_config = { 207 .aggr_mode = AGGR_GLOBAL, 208 .scale = true, 209 }; 210 211 static bool is_duration_time(struct perf_evsel *evsel) 212 { 213 return !strcmp(evsel->name, "duration_time"); 214 } 215 216 static inline void diff_timespec(struct timespec *r, struct timespec *a, 217 struct timespec *b) 218 { 219 r->tv_sec = a->tv_sec - b->tv_sec; 220 if (a->tv_nsec < b->tv_nsec) { 221 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 222 r->tv_sec--; 223 } else { 224 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 225 } 226 } 227 228 static void perf_stat__reset_stats(void) 229 { 230 int i; 231 232 perf_evlist__reset_stats(evsel_list); 233 perf_stat__reset_shadow_stats(); 234 235 for (i = 0; i < stat_config.stats_num; i++) 236 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 237 } 238 239 static int create_perf_stat_counter(struct perf_evsel *evsel) 240 { 241 struct perf_event_attr *attr = &evsel->attr; 242 struct perf_evsel *leader = evsel->leader; 243 244 if (stat_config.scale) { 245 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 246 PERF_FORMAT_TOTAL_TIME_RUNNING; 247 } 248 249 /* 250 * The event is part of non trivial group, let's enable 251 * the group read (for leader) and ID retrieval for all 252 * members. 253 */ 254 if (leader->nr_members > 1) 255 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 256 257 attr->inherit = !no_inherit; 258 259 /* 260 * Some events get initialized with sample_(period/type) set, 261 * like tracepoints. Clear it up for counting. 262 */ 263 attr->sample_period = 0; 264 265 /* 266 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 267 * while avoiding that older tools show confusing messages. 268 * 269 * However for pipe sessions we need to keep it zero, 270 * because script's perf_evsel__check_attr is triggered 271 * by attr->sample_type != 0, and we can't run it on 272 * stat sessions. 273 */ 274 if (!(STAT_RECORD && perf_stat.data.is_pipe)) 275 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 276 277 /* 278 * Disabling all counters initially, they will be enabled 279 * either manually by us or by kernel via enable_on_exec 280 * set later. 281 */ 282 if (perf_evsel__is_group_leader(evsel)) { 283 attr->disabled = 1; 284 285 /* 286 * In case of initial_delay we enable tracee 287 * events manually. 288 */ 289 if (target__none(&target) && !initial_delay) 290 attr->enable_on_exec = 1; 291 } 292 293 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 294 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 295 296 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 297 } 298 299 /* 300 * Does the counter have nsecs as a unit? 301 */ 302 static inline int nsec_counter(struct perf_evsel *evsel) 303 { 304 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 305 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 306 return 1; 307 308 return 0; 309 } 310 311 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 312 union perf_event *event, 313 struct perf_sample *sample __maybe_unused, 314 struct machine *machine __maybe_unused) 315 { 316 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 317 pr_err("failed to write perf data, error: %m\n"); 318 return -1; 319 } 320 321 perf_stat.bytes_written += event->header.size; 322 return 0; 323 } 324 325 static int write_stat_round_event(u64 tm, u64 type) 326 { 327 return perf_event__synthesize_stat_round(NULL, tm, type, 328 process_synthesized_event, 329 NULL); 330 } 331 332 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 333 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 334 335 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 336 337 static int 338 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 339 struct perf_counts_values *count) 340 { 341 struct perf_sample_id *sid = SID(counter, cpu, thread); 342 343 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 344 process_synthesized_event, NULL); 345 } 346 347 /* 348 * Read out the results of a single counter: 349 * do not aggregate counts across CPUs in system-wide mode 350 */ 351 static int read_counter(struct perf_evsel *counter) 352 { 353 int nthreads = thread_map__nr(evsel_list->threads); 354 int ncpus, cpu, thread; 355 356 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 357 ncpus = perf_evsel__nr_cpus(counter); 358 else 359 ncpus = 1; 360 361 if (!counter->supported) 362 return -ENOENT; 363 364 if (counter->system_wide) 365 nthreads = 1; 366 367 for (thread = 0; thread < nthreads; thread++) { 368 for (cpu = 0; cpu < ncpus; cpu++) { 369 struct perf_counts_values *count; 370 371 count = perf_counts(counter->counts, cpu, thread); 372 373 /* 374 * The leader's group read loads data into its group members 375 * (via perf_evsel__read_counter) and sets threir count->loaded. 376 */ 377 if (!count->loaded && 378 perf_evsel__read_counter(counter, cpu, thread)) { 379 counter->counts->scaled = -1; 380 perf_counts(counter->counts, cpu, thread)->ena = 0; 381 perf_counts(counter->counts, cpu, thread)->run = 0; 382 return -1; 383 } 384 385 count->loaded = false; 386 387 if (STAT_RECORD) { 388 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 389 pr_err("failed to write stat event\n"); 390 return -1; 391 } 392 } 393 394 if (verbose > 1) { 395 fprintf(stat_config.output, 396 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 397 perf_evsel__name(counter), 398 cpu, 399 count->val, count->ena, count->run); 400 } 401 } 402 } 403 404 return 0; 405 } 406 407 static void read_counters(void) 408 { 409 struct perf_evsel *counter; 410 int ret; 411 412 evlist__for_each_entry(evsel_list, counter) { 413 ret = read_counter(counter); 414 if (ret) 415 pr_debug("failed to read counter %s\n", counter->name); 416 417 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 418 pr_warning("failed to process counter %s\n", counter->name); 419 } 420 } 421 422 static void process_interval(void) 423 { 424 struct timespec ts, rs; 425 426 read_counters(); 427 428 clock_gettime(CLOCK_MONOTONIC, &ts); 429 diff_timespec(&rs, &ts, &ref_time); 430 431 if (STAT_RECORD) { 432 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 433 pr_err("failed to write stat round event\n"); 434 } 435 436 init_stats(&walltime_nsecs_stats); 437 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 438 print_counters(&rs, 0, NULL); 439 } 440 441 static void enable_counters(void) 442 { 443 if (initial_delay) 444 usleep(initial_delay * USEC_PER_MSEC); 445 446 /* 447 * We need to enable counters only if: 448 * - we don't have tracee (attaching to task or cpu) 449 * - we have initial delay configured 450 */ 451 if (!target__none(&target) || initial_delay) 452 perf_evlist__enable(evsel_list); 453 } 454 455 static void disable_counters(void) 456 { 457 /* 458 * If we don't have tracee (attaching to task or cpu), counters may 459 * still be running. To get accurate group ratios, we must stop groups 460 * from counting before reading their constituent counters. 461 */ 462 if (!target__none(&target)) 463 perf_evlist__disable(evsel_list); 464 } 465 466 static volatile int workload_exec_errno; 467 468 /* 469 * perf_evlist__prepare_workload will send a SIGUSR1 470 * if the fork fails, since we asked by setting its 471 * want_signal to true. 472 */ 473 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 474 void *ucontext __maybe_unused) 475 { 476 workload_exec_errno = info->si_value.sival_int; 477 } 478 479 static int perf_stat_synthesize_config(bool is_pipe) 480 { 481 int err; 482 483 if (is_pipe) { 484 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 485 process_synthesized_event); 486 if (err < 0) { 487 pr_err("Couldn't synthesize attrs.\n"); 488 return err; 489 } 490 } 491 492 err = perf_event__synthesize_extra_attr(NULL, 493 evsel_list, 494 process_synthesized_event, 495 is_pipe); 496 497 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 498 process_synthesized_event, 499 NULL); 500 if (err < 0) { 501 pr_err("Couldn't synthesize thread map.\n"); 502 return err; 503 } 504 505 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 506 process_synthesized_event, NULL); 507 if (err < 0) { 508 pr_err("Couldn't synthesize thread map.\n"); 509 return err; 510 } 511 512 err = perf_event__synthesize_stat_config(NULL, &stat_config, 513 process_synthesized_event, NULL); 514 if (err < 0) { 515 pr_err("Couldn't synthesize config.\n"); 516 return err; 517 } 518 519 return 0; 520 } 521 522 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 523 524 static int __store_counter_ids(struct perf_evsel *counter) 525 { 526 int cpu, thread; 527 528 for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { 529 for (thread = 0; thread < xyarray__max_y(counter->fd); 530 thread++) { 531 int fd = FD(counter, cpu, thread); 532 533 if (perf_evlist__id_add_fd(evsel_list, counter, 534 cpu, thread, fd) < 0) 535 return -1; 536 } 537 } 538 539 return 0; 540 } 541 542 static int store_counter_ids(struct perf_evsel *counter) 543 { 544 struct cpu_map *cpus = counter->cpus; 545 struct thread_map *threads = counter->threads; 546 547 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 548 return -ENOMEM; 549 550 return __store_counter_ids(counter); 551 } 552 553 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 554 { 555 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 556 } 557 558 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 559 { 560 struct perf_evsel *c2, *leader; 561 bool is_open = true; 562 563 leader = evsel->leader; 564 pr_debug("Weak group for %s/%d failed\n", 565 leader->name, leader->nr_members); 566 567 /* 568 * for_each_group_member doesn't work here because it doesn't 569 * include the first entry. 570 */ 571 evlist__for_each_entry(evsel_list, c2) { 572 if (c2 == evsel) 573 is_open = false; 574 if (c2->leader == leader) { 575 if (is_open) 576 perf_evsel__close(c2); 577 c2->leader = c2; 578 c2->nr_members = 0; 579 } 580 } 581 return leader; 582 } 583 584 static int __run_perf_stat(int argc, const char **argv, int run_idx) 585 { 586 int interval = stat_config.interval; 587 int times = stat_config.times; 588 int timeout = stat_config.timeout; 589 char msg[BUFSIZ]; 590 unsigned long long t0, t1; 591 struct perf_evsel *counter; 592 struct timespec ts; 593 size_t l; 594 int status = 0; 595 const bool forks = (argc > 0); 596 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 597 struct perf_evsel_config_term *err_term; 598 599 if (interval) { 600 ts.tv_sec = interval / USEC_PER_MSEC; 601 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 602 } else if (timeout) { 603 ts.tv_sec = timeout / USEC_PER_MSEC; 604 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 605 } else { 606 ts.tv_sec = 1; 607 ts.tv_nsec = 0; 608 } 609 610 if (forks) { 611 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 612 workload_exec_failed_signal) < 0) { 613 perror("failed to prepare workload"); 614 return -1; 615 } 616 child_pid = evsel_list->workload.pid; 617 } 618 619 if (group) 620 perf_evlist__set_leader(evsel_list); 621 622 evlist__for_each_entry(evsel_list, counter) { 623 try_again: 624 if (create_perf_stat_counter(counter) < 0) { 625 626 /* Weak group failed. Reset the group. */ 627 if ((errno == EINVAL || errno == EBADF) && 628 counter->leader != counter && 629 counter->weak_group) { 630 counter = perf_evsel__reset_weak_group(counter); 631 goto try_again; 632 } 633 634 /* 635 * PPC returns ENXIO for HW counters until 2.6.37 636 * (behavior changed with commit b0a873e). 637 */ 638 if (errno == EINVAL || errno == ENOSYS || 639 errno == ENOENT || errno == EOPNOTSUPP || 640 errno == ENXIO) { 641 if (verbose > 0) 642 ui__warning("%s event is not supported by the kernel.\n", 643 perf_evsel__name(counter)); 644 counter->supported = false; 645 646 if ((counter->leader != counter) || 647 !(counter->leader->nr_members > 1)) 648 continue; 649 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 650 if (verbose > 0) 651 ui__warning("%s\n", msg); 652 goto try_again; 653 } else if (target__has_per_thread(&target) && 654 evsel_list->threads && 655 evsel_list->threads->err_thread != -1) { 656 /* 657 * For global --per-thread case, skip current 658 * error thread. 659 */ 660 if (!thread_map__remove(evsel_list->threads, 661 evsel_list->threads->err_thread)) { 662 evsel_list->threads->err_thread = -1; 663 goto try_again; 664 } 665 } 666 667 perf_evsel__open_strerror(counter, &target, 668 errno, msg, sizeof(msg)); 669 ui__error("%s\n", msg); 670 671 if (child_pid != -1) 672 kill(child_pid, SIGTERM); 673 674 return -1; 675 } 676 counter->supported = true; 677 678 l = strlen(counter->unit); 679 if (l > unit_width) 680 unit_width = l; 681 682 if (perf_evsel__should_store_id(counter) && 683 store_counter_ids(counter)) 684 return -1; 685 } 686 687 if (perf_evlist__apply_filters(evsel_list, &counter)) { 688 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 689 counter->filter, perf_evsel__name(counter), errno, 690 str_error_r(errno, msg, sizeof(msg))); 691 return -1; 692 } 693 694 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 695 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 696 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 697 str_error_r(errno, msg, sizeof(msg))); 698 return -1; 699 } 700 701 if (STAT_RECORD) { 702 int err, fd = perf_data__fd(&perf_stat.data); 703 704 if (is_pipe) { 705 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 706 } else { 707 err = perf_session__write_header(perf_stat.session, evsel_list, 708 fd, false); 709 } 710 711 if (err < 0) 712 return err; 713 714 err = perf_stat_synthesize_config(is_pipe); 715 if (err < 0) 716 return err; 717 } 718 719 /* 720 * Enable counters and exec the command: 721 */ 722 t0 = rdclock(); 723 clock_gettime(CLOCK_MONOTONIC, &ref_time); 724 725 if (forks) { 726 perf_evlist__start_workload(evsel_list); 727 enable_counters(); 728 729 if (interval || timeout) { 730 while (!waitpid(child_pid, &status, WNOHANG)) { 731 nanosleep(&ts, NULL); 732 if (timeout) 733 break; 734 process_interval(); 735 if (interval_count && !(--times)) 736 break; 737 } 738 } 739 wait4(child_pid, &status, 0, &ru_data); 740 741 if (workload_exec_errno) { 742 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 743 pr_err("Workload failed: %s\n", emsg); 744 return -1; 745 } 746 747 if (WIFSIGNALED(status)) 748 psignal(WTERMSIG(status), argv[0]); 749 } else { 750 enable_counters(); 751 while (!done) { 752 nanosleep(&ts, NULL); 753 if (timeout) 754 break; 755 if (interval) { 756 process_interval(); 757 if (interval_count && !(--times)) 758 break; 759 } 760 } 761 } 762 763 disable_counters(); 764 765 t1 = rdclock(); 766 767 if (walltime_run_table) 768 walltime_run[run_idx] = t1 - t0; 769 770 update_stats(&walltime_nsecs_stats, t1 - t0); 771 772 /* 773 * Closing a group leader splits the group, and as we only disable 774 * group leaders, results in remaining events becoming enabled. To 775 * avoid arbitrary skew, we must read all counters before closing any 776 * group leaders. 777 */ 778 read_counters(); 779 perf_evlist__close(evsel_list); 780 781 return WEXITSTATUS(status); 782 } 783 784 static int run_perf_stat(int argc, const char **argv, int run_idx) 785 { 786 int ret; 787 788 if (pre_cmd) { 789 ret = system(pre_cmd); 790 if (ret) 791 return ret; 792 } 793 794 if (sync_run) 795 sync(); 796 797 ret = __run_perf_stat(argc, argv, run_idx); 798 if (ret) 799 return ret; 800 801 if (post_cmd) { 802 ret = system(post_cmd); 803 if (ret) 804 return ret; 805 } 806 807 return ret; 808 } 809 810 static void print_running(u64 run, u64 ena) 811 { 812 if (csv_output) { 813 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 814 csv_sep, 815 run, 816 csv_sep, 817 ena ? 100.0 * run / ena : 100.0); 818 } else if (run != ena) { 819 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 820 } 821 } 822 823 static void print_noise_pct(double total, double avg) 824 { 825 double pct = rel_stddev_stats(total, avg); 826 827 if (csv_output) 828 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 829 else if (pct) 830 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 831 } 832 833 static void print_noise(struct perf_evsel *evsel, double avg) 834 { 835 struct perf_stat_evsel *ps; 836 837 if (run_count == 1) 838 return; 839 840 ps = evsel->stats; 841 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 842 } 843 844 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 845 { 846 switch (stat_config.aggr_mode) { 847 case AGGR_CORE: 848 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 849 cpu_map__id_to_socket(id), 850 csv_output ? 0 : -8, 851 cpu_map__id_to_cpu(id), 852 csv_sep, 853 csv_output ? 0 : 4, 854 nr, 855 csv_sep); 856 break; 857 case AGGR_SOCKET: 858 fprintf(stat_config.output, "S%*d%s%*d%s", 859 csv_output ? 0 : -5, 860 id, 861 csv_sep, 862 csv_output ? 0 : 4, 863 nr, 864 csv_sep); 865 break; 866 case AGGR_NONE: 867 fprintf(stat_config.output, "CPU%*d%s", 868 csv_output ? 0 : -4, 869 perf_evsel__cpus(evsel)->map[id], csv_sep); 870 break; 871 case AGGR_THREAD: 872 fprintf(stat_config.output, "%*s-%*d%s", 873 csv_output ? 0 : 16, 874 thread_map__comm(evsel->threads, id), 875 csv_output ? 0 : -8, 876 thread_map__pid(evsel->threads, id), 877 csv_sep); 878 break; 879 case AGGR_GLOBAL: 880 case AGGR_UNSET: 881 default: 882 break; 883 } 884 } 885 886 struct outstate { 887 FILE *fh; 888 bool newline; 889 const char *prefix; 890 int nfields; 891 int id, nr; 892 struct perf_evsel *evsel; 893 }; 894 895 #define METRIC_LEN 35 896 897 static void new_line_std(void *ctx) 898 { 899 struct outstate *os = ctx; 900 901 os->newline = true; 902 } 903 904 static void do_new_line_std(struct outstate *os) 905 { 906 fputc('\n', os->fh); 907 fputs(os->prefix, os->fh); 908 aggr_printout(os->evsel, os->id, os->nr); 909 if (stat_config.aggr_mode == AGGR_NONE) 910 fprintf(os->fh, " "); 911 fprintf(os->fh, " "); 912 } 913 914 static void print_metric_std(void *ctx, const char *color, const char *fmt, 915 const char *unit, double val) 916 { 917 struct outstate *os = ctx; 918 FILE *out = os->fh; 919 int n; 920 bool newline = os->newline; 921 922 os->newline = false; 923 924 if (unit == NULL || fmt == NULL) { 925 fprintf(out, "%-*s", METRIC_LEN, ""); 926 return; 927 } 928 929 if (newline) 930 do_new_line_std(os); 931 932 n = fprintf(out, " # "); 933 if (color) 934 n += color_fprintf(out, color, fmt, val); 935 else 936 n += fprintf(out, fmt, val); 937 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 938 } 939 940 static void new_line_csv(void *ctx) 941 { 942 struct outstate *os = ctx; 943 int i; 944 945 fputc('\n', os->fh); 946 if (os->prefix) 947 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 948 aggr_printout(os->evsel, os->id, os->nr); 949 for (i = 0; i < os->nfields; i++) 950 fputs(csv_sep, os->fh); 951 } 952 953 static void print_metric_csv(void *ctx, 954 const char *color __maybe_unused, 955 const char *fmt, const char *unit, double val) 956 { 957 struct outstate *os = ctx; 958 FILE *out = os->fh; 959 char buf[64], *vals, *ends; 960 961 if (unit == NULL || fmt == NULL) { 962 fprintf(out, "%s%s", csv_sep, csv_sep); 963 return; 964 } 965 snprintf(buf, sizeof(buf), fmt, val); 966 ends = vals = ltrim(buf); 967 while (isdigit(*ends) || *ends == '.') 968 ends++; 969 *ends = 0; 970 while (isspace(*unit)) 971 unit++; 972 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 973 } 974 975 /* Filter out some columns that don't work well in metrics only mode */ 976 977 static bool valid_only_metric(const char *unit) 978 { 979 if (!unit) 980 return false; 981 if (strstr(unit, "/sec") || 982 strstr(unit, "hz") || 983 strstr(unit, "Hz") || 984 strstr(unit, "CPUs utilized")) 985 return false; 986 return true; 987 } 988 989 static const char *fixunit(char *buf, struct perf_evsel *evsel, 990 const char *unit) 991 { 992 if (!strncmp(unit, "of all", 6)) { 993 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 994 unit); 995 return buf; 996 } 997 return unit; 998 } 999 1000 static void print_metric_only(void *ctx, const char *color, const char *fmt, 1001 const char *unit, double val) 1002 { 1003 struct outstate *os = ctx; 1004 FILE *out = os->fh; 1005 char buf[1024], str[1024]; 1006 unsigned mlen = metric_only_len; 1007 1008 if (!valid_only_metric(unit)) 1009 return; 1010 unit = fixunit(buf, os->evsel, unit); 1011 if (mlen < strlen(unit)) 1012 mlen = strlen(unit) + 1; 1013 1014 if (color) 1015 mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; 1016 1017 color_snprintf(str, sizeof(str), color ?: "", fmt, val); 1018 fprintf(out, "%*s ", mlen, str); 1019 } 1020 1021 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1022 const char *fmt, 1023 const char *unit, double val) 1024 { 1025 struct outstate *os = ctx; 1026 FILE *out = os->fh; 1027 char buf[64], *vals, *ends; 1028 char tbuf[1024]; 1029 1030 if (!valid_only_metric(unit)) 1031 return; 1032 unit = fixunit(tbuf, os->evsel, unit); 1033 snprintf(buf, sizeof buf, fmt, val); 1034 ends = vals = ltrim(buf); 1035 while (isdigit(*ends) || *ends == '.') 1036 ends++; 1037 *ends = 0; 1038 fprintf(out, "%s%s", vals, csv_sep); 1039 } 1040 1041 static void new_line_metric(void *ctx __maybe_unused) 1042 { 1043 } 1044 1045 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1046 const char *fmt __maybe_unused, 1047 const char *unit, double val __maybe_unused) 1048 { 1049 struct outstate *os = ctx; 1050 char tbuf[1024]; 1051 1052 if (!valid_only_metric(unit)) 1053 return; 1054 unit = fixunit(tbuf, os->evsel, unit); 1055 if (csv_output) 1056 fprintf(os->fh, "%s%s", unit, csv_sep); 1057 else 1058 fprintf(os->fh, "%*s ", metric_only_len, unit); 1059 } 1060 1061 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1062 { 1063 FILE *output = stat_config.output; 1064 double msecs = avg / NSEC_PER_MSEC; 1065 const char *fmt_v, *fmt_n; 1066 char name[25]; 1067 1068 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1069 fmt_n = csv_output ? "%s" : "%-25s"; 1070 1071 aggr_printout(evsel, id, nr); 1072 1073 scnprintf(name, sizeof(name), "%s%s", 1074 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1075 1076 fprintf(output, fmt_v, msecs, csv_sep); 1077 1078 if (csv_output) 1079 fprintf(output, "%s%s", evsel->unit, csv_sep); 1080 else 1081 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1082 1083 fprintf(output, fmt_n, name); 1084 1085 if (evsel->cgrp) 1086 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1087 } 1088 1089 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1090 { 1091 int i; 1092 1093 if (!aggr_get_id) 1094 return 0; 1095 1096 if (stat_config.aggr_mode == AGGR_NONE) 1097 return id; 1098 1099 if (stat_config.aggr_mode == AGGR_GLOBAL) 1100 return 0; 1101 1102 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1103 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1104 1105 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1106 return cpu2; 1107 } 1108 return 0; 1109 } 1110 1111 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1112 { 1113 FILE *output = stat_config.output; 1114 double sc = evsel->scale; 1115 const char *fmt; 1116 1117 if (csv_output) { 1118 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1119 } else { 1120 if (big_num) 1121 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1122 else 1123 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1124 } 1125 1126 aggr_printout(evsel, id, nr); 1127 1128 fprintf(output, fmt, avg, csv_sep); 1129 1130 if (evsel->unit) 1131 fprintf(output, "%-*s%s", 1132 csv_output ? 0 : unit_width, 1133 evsel->unit, csv_sep); 1134 1135 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1136 1137 if (evsel->cgrp) 1138 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1139 } 1140 1141 static bool is_mixed_hw_group(struct perf_evsel *counter) 1142 { 1143 struct perf_evlist *evlist = counter->evlist; 1144 u32 pmu_type = counter->attr.type; 1145 struct perf_evsel *pos; 1146 1147 if (counter->nr_members < 2) 1148 return false; 1149 1150 evlist__for_each_entry(evlist, pos) { 1151 /* software events can be part of any hardware group */ 1152 if (pos->attr.type == PERF_TYPE_SOFTWARE) 1153 continue; 1154 if (pmu_type == PERF_TYPE_SOFTWARE) { 1155 pmu_type = pos->attr.type; 1156 continue; 1157 } 1158 if (pmu_type != pos->attr.type) 1159 return true; 1160 } 1161 1162 return false; 1163 } 1164 1165 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1166 char *prefix, u64 run, u64 ena, double noise, 1167 struct runtime_stat *st) 1168 { 1169 struct perf_stat_output_ctx out; 1170 struct outstate os = { 1171 .fh = stat_config.output, 1172 .prefix = prefix ? prefix : "", 1173 .id = id, 1174 .nr = nr, 1175 .evsel = counter, 1176 }; 1177 print_metric_t pm = print_metric_std; 1178 void (*nl)(void *); 1179 1180 if (metric_only) { 1181 nl = new_line_metric; 1182 if (csv_output) 1183 pm = print_metric_only_csv; 1184 else 1185 pm = print_metric_only; 1186 } else 1187 nl = new_line_std; 1188 1189 if (csv_output && !metric_only) { 1190 static int aggr_fields[] = { 1191 [AGGR_GLOBAL] = 0, 1192 [AGGR_THREAD] = 1, 1193 [AGGR_NONE] = 1, 1194 [AGGR_SOCKET] = 2, 1195 [AGGR_CORE] = 2, 1196 }; 1197 1198 pm = print_metric_csv; 1199 nl = new_line_csv; 1200 os.nfields = 3; 1201 os.nfields += aggr_fields[stat_config.aggr_mode]; 1202 if (counter->cgrp) 1203 os.nfields++; 1204 } 1205 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1206 if (metric_only) { 1207 pm(&os, NULL, "", "", 0); 1208 return; 1209 } 1210 aggr_printout(counter, id, nr); 1211 1212 fprintf(stat_config.output, "%*s%s", 1213 csv_output ? 0 : 18, 1214 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1215 csv_sep); 1216 1217 if (counter->supported) { 1218 print_free_counters_hint = 1; 1219 if (is_mixed_hw_group(counter)) 1220 print_mixed_hw_group_error = 1; 1221 } 1222 1223 fprintf(stat_config.output, "%-*s%s", 1224 csv_output ? 0 : unit_width, 1225 counter->unit, csv_sep); 1226 1227 fprintf(stat_config.output, "%*s", 1228 csv_output ? 0 : -25, 1229 perf_evsel__name(counter)); 1230 1231 if (counter->cgrp) 1232 fprintf(stat_config.output, "%s%s", 1233 csv_sep, counter->cgrp->name); 1234 1235 if (!csv_output) 1236 pm(&os, NULL, NULL, "", 0); 1237 print_noise(counter, noise); 1238 print_running(run, ena); 1239 if (csv_output) 1240 pm(&os, NULL, NULL, "", 0); 1241 return; 1242 } 1243 1244 if (metric_only) 1245 /* nothing */; 1246 else if (nsec_counter(counter)) 1247 nsec_printout(id, nr, counter, uval); 1248 else 1249 abs_printout(id, nr, counter, uval); 1250 1251 out.print_metric = pm; 1252 out.new_line = nl; 1253 out.ctx = &os; 1254 out.force_header = false; 1255 1256 if (csv_output && !metric_only) { 1257 print_noise(counter, noise); 1258 print_running(run, ena); 1259 } 1260 1261 perf_stat__print_shadow_stats(counter, uval, 1262 first_shadow_cpu(counter, id), 1263 &out, &metric_events, st); 1264 if (!csv_output && !metric_only) { 1265 print_noise(counter, noise); 1266 print_running(run, ena); 1267 } 1268 } 1269 1270 static void aggr_update_shadow(void) 1271 { 1272 int cpu, s2, id, s; 1273 u64 val; 1274 struct perf_evsel *counter; 1275 1276 for (s = 0; s < aggr_map->nr; s++) { 1277 id = aggr_map->map[s]; 1278 evlist__for_each_entry(evsel_list, counter) { 1279 val = 0; 1280 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1281 s2 = aggr_get_id(evsel_list->cpus, cpu); 1282 if (s2 != id) 1283 continue; 1284 val += perf_counts(counter->counts, cpu, 0)->val; 1285 } 1286 perf_stat__update_shadow_stats(counter, val, 1287 first_shadow_cpu(counter, id), 1288 &rt_stat); 1289 } 1290 } 1291 } 1292 1293 static void uniquify_event_name(struct perf_evsel *counter) 1294 { 1295 char *new_name; 1296 char *config; 1297 1298 if (counter->uniquified_name || 1299 !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, 1300 strlen(counter->pmu_name))) 1301 return; 1302 1303 config = strchr(counter->name, '/'); 1304 if (config) { 1305 if (asprintf(&new_name, 1306 "%s%s", counter->pmu_name, config) > 0) { 1307 free(counter->name); 1308 counter->name = new_name; 1309 } 1310 } else { 1311 if (asprintf(&new_name, 1312 "%s [%s]", counter->name, counter->pmu_name) > 0) { 1313 free(counter->name); 1314 counter->name = new_name; 1315 } 1316 } 1317 1318 counter->uniquified_name = true; 1319 } 1320 1321 static void collect_all_aliases(struct perf_evsel *counter, 1322 void (*cb)(struct perf_evsel *counter, void *data, 1323 bool first), 1324 void *data) 1325 { 1326 struct perf_evsel *alias; 1327 1328 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1329 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1330 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1331 alias->scale != counter->scale || 1332 alias->cgrp != counter->cgrp || 1333 strcmp(alias->unit, counter->unit) || 1334 nsec_counter(alias) != nsec_counter(counter)) 1335 break; 1336 alias->merged_stat = true; 1337 cb(alias, data, false); 1338 } 1339 } 1340 1341 static bool collect_data(struct perf_evsel *counter, 1342 void (*cb)(struct perf_evsel *counter, void *data, 1343 bool first), 1344 void *data) 1345 { 1346 if (counter->merged_stat) 1347 return false; 1348 cb(counter, data, true); 1349 if (no_merge) 1350 uniquify_event_name(counter); 1351 else if (counter->auto_merge_stats) 1352 collect_all_aliases(counter, cb, data); 1353 return true; 1354 } 1355 1356 struct aggr_data { 1357 u64 ena, run, val; 1358 int id; 1359 int nr; 1360 int cpu; 1361 }; 1362 1363 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1364 { 1365 struct aggr_data *ad = data; 1366 int cpu, s2; 1367 1368 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1369 struct perf_counts_values *counts; 1370 1371 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1372 if (s2 != ad->id) 1373 continue; 1374 if (first) 1375 ad->nr++; 1376 counts = perf_counts(counter->counts, cpu, 0); 1377 /* 1378 * When any result is bad, make them all to give 1379 * consistent output in interval mode. 1380 */ 1381 if (counts->ena == 0 || counts->run == 0 || 1382 counter->counts->scaled == -1) { 1383 ad->ena = 0; 1384 ad->run = 0; 1385 break; 1386 } 1387 ad->val += counts->val; 1388 ad->ena += counts->ena; 1389 ad->run += counts->run; 1390 } 1391 } 1392 1393 static void print_aggr(char *prefix) 1394 { 1395 FILE *output = stat_config.output; 1396 struct perf_evsel *counter; 1397 int s, id, nr; 1398 double uval; 1399 u64 ena, run, val; 1400 bool first; 1401 1402 if (!(aggr_map || aggr_get_id)) 1403 return; 1404 1405 aggr_update_shadow(); 1406 1407 /* 1408 * With metric_only everything is on a single line. 1409 * Without each counter has its own line. 1410 */ 1411 for (s = 0; s < aggr_map->nr; s++) { 1412 struct aggr_data ad; 1413 if (prefix && metric_only) 1414 fprintf(output, "%s", prefix); 1415 1416 ad.id = id = aggr_map->map[s]; 1417 first = true; 1418 evlist__for_each_entry(evsel_list, counter) { 1419 if (is_duration_time(counter)) 1420 continue; 1421 1422 ad.val = ad.ena = ad.run = 0; 1423 ad.nr = 0; 1424 if (!collect_data(counter, aggr_cb, &ad)) 1425 continue; 1426 nr = ad.nr; 1427 ena = ad.ena; 1428 run = ad.run; 1429 val = ad.val; 1430 if (first && metric_only) { 1431 first = false; 1432 aggr_printout(counter, id, nr); 1433 } 1434 if (prefix && !metric_only) 1435 fprintf(output, "%s", prefix); 1436 1437 uval = val * counter->scale; 1438 printout(id, nr, counter, uval, prefix, run, ena, 1.0, 1439 &rt_stat); 1440 if (!metric_only) 1441 fputc('\n', output); 1442 } 1443 if (metric_only) 1444 fputc('\n', output); 1445 } 1446 } 1447 1448 static int cmp_val(const void *a, const void *b) 1449 { 1450 return ((struct perf_aggr_thread_value *)b)->val - 1451 ((struct perf_aggr_thread_value *)a)->val; 1452 } 1453 1454 static struct perf_aggr_thread_value *sort_aggr_thread( 1455 struct perf_evsel *counter, 1456 int nthreads, int ncpus, 1457 int *ret) 1458 { 1459 int cpu, thread, i = 0; 1460 double uval; 1461 struct perf_aggr_thread_value *buf; 1462 1463 buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 1464 if (!buf) 1465 return NULL; 1466 1467 for (thread = 0; thread < nthreads; thread++) { 1468 u64 ena = 0, run = 0, val = 0; 1469 1470 for (cpu = 0; cpu < ncpus; cpu++) { 1471 val += perf_counts(counter->counts, cpu, thread)->val; 1472 ena += perf_counts(counter->counts, cpu, thread)->ena; 1473 run += perf_counts(counter->counts, cpu, thread)->run; 1474 } 1475 1476 uval = val * counter->scale; 1477 1478 /* 1479 * Skip value 0 when enabling --per-thread globally, 1480 * otherwise too many 0 output. 1481 */ 1482 if (uval == 0.0 && target__has_per_thread(&target)) 1483 continue; 1484 1485 buf[i].counter = counter; 1486 buf[i].id = thread; 1487 buf[i].uval = uval; 1488 buf[i].val = val; 1489 buf[i].run = run; 1490 buf[i].ena = ena; 1491 i++; 1492 } 1493 1494 qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 1495 1496 if (ret) 1497 *ret = i; 1498 1499 return buf; 1500 } 1501 1502 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1503 { 1504 FILE *output = stat_config.output; 1505 int nthreads = thread_map__nr(counter->threads); 1506 int ncpus = cpu_map__nr(counter->cpus); 1507 int thread, sorted_threads, id; 1508 struct perf_aggr_thread_value *buf; 1509 1510 buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); 1511 if (!buf) { 1512 perror("cannot sort aggr thread"); 1513 return; 1514 } 1515 1516 for (thread = 0; thread < sorted_threads; thread++) { 1517 if (prefix) 1518 fprintf(output, "%s", prefix); 1519 1520 id = buf[thread].id; 1521 if (stat_config.stats) 1522 printout(id, 0, buf[thread].counter, buf[thread].uval, 1523 prefix, buf[thread].run, buf[thread].ena, 1.0, 1524 &stat_config.stats[id]); 1525 else 1526 printout(id, 0, buf[thread].counter, buf[thread].uval, 1527 prefix, buf[thread].run, buf[thread].ena, 1.0, 1528 &rt_stat); 1529 fputc('\n', output); 1530 } 1531 1532 free(buf); 1533 } 1534 1535 struct caggr_data { 1536 double avg, avg_enabled, avg_running; 1537 }; 1538 1539 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1540 bool first __maybe_unused) 1541 { 1542 struct caggr_data *cd = data; 1543 struct perf_stat_evsel *ps = counter->stats; 1544 1545 cd->avg += avg_stats(&ps->res_stats[0]); 1546 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1547 cd->avg_running += avg_stats(&ps->res_stats[2]); 1548 } 1549 1550 /* 1551 * Print out the results of a single counter: 1552 * aggregated counts in system-wide mode 1553 */ 1554 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1555 { 1556 FILE *output = stat_config.output; 1557 double uval; 1558 struct caggr_data cd = { .avg = 0.0 }; 1559 1560 if (!collect_data(counter, counter_aggr_cb, &cd)) 1561 return; 1562 1563 if (prefix && !metric_only) 1564 fprintf(output, "%s", prefix); 1565 1566 uval = cd.avg * counter->scale; 1567 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 1568 cd.avg, &rt_stat); 1569 if (!metric_only) 1570 fprintf(output, "\n"); 1571 } 1572 1573 static void counter_cb(struct perf_evsel *counter, void *data, 1574 bool first __maybe_unused) 1575 { 1576 struct aggr_data *ad = data; 1577 1578 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1579 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1580 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1581 } 1582 1583 /* 1584 * Print out the results of a single counter: 1585 * does not use aggregated count in system-wide 1586 */ 1587 static void print_counter(struct perf_evsel *counter, char *prefix) 1588 { 1589 FILE *output = stat_config.output; 1590 u64 ena, run, val; 1591 double uval; 1592 int cpu; 1593 1594 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1595 struct aggr_data ad = { .cpu = cpu }; 1596 1597 if (!collect_data(counter, counter_cb, &ad)) 1598 return; 1599 val = ad.val; 1600 ena = ad.ena; 1601 run = ad.run; 1602 1603 if (prefix) 1604 fprintf(output, "%s", prefix); 1605 1606 uval = val * counter->scale; 1607 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1608 &rt_stat); 1609 1610 fputc('\n', output); 1611 } 1612 } 1613 1614 static void print_no_aggr_metric(char *prefix) 1615 { 1616 int cpu; 1617 int nrcpus = 0; 1618 struct perf_evsel *counter; 1619 u64 ena, run, val; 1620 double uval; 1621 1622 nrcpus = evsel_list->cpus->nr; 1623 for (cpu = 0; cpu < nrcpus; cpu++) { 1624 bool first = true; 1625 1626 if (prefix) 1627 fputs(prefix, stat_config.output); 1628 evlist__for_each_entry(evsel_list, counter) { 1629 if (is_duration_time(counter)) 1630 continue; 1631 if (first) { 1632 aggr_printout(counter, cpu, 0); 1633 first = false; 1634 } 1635 val = perf_counts(counter->counts, cpu, 0)->val; 1636 ena = perf_counts(counter->counts, cpu, 0)->ena; 1637 run = perf_counts(counter->counts, cpu, 0)->run; 1638 1639 uval = val * counter->scale; 1640 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1641 &rt_stat); 1642 } 1643 fputc('\n', stat_config.output); 1644 } 1645 } 1646 1647 static int aggr_header_lens[] = { 1648 [AGGR_CORE] = 18, 1649 [AGGR_SOCKET] = 12, 1650 [AGGR_NONE] = 6, 1651 [AGGR_THREAD] = 24, 1652 [AGGR_GLOBAL] = 0, 1653 }; 1654 1655 static const char *aggr_header_csv[] = { 1656 [AGGR_CORE] = "core,cpus,", 1657 [AGGR_SOCKET] = "socket,cpus", 1658 [AGGR_NONE] = "cpu,", 1659 [AGGR_THREAD] = "comm-pid,", 1660 [AGGR_GLOBAL] = "" 1661 }; 1662 1663 static void print_metric_headers(const char *prefix, bool no_indent) 1664 { 1665 struct perf_stat_output_ctx out; 1666 struct perf_evsel *counter; 1667 struct outstate os = { 1668 .fh = stat_config.output 1669 }; 1670 1671 if (prefix) 1672 fprintf(stat_config.output, "%s", prefix); 1673 1674 if (!csv_output && !no_indent) 1675 fprintf(stat_config.output, "%*s", 1676 aggr_header_lens[stat_config.aggr_mode], ""); 1677 if (csv_output) { 1678 if (stat_config.interval) 1679 fputs("time,", stat_config.output); 1680 fputs(aggr_header_csv[stat_config.aggr_mode], 1681 stat_config.output); 1682 } 1683 1684 /* Print metrics headers only */ 1685 evlist__for_each_entry(evsel_list, counter) { 1686 if (is_duration_time(counter)) 1687 continue; 1688 os.evsel = counter; 1689 out.ctx = &os; 1690 out.print_metric = print_metric_header; 1691 out.new_line = new_line_metric; 1692 out.force_header = true; 1693 os.evsel = counter; 1694 perf_stat__print_shadow_stats(counter, 0, 1695 0, 1696 &out, 1697 &metric_events, 1698 &rt_stat); 1699 } 1700 fputc('\n', stat_config.output); 1701 } 1702 1703 static void print_interval(char *prefix, struct timespec *ts) 1704 { 1705 FILE *output = stat_config.output; 1706 static int num_print_interval; 1707 1708 if (interval_clear) 1709 puts(CONSOLE_CLEAR); 1710 1711 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1712 1713 if ((num_print_interval == 0 && !csv_output) || interval_clear) { 1714 switch (stat_config.aggr_mode) { 1715 case AGGR_SOCKET: 1716 fprintf(output, "# time socket cpus"); 1717 if (!metric_only) 1718 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1719 break; 1720 case AGGR_CORE: 1721 fprintf(output, "# time core cpus"); 1722 if (!metric_only) 1723 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1724 break; 1725 case AGGR_NONE: 1726 fprintf(output, "# time CPU "); 1727 if (!metric_only) 1728 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1729 break; 1730 case AGGR_THREAD: 1731 fprintf(output, "# time comm-pid"); 1732 if (!metric_only) 1733 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1734 break; 1735 case AGGR_GLOBAL: 1736 default: 1737 fprintf(output, "# time"); 1738 if (!metric_only) 1739 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1740 case AGGR_UNSET: 1741 break; 1742 } 1743 } 1744 1745 if ((num_print_interval == 0 && metric_only) || interval_clear) 1746 print_metric_headers(" ", true); 1747 if (++num_print_interval == 25) 1748 num_print_interval = 0; 1749 } 1750 1751 static void print_header(int argc, const char **argv) 1752 { 1753 FILE *output = stat_config.output; 1754 int i; 1755 1756 fflush(stdout); 1757 1758 if (!csv_output) { 1759 fprintf(output, "\n"); 1760 fprintf(output, " Performance counter stats for "); 1761 if (target.system_wide) 1762 fprintf(output, "\'system wide"); 1763 else if (target.cpu_list) 1764 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1765 else if (!target__has_task(&target)) { 1766 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1767 for (i = 1; argv && (i < argc); i++) 1768 fprintf(output, " %s", argv[i]); 1769 } else if (target.pid) 1770 fprintf(output, "process id \'%s", target.pid); 1771 else 1772 fprintf(output, "thread id \'%s", target.tid); 1773 1774 fprintf(output, "\'"); 1775 if (run_count > 1) 1776 fprintf(output, " (%d runs)", run_count); 1777 fprintf(output, ":\n\n"); 1778 } 1779 } 1780 1781 static int get_precision(double num) 1782 { 1783 if (num > 1) 1784 return 0; 1785 1786 return lround(ceil(-log10(num))); 1787 } 1788 1789 static void print_table(FILE *output, int precision, double avg) 1790 { 1791 char tmp[64]; 1792 int idx, indent = 0; 1793 1794 scnprintf(tmp, 64, " %17.*f", precision, avg); 1795 while (tmp[indent] == ' ') 1796 indent++; 1797 1798 fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); 1799 1800 for (idx = 0; idx < run_count; idx++) { 1801 double run = (double) walltime_run[idx] / NSEC_PER_SEC; 1802 int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); 1803 1804 fprintf(output, " %17.*f (%+.*f) ", 1805 precision, run, precision, run - avg); 1806 1807 for (h = 0; h < n; h++) 1808 fprintf(output, "#"); 1809 1810 fprintf(output, "\n"); 1811 } 1812 1813 fprintf(output, "\n%*s# Final result:\n", indent, ""); 1814 } 1815 1816 static double timeval2double(struct timeval *t) 1817 { 1818 return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; 1819 } 1820 1821 static void print_footer(void) 1822 { 1823 double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1824 FILE *output = stat_config.output; 1825 int n; 1826 1827 if (!null_run) 1828 fprintf(output, "\n"); 1829 1830 if (run_count == 1) { 1831 fprintf(output, " %17.9f seconds time elapsed", avg); 1832 1833 if (ru_display) { 1834 double ru_utime = timeval2double(&ru_data.ru_utime); 1835 double ru_stime = timeval2double(&ru_data.ru_stime); 1836 1837 fprintf(output, "\n\n"); 1838 fprintf(output, " %17.9f seconds user\n", ru_utime); 1839 fprintf(output, " %17.9f seconds sys\n", ru_stime); 1840 } 1841 } else { 1842 double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC; 1843 /* 1844 * Display at most 2 more significant 1845 * digits than the stddev inaccuracy. 1846 */ 1847 int precision = get_precision(sd) + 2; 1848 1849 if (walltime_run_table) 1850 print_table(output, precision, avg); 1851 1852 fprintf(output, " %17.*f +- %.*f seconds time elapsed", 1853 precision, avg, precision, sd); 1854 1855 print_noise_pct(sd, avg); 1856 } 1857 fprintf(output, "\n\n"); 1858 1859 if (print_free_counters_hint && 1860 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1861 n > 0) 1862 fprintf(output, 1863 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1864 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1865 " perf stat ...\n" 1866 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1867 1868 if (print_mixed_hw_group_error) 1869 fprintf(output, 1870 "The events in group usually have to be from " 1871 "the same PMU. Try reorganizing the group.\n"); 1872 } 1873 1874 static void print_counters(struct timespec *ts, int argc, const char **argv) 1875 { 1876 int interval = stat_config.interval; 1877 struct perf_evsel *counter; 1878 char buf[64], *prefix = NULL; 1879 1880 /* Do not print anything if we record to the pipe. */ 1881 if (STAT_RECORD && perf_stat.data.is_pipe) 1882 return; 1883 1884 if (interval) 1885 print_interval(prefix = buf, ts); 1886 else 1887 print_header(argc, argv); 1888 1889 if (metric_only) { 1890 static int num_print_iv; 1891 1892 if (num_print_iv == 0 && !interval) 1893 print_metric_headers(prefix, false); 1894 if (num_print_iv++ == 25) 1895 num_print_iv = 0; 1896 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1897 fprintf(stat_config.output, "%s", prefix); 1898 } 1899 1900 switch (stat_config.aggr_mode) { 1901 case AGGR_CORE: 1902 case AGGR_SOCKET: 1903 print_aggr(prefix); 1904 break; 1905 case AGGR_THREAD: 1906 evlist__for_each_entry(evsel_list, counter) { 1907 if (is_duration_time(counter)) 1908 continue; 1909 print_aggr_thread(counter, prefix); 1910 } 1911 break; 1912 case AGGR_GLOBAL: 1913 evlist__for_each_entry(evsel_list, counter) { 1914 if (is_duration_time(counter)) 1915 continue; 1916 print_counter_aggr(counter, prefix); 1917 } 1918 if (metric_only) 1919 fputc('\n', stat_config.output); 1920 break; 1921 case AGGR_NONE: 1922 if (metric_only) 1923 print_no_aggr_metric(prefix); 1924 else { 1925 evlist__for_each_entry(evsel_list, counter) { 1926 if (is_duration_time(counter)) 1927 continue; 1928 print_counter(counter, prefix); 1929 } 1930 } 1931 break; 1932 case AGGR_UNSET: 1933 default: 1934 break; 1935 } 1936 1937 if (!interval && !csv_output) 1938 print_footer(); 1939 1940 fflush(stat_config.output); 1941 } 1942 1943 static volatile int signr = -1; 1944 1945 static void skip_signal(int signo) 1946 { 1947 if ((child_pid == -1) || stat_config.interval) 1948 done = 1; 1949 1950 signr = signo; 1951 /* 1952 * render child_pid harmless 1953 * won't send SIGTERM to a random 1954 * process in case of race condition 1955 * and fast PID recycling 1956 */ 1957 child_pid = -1; 1958 } 1959 1960 static void sig_atexit(void) 1961 { 1962 sigset_t set, oset; 1963 1964 /* 1965 * avoid race condition with SIGCHLD handler 1966 * in skip_signal() which is modifying child_pid 1967 * goal is to avoid send SIGTERM to a random 1968 * process 1969 */ 1970 sigemptyset(&set); 1971 sigaddset(&set, SIGCHLD); 1972 sigprocmask(SIG_BLOCK, &set, &oset); 1973 1974 if (child_pid != -1) 1975 kill(child_pid, SIGTERM); 1976 1977 sigprocmask(SIG_SETMASK, &oset, NULL); 1978 1979 if (signr == -1) 1980 return; 1981 1982 signal(signr, SIG_DFL); 1983 kill(getpid(), signr); 1984 } 1985 1986 static int stat__set_big_num(const struct option *opt __maybe_unused, 1987 const char *s __maybe_unused, int unset) 1988 { 1989 big_num_opt = unset ? 0 : 1; 1990 return 0; 1991 } 1992 1993 static int enable_metric_only(const struct option *opt __maybe_unused, 1994 const char *s __maybe_unused, int unset) 1995 { 1996 force_metric_only = true; 1997 metric_only = !unset; 1998 return 0; 1999 } 2000 2001 static int parse_metric_groups(const struct option *opt, 2002 const char *str, 2003 int unset __maybe_unused) 2004 { 2005 return metricgroup__parse_groups(opt, str, &metric_events); 2006 } 2007 2008 static const struct option stat_options[] = { 2009 OPT_BOOLEAN('T', "transaction", &transaction_run, 2010 "hardware transaction statistics"), 2011 OPT_CALLBACK('e', "event", &evsel_list, "event", 2012 "event selector. use 'perf list' to list available events", 2013 parse_events_option), 2014 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 2015 "event filter", parse_filter), 2016 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 2017 "child tasks do not inherit counters"), 2018 OPT_STRING('p', "pid", &target.pid, "pid", 2019 "stat events on existing process id"), 2020 OPT_STRING('t', "tid", &target.tid, "tid", 2021 "stat events on existing thread id"), 2022 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 2023 "system-wide collection from all CPUs"), 2024 OPT_BOOLEAN('g', "group", &group, 2025 "put the counters into a counter group"), 2026 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 2027 OPT_INCR('v', "verbose", &verbose, 2028 "be more verbose (show counter open errors, etc)"), 2029 OPT_INTEGER('r', "repeat", &run_count, 2030 "repeat command and print average + stddev (max: 100, forever: 0)"), 2031 OPT_BOOLEAN(0, "table", &walltime_run_table, 2032 "display details about each run (only with -r option)"), 2033 OPT_BOOLEAN('n', "null", &null_run, 2034 "null run - dont start any counters"), 2035 OPT_INCR('d', "detailed", &detailed_run, 2036 "detailed run - start a lot of events"), 2037 OPT_BOOLEAN('S', "sync", &sync_run, 2038 "call sync() before starting a run"), 2039 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 2040 "print large numbers with thousands\' separators", 2041 stat__set_big_num), 2042 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 2043 "list of cpus to monitor in system-wide"), 2044 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 2045 "disable CPU count aggregation", AGGR_NONE), 2046 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 2047 OPT_STRING('x', "field-separator", &csv_sep, "separator", 2048 "print counts with custom separator"), 2049 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 2050 "monitor event in cgroup name only", parse_cgroups), 2051 OPT_STRING('o', "output", &output_name, "file", "output file name"), 2052 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 2053 OPT_INTEGER(0, "log-fd", &output_fd, 2054 "log output to fd, instead of stderr"), 2055 OPT_STRING(0, "pre", &pre_cmd, "command", 2056 "command to run prior to the measured command"), 2057 OPT_STRING(0, "post", &post_cmd, "command", 2058 "command to run after to the measured command"), 2059 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 2060 "print counts at regular interval in ms " 2061 "(overhead is possible for values <= 100ms)"), 2062 OPT_INTEGER(0, "interval-count", &stat_config.times, 2063 "print counts for fixed number of times"), 2064 OPT_BOOLEAN(0, "interval-clear", &interval_clear, 2065 "clear screen in between new interval"), 2066 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 2067 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 2068 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 2069 "aggregate counts per processor socket", AGGR_SOCKET), 2070 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 2071 "aggregate counts per physical processor core", AGGR_CORE), 2072 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 2073 "aggregate counts per thread", AGGR_THREAD), 2074 OPT_UINTEGER('D', "delay", &initial_delay, 2075 "ms to wait before starting measurement after program start"), 2076 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 2077 "Only print computed metrics. No raw values", enable_metric_only), 2078 OPT_BOOLEAN(0, "topdown", &topdown_run, 2079 "measure topdown level 1 statistics"), 2080 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 2081 "measure SMI cost"), 2082 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 2083 "monitor specified metrics or metric groups (separated by ,)", 2084 parse_metric_groups), 2085 OPT_END() 2086 }; 2087 2088 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 2089 { 2090 return cpu_map__get_socket(map, cpu, NULL); 2091 } 2092 2093 static int perf_stat__get_core(struct cpu_map *map, int cpu) 2094 { 2095 return cpu_map__get_core(map, cpu, NULL); 2096 } 2097 2098 static int cpu_map__get_max(struct cpu_map *map) 2099 { 2100 int i, max = -1; 2101 2102 for (i = 0; i < map->nr; i++) { 2103 if (map->map[i] > max) 2104 max = map->map[i]; 2105 } 2106 2107 return max; 2108 } 2109 2110 static struct cpu_map *cpus_aggr_map; 2111 2112 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 2113 { 2114 int cpu; 2115 2116 if (idx >= map->nr) 2117 return -1; 2118 2119 cpu = map->map[idx]; 2120 2121 if (cpus_aggr_map->map[cpu] == -1) 2122 cpus_aggr_map->map[cpu] = get_id(map, idx); 2123 2124 return cpus_aggr_map->map[cpu]; 2125 } 2126 2127 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 2128 { 2129 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 2130 } 2131 2132 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 2133 { 2134 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 2135 } 2136 2137 static int perf_stat_init_aggr_mode(void) 2138 { 2139 int nr; 2140 2141 switch (stat_config.aggr_mode) { 2142 case AGGR_SOCKET: 2143 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 2144 perror("cannot build socket map"); 2145 return -1; 2146 } 2147 aggr_get_id = perf_stat__get_socket_cached; 2148 break; 2149 case AGGR_CORE: 2150 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 2151 perror("cannot build core map"); 2152 return -1; 2153 } 2154 aggr_get_id = perf_stat__get_core_cached; 2155 break; 2156 case AGGR_NONE: 2157 case AGGR_GLOBAL: 2158 case AGGR_THREAD: 2159 case AGGR_UNSET: 2160 default: 2161 break; 2162 } 2163 2164 /* 2165 * The evsel_list->cpus is the base we operate on, 2166 * taking the highest cpu number to be the size of 2167 * the aggregation translate cpumap. 2168 */ 2169 nr = cpu_map__get_max(evsel_list->cpus); 2170 cpus_aggr_map = cpu_map__empty_new(nr + 1); 2171 return cpus_aggr_map ? 0 : -ENOMEM; 2172 } 2173 2174 static void perf_stat__exit_aggr_mode(void) 2175 { 2176 cpu_map__put(aggr_map); 2177 cpu_map__put(cpus_aggr_map); 2178 aggr_map = NULL; 2179 cpus_aggr_map = NULL; 2180 } 2181 2182 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 2183 { 2184 int cpu; 2185 2186 if (idx > map->nr) 2187 return -1; 2188 2189 cpu = map->map[idx]; 2190 2191 if (cpu >= env->nr_cpus_avail) 2192 return -1; 2193 2194 return cpu; 2195 } 2196 2197 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2198 { 2199 struct perf_env *env = data; 2200 int cpu = perf_env__get_cpu(env, map, idx); 2201 2202 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2203 } 2204 2205 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2206 { 2207 struct perf_env *env = data; 2208 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2209 2210 if (cpu != -1) { 2211 int socket_id = env->cpu[cpu].socket_id; 2212 2213 /* 2214 * Encode socket in upper 16 bits 2215 * core_id is relative to socket, and 2216 * we need a global id. So we combine 2217 * socket + core id. 2218 */ 2219 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2220 } 2221 2222 return core; 2223 } 2224 2225 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2226 struct cpu_map **sockp) 2227 { 2228 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2229 } 2230 2231 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2232 struct cpu_map **corep) 2233 { 2234 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2235 } 2236 2237 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2238 { 2239 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2240 } 2241 2242 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2243 { 2244 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2245 } 2246 2247 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2248 { 2249 struct perf_env *env = &st->session->header.env; 2250 2251 switch (stat_config.aggr_mode) { 2252 case AGGR_SOCKET: 2253 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2254 perror("cannot build socket map"); 2255 return -1; 2256 } 2257 aggr_get_id = perf_stat__get_socket_file; 2258 break; 2259 case AGGR_CORE: 2260 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2261 perror("cannot build core map"); 2262 return -1; 2263 } 2264 aggr_get_id = perf_stat__get_core_file; 2265 break; 2266 case AGGR_NONE: 2267 case AGGR_GLOBAL: 2268 case AGGR_THREAD: 2269 case AGGR_UNSET: 2270 default: 2271 break; 2272 } 2273 2274 return 0; 2275 } 2276 2277 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2278 { 2279 int off = 0; 2280 int i; 2281 int len = 0; 2282 char *s; 2283 2284 for (i = 0; attr[i]; i++) { 2285 if (pmu_have_event("cpu", attr[i])) { 2286 len += strlen(attr[i]) + 1; 2287 attr[i - off] = attr[i]; 2288 } else 2289 off++; 2290 } 2291 attr[i - off] = NULL; 2292 2293 *str = malloc(len + 1 + 2); 2294 if (!*str) 2295 return -1; 2296 s = *str; 2297 if (i - off == 0) { 2298 *s = 0; 2299 return 0; 2300 } 2301 if (use_group) 2302 *s++ = '{'; 2303 for (i = 0; attr[i]; i++) { 2304 strcpy(s, attr[i]); 2305 s += strlen(s); 2306 *s++ = ','; 2307 } 2308 if (use_group) { 2309 s[-1] = '}'; 2310 *s = 0; 2311 } else 2312 s[-1] = 0; 2313 return 0; 2314 } 2315 2316 __weak bool arch_topdown_check_group(bool *warn) 2317 { 2318 *warn = false; 2319 return false; 2320 } 2321 2322 __weak void arch_topdown_group_warn(void) 2323 { 2324 } 2325 2326 /* 2327 * Add default attributes, if there were no attributes specified or 2328 * if -d/--detailed, -d -d or -d -d -d is used: 2329 */ 2330 static int add_default_attributes(void) 2331 { 2332 int err; 2333 struct perf_event_attr default_attrs0[] = { 2334 2335 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2336 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2337 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2338 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2339 2340 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2341 }; 2342 struct perf_event_attr frontend_attrs[] = { 2343 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2344 }; 2345 struct perf_event_attr backend_attrs[] = { 2346 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2347 }; 2348 struct perf_event_attr default_attrs1[] = { 2349 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2350 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2351 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2352 2353 }; 2354 2355 /* 2356 * Detailed stats (-d), covering the L1 and last level data caches: 2357 */ 2358 struct perf_event_attr detailed_attrs[] = { 2359 2360 { .type = PERF_TYPE_HW_CACHE, 2361 .config = 2362 PERF_COUNT_HW_CACHE_L1D << 0 | 2363 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2364 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2365 2366 { .type = PERF_TYPE_HW_CACHE, 2367 .config = 2368 PERF_COUNT_HW_CACHE_L1D << 0 | 2369 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2370 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2371 2372 { .type = PERF_TYPE_HW_CACHE, 2373 .config = 2374 PERF_COUNT_HW_CACHE_LL << 0 | 2375 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2376 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2377 2378 { .type = PERF_TYPE_HW_CACHE, 2379 .config = 2380 PERF_COUNT_HW_CACHE_LL << 0 | 2381 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2382 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2383 }; 2384 2385 /* 2386 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2387 */ 2388 struct perf_event_attr very_detailed_attrs[] = { 2389 2390 { .type = PERF_TYPE_HW_CACHE, 2391 .config = 2392 PERF_COUNT_HW_CACHE_L1I << 0 | 2393 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2394 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2395 2396 { .type = PERF_TYPE_HW_CACHE, 2397 .config = 2398 PERF_COUNT_HW_CACHE_L1I << 0 | 2399 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2400 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2401 2402 { .type = PERF_TYPE_HW_CACHE, 2403 .config = 2404 PERF_COUNT_HW_CACHE_DTLB << 0 | 2405 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2406 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2407 2408 { .type = PERF_TYPE_HW_CACHE, 2409 .config = 2410 PERF_COUNT_HW_CACHE_DTLB << 0 | 2411 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2412 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2413 2414 { .type = PERF_TYPE_HW_CACHE, 2415 .config = 2416 PERF_COUNT_HW_CACHE_ITLB << 0 | 2417 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2418 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2419 2420 { .type = PERF_TYPE_HW_CACHE, 2421 .config = 2422 PERF_COUNT_HW_CACHE_ITLB << 0 | 2423 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2424 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2425 2426 }; 2427 2428 /* 2429 * Very, very detailed stats (-d -d -d), adding prefetch events: 2430 */ 2431 struct perf_event_attr very_very_detailed_attrs[] = { 2432 2433 { .type = PERF_TYPE_HW_CACHE, 2434 .config = 2435 PERF_COUNT_HW_CACHE_L1D << 0 | 2436 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2437 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2438 2439 { .type = PERF_TYPE_HW_CACHE, 2440 .config = 2441 PERF_COUNT_HW_CACHE_L1D << 0 | 2442 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2443 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2444 }; 2445 struct parse_events_error errinfo; 2446 2447 /* Set attrs if no event is selected and !null_run: */ 2448 if (null_run) 2449 return 0; 2450 2451 if (transaction_run) { 2452 if (pmu_have_event("cpu", "cycles-ct") && 2453 pmu_have_event("cpu", "el-start")) 2454 err = parse_events(evsel_list, transaction_attrs, 2455 &errinfo); 2456 else 2457 err = parse_events(evsel_list, 2458 transaction_limited_attrs, 2459 &errinfo); 2460 if (err) { 2461 fprintf(stderr, "Cannot set up transaction events\n"); 2462 parse_events_print_error(&errinfo, transaction_attrs); 2463 return -1; 2464 } 2465 return 0; 2466 } 2467 2468 if (smi_cost) { 2469 int smi; 2470 2471 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2472 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2473 return -1; 2474 } 2475 2476 if (!smi) { 2477 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2478 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2479 return -1; 2480 } 2481 smi_reset = true; 2482 } 2483 2484 if (pmu_have_event("msr", "aperf") && 2485 pmu_have_event("msr", "smi")) { 2486 if (!force_metric_only) 2487 metric_only = true; 2488 err = parse_events(evsel_list, smi_cost_attrs, &errinfo); 2489 } else { 2490 fprintf(stderr, "To measure SMI cost, it needs " 2491 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2492 parse_events_print_error(&errinfo, smi_cost_attrs); 2493 return -1; 2494 } 2495 if (err) { 2496 fprintf(stderr, "Cannot set up SMI cost events\n"); 2497 return -1; 2498 } 2499 return 0; 2500 } 2501 2502 if (topdown_run) { 2503 char *str = NULL; 2504 bool warn = false; 2505 2506 if (stat_config.aggr_mode != AGGR_GLOBAL && 2507 stat_config.aggr_mode != AGGR_CORE) { 2508 pr_err("top down event configuration requires --per-core mode\n"); 2509 return -1; 2510 } 2511 stat_config.aggr_mode = AGGR_CORE; 2512 if (nr_cgroups || !target__has_cpu(&target)) { 2513 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2514 return -1; 2515 } 2516 2517 if (!force_metric_only) 2518 metric_only = true; 2519 if (topdown_filter_events(topdown_attrs, &str, 2520 arch_topdown_check_group(&warn)) < 0) { 2521 pr_err("Out of memory\n"); 2522 return -1; 2523 } 2524 if (topdown_attrs[0] && str) { 2525 if (warn) 2526 arch_topdown_group_warn(); 2527 err = parse_events(evsel_list, str, &errinfo); 2528 if (err) { 2529 fprintf(stderr, 2530 "Cannot set up top down events %s: %d\n", 2531 str, err); 2532 free(str); 2533 parse_events_print_error(&errinfo, str); 2534 return -1; 2535 } 2536 } else { 2537 fprintf(stderr, "System does not support topdown\n"); 2538 return -1; 2539 } 2540 free(str); 2541 } 2542 2543 if (!evsel_list->nr_entries) { 2544 if (target__has_cpu(&target)) 2545 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2546 2547 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2548 return -1; 2549 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2550 if (perf_evlist__add_default_attrs(evsel_list, 2551 frontend_attrs) < 0) 2552 return -1; 2553 } 2554 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2555 if (perf_evlist__add_default_attrs(evsel_list, 2556 backend_attrs) < 0) 2557 return -1; 2558 } 2559 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2560 return -1; 2561 } 2562 2563 /* Detailed events get appended to the event list: */ 2564 2565 if (detailed_run < 1) 2566 return 0; 2567 2568 /* Append detailed run extra attributes: */ 2569 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2570 return -1; 2571 2572 if (detailed_run < 2) 2573 return 0; 2574 2575 /* Append very detailed run extra attributes: */ 2576 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2577 return -1; 2578 2579 if (detailed_run < 3) 2580 return 0; 2581 2582 /* Append very, very detailed run extra attributes: */ 2583 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2584 } 2585 2586 static const char * const stat_record_usage[] = { 2587 "perf stat record [<options>]", 2588 NULL, 2589 }; 2590 2591 static void init_features(struct perf_session *session) 2592 { 2593 int feat; 2594 2595 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2596 perf_header__set_feat(&session->header, feat); 2597 2598 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2599 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2600 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2601 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2602 } 2603 2604 static int __cmd_record(int argc, const char **argv) 2605 { 2606 struct perf_session *session; 2607 struct perf_data *data = &perf_stat.data; 2608 2609 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2610 PARSE_OPT_STOP_AT_NON_OPTION); 2611 2612 if (output_name) 2613 data->file.path = output_name; 2614 2615 if (run_count != 1 || forever) { 2616 pr_err("Cannot use -r option with perf stat record.\n"); 2617 return -1; 2618 } 2619 2620 session = perf_session__new(data, false, NULL); 2621 if (session == NULL) { 2622 pr_err("Perf session creation failed.\n"); 2623 return -1; 2624 } 2625 2626 init_features(session); 2627 2628 session->evlist = evsel_list; 2629 perf_stat.session = session; 2630 perf_stat.record = true; 2631 return argc; 2632 } 2633 2634 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2635 union perf_event *event, 2636 struct perf_session *session) 2637 { 2638 struct stat_round_event *stat_round = &event->stat_round; 2639 struct perf_evsel *counter; 2640 struct timespec tsh, *ts = NULL; 2641 const char **argv = session->header.env.cmdline_argv; 2642 int argc = session->header.env.nr_cmdline; 2643 2644 evlist__for_each_entry(evsel_list, counter) 2645 perf_stat_process_counter(&stat_config, counter); 2646 2647 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2648 update_stats(&walltime_nsecs_stats, stat_round->time); 2649 2650 if (stat_config.interval && stat_round->time) { 2651 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2652 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2653 ts = &tsh; 2654 } 2655 2656 print_counters(ts, argc, argv); 2657 return 0; 2658 } 2659 2660 static 2661 int process_stat_config_event(struct perf_tool *tool, 2662 union perf_event *event, 2663 struct perf_session *session __maybe_unused) 2664 { 2665 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2666 2667 perf_event__read_stat_config(&stat_config, &event->stat_config); 2668 2669 if (cpu_map__empty(st->cpus)) { 2670 if (st->aggr_mode != AGGR_UNSET) 2671 pr_warning("warning: processing task data, aggregation mode not set\n"); 2672 return 0; 2673 } 2674 2675 if (st->aggr_mode != AGGR_UNSET) 2676 stat_config.aggr_mode = st->aggr_mode; 2677 2678 if (perf_stat.data.is_pipe) 2679 perf_stat_init_aggr_mode(); 2680 else 2681 perf_stat_init_aggr_mode_file(st); 2682 2683 return 0; 2684 } 2685 2686 static int set_maps(struct perf_stat *st) 2687 { 2688 if (!st->cpus || !st->threads) 2689 return 0; 2690 2691 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2692 return -EINVAL; 2693 2694 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2695 2696 if (perf_evlist__alloc_stats(evsel_list, true)) 2697 return -ENOMEM; 2698 2699 st->maps_allocated = true; 2700 return 0; 2701 } 2702 2703 static 2704 int process_thread_map_event(struct perf_tool *tool, 2705 union perf_event *event, 2706 struct perf_session *session __maybe_unused) 2707 { 2708 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2709 2710 if (st->threads) { 2711 pr_warning("Extra thread map event, ignoring.\n"); 2712 return 0; 2713 } 2714 2715 st->threads = thread_map__new_event(&event->thread_map); 2716 if (!st->threads) 2717 return -ENOMEM; 2718 2719 return set_maps(st); 2720 } 2721 2722 static 2723 int process_cpu_map_event(struct perf_tool *tool, 2724 union perf_event *event, 2725 struct perf_session *session __maybe_unused) 2726 { 2727 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2728 struct cpu_map *cpus; 2729 2730 if (st->cpus) { 2731 pr_warning("Extra cpu map event, ignoring.\n"); 2732 return 0; 2733 } 2734 2735 cpus = cpu_map__new_data(&event->cpu_map.data); 2736 if (!cpus) 2737 return -ENOMEM; 2738 2739 st->cpus = cpus; 2740 return set_maps(st); 2741 } 2742 2743 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 2744 { 2745 int i; 2746 2747 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 2748 if (!config->stats) 2749 return -1; 2750 2751 config->stats_num = nthreads; 2752 2753 for (i = 0; i < nthreads; i++) 2754 runtime_stat__init(&config->stats[i]); 2755 2756 return 0; 2757 } 2758 2759 static void runtime_stat_delete(struct perf_stat_config *config) 2760 { 2761 int i; 2762 2763 if (!config->stats) 2764 return; 2765 2766 for (i = 0; i < config->stats_num; i++) 2767 runtime_stat__exit(&config->stats[i]); 2768 2769 free(config->stats); 2770 } 2771 2772 static const char * const stat_report_usage[] = { 2773 "perf stat report [<options>]", 2774 NULL, 2775 }; 2776 2777 static struct perf_stat perf_stat = { 2778 .tool = { 2779 .attr = perf_event__process_attr, 2780 .event_update = perf_event__process_event_update, 2781 .thread_map = process_thread_map_event, 2782 .cpu_map = process_cpu_map_event, 2783 .stat_config = process_stat_config_event, 2784 .stat = perf_event__process_stat_event, 2785 .stat_round = process_stat_round_event, 2786 }, 2787 .aggr_mode = AGGR_UNSET, 2788 }; 2789 2790 static int __cmd_report(int argc, const char **argv) 2791 { 2792 struct perf_session *session; 2793 const struct option options[] = { 2794 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2795 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2796 "aggregate counts per processor socket", AGGR_SOCKET), 2797 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2798 "aggregate counts per physical processor core", AGGR_CORE), 2799 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2800 "disable CPU count aggregation", AGGR_NONE), 2801 OPT_END() 2802 }; 2803 struct stat st; 2804 int ret; 2805 2806 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2807 2808 if (!input_name || !strlen(input_name)) { 2809 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2810 input_name = "-"; 2811 else 2812 input_name = "perf.data"; 2813 } 2814 2815 perf_stat.data.file.path = input_name; 2816 perf_stat.data.mode = PERF_DATA_MODE_READ; 2817 2818 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 2819 if (session == NULL) 2820 return -1; 2821 2822 perf_stat.session = session; 2823 stat_config.output = stderr; 2824 evsel_list = session->evlist; 2825 2826 ret = perf_session__process_events(session); 2827 if (ret) 2828 return ret; 2829 2830 perf_session__delete(session); 2831 return 0; 2832 } 2833 2834 static void setup_system_wide(int forks) 2835 { 2836 /* 2837 * Make system wide (-a) the default target if 2838 * no target was specified and one of following 2839 * conditions is met: 2840 * 2841 * - there's no workload specified 2842 * - there is workload specified but all requested 2843 * events are system wide events 2844 */ 2845 if (!target__none(&target)) 2846 return; 2847 2848 if (!forks) 2849 target.system_wide = true; 2850 else { 2851 struct perf_evsel *counter; 2852 2853 evlist__for_each_entry(evsel_list, counter) { 2854 if (!counter->system_wide) 2855 return; 2856 } 2857 2858 if (evsel_list->nr_entries) 2859 target.system_wide = true; 2860 } 2861 } 2862 2863 int cmd_stat(int argc, const char **argv) 2864 { 2865 const char * const stat_usage[] = { 2866 "perf stat [<options>] [<command>]", 2867 NULL 2868 }; 2869 int status = -EINVAL, run_idx; 2870 const char *mode; 2871 FILE *output = stderr; 2872 unsigned int interval, timeout; 2873 const char * const stat_subcommands[] = { "record", "report" }; 2874 2875 setlocale(LC_ALL, ""); 2876 2877 evsel_list = perf_evlist__new(); 2878 if (evsel_list == NULL) 2879 return -ENOMEM; 2880 2881 parse_events__shrink_config_terms(); 2882 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2883 (const char **) stat_usage, 2884 PARSE_OPT_STOP_AT_NON_OPTION); 2885 perf_stat__collect_metric_expr(evsel_list); 2886 perf_stat__init_shadow_stats(); 2887 2888 if (csv_sep) { 2889 csv_output = true; 2890 if (!strcmp(csv_sep, "\\t")) 2891 csv_sep = "\t"; 2892 } else 2893 csv_sep = DEFAULT_SEPARATOR; 2894 2895 if (argc && !strncmp(argv[0], "rec", 3)) { 2896 argc = __cmd_record(argc, argv); 2897 if (argc < 0) 2898 return -1; 2899 } else if (argc && !strncmp(argv[0], "rep", 3)) 2900 return __cmd_report(argc, argv); 2901 2902 interval = stat_config.interval; 2903 timeout = stat_config.timeout; 2904 2905 /* 2906 * For record command the -o is already taken care of. 2907 */ 2908 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2909 output = NULL; 2910 2911 if (output_name && output_fd) { 2912 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2913 parse_options_usage(stat_usage, stat_options, "o", 1); 2914 parse_options_usage(NULL, stat_options, "log-fd", 0); 2915 goto out; 2916 } 2917 2918 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2919 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2920 goto out; 2921 } 2922 2923 if (metric_only && run_count > 1) { 2924 fprintf(stderr, "--metric-only is not supported with -r\n"); 2925 goto out; 2926 } 2927 2928 if (walltime_run_table && run_count <= 1) { 2929 fprintf(stderr, "--table is only supported with -r\n"); 2930 parse_options_usage(stat_usage, stat_options, "r", 1); 2931 parse_options_usage(NULL, stat_options, "table", 0); 2932 goto out; 2933 } 2934 2935 if (output_fd < 0) { 2936 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2937 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2938 goto out; 2939 } 2940 2941 if (!output) { 2942 struct timespec tm; 2943 mode = append_file ? "a" : "w"; 2944 2945 output = fopen(output_name, mode); 2946 if (!output) { 2947 perror("failed to create output file"); 2948 return -1; 2949 } 2950 clock_gettime(CLOCK_REALTIME, &tm); 2951 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2952 } else if (output_fd > 0) { 2953 mode = append_file ? "a" : "w"; 2954 output = fdopen(output_fd, mode); 2955 if (!output) { 2956 perror("Failed opening logfd"); 2957 return -errno; 2958 } 2959 } 2960 2961 stat_config.output = output; 2962 2963 /* 2964 * let the spreadsheet do the pretty-printing 2965 */ 2966 if (csv_output) { 2967 /* User explicitly passed -B? */ 2968 if (big_num_opt == 1) { 2969 fprintf(stderr, "-B option not supported with -x\n"); 2970 parse_options_usage(stat_usage, stat_options, "B", 1); 2971 parse_options_usage(NULL, stat_options, "x", 1); 2972 goto out; 2973 } else /* Nope, so disable big number formatting */ 2974 big_num = false; 2975 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2976 big_num = false; 2977 2978 setup_system_wide(argc); 2979 2980 /* 2981 * Display user/system times only for single 2982 * run and when there's specified tracee. 2983 */ 2984 if ((run_count == 1) && target__none(&target)) 2985 ru_display = true; 2986 2987 if (run_count < 0) { 2988 pr_err("Run count must be a positive number\n"); 2989 parse_options_usage(stat_usage, stat_options, "r", 1); 2990 goto out; 2991 } else if (run_count == 0) { 2992 forever = true; 2993 run_count = 1; 2994 } 2995 2996 if (walltime_run_table) { 2997 walltime_run = zalloc(run_count * sizeof(walltime_run[0])); 2998 if (!walltime_run) { 2999 pr_err("failed to setup -r option"); 3000 goto out; 3001 } 3002 } 3003 3004 if ((stat_config.aggr_mode == AGGR_THREAD) && 3005 !target__has_task(&target)) { 3006 if (!target.system_wide || target.cpu_list) { 3007 fprintf(stderr, "The --per-thread option is only " 3008 "available when monitoring via -p -t -a " 3009 "options or only --per-thread.\n"); 3010 parse_options_usage(NULL, stat_options, "p", 1); 3011 parse_options_usage(NULL, stat_options, "t", 1); 3012 goto out; 3013 } 3014 } 3015 3016 /* 3017 * no_aggr, cgroup are for system-wide only 3018 * --per-thread is aggregated per thread, we dont mix it with cpu mode 3019 */ 3020 if (((stat_config.aggr_mode != AGGR_GLOBAL && 3021 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 3022 !target__has_cpu(&target)) { 3023 fprintf(stderr, "both cgroup and no-aggregation " 3024 "modes only available in system-wide mode\n"); 3025 3026 parse_options_usage(stat_usage, stat_options, "G", 1); 3027 parse_options_usage(NULL, stat_options, "A", 1); 3028 parse_options_usage(NULL, stat_options, "a", 1); 3029 goto out; 3030 } 3031 3032 if (add_default_attributes()) 3033 goto out; 3034 3035 target__validate(&target); 3036 3037 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 3038 target.per_thread = true; 3039 3040 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 3041 if (target__has_task(&target)) { 3042 pr_err("Problems finding threads of monitor\n"); 3043 parse_options_usage(stat_usage, stat_options, "p", 1); 3044 parse_options_usage(NULL, stat_options, "t", 1); 3045 } else if (target__has_cpu(&target)) { 3046 perror("failed to parse CPUs map"); 3047 parse_options_usage(stat_usage, stat_options, "C", 1); 3048 parse_options_usage(NULL, stat_options, "a", 1); 3049 } 3050 goto out; 3051 } 3052 3053 /* 3054 * Initialize thread_map with comm names, 3055 * so we could print it out on output. 3056 */ 3057 if (stat_config.aggr_mode == AGGR_THREAD) { 3058 thread_map__read_comms(evsel_list->threads); 3059 if (target.system_wide) { 3060 if (runtime_stat_new(&stat_config, 3061 thread_map__nr(evsel_list->threads))) { 3062 goto out; 3063 } 3064 } 3065 } 3066 3067 if (stat_config.times && interval) 3068 interval_count = true; 3069 else if (stat_config.times && !interval) { 3070 pr_err("interval-count option should be used together with " 3071 "interval-print.\n"); 3072 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 3073 parse_options_usage(stat_usage, stat_options, "I", 1); 3074 goto out; 3075 } 3076 3077 if (timeout && timeout < 100) { 3078 if (timeout < 10) { 3079 pr_err("timeout must be >= 10ms.\n"); 3080 parse_options_usage(stat_usage, stat_options, "timeout", 0); 3081 goto out; 3082 } else 3083 pr_warning("timeout < 100ms. " 3084 "The overhead percentage could be high in some cases. " 3085 "Please proceed with caution.\n"); 3086 } 3087 if (timeout && interval) { 3088 pr_err("timeout option is not supported with interval-print.\n"); 3089 parse_options_usage(stat_usage, stat_options, "timeout", 0); 3090 parse_options_usage(stat_usage, stat_options, "I", 1); 3091 goto out; 3092 } 3093 3094 if (perf_evlist__alloc_stats(evsel_list, interval)) 3095 goto out; 3096 3097 if (perf_stat_init_aggr_mode()) 3098 goto out; 3099 3100 /* 3101 * We dont want to block the signals - that would cause 3102 * child tasks to inherit that and Ctrl-C would not work. 3103 * What we want is for Ctrl-C to work in the exec()-ed 3104 * task, but being ignored by perf stat itself: 3105 */ 3106 atexit(sig_atexit); 3107 if (!forever) 3108 signal(SIGINT, skip_signal); 3109 signal(SIGCHLD, skip_signal); 3110 signal(SIGALRM, skip_signal); 3111 signal(SIGABRT, skip_signal); 3112 3113 status = 0; 3114 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 3115 if (run_count != 1 && verbose > 0) 3116 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 3117 run_idx + 1); 3118 3119 status = run_perf_stat(argc, argv, run_idx); 3120 if (forever && status != -1) { 3121 print_counters(NULL, argc, argv); 3122 perf_stat__reset_stats(); 3123 } 3124 } 3125 3126 if (!forever && status != -1 && !interval) 3127 print_counters(NULL, argc, argv); 3128 3129 if (STAT_RECORD) { 3130 /* 3131 * We synthesize the kernel mmap record just so that older tools 3132 * don't emit warnings about not being able to resolve symbols 3133 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 3134 * a saner message about no samples being in the perf.data file. 3135 * 3136 * This also serves to suppress a warning about f_header.data.size == 0 3137 * in header.c at the moment 'perf stat record' gets introduced, which 3138 * is not really needed once we start adding the stat specific PERF_RECORD_ 3139 * records, but the need to suppress the kptr_restrict messages in older 3140 * tools remain -acme 3141 */ 3142 int fd = perf_data__fd(&perf_stat.data); 3143 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 3144 process_synthesized_event, 3145 &perf_stat.session->machines.host); 3146 if (err) { 3147 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 3148 "older tools may produce warnings about this file\n."); 3149 } 3150 3151 if (!interval) { 3152 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 3153 pr_err("failed to write stat round event\n"); 3154 } 3155 3156 if (!perf_stat.data.is_pipe) { 3157 perf_stat.session->header.data_size += perf_stat.bytes_written; 3158 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 3159 } 3160 3161 perf_session__delete(perf_stat.session); 3162 } 3163 3164 perf_stat__exit_aggr_mode(); 3165 perf_evlist__free_stats(evsel_list); 3166 out: 3167 free(walltime_run); 3168 3169 if (smi_cost && smi_reset) 3170 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3171 3172 perf_evlist__delete(evsel_list); 3173 3174 runtime_stat_delete(&stat_config); 3175 3176 return status; 3177 } 3178