1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/string2.h" 67 #include "util/metricgroup.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct rblist metric_events; 137 138 static struct target target = { 139 .uid = UINT_MAX, 140 }; 141 142 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 143 144 static int run_count = 1; 145 static bool no_inherit = false; 146 static volatile pid_t child_pid = -1; 147 static bool null_run = false; 148 static int detailed_run = 0; 149 static bool transaction_run; 150 static bool topdown_run = false; 151 static bool smi_cost = false; 152 static bool smi_reset = false; 153 static bool big_num = true; 154 static int big_num_opt = -1; 155 static const char *csv_sep = NULL; 156 static bool csv_output = false; 157 static bool group = false; 158 static const char *pre_cmd = NULL; 159 static const char *post_cmd = NULL; 160 static bool sync_run = false; 161 static unsigned int initial_delay = 0; 162 static unsigned int unit_width = 4; /* strlen("unit") */ 163 static bool forever = false; 164 static bool metric_only = false; 165 static bool force_metric_only = false; 166 static bool no_merge = false; 167 static struct timespec ref_time; 168 static struct cpu_map *aggr_map; 169 static aggr_get_id_t aggr_get_id; 170 static bool append_file; 171 static bool interval_count; 172 static const char *output_name; 173 static int output_fd; 174 static int print_free_counters_hint; 175 176 struct perf_stat { 177 bool record; 178 struct perf_data data; 179 struct perf_session *session; 180 u64 bytes_written; 181 struct perf_tool tool; 182 bool maps_allocated; 183 struct cpu_map *cpus; 184 struct thread_map *threads; 185 enum aggr_mode aggr_mode; 186 }; 187 188 static struct perf_stat perf_stat; 189 #define STAT_RECORD perf_stat.record 190 191 static volatile int done = 0; 192 193 static struct perf_stat_config stat_config = { 194 .aggr_mode = AGGR_GLOBAL, 195 .scale = true, 196 }; 197 198 static bool is_duration_time(struct perf_evsel *evsel) 199 { 200 return !strcmp(evsel->name, "duration_time"); 201 } 202 203 static inline void diff_timespec(struct timespec *r, struct timespec *a, 204 struct timespec *b) 205 { 206 r->tv_sec = a->tv_sec - b->tv_sec; 207 if (a->tv_nsec < b->tv_nsec) { 208 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 209 r->tv_sec--; 210 } else { 211 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 212 } 213 } 214 215 static void perf_stat__reset_stats(void) 216 { 217 int i; 218 219 perf_evlist__reset_stats(evsel_list); 220 perf_stat__reset_shadow_stats(); 221 222 for (i = 0; i < stat_config.stats_num; i++) 223 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 224 } 225 226 static int create_perf_stat_counter(struct perf_evsel *evsel) 227 { 228 struct perf_event_attr *attr = &evsel->attr; 229 struct perf_evsel *leader = evsel->leader; 230 231 if (stat_config.scale) { 232 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 233 PERF_FORMAT_TOTAL_TIME_RUNNING; 234 } 235 236 /* 237 * The event is part of non trivial group, let's enable 238 * the group read (for leader) and ID retrieval for all 239 * members. 240 */ 241 if (leader->nr_members > 1) 242 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 243 244 attr->inherit = !no_inherit; 245 246 /* 247 * Some events get initialized with sample_(period/type) set, 248 * like tracepoints. Clear it up for counting. 249 */ 250 attr->sample_period = 0; 251 252 /* 253 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 254 * while avoiding that older tools show confusing messages. 255 * 256 * However for pipe sessions we need to keep it zero, 257 * because script's perf_evsel__check_attr is triggered 258 * by attr->sample_type != 0, and we can't run it on 259 * stat sessions. 260 */ 261 if (!(STAT_RECORD && perf_stat.data.is_pipe)) 262 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 263 264 /* 265 * Disabling all counters initially, they will be enabled 266 * either manually by us or by kernel via enable_on_exec 267 * set later. 268 */ 269 if (perf_evsel__is_group_leader(evsel)) { 270 attr->disabled = 1; 271 272 /* 273 * In case of initial_delay we enable tracee 274 * events manually. 275 */ 276 if (target__none(&target) && !initial_delay) 277 attr->enable_on_exec = 1; 278 } 279 280 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 281 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 282 283 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 284 } 285 286 /* 287 * Does the counter have nsecs as a unit? 288 */ 289 static inline int nsec_counter(struct perf_evsel *evsel) 290 { 291 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 292 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 293 return 1; 294 295 return 0; 296 } 297 298 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 299 union perf_event *event, 300 struct perf_sample *sample __maybe_unused, 301 struct machine *machine __maybe_unused) 302 { 303 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 304 pr_err("failed to write perf data, error: %m\n"); 305 return -1; 306 } 307 308 perf_stat.bytes_written += event->header.size; 309 return 0; 310 } 311 312 static int write_stat_round_event(u64 tm, u64 type) 313 { 314 return perf_event__synthesize_stat_round(NULL, tm, type, 315 process_synthesized_event, 316 NULL); 317 } 318 319 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 320 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 321 322 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 323 324 static int 325 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 326 struct perf_counts_values *count) 327 { 328 struct perf_sample_id *sid = SID(counter, cpu, thread); 329 330 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 331 process_synthesized_event, NULL); 332 } 333 334 /* 335 * Read out the results of a single counter: 336 * do not aggregate counts across CPUs in system-wide mode 337 */ 338 static int read_counter(struct perf_evsel *counter) 339 { 340 int nthreads = thread_map__nr(evsel_list->threads); 341 int ncpus, cpu, thread; 342 343 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 344 ncpus = perf_evsel__nr_cpus(counter); 345 else 346 ncpus = 1; 347 348 if (!counter->supported) 349 return -ENOENT; 350 351 if (counter->system_wide) 352 nthreads = 1; 353 354 for (thread = 0; thread < nthreads; thread++) { 355 for (cpu = 0; cpu < ncpus; cpu++) { 356 struct perf_counts_values *count; 357 358 count = perf_counts(counter->counts, cpu, thread); 359 360 /* 361 * The leader's group read loads data into its group members 362 * (via perf_evsel__read_counter) and sets threir count->loaded. 363 */ 364 if (!count->loaded && 365 perf_evsel__read_counter(counter, cpu, thread)) { 366 counter->counts->scaled = -1; 367 perf_counts(counter->counts, cpu, thread)->ena = 0; 368 perf_counts(counter->counts, cpu, thread)->run = 0; 369 return -1; 370 } 371 372 count->loaded = false; 373 374 if (STAT_RECORD) { 375 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 376 pr_err("failed to write stat event\n"); 377 return -1; 378 } 379 } 380 381 if (verbose > 1) { 382 fprintf(stat_config.output, 383 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 384 perf_evsel__name(counter), 385 cpu, 386 count->val, count->ena, count->run); 387 } 388 } 389 } 390 391 return 0; 392 } 393 394 static void read_counters(void) 395 { 396 struct perf_evsel *counter; 397 int ret; 398 399 evlist__for_each_entry(evsel_list, counter) { 400 ret = read_counter(counter); 401 if (ret) 402 pr_debug("failed to read counter %s\n", counter->name); 403 404 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 405 pr_warning("failed to process counter %s\n", counter->name); 406 } 407 } 408 409 static void process_interval(void) 410 { 411 struct timespec ts, rs; 412 413 read_counters(); 414 415 clock_gettime(CLOCK_MONOTONIC, &ts); 416 diff_timespec(&rs, &ts, &ref_time); 417 418 if (STAT_RECORD) { 419 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 420 pr_err("failed to write stat round event\n"); 421 } 422 423 init_stats(&walltime_nsecs_stats); 424 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 425 print_counters(&rs, 0, NULL); 426 } 427 428 static void enable_counters(void) 429 { 430 if (initial_delay) 431 usleep(initial_delay * USEC_PER_MSEC); 432 433 /* 434 * We need to enable counters only if: 435 * - we don't have tracee (attaching to task or cpu) 436 * - we have initial delay configured 437 */ 438 if (!target__none(&target) || initial_delay) 439 perf_evlist__enable(evsel_list); 440 } 441 442 static void disable_counters(void) 443 { 444 /* 445 * If we don't have tracee (attaching to task or cpu), counters may 446 * still be running. To get accurate group ratios, we must stop groups 447 * from counting before reading their constituent counters. 448 */ 449 if (!target__none(&target)) 450 perf_evlist__disable(evsel_list); 451 } 452 453 static volatile int workload_exec_errno; 454 455 /* 456 * perf_evlist__prepare_workload will send a SIGUSR1 457 * if the fork fails, since we asked by setting its 458 * want_signal to true. 459 */ 460 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 461 void *ucontext __maybe_unused) 462 { 463 workload_exec_errno = info->si_value.sival_int; 464 } 465 466 static int perf_stat_synthesize_config(bool is_pipe) 467 { 468 int err; 469 470 if (is_pipe) { 471 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 472 process_synthesized_event); 473 if (err < 0) { 474 pr_err("Couldn't synthesize attrs.\n"); 475 return err; 476 } 477 } 478 479 err = perf_event__synthesize_extra_attr(NULL, 480 evsel_list, 481 process_synthesized_event, 482 is_pipe); 483 484 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 485 process_synthesized_event, 486 NULL); 487 if (err < 0) { 488 pr_err("Couldn't synthesize thread map.\n"); 489 return err; 490 } 491 492 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 493 process_synthesized_event, NULL); 494 if (err < 0) { 495 pr_err("Couldn't synthesize thread map.\n"); 496 return err; 497 } 498 499 err = perf_event__synthesize_stat_config(NULL, &stat_config, 500 process_synthesized_event, NULL); 501 if (err < 0) { 502 pr_err("Couldn't synthesize config.\n"); 503 return err; 504 } 505 506 return 0; 507 } 508 509 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 510 511 static int __store_counter_ids(struct perf_evsel *counter) 512 { 513 int cpu, thread; 514 515 for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { 516 for (thread = 0; thread < xyarray__max_y(counter->fd); 517 thread++) { 518 int fd = FD(counter, cpu, thread); 519 520 if (perf_evlist__id_add_fd(evsel_list, counter, 521 cpu, thread, fd) < 0) 522 return -1; 523 } 524 } 525 526 return 0; 527 } 528 529 static int store_counter_ids(struct perf_evsel *counter) 530 { 531 struct cpu_map *cpus = counter->cpus; 532 struct thread_map *threads = counter->threads; 533 534 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 535 return -ENOMEM; 536 537 return __store_counter_ids(counter); 538 } 539 540 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 541 { 542 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 543 } 544 545 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 546 { 547 struct perf_evsel *c2, *leader; 548 bool is_open = true; 549 550 leader = evsel->leader; 551 pr_debug("Weak group for %s/%d failed\n", 552 leader->name, leader->nr_members); 553 554 /* 555 * for_each_group_member doesn't work here because it doesn't 556 * include the first entry. 557 */ 558 evlist__for_each_entry(evsel_list, c2) { 559 if (c2 == evsel) 560 is_open = false; 561 if (c2->leader == leader) { 562 if (is_open) 563 perf_evsel__close(c2); 564 c2->leader = c2; 565 c2->nr_members = 0; 566 } 567 } 568 return leader; 569 } 570 571 static int __run_perf_stat(int argc, const char **argv) 572 { 573 int interval = stat_config.interval; 574 int times = stat_config.times; 575 int timeout = stat_config.timeout; 576 char msg[BUFSIZ]; 577 unsigned long long t0, t1; 578 struct perf_evsel *counter; 579 struct timespec ts; 580 size_t l; 581 int status = 0; 582 const bool forks = (argc > 0); 583 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 584 struct perf_evsel_config_term *err_term; 585 586 if (interval) { 587 ts.tv_sec = interval / USEC_PER_MSEC; 588 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 589 } else if (timeout) { 590 ts.tv_sec = timeout / USEC_PER_MSEC; 591 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 592 } else { 593 ts.tv_sec = 1; 594 ts.tv_nsec = 0; 595 } 596 597 if (forks) { 598 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 599 workload_exec_failed_signal) < 0) { 600 perror("failed to prepare workload"); 601 return -1; 602 } 603 child_pid = evsel_list->workload.pid; 604 } 605 606 if (group) 607 perf_evlist__set_leader(evsel_list); 608 609 evlist__for_each_entry(evsel_list, counter) { 610 try_again: 611 if (create_perf_stat_counter(counter) < 0) { 612 613 /* Weak group failed. Reset the group. */ 614 if ((errno == EINVAL || errno == EBADF) && 615 counter->leader != counter && 616 counter->weak_group) { 617 counter = perf_evsel__reset_weak_group(counter); 618 goto try_again; 619 } 620 621 /* 622 * PPC returns ENXIO for HW counters until 2.6.37 623 * (behavior changed with commit b0a873e). 624 */ 625 if (errno == EINVAL || errno == ENOSYS || 626 errno == ENOENT || errno == EOPNOTSUPP || 627 errno == ENXIO) { 628 if (verbose > 0) 629 ui__warning("%s event is not supported by the kernel.\n", 630 perf_evsel__name(counter)); 631 counter->supported = false; 632 633 if ((counter->leader != counter) || 634 !(counter->leader->nr_members > 1)) 635 continue; 636 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 637 if (verbose > 0) 638 ui__warning("%s\n", msg); 639 goto try_again; 640 } else if (target__has_per_thread(&target) && 641 evsel_list->threads && 642 evsel_list->threads->err_thread != -1) { 643 /* 644 * For global --per-thread case, skip current 645 * error thread. 646 */ 647 if (!thread_map__remove(evsel_list->threads, 648 evsel_list->threads->err_thread)) { 649 evsel_list->threads->err_thread = -1; 650 goto try_again; 651 } 652 } 653 654 perf_evsel__open_strerror(counter, &target, 655 errno, msg, sizeof(msg)); 656 ui__error("%s\n", msg); 657 658 if (child_pid != -1) 659 kill(child_pid, SIGTERM); 660 661 return -1; 662 } 663 counter->supported = true; 664 665 l = strlen(counter->unit); 666 if (l > unit_width) 667 unit_width = l; 668 669 if (perf_evsel__should_store_id(counter) && 670 store_counter_ids(counter)) 671 return -1; 672 } 673 674 if (perf_evlist__apply_filters(evsel_list, &counter)) { 675 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 676 counter->filter, perf_evsel__name(counter), errno, 677 str_error_r(errno, msg, sizeof(msg))); 678 return -1; 679 } 680 681 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 682 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 683 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 684 str_error_r(errno, msg, sizeof(msg))); 685 return -1; 686 } 687 688 if (STAT_RECORD) { 689 int err, fd = perf_data__fd(&perf_stat.data); 690 691 if (is_pipe) { 692 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 693 } else { 694 err = perf_session__write_header(perf_stat.session, evsel_list, 695 fd, false); 696 } 697 698 if (err < 0) 699 return err; 700 701 err = perf_stat_synthesize_config(is_pipe); 702 if (err < 0) 703 return err; 704 } 705 706 /* 707 * Enable counters and exec the command: 708 */ 709 t0 = rdclock(); 710 clock_gettime(CLOCK_MONOTONIC, &ref_time); 711 712 if (forks) { 713 perf_evlist__start_workload(evsel_list); 714 enable_counters(); 715 716 if (interval || timeout) { 717 while (!waitpid(child_pid, &status, WNOHANG)) { 718 nanosleep(&ts, NULL); 719 if (timeout) 720 break; 721 process_interval(); 722 if (interval_count && !(--times)) 723 break; 724 } 725 } 726 waitpid(child_pid, &status, 0); 727 728 if (workload_exec_errno) { 729 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 730 pr_err("Workload failed: %s\n", emsg); 731 return -1; 732 } 733 734 if (WIFSIGNALED(status)) 735 psignal(WTERMSIG(status), argv[0]); 736 } else { 737 enable_counters(); 738 while (!done) { 739 nanosleep(&ts, NULL); 740 if (timeout) 741 break; 742 if (interval) { 743 process_interval(); 744 if (interval_count && !(--times)) 745 break; 746 } 747 } 748 } 749 750 disable_counters(); 751 752 t1 = rdclock(); 753 754 update_stats(&walltime_nsecs_stats, t1 - t0); 755 756 /* 757 * Closing a group leader splits the group, and as we only disable 758 * group leaders, results in remaining events becoming enabled. To 759 * avoid arbitrary skew, we must read all counters before closing any 760 * group leaders. 761 */ 762 read_counters(); 763 perf_evlist__close(evsel_list); 764 765 return WEXITSTATUS(status); 766 } 767 768 static int run_perf_stat(int argc, const char **argv) 769 { 770 int ret; 771 772 if (pre_cmd) { 773 ret = system(pre_cmd); 774 if (ret) 775 return ret; 776 } 777 778 if (sync_run) 779 sync(); 780 781 ret = __run_perf_stat(argc, argv); 782 if (ret) 783 return ret; 784 785 if (post_cmd) { 786 ret = system(post_cmd); 787 if (ret) 788 return ret; 789 } 790 791 return ret; 792 } 793 794 static void print_running(u64 run, u64 ena) 795 { 796 if (csv_output) { 797 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 798 csv_sep, 799 run, 800 csv_sep, 801 ena ? 100.0 * run / ena : 100.0); 802 } else if (run != ena) { 803 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 804 } 805 } 806 807 static void print_noise_pct(double total, double avg) 808 { 809 double pct = rel_stddev_stats(total, avg); 810 811 if (csv_output) 812 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 813 else if (pct) 814 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 815 } 816 817 static void print_noise(struct perf_evsel *evsel, double avg) 818 { 819 struct perf_stat_evsel *ps; 820 821 if (run_count == 1) 822 return; 823 824 ps = evsel->stats; 825 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 826 } 827 828 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 829 { 830 switch (stat_config.aggr_mode) { 831 case AGGR_CORE: 832 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 833 cpu_map__id_to_socket(id), 834 csv_output ? 0 : -8, 835 cpu_map__id_to_cpu(id), 836 csv_sep, 837 csv_output ? 0 : 4, 838 nr, 839 csv_sep); 840 break; 841 case AGGR_SOCKET: 842 fprintf(stat_config.output, "S%*d%s%*d%s", 843 csv_output ? 0 : -5, 844 id, 845 csv_sep, 846 csv_output ? 0 : 4, 847 nr, 848 csv_sep); 849 break; 850 case AGGR_NONE: 851 fprintf(stat_config.output, "CPU%*d%s", 852 csv_output ? 0 : -4, 853 perf_evsel__cpus(evsel)->map[id], csv_sep); 854 break; 855 case AGGR_THREAD: 856 fprintf(stat_config.output, "%*s-%*d%s", 857 csv_output ? 0 : 16, 858 thread_map__comm(evsel->threads, id), 859 csv_output ? 0 : -8, 860 thread_map__pid(evsel->threads, id), 861 csv_sep); 862 break; 863 case AGGR_GLOBAL: 864 case AGGR_UNSET: 865 default: 866 break; 867 } 868 } 869 870 struct outstate { 871 FILE *fh; 872 bool newline; 873 const char *prefix; 874 int nfields; 875 int id, nr; 876 struct perf_evsel *evsel; 877 }; 878 879 #define METRIC_LEN 35 880 881 static void new_line_std(void *ctx) 882 { 883 struct outstate *os = ctx; 884 885 os->newline = true; 886 } 887 888 static void do_new_line_std(struct outstate *os) 889 { 890 fputc('\n', os->fh); 891 fputs(os->prefix, os->fh); 892 aggr_printout(os->evsel, os->id, os->nr); 893 if (stat_config.aggr_mode == AGGR_NONE) 894 fprintf(os->fh, " "); 895 fprintf(os->fh, " "); 896 } 897 898 static void print_metric_std(void *ctx, const char *color, const char *fmt, 899 const char *unit, double val) 900 { 901 struct outstate *os = ctx; 902 FILE *out = os->fh; 903 int n; 904 bool newline = os->newline; 905 906 os->newline = false; 907 908 if (unit == NULL || fmt == NULL) { 909 fprintf(out, "%-*s", METRIC_LEN, ""); 910 return; 911 } 912 913 if (newline) 914 do_new_line_std(os); 915 916 n = fprintf(out, " # "); 917 if (color) 918 n += color_fprintf(out, color, fmt, val); 919 else 920 n += fprintf(out, fmt, val); 921 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 922 } 923 924 static void new_line_csv(void *ctx) 925 { 926 struct outstate *os = ctx; 927 int i; 928 929 fputc('\n', os->fh); 930 if (os->prefix) 931 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 932 aggr_printout(os->evsel, os->id, os->nr); 933 for (i = 0; i < os->nfields; i++) 934 fputs(csv_sep, os->fh); 935 } 936 937 static void print_metric_csv(void *ctx, 938 const char *color __maybe_unused, 939 const char *fmt, const char *unit, double val) 940 { 941 struct outstate *os = ctx; 942 FILE *out = os->fh; 943 char buf[64], *vals, *ends; 944 945 if (unit == NULL || fmt == NULL) { 946 fprintf(out, "%s%s", csv_sep, csv_sep); 947 return; 948 } 949 snprintf(buf, sizeof(buf), fmt, val); 950 ends = vals = ltrim(buf); 951 while (isdigit(*ends) || *ends == '.') 952 ends++; 953 *ends = 0; 954 while (isspace(*unit)) 955 unit++; 956 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 957 } 958 959 #define METRIC_ONLY_LEN 20 960 961 /* Filter out some columns that don't work well in metrics only mode */ 962 963 static bool valid_only_metric(const char *unit) 964 { 965 if (!unit) 966 return false; 967 if (strstr(unit, "/sec") || 968 strstr(unit, "hz") || 969 strstr(unit, "Hz") || 970 strstr(unit, "CPUs utilized")) 971 return false; 972 return true; 973 } 974 975 static const char *fixunit(char *buf, struct perf_evsel *evsel, 976 const char *unit) 977 { 978 if (!strncmp(unit, "of all", 6)) { 979 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 980 unit); 981 return buf; 982 } 983 return unit; 984 } 985 986 static void print_metric_only(void *ctx, const char *color, const char *fmt, 987 const char *unit, double val) 988 { 989 struct outstate *os = ctx; 990 FILE *out = os->fh; 991 int n; 992 char buf[1024]; 993 unsigned mlen = METRIC_ONLY_LEN; 994 995 if (!valid_only_metric(unit)) 996 return; 997 unit = fixunit(buf, os->evsel, unit); 998 if (color) 999 n = color_fprintf(out, color, fmt, val); 1000 else 1001 n = fprintf(out, fmt, val); 1002 if (n > METRIC_ONLY_LEN) 1003 n = METRIC_ONLY_LEN; 1004 if (mlen < strlen(unit)) 1005 mlen = strlen(unit) + 1; 1006 fprintf(out, "%*s", mlen - n, ""); 1007 } 1008 1009 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1010 const char *fmt, 1011 const char *unit, double val) 1012 { 1013 struct outstate *os = ctx; 1014 FILE *out = os->fh; 1015 char buf[64], *vals, *ends; 1016 char tbuf[1024]; 1017 1018 if (!valid_only_metric(unit)) 1019 return; 1020 unit = fixunit(tbuf, os->evsel, unit); 1021 snprintf(buf, sizeof buf, fmt, val); 1022 ends = vals = ltrim(buf); 1023 while (isdigit(*ends) || *ends == '.') 1024 ends++; 1025 *ends = 0; 1026 fprintf(out, "%s%s", vals, csv_sep); 1027 } 1028 1029 static void new_line_metric(void *ctx __maybe_unused) 1030 { 1031 } 1032 1033 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1034 const char *fmt __maybe_unused, 1035 const char *unit, double val __maybe_unused) 1036 { 1037 struct outstate *os = ctx; 1038 char tbuf[1024]; 1039 1040 if (!valid_only_metric(unit)) 1041 return; 1042 unit = fixunit(tbuf, os->evsel, unit); 1043 if (csv_output) 1044 fprintf(os->fh, "%s%s", unit, csv_sep); 1045 else 1046 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1047 } 1048 1049 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1050 { 1051 FILE *output = stat_config.output; 1052 double msecs = avg / NSEC_PER_MSEC; 1053 const char *fmt_v, *fmt_n; 1054 char name[25]; 1055 1056 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1057 fmt_n = csv_output ? "%s" : "%-25s"; 1058 1059 aggr_printout(evsel, id, nr); 1060 1061 scnprintf(name, sizeof(name), "%s%s", 1062 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1063 1064 fprintf(output, fmt_v, msecs, csv_sep); 1065 1066 if (csv_output) 1067 fprintf(output, "%s%s", evsel->unit, csv_sep); 1068 else 1069 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1070 1071 fprintf(output, fmt_n, name); 1072 1073 if (evsel->cgrp) 1074 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1075 } 1076 1077 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1078 { 1079 int i; 1080 1081 if (!aggr_get_id) 1082 return 0; 1083 1084 if (stat_config.aggr_mode == AGGR_NONE) 1085 return id; 1086 1087 if (stat_config.aggr_mode == AGGR_GLOBAL) 1088 return 0; 1089 1090 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1091 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1092 1093 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1094 return cpu2; 1095 } 1096 return 0; 1097 } 1098 1099 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1100 { 1101 FILE *output = stat_config.output; 1102 double sc = evsel->scale; 1103 const char *fmt; 1104 1105 if (csv_output) { 1106 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1107 } else { 1108 if (big_num) 1109 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1110 else 1111 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1112 } 1113 1114 aggr_printout(evsel, id, nr); 1115 1116 fprintf(output, fmt, avg, csv_sep); 1117 1118 if (evsel->unit) 1119 fprintf(output, "%-*s%s", 1120 csv_output ? 0 : unit_width, 1121 evsel->unit, csv_sep); 1122 1123 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1124 1125 if (evsel->cgrp) 1126 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1127 } 1128 1129 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1130 char *prefix, u64 run, u64 ena, double noise, 1131 struct runtime_stat *st) 1132 { 1133 struct perf_stat_output_ctx out; 1134 struct outstate os = { 1135 .fh = stat_config.output, 1136 .prefix = prefix ? prefix : "", 1137 .id = id, 1138 .nr = nr, 1139 .evsel = counter, 1140 }; 1141 print_metric_t pm = print_metric_std; 1142 void (*nl)(void *); 1143 1144 if (metric_only) { 1145 nl = new_line_metric; 1146 if (csv_output) 1147 pm = print_metric_only_csv; 1148 else 1149 pm = print_metric_only; 1150 } else 1151 nl = new_line_std; 1152 1153 if (csv_output && !metric_only) { 1154 static int aggr_fields[] = { 1155 [AGGR_GLOBAL] = 0, 1156 [AGGR_THREAD] = 1, 1157 [AGGR_NONE] = 1, 1158 [AGGR_SOCKET] = 2, 1159 [AGGR_CORE] = 2, 1160 }; 1161 1162 pm = print_metric_csv; 1163 nl = new_line_csv; 1164 os.nfields = 3; 1165 os.nfields += aggr_fields[stat_config.aggr_mode]; 1166 if (counter->cgrp) 1167 os.nfields++; 1168 } 1169 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1170 if (metric_only) { 1171 pm(&os, NULL, "", "", 0); 1172 return; 1173 } 1174 aggr_printout(counter, id, nr); 1175 1176 fprintf(stat_config.output, "%*s%s", 1177 csv_output ? 0 : 18, 1178 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1179 csv_sep); 1180 1181 if (counter->supported) 1182 print_free_counters_hint = 1; 1183 1184 fprintf(stat_config.output, "%-*s%s", 1185 csv_output ? 0 : unit_width, 1186 counter->unit, csv_sep); 1187 1188 fprintf(stat_config.output, "%*s", 1189 csv_output ? 0 : -25, 1190 perf_evsel__name(counter)); 1191 1192 if (counter->cgrp) 1193 fprintf(stat_config.output, "%s%s", 1194 csv_sep, counter->cgrp->name); 1195 1196 if (!csv_output) 1197 pm(&os, NULL, NULL, "", 0); 1198 print_noise(counter, noise); 1199 print_running(run, ena); 1200 if (csv_output) 1201 pm(&os, NULL, NULL, "", 0); 1202 return; 1203 } 1204 1205 if (metric_only) 1206 /* nothing */; 1207 else if (nsec_counter(counter)) 1208 nsec_printout(id, nr, counter, uval); 1209 else 1210 abs_printout(id, nr, counter, uval); 1211 1212 out.print_metric = pm; 1213 out.new_line = nl; 1214 out.ctx = &os; 1215 out.force_header = false; 1216 1217 if (csv_output && !metric_only) { 1218 print_noise(counter, noise); 1219 print_running(run, ena); 1220 } 1221 1222 perf_stat__print_shadow_stats(counter, uval, 1223 first_shadow_cpu(counter, id), 1224 &out, &metric_events, st); 1225 if (!csv_output && !metric_only) { 1226 print_noise(counter, noise); 1227 print_running(run, ena); 1228 } 1229 } 1230 1231 static void aggr_update_shadow(void) 1232 { 1233 int cpu, s2, id, s; 1234 u64 val; 1235 struct perf_evsel *counter; 1236 1237 for (s = 0; s < aggr_map->nr; s++) { 1238 id = aggr_map->map[s]; 1239 evlist__for_each_entry(evsel_list, counter) { 1240 val = 0; 1241 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1242 s2 = aggr_get_id(evsel_list->cpus, cpu); 1243 if (s2 != id) 1244 continue; 1245 val += perf_counts(counter->counts, cpu, 0)->val; 1246 } 1247 perf_stat__update_shadow_stats(counter, val, 1248 first_shadow_cpu(counter, id), 1249 &rt_stat); 1250 } 1251 } 1252 } 1253 1254 static void uniquify_event_name(struct perf_evsel *counter) 1255 { 1256 char *new_name; 1257 char *config; 1258 1259 if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name, 1260 strlen(counter->pmu_name))) 1261 return; 1262 1263 config = strchr(counter->name, '/'); 1264 if (config) { 1265 if (asprintf(&new_name, 1266 "%s%s", counter->pmu_name, config) > 0) { 1267 free(counter->name); 1268 counter->name = new_name; 1269 } 1270 } else { 1271 if (asprintf(&new_name, 1272 "%s [%s]", counter->name, counter->pmu_name) > 0) { 1273 free(counter->name); 1274 counter->name = new_name; 1275 } 1276 } 1277 } 1278 1279 static void collect_all_aliases(struct perf_evsel *counter, 1280 void (*cb)(struct perf_evsel *counter, void *data, 1281 bool first), 1282 void *data) 1283 { 1284 struct perf_evsel *alias; 1285 1286 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1287 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1288 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1289 alias->scale != counter->scale || 1290 alias->cgrp != counter->cgrp || 1291 strcmp(alias->unit, counter->unit) || 1292 nsec_counter(alias) != nsec_counter(counter)) 1293 break; 1294 alias->merged_stat = true; 1295 cb(alias, data, false); 1296 } 1297 } 1298 1299 static bool collect_data(struct perf_evsel *counter, 1300 void (*cb)(struct perf_evsel *counter, void *data, 1301 bool first), 1302 void *data) 1303 { 1304 if (counter->merged_stat) 1305 return false; 1306 cb(counter, data, true); 1307 if (no_merge) 1308 uniquify_event_name(counter); 1309 else if (counter->auto_merge_stats) 1310 collect_all_aliases(counter, cb, data); 1311 return true; 1312 } 1313 1314 struct aggr_data { 1315 u64 ena, run, val; 1316 int id; 1317 int nr; 1318 int cpu; 1319 }; 1320 1321 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1322 { 1323 struct aggr_data *ad = data; 1324 int cpu, s2; 1325 1326 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1327 struct perf_counts_values *counts; 1328 1329 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1330 if (s2 != ad->id) 1331 continue; 1332 if (first) 1333 ad->nr++; 1334 counts = perf_counts(counter->counts, cpu, 0); 1335 /* 1336 * When any result is bad, make them all to give 1337 * consistent output in interval mode. 1338 */ 1339 if (counts->ena == 0 || counts->run == 0 || 1340 counter->counts->scaled == -1) { 1341 ad->ena = 0; 1342 ad->run = 0; 1343 break; 1344 } 1345 ad->val += counts->val; 1346 ad->ena += counts->ena; 1347 ad->run += counts->run; 1348 } 1349 } 1350 1351 static void print_aggr(char *prefix) 1352 { 1353 FILE *output = stat_config.output; 1354 struct perf_evsel *counter; 1355 int s, id, nr; 1356 double uval; 1357 u64 ena, run, val; 1358 bool first; 1359 1360 if (!(aggr_map || aggr_get_id)) 1361 return; 1362 1363 aggr_update_shadow(); 1364 1365 /* 1366 * With metric_only everything is on a single line. 1367 * Without each counter has its own line. 1368 */ 1369 for (s = 0; s < aggr_map->nr; s++) { 1370 struct aggr_data ad; 1371 if (prefix && metric_only) 1372 fprintf(output, "%s", prefix); 1373 1374 ad.id = id = aggr_map->map[s]; 1375 first = true; 1376 evlist__for_each_entry(evsel_list, counter) { 1377 if (is_duration_time(counter)) 1378 continue; 1379 1380 ad.val = ad.ena = ad.run = 0; 1381 ad.nr = 0; 1382 if (!collect_data(counter, aggr_cb, &ad)) 1383 continue; 1384 nr = ad.nr; 1385 ena = ad.ena; 1386 run = ad.run; 1387 val = ad.val; 1388 if (first && metric_only) { 1389 first = false; 1390 aggr_printout(counter, id, nr); 1391 } 1392 if (prefix && !metric_only) 1393 fprintf(output, "%s", prefix); 1394 1395 uval = val * counter->scale; 1396 printout(id, nr, counter, uval, prefix, run, ena, 1.0, 1397 &rt_stat); 1398 if (!metric_only) 1399 fputc('\n', output); 1400 } 1401 if (metric_only) 1402 fputc('\n', output); 1403 } 1404 } 1405 1406 static int cmp_val(const void *a, const void *b) 1407 { 1408 return ((struct perf_aggr_thread_value *)b)->val - 1409 ((struct perf_aggr_thread_value *)a)->val; 1410 } 1411 1412 static struct perf_aggr_thread_value *sort_aggr_thread( 1413 struct perf_evsel *counter, 1414 int nthreads, int ncpus, 1415 int *ret) 1416 { 1417 int cpu, thread, i = 0; 1418 double uval; 1419 struct perf_aggr_thread_value *buf; 1420 1421 buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 1422 if (!buf) 1423 return NULL; 1424 1425 for (thread = 0; thread < nthreads; thread++) { 1426 u64 ena = 0, run = 0, val = 0; 1427 1428 for (cpu = 0; cpu < ncpus; cpu++) { 1429 val += perf_counts(counter->counts, cpu, thread)->val; 1430 ena += perf_counts(counter->counts, cpu, thread)->ena; 1431 run += perf_counts(counter->counts, cpu, thread)->run; 1432 } 1433 1434 uval = val * counter->scale; 1435 1436 /* 1437 * Skip value 0 when enabling --per-thread globally, 1438 * otherwise too many 0 output. 1439 */ 1440 if (uval == 0.0 && target__has_per_thread(&target)) 1441 continue; 1442 1443 buf[i].counter = counter; 1444 buf[i].id = thread; 1445 buf[i].uval = uval; 1446 buf[i].val = val; 1447 buf[i].run = run; 1448 buf[i].ena = ena; 1449 i++; 1450 } 1451 1452 qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 1453 1454 if (ret) 1455 *ret = i; 1456 1457 return buf; 1458 } 1459 1460 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1461 { 1462 FILE *output = stat_config.output; 1463 int nthreads = thread_map__nr(counter->threads); 1464 int ncpus = cpu_map__nr(counter->cpus); 1465 int thread, sorted_threads, id; 1466 struct perf_aggr_thread_value *buf; 1467 1468 buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); 1469 if (!buf) { 1470 perror("cannot sort aggr thread"); 1471 return; 1472 } 1473 1474 for (thread = 0; thread < sorted_threads; thread++) { 1475 if (prefix) 1476 fprintf(output, "%s", prefix); 1477 1478 id = buf[thread].id; 1479 if (stat_config.stats) 1480 printout(id, 0, buf[thread].counter, buf[thread].uval, 1481 prefix, buf[thread].run, buf[thread].ena, 1.0, 1482 &stat_config.stats[id]); 1483 else 1484 printout(id, 0, buf[thread].counter, buf[thread].uval, 1485 prefix, buf[thread].run, buf[thread].ena, 1.0, 1486 &rt_stat); 1487 fputc('\n', output); 1488 } 1489 1490 free(buf); 1491 } 1492 1493 struct caggr_data { 1494 double avg, avg_enabled, avg_running; 1495 }; 1496 1497 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1498 bool first __maybe_unused) 1499 { 1500 struct caggr_data *cd = data; 1501 struct perf_stat_evsel *ps = counter->stats; 1502 1503 cd->avg += avg_stats(&ps->res_stats[0]); 1504 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1505 cd->avg_running += avg_stats(&ps->res_stats[2]); 1506 } 1507 1508 /* 1509 * Print out the results of a single counter: 1510 * aggregated counts in system-wide mode 1511 */ 1512 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1513 { 1514 FILE *output = stat_config.output; 1515 double uval; 1516 struct caggr_data cd = { .avg = 0.0 }; 1517 1518 if (!collect_data(counter, counter_aggr_cb, &cd)) 1519 return; 1520 1521 if (prefix && !metric_only) 1522 fprintf(output, "%s", prefix); 1523 1524 uval = cd.avg * counter->scale; 1525 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 1526 cd.avg, &rt_stat); 1527 if (!metric_only) 1528 fprintf(output, "\n"); 1529 } 1530 1531 static void counter_cb(struct perf_evsel *counter, void *data, 1532 bool first __maybe_unused) 1533 { 1534 struct aggr_data *ad = data; 1535 1536 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1537 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1538 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1539 } 1540 1541 /* 1542 * Print out the results of a single counter: 1543 * does not use aggregated count in system-wide 1544 */ 1545 static void print_counter(struct perf_evsel *counter, char *prefix) 1546 { 1547 FILE *output = stat_config.output; 1548 u64 ena, run, val; 1549 double uval; 1550 int cpu; 1551 1552 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1553 struct aggr_data ad = { .cpu = cpu }; 1554 1555 if (!collect_data(counter, counter_cb, &ad)) 1556 return; 1557 val = ad.val; 1558 ena = ad.ena; 1559 run = ad.run; 1560 1561 if (prefix) 1562 fprintf(output, "%s", prefix); 1563 1564 uval = val * counter->scale; 1565 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1566 &rt_stat); 1567 1568 fputc('\n', output); 1569 } 1570 } 1571 1572 static void print_no_aggr_metric(char *prefix) 1573 { 1574 int cpu; 1575 int nrcpus = 0; 1576 struct perf_evsel *counter; 1577 u64 ena, run, val; 1578 double uval; 1579 1580 nrcpus = evsel_list->cpus->nr; 1581 for (cpu = 0; cpu < nrcpus; cpu++) { 1582 bool first = true; 1583 1584 if (prefix) 1585 fputs(prefix, stat_config.output); 1586 evlist__for_each_entry(evsel_list, counter) { 1587 if (is_duration_time(counter)) 1588 continue; 1589 if (first) { 1590 aggr_printout(counter, cpu, 0); 1591 first = false; 1592 } 1593 val = perf_counts(counter->counts, cpu, 0)->val; 1594 ena = perf_counts(counter->counts, cpu, 0)->ena; 1595 run = perf_counts(counter->counts, cpu, 0)->run; 1596 1597 uval = val * counter->scale; 1598 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1599 &rt_stat); 1600 } 1601 fputc('\n', stat_config.output); 1602 } 1603 } 1604 1605 static int aggr_header_lens[] = { 1606 [AGGR_CORE] = 18, 1607 [AGGR_SOCKET] = 12, 1608 [AGGR_NONE] = 6, 1609 [AGGR_THREAD] = 24, 1610 [AGGR_GLOBAL] = 0, 1611 }; 1612 1613 static const char *aggr_header_csv[] = { 1614 [AGGR_CORE] = "core,cpus,", 1615 [AGGR_SOCKET] = "socket,cpus", 1616 [AGGR_NONE] = "cpu,", 1617 [AGGR_THREAD] = "comm-pid,", 1618 [AGGR_GLOBAL] = "" 1619 }; 1620 1621 static void print_metric_headers(const char *prefix, bool no_indent) 1622 { 1623 struct perf_stat_output_ctx out; 1624 struct perf_evsel *counter; 1625 struct outstate os = { 1626 .fh = stat_config.output 1627 }; 1628 1629 if (prefix) 1630 fprintf(stat_config.output, "%s", prefix); 1631 1632 if (!csv_output && !no_indent) 1633 fprintf(stat_config.output, "%*s", 1634 aggr_header_lens[stat_config.aggr_mode], ""); 1635 if (csv_output) { 1636 if (stat_config.interval) 1637 fputs("time,", stat_config.output); 1638 fputs(aggr_header_csv[stat_config.aggr_mode], 1639 stat_config.output); 1640 } 1641 1642 /* Print metrics headers only */ 1643 evlist__for_each_entry(evsel_list, counter) { 1644 if (is_duration_time(counter)) 1645 continue; 1646 os.evsel = counter; 1647 out.ctx = &os; 1648 out.print_metric = print_metric_header; 1649 out.new_line = new_line_metric; 1650 out.force_header = true; 1651 os.evsel = counter; 1652 perf_stat__print_shadow_stats(counter, 0, 1653 0, 1654 &out, 1655 &metric_events, 1656 &rt_stat); 1657 } 1658 fputc('\n', stat_config.output); 1659 } 1660 1661 static void print_interval(char *prefix, struct timespec *ts) 1662 { 1663 FILE *output = stat_config.output; 1664 static int num_print_interval; 1665 1666 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1667 1668 if (num_print_interval == 0 && !csv_output) { 1669 switch (stat_config.aggr_mode) { 1670 case AGGR_SOCKET: 1671 fprintf(output, "# time socket cpus"); 1672 if (!metric_only) 1673 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1674 break; 1675 case AGGR_CORE: 1676 fprintf(output, "# time core cpus"); 1677 if (!metric_only) 1678 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1679 break; 1680 case AGGR_NONE: 1681 fprintf(output, "# time CPU"); 1682 if (!metric_only) 1683 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1684 break; 1685 case AGGR_THREAD: 1686 fprintf(output, "# time comm-pid"); 1687 if (!metric_only) 1688 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1689 break; 1690 case AGGR_GLOBAL: 1691 default: 1692 fprintf(output, "# time"); 1693 if (!metric_only) 1694 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1695 case AGGR_UNSET: 1696 break; 1697 } 1698 } 1699 1700 if (num_print_interval == 0 && metric_only) 1701 print_metric_headers(" ", true); 1702 if (++num_print_interval == 25) 1703 num_print_interval = 0; 1704 } 1705 1706 static void print_header(int argc, const char **argv) 1707 { 1708 FILE *output = stat_config.output; 1709 int i; 1710 1711 fflush(stdout); 1712 1713 if (!csv_output) { 1714 fprintf(output, "\n"); 1715 fprintf(output, " Performance counter stats for "); 1716 if (target.system_wide) 1717 fprintf(output, "\'system wide"); 1718 else if (target.cpu_list) 1719 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1720 else if (!target__has_task(&target)) { 1721 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1722 for (i = 1; argv && (i < argc); i++) 1723 fprintf(output, " %s", argv[i]); 1724 } else if (target.pid) 1725 fprintf(output, "process id \'%s", target.pid); 1726 else 1727 fprintf(output, "thread id \'%s", target.tid); 1728 1729 fprintf(output, "\'"); 1730 if (run_count > 1) 1731 fprintf(output, " (%d runs)", run_count); 1732 fprintf(output, ":\n\n"); 1733 } 1734 } 1735 1736 static void print_footer(void) 1737 { 1738 FILE *output = stat_config.output; 1739 int n; 1740 1741 if (!null_run) 1742 fprintf(output, "\n"); 1743 fprintf(output, " %17.9f seconds time elapsed", 1744 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1745 if (run_count > 1) { 1746 fprintf(output, " "); 1747 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1748 avg_stats(&walltime_nsecs_stats)); 1749 } 1750 fprintf(output, "\n\n"); 1751 1752 if (print_free_counters_hint && 1753 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1754 n > 0) 1755 fprintf(output, 1756 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1757 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1758 " perf stat ...\n" 1759 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1760 } 1761 1762 static void print_counters(struct timespec *ts, int argc, const char **argv) 1763 { 1764 int interval = stat_config.interval; 1765 struct perf_evsel *counter; 1766 char buf[64], *prefix = NULL; 1767 1768 /* Do not print anything if we record to the pipe. */ 1769 if (STAT_RECORD && perf_stat.data.is_pipe) 1770 return; 1771 1772 if (interval) 1773 print_interval(prefix = buf, ts); 1774 else 1775 print_header(argc, argv); 1776 1777 if (metric_only) { 1778 static int num_print_iv; 1779 1780 if (num_print_iv == 0 && !interval) 1781 print_metric_headers(prefix, false); 1782 if (num_print_iv++ == 25) 1783 num_print_iv = 0; 1784 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1785 fprintf(stat_config.output, "%s", prefix); 1786 } 1787 1788 switch (stat_config.aggr_mode) { 1789 case AGGR_CORE: 1790 case AGGR_SOCKET: 1791 print_aggr(prefix); 1792 break; 1793 case AGGR_THREAD: 1794 evlist__for_each_entry(evsel_list, counter) { 1795 if (is_duration_time(counter)) 1796 continue; 1797 print_aggr_thread(counter, prefix); 1798 } 1799 break; 1800 case AGGR_GLOBAL: 1801 evlist__for_each_entry(evsel_list, counter) { 1802 if (is_duration_time(counter)) 1803 continue; 1804 print_counter_aggr(counter, prefix); 1805 } 1806 if (metric_only) 1807 fputc('\n', stat_config.output); 1808 break; 1809 case AGGR_NONE: 1810 if (metric_only) 1811 print_no_aggr_metric(prefix); 1812 else { 1813 evlist__for_each_entry(evsel_list, counter) { 1814 if (is_duration_time(counter)) 1815 continue; 1816 print_counter(counter, prefix); 1817 } 1818 } 1819 break; 1820 case AGGR_UNSET: 1821 default: 1822 break; 1823 } 1824 1825 if (!interval && !csv_output) 1826 print_footer(); 1827 1828 fflush(stat_config.output); 1829 } 1830 1831 static volatile int signr = -1; 1832 1833 static void skip_signal(int signo) 1834 { 1835 if ((child_pid == -1) || stat_config.interval) 1836 done = 1; 1837 1838 signr = signo; 1839 /* 1840 * render child_pid harmless 1841 * won't send SIGTERM to a random 1842 * process in case of race condition 1843 * and fast PID recycling 1844 */ 1845 child_pid = -1; 1846 } 1847 1848 static void sig_atexit(void) 1849 { 1850 sigset_t set, oset; 1851 1852 /* 1853 * avoid race condition with SIGCHLD handler 1854 * in skip_signal() which is modifying child_pid 1855 * goal is to avoid send SIGTERM to a random 1856 * process 1857 */ 1858 sigemptyset(&set); 1859 sigaddset(&set, SIGCHLD); 1860 sigprocmask(SIG_BLOCK, &set, &oset); 1861 1862 if (child_pid != -1) 1863 kill(child_pid, SIGTERM); 1864 1865 sigprocmask(SIG_SETMASK, &oset, NULL); 1866 1867 if (signr == -1) 1868 return; 1869 1870 signal(signr, SIG_DFL); 1871 kill(getpid(), signr); 1872 } 1873 1874 static int stat__set_big_num(const struct option *opt __maybe_unused, 1875 const char *s __maybe_unused, int unset) 1876 { 1877 big_num_opt = unset ? 0 : 1; 1878 return 0; 1879 } 1880 1881 static int enable_metric_only(const struct option *opt __maybe_unused, 1882 const char *s __maybe_unused, int unset) 1883 { 1884 force_metric_only = true; 1885 metric_only = !unset; 1886 return 0; 1887 } 1888 1889 static int parse_metric_groups(const struct option *opt, 1890 const char *str, 1891 int unset __maybe_unused) 1892 { 1893 return metricgroup__parse_groups(opt, str, &metric_events); 1894 } 1895 1896 static const struct option stat_options[] = { 1897 OPT_BOOLEAN('T', "transaction", &transaction_run, 1898 "hardware transaction statistics"), 1899 OPT_CALLBACK('e', "event", &evsel_list, "event", 1900 "event selector. use 'perf list' to list available events", 1901 parse_events_option), 1902 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1903 "event filter", parse_filter), 1904 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1905 "child tasks do not inherit counters"), 1906 OPT_STRING('p', "pid", &target.pid, "pid", 1907 "stat events on existing process id"), 1908 OPT_STRING('t', "tid", &target.tid, "tid", 1909 "stat events on existing thread id"), 1910 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1911 "system-wide collection from all CPUs"), 1912 OPT_BOOLEAN('g', "group", &group, 1913 "put the counters into a counter group"), 1914 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1915 OPT_INCR('v', "verbose", &verbose, 1916 "be more verbose (show counter open errors, etc)"), 1917 OPT_INTEGER('r', "repeat", &run_count, 1918 "repeat command and print average + stddev (max: 100, forever: 0)"), 1919 OPT_BOOLEAN('n', "null", &null_run, 1920 "null run - dont start any counters"), 1921 OPT_INCR('d', "detailed", &detailed_run, 1922 "detailed run - start a lot of events"), 1923 OPT_BOOLEAN('S', "sync", &sync_run, 1924 "call sync() before starting a run"), 1925 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1926 "print large numbers with thousands\' separators", 1927 stat__set_big_num), 1928 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1929 "list of cpus to monitor in system-wide"), 1930 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1931 "disable CPU count aggregation", AGGR_NONE), 1932 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1933 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1934 "print counts with custom separator"), 1935 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1936 "monitor event in cgroup name only", parse_cgroups), 1937 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1938 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1939 OPT_INTEGER(0, "log-fd", &output_fd, 1940 "log output to fd, instead of stderr"), 1941 OPT_STRING(0, "pre", &pre_cmd, "command", 1942 "command to run prior to the measured command"), 1943 OPT_STRING(0, "post", &post_cmd, "command", 1944 "command to run after to the measured command"), 1945 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1946 "print counts at regular interval in ms (>= 10)"), 1947 OPT_INTEGER(0, "interval-count", &stat_config.times, 1948 "print counts for fixed number of times"), 1949 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 1950 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 1951 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1952 "aggregate counts per processor socket", AGGR_SOCKET), 1953 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1954 "aggregate counts per physical processor core", AGGR_CORE), 1955 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1956 "aggregate counts per thread", AGGR_THREAD), 1957 OPT_UINTEGER('D', "delay", &initial_delay, 1958 "ms to wait before starting measurement after program start"), 1959 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1960 "Only print computed metrics. No raw values", enable_metric_only), 1961 OPT_BOOLEAN(0, "topdown", &topdown_run, 1962 "measure topdown level 1 statistics"), 1963 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 1964 "measure SMI cost"), 1965 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 1966 "monitor specified metrics or metric groups (separated by ,)", 1967 parse_metric_groups), 1968 OPT_END() 1969 }; 1970 1971 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1972 { 1973 return cpu_map__get_socket(map, cpu, NULL); 1974 } 1975 1976 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1977 { 1978 return cpu_map__get_core(map, cpu, NULL); 1979 } 1980 1981 static int cpu_map__get_max(struct cpu_map *map) 1982 { 1983 int i, max = -1; 1984 1985 for (i = 0; i < map->nr; i++) { 1986 if (map->map[i] > max) 1987 max = map->map[i]; 1988 } 1989 1990 return max; 1991 } 1992 1993 static struct cpu_map *cpus_aggr_map; 1994 1995 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1996 { 1997 int cpu; 1998 1999 if (idx >= map->nr) 2000 return -1; 2001 2002 cpu = map->map[idx]; 2003 2004 if (cpus_aggr_map->map[cpu] == -1) 2005 cpus_aggr_map->map[cpu] = get_id(map, idx); 2006 2007 return cpus_aggr_map->map[cpu]; 2008 } 2009 2010 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 2011 { 2012 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 2013 } 2014 2015 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 2016 { 2017 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 2018 } 2019 2020 static int perf_stat_init_aggr_mode(void) 2021 { 2022 int nr; 2023 2024 switch (stat_config.aggr_mode) { 2025 case AGGR_SOCKET: 2026 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 2027 perror("cannot build socket map"); 2028 return -1; 2029 } 2030 aggr_get_id = perf_stat__get_socket_cached; 2031 break; 2032 case AGGR_CORE: 2033 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 2034 perror("cannot build core map"); 2035 return -1; 2036 } 2037 aggr_get_id = perf_stat__get_core_cached; 2038 break; 2039 case AGGR_NONE: 2040 case AGGR_GLOBAL: 2041 case AGGR_THREAD: 2042 case AGGR_UNSET: 2043 default: 2044 break; 2045 } 2046 2047 /* 2048 * The evsel_list->cpus is the base we operate on, 2049 * taking the highest cpu number to be the size of 2050 * the aggregation translate cpumap. 2051 */ 2052 nr = cpu_map__get_max(evsel_list->cpus); 2053 cpus_aggr_map = cpu_map__empty_new(nr + 1); 2054 return cpus_aggr_map ? 0 : -ENOMEM; 2055 } 2056 2057 static void perf_stat__exit_aggr_mode(void) 2058 { 2059 cpu_map__put(aggr_map); 2060 cpu_map__put(cpus_aggr_map); 2061 aggr_map = NULL; 2062 cpus_aggr_map = NULL; 2063 } 2064 2065 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 2066 { 2067 int cpu; 2068 2069 if (idx > map->nr) 2070 return -1; 2071 2072 cpu = map->map[idx]; 2073 2074 if (cpu >= env->nr_cpus_avail) 2075 return -1; 2076 2077 return cpu; 2078 } 2079 2080 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2081 { 2082 struct perf_env *env = data; 2083 int cpu = perf_env__get_cpu(env, map, idx); 2084 2085 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2086 } 2087 2088 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2089 { 2090 struct perf_env *env = data; 2091 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2092 2093 if (cpu != -1) { 2094 int socket_id = env->cpu[cpu].socket_id; 2095 2096 /* 2097 * Encode socket in upper 16 bits 2098 * core_id is relative to socket, and 2099 * we need a global id. So we combine 2100 * socket + core id. 2101 */ 2102 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2103 } 2104 2105 return core; 2106 } 2107 2108 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2109 struct cpu_map **sockp) 2110 { 2111 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2112 } 2113 2114 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2115 struct cpu_map **corep) 2116 { 2117 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2118 } 2119 2120 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2121 { 2122 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2123 } 2124 2125 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2126 { 2127 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2128 } 2129 2130 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2131 { 2132 struct perf_env *env = &st->session->header.env; 2133 2134 switch (stat_config.aggr_mode) { 2135 case AGGR_SOCKET: 2136 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2137 perror("cannot build socket map"); 2138 return -1; 2139 } 2140 aggr_get_id = perf_stat__get_socket_file; 2141 break; 2142 case AGGR_CORE: 2143 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2144 perror("cannot build core map"); 2145 return -1; 2146 } 2147 aggr_get_id = perf_stat__get_core_file; 2148 break; 2149 case AGGR_NONE: 2150 case AGGR_GLOBAL: 2151 case AGGR_THREAD: 2152 case AGGR_UNSET: 2153 default: 2154 break; 2155 } 2156 2157 return 0; 2158 } 2159 2160 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2161 { 2162 int off = 0; 2163 int i; 2164 int len = 0; 2165 char *s; 2166 2167 for (i = 0; attr[i]; i++) { 2168 if (pmu_have_event("cpu", attr[i])) { 2169 len += strlen(attr[i]) + 1; 2170 attr[i - off] = attr[i]; 2171 } else 2172 off++; 2173 } 2174 attr[i - off] = NULL; 2175 2176 *str = malloc(len + 1 + 2); 2177 if (!*str) 2178 return -1; 2179 s = *str; 2180 if (i - off == 0) { 2181 *s = 0; 2182 return 0; 2183 } 2184 if (use_group) 2185 *s++ = '{'; 2186 for (i = 0; attr[i]; i++) { 2187 strcpy(s, attr[i]); 2188 s += strlen(s); 2189 *s++ = ','; 2190 } 2191 if (use_group) { 2192 s[-1] = '}'; 2193 *s = 0; 2194 } else 2195 s[-1] = 0; 2196 return 0; 2197 } 2198 2199 __weak bool arch_topdown_check_group(bool *warn) 2200 { 2201 *warn = false; 2202 return false; 2203 } 2204 2205 __weak void arch_topdown_group_warn(void) 2206 { 2207 } 2208 2209 /* 2210 * Add default attributes, if there were no attributes specified or 2211 * if -d/--detailed, -d -d or -d -d -d is used: 2212 */ 2213 static int add_default_attributes(void) 2214 { 2215 int err; 2216 struct perf_event_attr default_attrs0[] = { 2217 2218 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2219 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2220 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2221 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2222 2223 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2224 }; 2225 struct perf_event_attr frontend_attrs[] = { 2226 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2227 }; 2228 struct perf_event_attr backend_attrs[] = { 2229 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2230 }; 2231 struct perf_event_attr default_attrs1[] = { 2232 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2233 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2234 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2235 2236 }; 2237 2238 /* 2239 * Detailed stats (-d), covering the L1 and last level data caches: 2240 */ 2241 struct perf_event_attr detailed_attrs[] = { 2242 2243 { .type = PERF_TYPE_HW_CACHE, 2244 .config = 2245 PERF_COUNT_HW_CACHE_L1D << 0 | 2246 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2247 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2248 2249 { .type = PERF_TYPE_HW_CACHE, 2250 .config = 2251 PERF_COUNT_HW_CACHE_L1D << 0 | 2252 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2253 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2254 2255 { .type = PERF_TYPE_HW_CACHE, 2256 .config = 2257 PERF_COUNT_HW_CACHE_LL << 0 | 2258 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2259 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2260 2261 { .type = PERF_TYPE_HW_CACHE, 2262 .config = 2263 PERF_COUNT_HW_CACHE_LL << 0 | 2264 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2265 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2266 }; 2267 2268 /* 2269 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2270 */ 2271 struct perf_event_attr very_detailed_attrs[] = { 2272 2273 { .type = PERF_TYPE_HW_CACHE, 2274 .config = 2275 PERF_COUNT_HW_CACHE_L1I << 0 | 2276 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2277 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2278 2279 { .type = PERF_TYPE_HW_CACHE, 2280 .config = 2281 PERF_COUNT_HW_CACHE_L1I << 0 | 2282 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2283 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2284 2285 { .type = PERF_TYPE_HW_CACHE, 2286 .config = 2287 PERF_COUNT_HW_CACHE_DTLB << 0 | 2288 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2289 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2290 2291 { .type = PERF_TYPE_HW_CACHE, 2292 .config = 2293 PERF_COUNT_HW_CACHE_DTLB << 0 | 2294 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2295 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2296 2297 { .type = PERF_TYPE_HW_CACHE, 2298 .config = 2299 PERF_COUNT_HW_CACHE_ITLB << 0 | 2300 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2301 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2302 2303 { .type = PERF_TYPE_HW_CACHE, 2304 .config = 2305 PERF_COUNT_HW_CACHE_ITLB << 0 | 2306 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2307 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2308 2309 }; 2310 2311 /* 2312 * Very, very detailed stats (-d -d -d), adding prefetch events: 2313 */ 2314 struct perf_event_attr very_very_detailed_attrs[] = { 2315 2316 { .type = PERF_TYPE_HW_CACHE, 2317 .config = 2318 PERF_COUNT_HW_CACHE_L1D << 0 | 2319 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2320 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2321 2322 { .type = PERF_TYPE_HW_CACHE, 2323 .config = 2324 PERF_COUNT_HW_CACHE_L1D << 0 | 2325 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2326 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2327 }; 2328 2329 /* Set attrs if no event is selected and !null_run: */ 2330 if (null_run) 2331 return 0; 2332 2333 if (transaction_run) { 2334 struct parse_events_error errinfo; 2335 2336 if (pmu_have_event("cpu", "cycles-ct") && 2337 pmu_have_event("cpu", "el-start")) 2338 err = parse_events(evsel_list, transaction_attrs, 2339 &errinfo); 2340 else 2341 err = parse_events(evsel_list, 2342 transaction_limited_attrs, 2343 &errinfo); 2344 if (err) { 2345 fprintf(stderr, "Cannot set up transaction events\n"); 2346 return -1; 2347 } 2348 return 0; 2349 } 2350 2351 if (smi_cost) { 2352 int smi; 2353 2354 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2355 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2356 return -1; 2357 } 2358 2359 if (!smi) { 2360 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2361 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2362 return -1; 2363 } 2364 smi_reset = true; 2365 } 2366 2367 if (pmu_have_event("msr", "aperf") && 2368 pmu_have_event("msr", "smi")) { 2369 if (!force_metric_only) 2370 metric_only = true; 2371 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2372 } else { 2373 fprintf(stderr, "To measure SMI cost, it needs " 2374 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2375 return -1; 2376 } 2377 if (err) { 2378 fprintf(stderr, "Cannot set up SMI cost events\n"); 2379 return -1; 2380 } 2381 return 0; 2382 } 2383 2384 if (topdown_run) { 2385 char *str = NULL; 2386 bool warn = false; 2387 2388 if (stat_config.aggr_mode != AGGR_GLOBAL && 2389 stat_config.aggr_mode != AGGR_CORE) { 2390 pr_err("top down event configuration requires --per-core mode\n"); 2391 return -1; 2392 } 2393 stat_config.aggr_mode = AGGR_CORE; 2394 if (nr_cgroups || !target__has_cpu(&target)) { 2395 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2396 return -1; 2397 } 2398 2399 if (!force_metric_only) 2400 metric_only = true; 2401 if (topdown_filter_events(topdown_attrs, &str, 2402 arch_topdown_check_group(&warn)) < 0) { 2403 pr_err("Out of memory\n"); 2404 return -1; 2405 } 2406 if (topdown_attrs[0] && str) { 2407 if (warn) 2408 arch_topdown_group_warn(); 2409 err = parse_events(evsel_list, str, NULL); 2410 if (err) { 2411 fprintf(stderr, 2412 "Cannot set up top down events %s: %d\n", 2413 str, err); 2414 free(str); 2415 return -1; 2416 } 2417 } else { 2418 fprintf(stderr, "System does not support topdown\n"); 2419 return -1; 2420 } 2421 free(str); 2422 } 2423 2424 if (!evsel_list->nr_entries) { 2425 if (target__has_cpu(&target)) 2426 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2427 2428 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2429 return -1; 2430 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2431 if (perf_evlist__add_default_attrs(evsel_list, 2432 frontend_attrs) < 0) 2433 return -1; 2434 } 2435 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2436 if (perf_evlist__add_default_attrs(evsel_list, 2437 backend_attrs) < 0) 2438 return -1; 2439 } 2440 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2441 return -1; 2442 } 2443 2444 /* Detailed events get appended to the event list: */ 2445 2446 if (detailed_run < 1) 2447 return 0; 2448 2449 /* Append detailed run extra attributes: */ 2450 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2451 return -1; 2452 2453 if (detailed_run < 2) 2454 return 0; 2455 2456 /* Append very detailed run extra attributes: */ 2457 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2458 return -1; 2459 2460 if (detailed_run < 3) 2461 return 0; 2462 2463 /* Append very, very detailed run extra attributes: */ 2464 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2465 } 2466 2467 static const char * const stat_record_usage[] = { 2468 "perf stat record [<options>]", 2469 NULL, 2470 }; 2471 2472 static void init_features(struct perf_session *session) 2473 { 2474 int feat; 2475 2476 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2477 perf_header__set_feat(&session->header, feat); 2478 2479 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2480 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2481 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2482 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2483 } 2484 2485 static int __cmd_record(int argc, const char **argv) 2486 { 2487 struct perf_session *session; 2488 struct perf_data *data = &perf_stat.data; 2489 2490 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2491 PARSE_OPT_STOP_AT_NON_OPTION); 2492 2493 if (output_name) 2494 data->file.path = output_name; 2495 2496 if (run_count != 1 || forever) { 2497 pr_err("Cannot use -r option with perf stat record.\n"); 2498 return -1; 2499 } 2500 2501 session = perf_session__new(data, false, NULL); 2502 if (session == NULL) { 2503 pr_err("Perf session creation failed.\n"); 2504 return -1; 2505 } 2506 2507 init_features(session); 2508 2509 session->evlist = evsel_list; 2510 perf_stat.session = session; 2511 perf_stat.record = true; 2512 return argc; 2513 } 2514 2515 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2516 union perf_event *event, 2517 struct perf_session *session) 2518 { 2519 struct stat_round_event *stat_round = &event->stat_round; 2520 struct perf_evsel *counter; 2521 struct timespec tsh, *ts = NULL; 2522 const char **argv = session->header.env.cmdline_argv; 2523 int argc = session->header.env.nr_cmdline; 2524 2525 evlist__for_each_entry(evsel_list, counter) 2526 perf_stat_process_counter(&stat_config, counter); 2527 2528 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2529 update_stats(&walltime_nsecs_stats, stat_round->time); 2530 2531 if (stat_config.interval && stat_round->time) { 2532 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2533 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2534 ts = &tsh; 2535 } 2536 2537 print_counters(ts, argc, argv); 2538 return 0; 2539 } 2540 2541 static 2542 int process_stat_config_event(struct perf_tool *tool, 2543 union perf_event *event, 2544 struct perf_session *session __maybe_unused) 2545 { 2546 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2547 2548 perf_event__read_stat_config(&stat_config, &event->stat_config); 2549 2550 if (cpu_map__empty(st->cpus)) { 2551 if (st->aggr_mode != AGGR_UNSET) 2552 pr_warning("warning: processing task data, aggregation mode not set\n"); 2553 return 0; 2554 } 2555 2556 if (st->aggr_mode != AGGR_UNSET) 2557 stat_config.aggr_mode = st->aggr_mode; 2558 2559 if (perf_stat.data.is_pipe) 2560 perf_stat_init_aggr_mode(); 2561 else 2562 perf_stat_init_aggr_mode_file(st); 2563 2564 return 0; 2565 } 2566 2567 static int set_maps(struct perf_stat *st) 2568 { 2569 if (!st->cpus || !st->threads) 2570 return 0; 2571 2572 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2573 return -EINVAL; 2574 2575 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2576 2577 if (perf_evlist__alloc_stats(evsel_list, true)) 2578 return -ENOMEM; 2579 2580 st->maps_allocated = true; 2581 return 0; 2582 } 2583 2584 static 2585 int process_thread_map_event(struct perf_tool *tool, 2586 union perf_event *event, 2587 struct perf_session *session __maybe_unused) 2588 { 2589 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2590 2591 if (st->threads) { 2592 pr_warning("Extra thread map event, ignoring.\n"); 2593 return 0; 2594 } 2595 2596 st->threads = thread_map__new_event(&event->thread_map); 2597 if (!st->threads) 2598 return -ENOMEM; 2599 2600 return set_maps(st); 2601 } 2602 2603 static 2604 int process_cpu_map_event(struct perf_tool *tool, 2605 union perf_event *event, 2606 struct perf_session *session __maybe_unused) 2607 { 2608 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2609 struct cpu_map *cpus; 2610 2611 if (st->cpus) { 2612 pr_warning("Extra cpu map event, ignoring.\n"); 2613 return 0; 2614 } 2615 2616 cpus = cpu_map__new_data(&event->cpu_map.data); 2617 if (!cpus) 2618 return -ENOMEM; 2619 2620 st->cpus = cpus; 2621 return set_maps(st); 2622 } 2623 2624 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 2625 { 2626 int i; 2627 2628 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 2629 if (!config->stats) 2630 return -1; 2631 2632 config->stats_num = nthreads; 2633 2634 for (i = 0; i < nthreads; i++) 2635 runtime_stat__init(&config->stats[i]); 2636 2637 return 0; 2638 } 2639 2640 static void runtime_stat_delete(struct perf_stat_config *config) 2641 { 2642 int i; 2643 2644 if (!config->stats) 2645 return; 2646 2647 for (i = 0; i < config->stats_num; i++) 2648 runtime_stat__exit(&config->stats[i]); 2649 2650 free(config->stats); 2651 } 2652 2653 static const char * const stat_report_usage[] = { 2654 "perf stat report [<options>]", 2655 NULL, 2656 }; 2657 2658 static struct perf_stat perf_stat = { 2659 .tool = { 2660 .attr = perf_event__process_attr, 2661 .event_update = perf_event__process_event_update, 2662 .thread_map = process_thread_map_event, 2663 .cpu_map = process_cpu_map_event, 2664 .stat_config = process_stat_config_event, 2665 .stat = perf_event__process_stat_event, 2666 .stat_round = process_stat_round_event, 2667 }, 2668 .aggr_mode = AGGR_UNSET, 2669 }; 2670 2671 static int __cmd_report(int argc, const char **argv) 2672 { 2673 struct perf_session *session; 2674 const struct option options[] = { 2675 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2676 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2677 "aggregate counts per processor socket", AGGR_SOCKET), 2678 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2679 "aggregate counts per physical processor core", AGGR_CORE), 2680 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2681 "disable CPU count aggregation", AGGR_NONE), 2682 OPT_END() 2683 }; 2684 struct stat st; 2685 int ret; 2686 2687 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2688 2689 if (!input_name || !strlen(input_name)) { 2690 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2691 input_name = "-"; 2692 else 2693 input_name = "perf.data"; 2694 } 2695 2696 perf_stat.data.file.path = input_name; 2697 perf_stat.data.mode = PERF_DATA_MODE_READ; 2698 2699 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 2700 if (session == NULL) 2701 return -1; 2702 2703 perf_stat.session = session; 2704 stat_config.output = stderr; 2705 evsel_list = session->evlist; 2706 2707 ret = perf_session__process_events(session); 2708 if (ret) 2709 return ret; 2710 2711 perf_session__delete(session); 2712 return 0; 2713 } 2714 2715 static void setup_system_wide(int forks) 2716 { 2717 /* 2718 * Make system wide (-a) the default target if 2719 * no target was specified and one of following 2720 * conditions is met: 2721 * 2722 * - there's no workload specified 2723 * - there is workload specified but all requested 2724 * events are system wide events 2725 */ 2726 if (!target__none(&target)) 2727 return; 2728 2729 if (!forks) 2730 target.system_wide = true; 2731 else { 2732 struct perf_evsel *counter; 2733 2734 evlist__for_each_entry(evsel_list, counter) { 2735 if (!counter->system_wide) 2736 return; 2737 } 2738 2739 if (evsel_list->nr_entries) 2740 target.system_wide = true; 2741 } 2742 } 2743 2744 int cmd_stat(int argc, const char **argv) 2745 { 2746 const char * const stat_usage[] = { 2747 "perf stat [<options>] [<command>]", 2748 NULL 2749 }; 2750 int status = -EINVAL, run_idx; 2751 const char *mode; 2752 FILE *output = stderr; 2753 unsigned int interval, timeout; 2754 const char * const stat_subcommands[] = { "record", "report" }; 2755 2756 setlocale(LC_ALL, ""); 2757 2758 evsel_list = perf_evlist__new(); 2759 if (evsel_list == NULL) 2760 return -ENOMEM; 2761 2762 parse_events__shrink_config_terms(); 2763 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2764 (const char **) stat_usage, 2765 PARSE_OPT_STOP_AT_NON_OPTION); 2766 perf_stat__collect_metric_expr(evsel_list); 2767 perf_stat__init_shadow_stats(); 2768 2769 if (csv_sep) { 2770 csv_output = true; 2771 if (!strcmp(csv_sep, "\\t")) 2772 csv_sep = "\t"; 2773 } else 2774 csv_sep = DEFAULT_SEPARATOR; 2775 2776 if (argc && !strncmp(argv[0], "rec", 3)) { 2777 argc = __cmd_record(argc, argv); 2778 if (argc < 0) 2779 return -1; 2780 } else if (argc && !strncmp(argv[0], "rep", 3)) 2781 return __cmd_report(argc, argv); 2782 2783 interval = stat_config.interval; 2784 timeout = stat_config.timeout; 2785 2786 /* 2787 * For record command the -o is already taken care of. 2788 */ 2789 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2790 output = NULL; 2791 2792 if (output_name && output_fd) { 2793 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2794 parse_options_usage(stat_usage, stat_options, "o", 1); 2795 parse_options_usage(NULL, stat_options, "log-fd", 0); 2796 goto out; 2797 } 2798 2799 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2800 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2801 goto out; 2802 } 2803 2804 if (metric_only && run_count > 1) { 2805 fprintf(stderr, "--metric-only is not supported with -r\n"); 2806 goto out; 2807 } 2808 2809 if (output_fd < 0) { 2810 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2811 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2812 goto out; 2813 } 2814 2815 if (!output) { 2816 struct timespec tm; 2817 mode = append_file ? "a" : "w"; 2818 2819 output = fopen(output_name, mode); 2820 if (!output) { 2821 perror("failed to create output file"); 2822 return -1; 2823 } 2824 clock_gettime(CLOCK_REALTIME, &tm); 2825 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2826 } else if (output_fd > 0) { 2827 mode = append_file ? "a" : "w"; 2828 output = fdopen(output_fd, mode); 2829 if (!output) { 2830 perror("Failed opening logfd"); 2831 return -errno; 2832 } 2833 } 2834 2835 stat_config.output = output; 2836 2837 /* 2838 * let the spreadsheet do the pretty-printing 2839 */ 2840 if (csv_output) { 2841 /* User explicitly passed -B? */ 2842 if (big_num_opt == 1) { 2843 fprintf(stderr, "-B option not supported with -x\n"); 2844 parse_options_usage(stat_usage, stat_options, "B", 1); 2845 parse_options_usage(NULL, stat_options, "x", 1); 2846 goto out; 2847 } else /* Nope, so disable big number formatting */ 2848 big_num = false; 2849 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2850 big_num = false; 2851 2852 setup_system_wide(argc); 2853 2854 if (run_count < 0) { 2855 pr_err("Run count must be a positive number\n"); 2856 parse_options_usage(stat_usage, stat_options, "r", 1); 2857 goto out; 2858 } else if (run_count == 0) { 2859 forever = true; 2860 run_count = 1; 2861 } 2862 2863 if ((stat_config.aggr_mode == AGGR_THREAD) && 2864 !target__has_task(&target)) { 2865 if (!target.system_wide || target.cpu_list) { 2866 fprintf(stderr, "The --per-thread option is only " 2867 "available when monitoring via -p -t -a " 2868 "options or only --per-thread.\n"); 2869 parse_options_usage(NULL, stat_options, "p", 1); 2870 parse_options_usage(NULL, stat_options, "t", 1); 2871 goto out; 2872 } 2873 } 2874 2875 /* 2876 * no_aggr, cgroup are for system-wide only 2877 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2878 */ 2879 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2880 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2881 !target__has_cpu(&target)) { 2882 fprintf(stderr, "both cgroup and no-aggregation " 2883 "modes only available in system-wide mode\n"); 2884 2885 parse_options_usage(stat_usage, stat_options, "G", 1); 2886 parse_options_usage(NULL, stat_options, "A", 1); 2887 parse_options_usage(NULL, stat_options, "a", 1); 2888 goto out; 2889 } 2890 2891 if (add_default_attributes()) 2892 goto out; 2893 2894 target__validate(&target); 2895 2896 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 2897 target.per_thread = true; 2898 2899 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2900 if (target__has_task(&target)) { 2901 pr_err("Problems finding threads of monitor\n"); 2902 parse_options_usage(stat_usage, stat_options, "p", 1); 2903 parse_options_usage(NULL, stat_options, "t", 1); 2904 } else if (target__has_cpu(&target)) { 2905 perror("failed to parse CPUs map"); 2906 parse_options_usage(stat_usage, stat_options, "C", 1); 2907 parse_options_usage(NULL, stat_options, "a", 1); 2908 } 2909 goto out; 2910 } 2911 2912 /* 2913 * Initialize thread_map with comm names, 2914 * so we could print it out on output. 2915 */ 2916 if (stat_config.aggr_mode == AGGR_THREAD) { 2917 thread_map__read_comms(evsel_list->threads); 2918 if (target.system_wide) { 2919 if (runtime_stat_new(&stat_config, 2920 thread_map__nr(evsel_list->threads))) { 2921 goto out; 2922 } 2923 } 2924 } 2925 2926 if (interval && interval < 100) { 2927 if (interval < 10) { 2928 pr_err("print interval must be >= 10ms\n"); 2929 parse_options_usage(stat_usage, stat_options, "I", 1); 2930 goto out; 2931 } else 2932 pr_warning("print interval < 100ms. " 2933 "The overhead percentage could be high in some cases. " 2934 "Please proceed with caution.\n"); 2935 } 2936 2937 if (stat_config.times && interval) 2938 interval_count = true; 2939 else if (stat_config.times && !interval) { 2940 pr_err("interval-count option should be used together with " 2941 "interval-print.\n"); 2942 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 2943 parse_options_usage(stat_usage, stat_options, "I", 1); 2944 goto out; 2945 } 2946 2947 if (timeout && timeout < 100) { 2948 if (timeout < 10) { 2949 pr_err("timeout must be >= 10ms.\n"); 2950 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2951 goto out; 2952 } else 2953 pr_warning("timeout < 100ms. " 2954 "The overhead percentage could be high in some cases. " 2955 "Please proceed with caution.\n"); 2956 } 2957 if (timeout && interval) { 2958 pr_err("timeout option is not supported with interval-print.\n"); 2959 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2960 parse_options_usage(stat_usage, stat_options, "I", 1); 2961 goto out; 2962 } 2963 2964 if (perf_evlist__alloc_stats(evsel_list, interval)) 2965 goto out; 2966 2967 if (perf_stat_init_aggr_mode()) 2968 goto out; 2969 2970 /* 2971 * We dont want to block the signals - that would cause 2972 * child tasks to inherit that and Ctrl-C would not work. 2973 * What we want is for Ctrl-C to work in the exec()-ed 2974 * task, but being ignored by perf stat itself: 2975 */ 2976 atexit(sig_atexit); 2977 if (!forever) 2978 signal(SIGINT, skip_signal); 2979 signal(SIGCHLD, skip_signal); 2980 signal(SIGALRM, skip_signal); 2981 signal(SIGABRT, skip_signal); 2982 2983 status = 0; 2984 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 2985 if (run_count != 1 && verbose > 0) 2986 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 2987 run_idx + 1); 2988 2989 status = run_perf_stat(argc, argv); 2990 if (forever && status != -1) { 2991 print_counters(NULL, argc, argv); 2992 perf_stat__reset_stats(); 2993 } 2994 } 2995 2996 if (!forever && status != -1 && !interval) 2997 print_counters(NULL, argc, argv); 2998 2999 if (STAT_RECORD) { 3000 /* 3001 * We synthesize the kernel mmap record just so that older tools 3002 * don't emit warnings about not being able to resolve symbols 3003 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 3004 * a saner message about no samples being in the perf.data file. 3005 * 3006 * This also serves to suppress a warning about f_header.data.size == 0 3007 * in header.c at the moment 'perf stat record' gets introduced, which 3008 * is not really needed once we start adding the stat specific PERF_RECORD_ 3009 * records, but the need to suppress the kptr_restrict messages in older 3010 * tools remain -acme 3011 */ 3012 int fd = perf_data__fd(&perf_stat.data); 3013 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 3014 process_synthesized_event, 3015 &perf_stat.session->machines.host); 3016 if (err) { 3017 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 3018 "older tools may produce warnings about this file\n."); 3019 } 3020 3021 if (!interval) { 3022 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 3023 pr_err("failed to write stat round event\n"); 3024 } 3025 3026 if (!perf_stat.data.is_pipe) { 3027 perf_stat.session->header.data_size += perf_stat.bytes_written; 3028 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 3029 } 3030 3031 perf_session__delete(perf_stat.session); 3032 } 3033 3034 perf_stat__exit_aggr_mode(); 3035 perf_evlist__free_stats(evsel_list); 3036 out: 3037 if (smi_cost && smi_reset) 3038 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3039 3040 perf_evlist__delete(evsel_list); 3041 3042 runtime_stat_delete(&stat_config); 3043 3044 return status; 3045 } 3046