1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/drv_configs.h" 56 #include "util/color.h" 57 #include "util/stat.h" 58 #include "util/header.h" 59 #include "util/cpumap.h" 60 #include "util/thread.h" 61 #include "util/thread_map.h" 62 #include "util/counts.h" 63 #include "util/group.h" 64 #include "util/session.h" 65 #include "util/tool.h" 66 #include "util/string2.h" 67 #include "util/metricgroup.h" 68 #include "asm/bug.h" 69 70 #include <linux/time64.h> 71 #include <api/fs/fs.h> 72 #include <errno.h> 73 #include <signal.h> 74 #include <stdlib.h> 75 #include <sys/prctl.h> 76 #include <inttypes.h> 77 #include <locale.h> 78 #include <math.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <sys/wait.h> 82 #include <unistd.h> 83 84 #include "sane_ctype.h" 85 86 #define DEFAULT_SEPARATOR " " 87 #define CNTR_NOT_SUPPORTED "<not supported>" 88 #define CNTR_NOT_COUNTED "<not counted>" 89 #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 90 91 static void print_counters(struct timespec *ts, int argc, const char **argv); 92 93 /* Default events used for perf stat -T */ 94 static const char *transaction_attrs = { 95 "task-clock," 96 "{" 97 "instructions," 98 "cycles," 99 "cpu/cycles-t/," 100 "cpu/tx-start/," 101 "cpu/el-start/," 102 "cpu/cycles-ct/" 103 "}" 104 }; 105 106 /* More limited version when the CPU does not have all events. */ 107 static const char * transaction_limited_attrs = { 108 "task-clock," 109 "{" 110 "instructions," 111 "cycles," 112 "cpu/cycles-t/," 113 "cpu/tx-start/" 114 "}" 115 }; 116 117 static const char * topdown_attrs[] = { 118 "topdown-total-slots", 119 "topdown-slots-retired", 120 "topdown-recovery-bubbles", 121 "topdown-fetch-bubbles", 122 "topdown-slots-issued", 123 NULL, 124 }; 125 126 static const char *smi_cost_attrs = { 127 "{" 128 "msr/aperf/," 129 "msr/smi/," 130 "cycles" 131 "}" 132 }; 133 134 static struct perf_evlist *evsel_list; 135 136 static struct rblist metric_events; 137 138 static struct target target = { 139 .uid = UINT_MAX, 140 }; 141 142 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 143 144 static int run_count = 1; 145 static bool no_inherit = false; 146 static volatile pid_t child_pid = -1; 147 static bool null_run = false; 148 static int detailed_run = 0; 149 static bool transaction_run; 150 static bool topdown_run = false; 151 static bool smi_cost = false; 152 static bool smi_reset = false; 153 static bool big_num = true; 154 static int big_num_opt = -1; 155 static const char *csv_sep = NULL; 156 static bool csv_output = false; 157 static bool group = false; 158 static const char *pre_cmd = NULL; 159 static const char *post_cmd = NULL; 160 static bool sync_run = false; 161 static unsigned int initial_delay = 0; 162 static unsigned int unit_width = 4; /* strlen("unit") */ 163 static bool forever = false; 164 static bool metric_only = false; 165 static bool force_metric_only = false; 166 static bool no_merge = false; 167 static struct timespec ref_time; 168 static struct cpu_map *aggr_map; 169 static aggr_get_id_t aggr_get_id; 170 static bool append_file; 171 static bool interval_count; 172 static const char *output_name; 173 static int output_fd; 174 static int print_free_counters_hint; 175 static int print_mixed_hw_group_error; 176 177 struct perf_stat { 178 bool record; 179 struct perf_data data; 180 struct perf_session *session; 181 u64 bytes_written; 182 struct perf_tool tool; 183 bool maps_allocated; 184 struct cpu_map *cpus; 185 struct thread_map *threads; 186 enum aggr_mode aggr_mode; 187 }; 188 189 static struct perf_stat perf_stat; 190 #define STAT_RECORD perf_stat.record 191 192 static volatile int done = 0; 193 194 static struct perf_stat_config stat_config = { 195 .aggr_mode = AGGR_GLOBAL, 196 .scale = true, 197 }; 198 199 static bool is_duration_time(struct perf_evsel *evsel) 200 { 201 return !strcmp(evsel->name, "duration_time"); 202 } 203 204 static inline void diff_timespec(struct timespec *r, struct timespec *a, 205 struct timespec *b) 206 { 207 r->tv_sec = a->tv_sec - b->tv_sec; 208 if (a->tv_nsec < b->tv_nsec) { 209 r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec; 210 r->tv_sec--; 211 } else { 212 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 213 } 214 } 215 216 static void perf_stat__reset_stats(void) 217 { 218 int i; 219 220 perf_evlist__reset_stats(evsel_list); 221 perf_stat__reset_shadow_stats(); 222 223 for (i = 0; i < stat_config.stats_num; i++) 224 perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); 225 } 226 227 static int create_perf_stat_counter(struct perf_evsel *evsel) 228 { 229 struct perf_event_attr *attr = &evsel->attr; 230 struct perf_evsel *leader = evsel->leader; 231 232 if (stat_config.scale) { 233 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 234 PERF_FORMAT_TOTAL_TIME_RUNNING; 235 } 236 237 /* 238 * The event is part of non trivial group, let's enable 239 * the group read (for leader) and ID retrieval for all 240 * members. 241 */ 242 if (leader->nr_members > 1) 243 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; 244 245 attr->inherit = !no_inherit; 246 247 /* 248 * Some events get initialized with sample_(period/type) set, 249 * like tracepoints. Clear it up for counting. 250 */ 251 attr->sample_period = 0; 252 253 /* 254 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 255 * while avoiding that older tools show confusing messages. 256 * 257 * However for pipe sessions we need to keep it zero, 258 * because script's perf_evsel__check_attr is triggered 259 * by attr->sample_type != 0, and we can't run it on 260 * stat sessions. 261 */ 262 if (!(STAT_RECORD && perf_stat.data.is_pipe)) 263 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 264 265 /* 266 * Disabling all counters initially, they will be enabled 267 * either manually by us or by kernel via enable_on_exec 268 * set later. 269 */ 270 if (perf_evsel__is_group_leader(evsel)) { 271 attr->disabled = 1; 272 273 /* 274 * In case of initial_delay we enable tracee 275 * events manually. 276 */ 277 if (target__none(&target) && !initial_delay) 278 attr->enable_on_exec = 1; 279 } 280 281 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 282 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 283 284 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 285 } 286 287 /* 288 * Does the counter have nsecs as a unit? 289 */ 290 static inline int nsec_counter(struct perf_evsel *evsel) 291 { 292 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 293 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 294 return 1; 295 296 return 0; 297 } 298 299 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 300 union perf_event *event, 301 struct perf_sample *sample __maybe_unused, 302 struct machine *machine __maybe_unused) 303 { 304 if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) { 305 pr_err("failed to write perf data, error: %m\n"); 306 return -1; 307 } 308 309 perf_stat.bytes_written += event->header.size; 310 return 0; 311 } 312 313 static int write_stat_round_event(u64 tm, u64 type) 314 { 315 return perf_event__synthesize_stat_round(NULL, tm, type, 316 process_synthesized_event, 317 NULL); 318 } 319 320 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 321 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 322 323 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 324 325 static int 326 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 327 struct perf_counts_values *count) 328 { 329 struct perf_sample_id *sid = SID(counter, cpu, thread); 330 331 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 332 process_synthesized_event, NULL); 333 } 334 335 /* 336 * Read out the results of a single counter: 337 * do not aggregate counts across CPUs in system-wide mode 338 */ 339 static int read_counter(struct perf_evsel *counter) 340 { 341 int nthreads = thread_map__nr(evsel_list->threads); 342 int ncpus, cpu, thread; 343 344 if (target__has_cpu(&target) && !target__has_per_thread(&target)) 345 ncpus = perf_evsel__nr_cpus(counter); 346 else 347 ncpus = 1; 348 349 if (!counter->supported) 350 return -ENOENT; 351 352 if (counter->system_wide) 353 nthreads = 1; 354 355 for (thread = 0; thread < nthreads; thread++) { 356 for (cpu = 0; cpu < ncpus; cpu++) { 357 struct perf_counts_values *count; 358 359 count = perf_counts(counter->counts, cpu, thread); 360 361 /* 362 * The leader's group read loads data into its group members 363 * (via perf_evsel__read_counter) and sets threir count->loaded. 364 */ 365 if (!count->loaded && 366 perf_evsel__read_counter(counter, cpu, thread)) { 367 counter->counts->scaled = -1; 368 perf_counts(counter->counts, cpu, thread)->ena = 0; 369 perf_counts(counter->counts, cpu, thread)->run = 0; 370 return -1; 371 } 372 373 count->loaded = false; 374 375 if (STAT_RECORD) { 376 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 377 pr_err("failed to write stat event\n"); 378 return -1; 379 } 380 } 381 382 if (verbose > 1) { 383 fprintf(stat_config.output, 384 "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 385 perf_evsel__name(counter), 386 cpu, 387 count->val, count->ena, count->run); 388 } 389 } 390 } 391 392 return 0; 393 } 394 395 static void read_counters(void) 396 { 397 struct perf_evsel *counter; 398 int ret; 399 400 evlist__for_each_entry(evsel_list, counter) { 401 ret = read_counter(counter); 402 if (ret) 403 pr_debug("failed to read counter %s\n", counter->name); 404 405 if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) 406 pr_warning("failed to process counter %s\n", counter->name); 407 } 408 } 409 410 static void process_interval(void) 411 { 412 struct timespec ts, rs; 413 414 read_counters(); 415 416 clock_gettime(CLOCK_MONOTONIC, &ts); 417 diff_timespec(&rs, &ts, &ref_time); 418 419 if (STAT_RECORD) { 420 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) 421 pr_err("failed to write stat round event\n"); 422 } 423 424 init_stats(&walltime_nsecs_stats); 425 update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); 426 print_counters(&rs, 0, NULL); 427 } 428 429 static void enable_counters(void) 430 { 431 if (initial_delay) 432 usleep(initial_delay * USEC_PER_MSEC); 433 434 /* 435 * We need to enable counters only if: 436 * - we don't have tracee (attaching to task or cpu) 437 * - we have initial delay configured 438 */ 439 if (!target__none(&target) || initial_delay) 440 perf_evlist__enable(evsel_list); 441 } 442 443 static void disable_counters(void) 444 { 445 /* 446 * If we don't have tracee (attaching to task or cpu), counters may 447 * still be running. To get accurate group ratios, we must stop groups 448 * from counting before reading their constituent counters. 449 */ 450 if (!target__none(&target)) 451 perf_evlist__disable(evsel_list); 452 } 453 454 static volatile int workload_exec_errno; 455 456 /* 457 * perf_evlist__prepare_workload will send a SIGUSR1 458 * if the fork fails, since we asked by setting its 459 * want_signal to true. 460 */ 461 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 462 void *ucontext __maybe_unused) 463 { 464 workload_exec_errno = info->si_value.sival_int; 465 } 466 467 static int perf_stat_synthesize_config(bool is_pipe) 468 { 469 int err; 470 471 if (is_pipe) { 472 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 473 process_synthesized_event); 474 if (err < 0) { 475 pr_err("Couldn't synthesize attrs.\n"); 476 return err; 477 } 478 } 479 480 err = perf_event__synthesize_extra_attr(NULL, 481 evsel_list, 482 process_synthesized_event, 483 is_pipe); 484 485 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 486 process_synthesized_event, 487 NULL); 488 if (err < 0) { 489 pr_err("Couldn't synthesize thread map.\n"); 490 return err; 491 } 492 493 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 494 process_synthesized_event, NULL); 495 if (err < 0) { 496 pr_err("Couldn't synthesize thread map.\n"); 497 return err; 498 } 499 500 err = perf_event__synthesize_stat_config(NULL, &stat_config, 501 process_synthesized_event, NULL); 502 if (err < 0) { 503 pr_err("Couldn't synthesize config.\n"); 504 return err; 505 } 506 507 return 0; 508 } 509 510 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 511 512 static int __store_counter_ids(struct perf_evsel *counter) 513 { 514 int cpu, thread; 515 516 for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { 517 for (thread = 0; thread < xyarray__max_y(counter->fd); 518 thread++) { 519 int fd = FD(counter, cpu, thread); 520 521 if (perf_evlist__id_add_fd(evsel_list, counter, 522 cpu, thread, fd) < 0) 523 return -1; 524 } 525 } 526 527 return 0; 528 } 529 530 static int store_counter_ids(struct perf_evsel *counter) 531 { 532 struct cpu_map *cpus = counter->cpus; 533 struct thread_map *threads = counter->threads; 534 535 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 536 return -ENOMEM; 537 538 return __store_counter_ids(counter); 539 } 540 541 static bool perf_evsel__should_store_id(struct perf_evsel *counter) 542 { 543 return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; 544 } 545 546 static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) 547 { 548 struct perf_evsel *c2, *leader; 549 bool is_open = true; 550 551 leader = evsel->leader; 552 pr_debug("Weak group for %s/%d failed\n", 553 leader->name, leader->nr_members); 554 555 /* 556 * for_each_group_member doesn't work here because it doesn't 557 * include the first entry. 558 */ 559 evlist__for_each_entry(evsel_list, c2) { 560 if (c2 == evsel) 561 is_open = false; 562 if (c2->leader == leader) { 563 if (is_open) 564 perf_evsel__close(c2); 565 c2->leader = c2; 566 c2->nr_members = 0; 567 } 568 } 569 return leader; 570 } 571 572 static int __run_perf_stat(int argc, const char **argv) 573 { 574 int interval = stat_config.interval; 575 int times = stat_config.times; 576 int timeout = stat_config.timeout; 577 char msg[BUFSIZ]; 578 unsigned long long t0, t1; 579 struct perf_evsel *counter; 580 struct timespec ts; 581 size_t l; 582 int status = 0; 583 const bool forks = (argc > 0); 584 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 585 struct perf_evsel_config_term *err_term; 586 587 if (interval) { 588 ts.tv_sec = interval / USEC_PER_MSEC; 589 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 590 } else if (timeout) { 591 ts.tv_sec = timeout / USEC_PER_MSEC; 592 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; 593 } else { 594 ts.tv_sec = 1; 595 ts.tv_nsec = 0; 596 } 597 598 if (forks) { 599 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 600 workload_exec_failed_signal) < 0) { 601 perror("failed to prepare workload"); 602 return -1; 603 } 604 child_pid = evsel_list->workload.pid; 605 } 606 607 if (group) 608 perf_evlist__set_leader(evsel_list); 609 610 evlist__for_each_entry(evsel_list, counter) { 611 try_again: 612 if (create_perf_stat_counter(counter) < 0) { 613 614 /* Weak group failed. Reset the group. */ 615 if ((errno == EINVAL || errno == EBADF) && 616 counter->leader != counter && 617 counter->weak_group) { 618 counter = perf_evsel__reset_weak_group(counter); 619 goto try_again; 620 } 621 622 /* 623 * PPC returns ENXIO for HW counters until 2.6.37 624 * (behavior changed with commit b0a873e). 625 */ 626 if (errno == EINVAL || errno == ENOSYS || 627 errno == ENOENT || errno == EOPNOTSUPP || 628 errno == ENXIO) { 629 if (verbose > 0) 630 ui__warning("%s event is not supported by the kernel.\n", 631 perf_evsel__name(counter)); 632 counter->supported = false; 633 634 if ((counter->leader != counter) || 635 !(counter->leader->nr_members > 1)) 636 continue; 637 } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { 638 if (verbose > 0) 639 ui__warning("%s\n", msg); 640 goto try_again; 641 } else if (target__has_per_thread(&target) && 642 evsel_list->threads && 643 evsel_list->threads->err_thread != -1) { 644 /* 645 * For global --per-thread case, skip current 646 * error thread. 647 */ 648 if (!thread_map__remove(evsel_list->threads, 649 evsel_list->threads->err_thread)) { 650 evsel_list->threads->err_thread = -1; 651 goto try_again; 652 } 653 } 654 655 perf_evsel__open_strerror(counter, &target, 656 errno, msg, sizeof(msg)); 657 ui__error("%s\n", msg); 658 659 if (child_pid != -1) 660 kill(child_pid, SIGTERM); 661 662 return -1; 663 } 664 counter->supported = true; 665 666 l = strlen(counter->unit); 667 if (l > unit_width) 668 unit_width = l; 669 670 if (perf_evsel__should_store_id(counter) && 671 store_counter_ids(counter)) 672 return -1; 673 } 674 675 if (perf_evlist__apply_filters(evsel_list, &counter)) { 676 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 677 counter->filter, perf_evsel__name(counter), errno, 678 str_error_r(errno, msg, sizeof(msg))); 679 return -1; 680 } 681 682 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { 683 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 684 err_term->val.drv_cfg, perf_evsel__name(counter), errno, 685 str_error_r(errno, msg, sizeof(msg))); 686 return -1; 687 } 688 689 if (STAT_RECORD) { 690 int err, fd = perf_data__fd(&perf_stat.data); 691 692 if (is_pipe) { 693 err = perf_header__write_pipe(perf_data__fd(&perf_stat.data)); 694 } else { 695 err = perf_session__write_header(perf_stat.session, evsel_list, 696 fd, false); 697 } 698 699 if (err < 0) 700 return err; 701 702 err = perf_stat_synthesize_config(is_pipe); 703 if (err < 0) 704 return err; 705 } 706 707 /* 708 * Enable counters and exec the command: 709 */ 710 t0 = rdclock(); 711 clock_gettime(CLOCK_MONOTONIC, &ref_time); 712 713 if (forks) { 714 perf_evlist__start_workload(evsel_list); 715 enable_counters(); 716 717 if (interval || timeout) { 718 while (!waitpid(child_pid, &status, WNOHANG)) { 719 nanosleep(&ts, NULL); 720 if (timeout) 721 break; 722 process_interval(); 723 if (interval_count && !(--times)) 724 break; 725 } 726 } 727 waitpid(child_pid, &status, 0); 728 729 if (workload_exec_errno) { 730 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 731 pr_err("Workload failed: %s\n", emsg); 732 return -1; 733 } 734 735 if (WIFSIGNALED(status)) 736 psignal(WTERMSIG(status), argv[0]); 737 } else { 738 enable_counters(); 739 while (!done) { 740 nanosleep(&ts, NULL); 741 if (timeout) 742 break; 743 if (interval) { 744 process_interval(); 745 if (interval_count && !(--times)) 746 break; 747 } 748 } 749 } 750 751 disable_counters(); 752 753 t1 = rdclock(); 754 755 update_stats(&walltime_nsecs_stats, t1 - t0); 756 757 /* 758 * Closing a group leader splits the group, and as we only disable 759 * group leaders, results in remaining events becoming enabled. To 760 * avoid arbitrary skew, we must read all counters before closing any 761 * group leaders. 762 */ 763 read_counters(); 764 perf_evlist__close(evsel_list); 765 766 return WEXITSTATUS(status); 767 } 768 769 static int run_perf_stat(int argc, const char **argv) 770 { 771 int ret; 772 773 if (pre_cmd) { 774 ret = system(pre_cmd); 775 if (ret) 776 return ret; 777 } 778 779 if (sync_run) 780 sync(); 781 782 ret = __run_perf_stat(argc, argv); 783 if (ret) 784 return ret; 785 786 if (post_cmd) { 787 ret = system(post_cmd); 788 if (ret) 789 return ret; 790 } 791 792 return ret; 793 } 794 795 static void print_running(u64 run, u64 ena) 796 { 797 if (csv_output) { 798 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 799 csv_sep, 800 run, 801 csv_sep, 802 ena ? 100.0 * run / ena : 100.0); 803 } else if (run != ena) { 804 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 805 } 806 } 807 808 static void print_noise_pct(double total, double avg) 809 { 810 double pct = rel_stddev_stats(total, avg); 811 812 if (csv_output) 813 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 814 else if (pct) 815 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 816 } 817 818 static void print_noise(struct perf_evsel *evsel, double avg) 819 { 820 struct perf_stat_evsel *ps; 821 822 if (run_count == 1) 823 return; 824 825 ps = evsel->stats; 826 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 827 } 828 829 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 830 { 831 switch (stat_config.aggr_mode) { 832 case AGGR_CORE: 833 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 834 cpu_map__id_to_socket(id), 835 csv_output ? 0 : -8, 836 cpu_map__id_to_cpu(id), 837 csv_sep, 838 csv_output ? 0 : 4, 839 nr, 840 csv_sep); 841 break; 842 case AGGR_SOCKET: 843 fprintf(stat_config.output, "S%*d%s%*d%s", 844 csv_output ? 0 : -5, 845 id, 846 csv_sep, 847 csv_output ? 0 : 4, 848 nr, 849 csv_sep); 850 break; 851 case AGGR_NONE: 852 fprintf(stat_config.output, "CPU%*d%s", 853 csv_output ? 0 : -4, 854 perf_evsel__cpus(evsel)->map[id], csv_sep); 855 break; 856 case AGGR_THREAD: 857 fprintf(stat_config.output, "%*s-%*d%s", 858 csv_output ? 0 : 16, 859 thread_map__comm(evsel->threads, id), 860 csv_output ? 0 : -8, 861 thread_map__pid(evsel->threads, id), 862 csv_sep); 863 break; 864 case AGGR_GLOBAL: 865 case AGGR_UNSET: 866 default: 867 break; 868 } 869 } 870 871 struct outstate { 872 FILE *fh; 873 bool newline; 874 const char *prefix; 875 int nfields; 876 int id, nr; 877 struct perf_evsel *evsel; 878 }; 879 880 #define METRIC_LEN 35 881 882 static void new_line_std(void *ctx) 883 { 884 struct outstate *os = ctx; 885 886 os->newline = true; 887 } 888 889 static void do_new_line_std(struct outstate *os) 890 { 891 fputc('\n', os->fh); 892 fputs(os->prefix, os->fh); 893 aggr_printout(os->evsel, os->id, os->nr); 894 if (stat_config.aggr_mode == AGGR_NONE) 895 fprintf(os->fh, " "); 896 fprintf(os->fh, " "); 897 } 898 899 static void print_metric_std(void *ctx, const char *color, const char *fmt, 900 const char *unit, double val) 901 { 902 struct outstate *os = ctx; 903 FILE *out = os->fh; 904 int n; 905 bool newline = os->newline; 906 907 os->newline = false; 908 909 if (unit == NULL || fmt == NULL) { 910 fprintf(out, "%-*s", METRIC_LEN, ""); 911 return; 912 } 913 914 if (newline) 915 do_new_line_std(os); 916 917 n = fprintf(out, " # "); 918 if (color) 919 n += color_fprintf(out, color, fmt, val); 920 else 921 n += fprintf(out, fmt, val); 922 fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); 923 } 924 925 static void new_line_csv(void *ctx) 926 { 927 struct outstate *os = ctx; 928 int i; 929 930 fputc('\n', os->fh); 931 if (os->prefix) 932 fprintf(os->fh, "%s%s", os->prefix, csv_sep); 933 aggr_printout(os->evsel, os->id, os->nr); 934 for (i = 0; i < os->nfields; i++) 935 fputs(csv_sep, os->fh); 936 } 937 938 static void print_metric_csv(void *ctx, 939 const char *color __maybe_unused, 940 const char *fmt, const char *unit, double val) 941 { 942 struct outstate *os = ctx; 943 FILE *out = os->fh; 944 char buf[64], *vals, *ends; 945 946 if (unit == NULL || fmt == NULL) { 947 fprintf(out, "%s%s", csv_sep, csv_sep); 948 return; 949 } 950 snprintf(buf, sizeof(buf), fmt, val); 951 ends = vals = ltrim(buf); 952 while (isdigit(*ends) || *ends == '.') 953 ends++; 954 *ends = 0; 955 while (isspace(*unit)) 956 unit++; 957 fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); 958 } 959 960 #define METRIC_ONLY_LEN 20 961 962 /* Filter out some columns that don't work well in metrics only mode */ 963 964 static bool valid_only_metric(const char *unit) 965 { 966 if (!unit) 967 return false; 968 if (strstr(unit, "/sec") || 969 strstr(unit, "hz") || 970 strstr(unit, "Hz") || 971 strstr(unit, "CPUs utilized")) 972 return false; 973 return true; 974 } 975 976 static const char *fixunit(char *buf, struct perf_evsel *evsel, 977 const char *unit) 978 { 979 if (!strncmp(unit, "of all", 6)) { 980 snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), 981 unit); 982 return buf; 983 } 984 return unit; 985 } 986 987 static void print_metric_only(void *ctx, const char *color, const char *fmt, 988 const char *unit, double val) 989 { 990 struct outstate *os = ctx; 991 FILE *out = os->fh; 992 int n; 993 char buf[1024]; 994 unsigned mlen = METRIC_ONLY_LEN; 995 996 if (!valid_only_metric(unit)) 997 return; 998 unit = fixunit(buf, os->evsel, unit); 999 if (color) 1000 n = color_fprintf(out, color, fmt, val); 1001 else 1002 n = fprintf(out, fmt, val); 1003 if (n > METRIC_ONLY_LEN) 1004 n = METRIC_ONLY_LEN; 1005 if (mlen < strlen(unit)) 1006 mlen = strlen(unit) + 1; 1007 fprintf(out, "%*s", mlen - n, ""); 1008 } 1009 1010 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, 1011 const char *fmt, 1012 const char *unit, double val) 1013 { 1014 struct outstate *os = ctx; 1015 FILE *out = os->fh; 1016 char buf[64], *vals, *ends; 1017 char tbuf[1024]; 1018 1019 if (!valid_only_metric(unit)) 1020 return; 1021 unit = fixunit(tbuf, os->evsel, unit); 1022 snprintf(buf, sizeof buf, fmt, val); 1023 ends = vals = ltrim(buf); 1024 while (isdigit(*ends) || *ends == '.') 1025 ends++; 1026 *ends = 0; 1027 fprintf(out, "%s%s", vals, csv_sep); 1028 } 1029 1030 static void new_line_metric(void *ctx __maybe_unused) 1031 { 1032 } 1033 1034 static void print_metric_header(void *ctx, const char *color __maybe_unused, 1035 const char *fmt __maybe_unused, 1036 const char *unit, double val __maybe_unused) 1037 { 1038 struct outstate *os = ctx; 1039 char tbuf[1024]; 1040 1041 if (!valid_only_metric(unit)) 1042 return; 1043 unit = fixunit(tbuf, os->evsel, unit); 1044 if (csv_output) 1045 fprintf(os->fh, "%s%s", unit, csv_sep); 1046 else 1047 fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); 1048 } 1049 1050 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1051 { 1052 FILE *output = stat_config.output; 1053 double msecs = avg / NSEC_PER_MSEC; 1054 const char *fmt_v, *fmt_n; 1055 char name[25]; 1056 1057 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 1058 fmt_n = csv_output ? "%s" : "%-25s"; 1059 1060 aggr_printout(evsel, id, nr); 1061 1062 scnprintf(name, sizeof(name), "%s%s", 1063 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 1064 1065 fprintf(output, fmt_v, msecs, csv_sep); 1066 1067 if (csv_output) 1068 fprintf(output, "%s%s", evsel->unit, csv_sep); 1069 else 1070 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 1071 1072 fprintf(output, fmt_n, name); 1073 1074 if (evsel->cgrp) 1075 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1076 } 1077 1078 static int first_shadow_cpu(struct perf_evsel *evsel, int id) 1079 { 1080 int i; 1081 1082 if (!aggr_get_id) 1083 return 0; 1084 1085 if (stat_config.aggr_mode == AGGR_NONE) 1086 return id; 1087 1088 if (stat_config.aggr_mode == AGGR_GLOBAL) 1089 return 0; 1090 1091 for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { 1092 int cpu2 = perf_evsel__cpus(evsel)->map[i]; 1093 1094 if (aggr_get_id(evsel_list->cpus, cpu2) == id) 1095 return cpu2; 1096 } 1097 return 0; 1098 } 1099 1100 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 1101 { 1102 FILE *output = stat_config.output; 1103 double sc = evsel->scale; 1104 const char *fmt; 1105 1106 if (csv_output) { 1107 fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; 1108 } else { 1109 if (big_num) 1110 fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; 1111 else 1112 fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; 1113 } 1114 1115 aggr_printout(evsel, id, nr); 1116 1117 fprintf(output, fmt, avg, csv_sep); 1118 1119 if (evsel->unit) 1120 fprintf(output, "%-*s%s", 1121 csv_output ? 0 : unit_width, 1122 evsel->unit, csv_sep); 1123 1124 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 1125 1126 if (evsel->cgrp) 1127 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 1128 } 1129 1130 static bool is_mixed_hw_group(struct perf_evsel *counter) 1131 { 1132 struct perf_evlist *evlist = counter->evlist; 1133 u32 pmu_type = counter->attr.type; 1134 struct perf_evsel *pos; 1135 1136 if (counter->nr_members < 2) 1137 return false; 1138 1139 evlist__for_each_entry(evlist, pos) { 1140 /* software events can be part of any hardware group */ 1141 if (pos->attr.type == PERF_TYPE_SOFTWARE) 1142 continue; 1143 if (pmu_type == PERF_TYPE_SOFTWARE) { 1144 pmu_type = pos->attr.type; 1145 continue; 1146 } 1147 if (pmu_type != pos->attr.type) 1148 return true; 1149 } 1150 1151 return false; 1152 } 1153 1154 static void printout(int id, int nr, struct perf_evsel *counter, double uval, 1155 char *prefix, u64 run, u64 ena, double noise, 1156 struct runtime_stat *st) 1157 { 1158 struct perf_stat_output_ctx out; 1159 struct outstate os = { 1160 .fh = stat_config.output, 1161 .prefix = prefix ? prefix : "", 1162 .id = id, 1163 .nr = nr, 1164 .evsel = counter, 1165 }; 1166 print_metric_t pm = print_metric_std; 1167 void (*nl)(void *); 1168 1169 if (metric_only) { 1170 nl = new_line_metric; 1171 if (csv_output) 1172 pm = print_metric_only_csv; 1173 else 1174 pm = print_metric_only; 1175 } else 1176 nl = new_line_std; 1177 1178 if (csv_output && !metric_only) { 1179 static int aggr_fields[] = { 1180 [AGGR_GLOBAL] = 0, 1181 [AGGR_THREAD] = 1, 1182 [AGGR_NONE] = 1, 1183 [AGGR_SOCKET] = 2, 1184 [AGGR_CORE] = 2, 1185 }; 1186 1187 pm = print_metric_csv; 1188 nl = new_line_csv; 1189 os.nfields = 3; 1190 os.nfields += aggr_fields[stat_config.aggr_mode]; 1191 if (counter->cgrp) 1192 os.nfields++; 1193 } 1194 if (run == 0 || ena == 0 || counter->counts->scaled == -1) { 1195 if (metric_only) { 1196 pm(&os, NULL, "", "", 0); 1197 return; 1198 } 1199 aggr_printout(counter, id, nr); 1200 1201 fprintf(stat_config.output, "%*s%s", 1202 csv_output ? 0 : 18, 1203 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1204 csv_sep); 1205 1206 if (counter->supported) { 1207 print_free_counters_hint = 1; 1208 if (is_mixed_hw_group(counter)) 1209 print_mixed_hw_group_error = 1; 1210 } 1211 1212 fprintf(stat_config.output, "%-*s%s", 1213 csv_output ? 0 : unit_width, 1214 counter->unit, csv_sep); 1215 1216 fprintf(stat_config.output, "%*s", 1217 csv_output ? 0 : -25, 1218 perf_evsel__name(counter)); 1219 1220 if (counter->cgrp) 1221 fprintf(stat_config.output, "%s%s", 1222 csv_sep, counter->cgrp->name); 1223 1224 if (!csv_output) 1225 pm(&os, NULL, NULL, "", 0); 1226 print_noise(counter, noise); 1227 print_running(run, ena); 1228 if (csv_output) 1229 pm(&os, NULL, NULL, "", 0); 1230 return; 1231 } 1232 1233 if (metric_only) 1234 /* nothing */; 1235 else if (nsec_counter(counter)) 1236 nsec_printout(id, nr, counter, uval); 1237 else 1238 abs_printout(id, nr, counter, uval); 1239 1240 out.print_metric = pm; 1241 out.new_line = nl; 1242 out.ctx = &os; 1243 out.force_header = false; 1244 1245 if (csv_output && !metric_only) { 1246 print_noise(counter, noise); 1247 print_running(run, ena); 1248 } 1249 1250 perf_stat__print_shadow_stats(counter, uval, 1251 first_shadow_cpu(counter, id), 1252 &out, &metric_events, st); 1253 if (!csv_output && !metric_only) { 1254 print_noise(counter, noise); 1255 print_running(run, ena); 1256 } 1257 } 1258 1259 static void aggr_update_shadow(void) 1260 { 1261 int cpu, s2, id, s; 1262 u64 val; 1263 struct perf_evsel *counter; 1264 1265 for (s = 0; s < aggr_map->nr; s++) { 1266 id = aggr_map->map[s]; 1267 evlist__for_each_entry(evsel_list, counter) { 1268 val = 0; 1269 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1270 s2 = aggr_get_id(evsel_list->cpus, cpu); 1271 if (s2 != id) 1272 continue; 1273 val += perf_counts(counter->counts, cpu, 0)->val; 1274 } 1275 perf_stat__update_shadow_stats(counter, val, 1276 first_shadow_cpu(counter, id), 1277 &rt_stat); 1278 } 1279 } 1280 } 1281 1282 static void uniquify_event_name(struct perf_evsel *counter) 1283 { 1284 char *new_name; 1285 char *config; 1286 1287 if (counter->uniquified_name || 1288 !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, 1289 strlen(counter->pmu_name))) 1290 return; 1291 1292 config = strchr(counter->name, '/'); 1293 if (config) { 1294 if (asprintf(&new_name, 1295 "%s%s", counter->pmu_name, config) > 0) { 1296 free(counter->name); 1297 counter->name = new_name; 1298 } 1299 } else { 1300 if (asprintf(&new_name, 1301 "%s [%s]", counter->name, counter->pmu_name) > 0) { 1302 free(counter->name); 1303 counter->name = new_name; 1304 } 1305 } 1306 1307 counter->uniquified_name = true; 1308 } 1309 1310 static void collect_all_aliases(struct perf_evsel *counter, 1311 void (*cb)(struct perf_evsel *counter, void *data, 1312 bool first), 1313 void *data) 1314 { 1315 struct perf_evsel *alias; 1316 1317 alias = list_prepare_entry(counter, &(evsel_list->entries), node); 1318 list_for_each_entry_continue (alias, &evsel_list->entries, node) { 1319 if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || 1320 alias->scale != counter->scale || 1321 alias->cgrp != counter->cgrp || 1322 strcmp(alias->unit, counter->unit) || 1323 nsec_counter(alias) != nsec_counter(counter)) 1324 break; 1325 alias->merged_stat = true; 1326 cb(alias, data, false); 1327 } 1328 } 1329 1330 static bool collect_data(struct perf_evsel *counter, 1331 void (*cb)(struct perf_evsel *counter, void *data, 1332 bool first), 1333 void *data) 1334 { 1335 if (counter->merged_stat) 1336 return false; 1337 cb(counter, data, true); 1338 if (no_merge) 1339 uniquify_event_name(counter); 1340 else if (counter->auto_merge_stats) 1341 collect_all_aliases(counter, cb, data); 1342 return true; 1343 } 1344 1345 struct aggr_data { 1346 u64 ena, run, val; 1347 int id; 1348 int nr; 1349 int cpu; 1350 }; 1351 1352 static void aggr_cb(struct perf_evsel *counter, void *data, bool first) 1353 { 1354 struct aggr_data *ad = data; 1355 int cpu, s2; 1356 1357 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1358 struct perf_counts_values *counts; 1359 1360 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 1361 if (s2 != ad->id) 1362 continue; 1363 if (first) 1364 ad->nr++; 1365 counts = perf_counts(counter->counts, cpu, 0); 1366 /* 1367 * When any result is bad, make them all to give 1368 * consistent output in interval mode. 1369 */ 1370 if (counts->ena == 0 || counts->run == 0 || 1371 counter->counts->scaled == -1) { 1372 ad->ena = 0; 1373 ad->run = 0; 1374 break; 1375 } 1376 ad->val += counts->val; 1377 ad->ena += counts->ena; 1378 ad->run += counts->run; 1379 } 1380 } 1381 1382 static void print_aggr(char *prefix) 1383 { 1384 FILE *output = stat_config.output; 1385 struct perf_evsel *counter; 1386 int s, id, nr; 1387 double uval; 1388 u64 ena, run, val; 1389 bool first; 1390 1391 if (!(aggr_map || aggr_get_id)) 1392 return; 1393 1394 aggr_update_shadow(); 1395 1396 /* 1397 * With metric_only everything is on a single line. 1398 * Without each counter has its own line. 1399 */ 1400 for (s = 0; s < aggr_map->nr; s++) { 1401 struct aggr_data ad; 1402 if (prefix && metric_only) 1403 fprintf(output, "%s", prefix); 1404 1405 ad.id = id = aggr_map->map[s]; 1406 first = true; 1407 evlist__for_each_entry(evsel_list, counter) { 1408 if (is_duration_time(counter)) 1409 continue; 1410 1411 ad.val = ad.ena = ad.run = 0; 1412 ad.nr = 0; 1413 if (!collect_data(counter, aggr_cb, &ad)) 1414 continue; 1415 nr = ad.nr; 1416 ena = ad.ena; 1417 run = ad.run; 1418 val = ad.val; 1419 if (first && metric_only) { 1420 first = false; 1421 aggr_printout(counter, id, nr); 1422 } 1423 if (prefix && !metric_only) 1424 fprintf(output, "%s", prefix); 1425 1426 uval = val * counter->scale; 1427 printout(id, nr, counter, uval, prefix, run, ena, 1.0, 1428 &rt_stat); 1429 if (!metric_only) 1430 fputc('\n', output); 1431 } 1432 if (metric_only) 1433 fputc('\n', output); 1434 } 1435 } 1436 1437 static int cmp_val(const void *a, const void *b) 1438 { 1439 return ((struct perf_aggr_thread_value *)b)->val - 1440 ((struct perf_aggr_thread_value *)a)->val; 1441 } 1442 1443 static struct perf_aggr_thread_value *sort_aggr_thread( 1444 struct perf_evsel *counter, 1445 int nthreads, int ncpus, 1446 int *ret) 1447 { 1448 int cpu, thread, i = 0; 1449 double uval; 1450 struct perf_aggr_thread_value *buf; 1451 1452 buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); 1453 if (!buf) 1454 return NULL; 1455 1456 for (thread = 0; thread < nthreads; thread++) { 1457 u64 ena = 0, run = 0, val = 0; 1458 1459 for (cpu = 0; cpu < ncpus; cpu++) { 1460 val += perf_counts(counter->counts, cpu, thread)->val; 1461 ena += perf_counts(counter->counts, cpu, thread)->ena; 1462 run += perf_counts(counter->counts, cpu, thread)->run; 1463 } 1464 1465 uval = val * counter->scale; 1466 1467 /* 1468 * Skip value 0 when enabling --per-thread globally, 1469 * otherwise too many 0 output. 1470 */ 1471 if (uval == 0.0 && target__has_per_thread(&target)) 1472 continue; 1473 1474 buf[i].counter = counter; 1475 buf[i].id = thread; 1476 buf[i].uval = uval; 1477 buf[i].val = val; 1478 buf[i].run = run; 1479 buf[i].ena = ena; 1480 i++; 1481 } 1482 1483 qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); 1484 1485 if (ret) 1486 *ret = i; 1487 1488 return buf; 1489 } 1490 1491 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 1492 { 1493 FILE *output = stat_config.output; 1494 int nthreads = thread_map__nr(counter->threads); 1495 int ncpus = cpu_map__nr(counter->cpus); 1496 int thread, sorted_threads, id; 1497 struct perf_aggr_thread_value *buf; 1498 1499 buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); 1500 if (!buf) { 1501 perror("cannot sort aggr thread"); 1502 return; 1503 } 1504 1505 for (thread = 0; thread < sorted_threads; thread++) { 1506 if (prefix) 1507 fprintf(output, "%s", prefix); 1508 1509 id = buf[thread].id; 1510 if (stat_config.stats) 1511 printout(id, 0, buf[thread].counter, buf[thread].uval, 1512 prefix, buf[thread].run, buf[thread].ena, 1.0, 1513 &stat_config.stats[id]); 1514 else 1515 printout(id, 0, buf[thread].counter, buf[thread].uval, 1516 prefix, buf[thread].run, buf[thread].ena, 1.0, 1517 &rt_stat); 1518 fputc('\n', output); 1519 } 1520 1521 free(buf); 1522 } 1523 1524 struct caggr_data { 1525 double avg, avg_enabled, avg_running; 1526 }; 1527 1528 static void counter_aggr_cb(struct perf_evsel *counter, void *data, 1529 bool first __maybe_unused) 1530 { 1531 struct caggr_data *cd = data; 1532 struct perf_stat_evsel *ps = counter->stats; 1533 1534 cd->avg += avg_stats(&ps->res_stats[0]); 1535 cd->avg_enabled += avg_stats(&ps->res_stats[1]); 1536 cd->avg_running += avg_stats(&ps->res_stats[2]); 1537 } 1538 1539 /* 1540 * Print out the results of a single counter: 1541 * aggregated counts in system-wide mode 1542 */ 1543 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1544 { 1545 FILE *output = stat_config.output; 1546 double uval; 1547 struct caggr_data cd = { .avg = 0.0 }; 1548 1549 if (!collect_data(counter, counter_aggr_cb, &cd)) 1550 return; 1551 1552 if (prefix && !metric_only) 1553 fprintf(output, "%s", prefix); 1554 1555 uval = cd.avg * counter->scale; 1556 printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 1557 cd.avg, &rt_stat); 1558 if (!metric_only) 1559 fprintf(output, "\n"); 1560 } 1561 1562 static void counter_cb(struct perf_evsel *counter, void *data, 1563 bool first __maybe_unused) 1564 { 1565 struct aggr_data *ad = data; 1566 1567 ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; 1568 ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; 1569 ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; 1570 } 1571 1572 /* 1573 * Print out the results of a single counter: 1574 * does not use aggregated count in system-wide 1575 */ 1576 static void print_counter(struct perf_evsel *counter, char *prefix) 1577 { 1578 FILE *output = stat_config.output; 1579 u64 ena, run, val; 1580 double uval; 1581 int cpu; 1582 1583 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1584 struct aggr_data ad = { .cpu = cpu }; 1585 1586 if (!collect_data(counter, counter_cb, &ad)) 1587 return; 1588 val = ad.val; 1589 ena = ad.ena; 1590 run = ad.run; 1591 1592 if (prefix) 1593 fprintf(output, "%s", prefix); 1594 1595 uval = val * counter->scale; 1596 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1597 &rt_stat); 1598 1599 fputc('\n', output); 1600 } 1601 } 1602 1603 static void print_no_aggr_metric(char *prefix) 1604 { 1605 int cpu; 1606 int nrcpus = 0; 1607 struct perf_evsel *counter; 1608 u64 ena, run, val; 1609 double uval; 1610 1611 nrcpus = evsel_list->cpus->nr; 1612 for (cpu = 0; cpu < nrcpus; cpu++) { 1613 bool first = true; 1614 1615 if (prefix) 1616 fputs(prefix, stat_config.output); 1617 evlist__for_each_entry(evsel_list, counter) { 1618 if (is_duration_time(counter)) 1619 continue; 1620 if (first) { 1621 aggr_printout(counter, cpu, 0); 1622 first = false; 1623 } 1624 val = perf_counts(counter->counts, cpu, 0)->val; 1625 ena = perf_counts(counter->counts, cpu, 0)->ena; 1626 run = perf_counts(counter->counts, cpu, 0)->run; 1627 1628 uval = val * counter->scale; 1629 printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, 1630 &rt_stat); 1631 } 1632 fputc('\n', stat_config.output); 1633 } 1634 } 1635 1636 static int aggr_header_lens[] = { 1637 [AGGR_CORE] = 18, 1638 [AGGR_SOCKET] = 12, 1639 [AGGR_NONE] = 6, 1640 [AGGR_THREAD] = 24, 1641 [AGGR_GLOBAL] = 0, 1642 }; 1643 1644 static const char *aggr_header_csv[] = { 1645 [AGGR_CORE] = "core,cpus,", 1646 [AGGR_SOCKET] = "socket,cpus", 1647 [AGGR_NONE] = "cpu,", 1648 [AGGR_THREAD] = "comm-pid,", 1649 [AGGR_GLOBAL] = "" 1650 }; 1651 1652 static void print_metric_headers(const char *prefix, bool no_indent) 1653 { 1654 struct perf_stat_output_ctx out; 1655 struct perf_evsel *counter; 1656 struct outstate os = { 1657 .fh = stat_config.output 1658 }; 1659 1660 if (prefix) 1661 fprintf(stat_config.output, "%s", prefix); 1662 1663 if (!csv_output && !no_indent) 1664 fprintf(stat_config.output, "%*s", 1665 aggr_header_lens[stat_config.aggr_mode], ""); 1666 if (csv_output) { 1667 if (stat_config.interval) 1668 fputs("time,", stat_config.output); 1669 fputs(aggr_header_csv[stat_config.aggr_mode], 1670 stat_config.output); 1671 } 1672 1673 /* Print metrics headers only */ 1674 evlist__for_each_entry(evsel_list, counter) { 1675 if (is_duration_time(counter)) 1676 continue; 1677 os.evsel = counter; 1678 out.ctx = &os; 1679 out.print_metric = print_metric_header; 1680 out.new_line = new_line_metric; 1681 out.force_header = true; 1682 os.evsel = counter; 1683 perf_stat__print_shadow_stats(counter, 0, 1684 0, 1685 &out, 1686 &metric_events, 1687 &rt_stat); 1688 } 1689 fputc('\n', stat_config.output); 1690 } 1691 1692 static void print_interval(char *prefix, struct timespec *ts) 1693 { 1694 FILE *output = stat_config.output; 1695 static int num_print_interval; 1696 1697 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1698 1699 if (num_print_interval == 0 && !csv_output) { 1700 switch (stat_config.aggr_mode) { 1701 case AGGR_SOCKET: 1702 fprintf(output, "# time socket cpus"); 1703 if (!metric_only) 1704 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1705 break; 1706 case AGGR_CORE: 1707 fprintf(output, "# time core cpus"); 1708 if (!metric_only) 1709 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1710 break; 1711 case AGGR_NONE: 1712 fprintf(output, "# time CPU"); 1713 if (!metric_only) 1714 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1715 break; 1716 case AGGR_THREAD: 1717 fprintf(output, "# time comm-pid"); 1718 if (!metric_only) 1719 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1720 break; 1721 case AGGR_GLOBAL: 1722 default: 1723 fprintf(output, "# time"); 1724 if (!metric_only) 1725 fprintf(output, " counts %*s events\n", unit_width, "unit"); 1726 case AGGR_UNSET: 1727 break; 1728 } 1729 } 1730 1731 if (num_print_interval == 0 && metric_only) 1732 print_metric_headers(" ", true); 1733 if (++num_print_interval == 25) 1734 num_print_interval = 0; 1735 } 1736 1737 static void print_header(int argc, const char **argv) 1738 { 1739 FILE *output = stat_config.output; 1740 int i; 1741 1742 fflush(stdout); 1743 1744 if (!csv_output) { 1745 fprintf(output, "\n"); 1746 fprintf(output, " Performance counter stats for "); 1747 if (target.system_wide) 1748 fprintf(output, "\'system wide"); 1749 else if (target.cpu_list) 1750 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1751 else if (!target__has_task(&target)) { 1752 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1753 for (i = 1; argv && (i < argc); i++) 1754 fprintf(output, " %s", argv[i]); 1755 } else if (target.pid) 1756 fprintf(output, "process id \'%s", target.pid); 1757 else 1758 fprintf(output, "thread id \'%s", target.tid); 1759 1760 fprintf(output, "\'"); 1761 if (run_count > 1) 1762 fprintf(output, " (%d runs)", run_count); 1763 fprintf(output, ":\n\n"); 1764 } 1765 } 1766 1767 static void print_footer(void) 1768 { 1769 FILE *output = stat_config.output; 1770 int n; 1771 1772 if (!null_run) 1773 fprintf(output, "\n"); 1774 fprintf(output, " %17.9f seconds time elapsed", 1775 avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); 1776 if (run_count > 1) { 1777 fprintf(output, " "); 1778 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1779 avg_stats(&walltime_nsecs_stats)); 1780 } 1781 fprintf(output, "\n\n"); 1782 1783 if (print_free_counters_hint && 1784 sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && 1785 n > 0) 1786 fprintf(output, 1787 "Some events weren't counted. Try disabling the NMI watchdog:\n" 1788 " echo 0 > /proc/sys/kernel/nmi_watchdog\n" 1789 " perf stat ...\n" 1790 " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); 1791 1792 if (print_mixed_hw_group_error) 1793 fprintf(output, 1794 "The events in group usually have to be from " 1795 "the same PMU. Try reorganizing the group.\n"); 1796 } 1797 1798 static void print_counters(struct timespec *ts, int argc, const char **argv) 1799 { 1800 int interval = stat_config.interval; 1801 struct perf_evsel *counter; 1802 char buf[64], *prefix = NULL; 1803 1804 /* Do not print anything if we record to the pipe. */ 1805 if (STAT_RECORD && perf_stat.data.is_pipe) 1806 return; 1807 1808 if (interval) 1809 print_interval(prefix = buf, ts); 1810 else 1811 print_header(argc, argv); 1812 1813 if (metric_only) { 1814 static int num_print_iv; 1815 1816 if (num_print_iv == 0 && !interval) 1817 print_metric_headers(prefix, false); 1818 if (num_print_iv++ == 25) 1819 num_print_iv = 0; 1820 if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) 1821 fprintf(stat_config.output, "%s", prefix); 1822 } 1823 1824 switch (stat_config.aggr_mode) { 1825 case AGGR_CORE: 1826 case AGGR_SOCKET: 1827 print_aggr(prefix); 1828 break; 1829 case AGGR_THREAD: 1830 evlist__for_each_entry(evsel_list, counter) { 1831 if (is_duration_time(counter)) 1832 continue; 1833 print_aggr_thread(counter, prefix); 1834 } 1835 break; 1836 case AGGR_GLOBAL: 1837 evlist__for_each_entry(evsel_list, counter) { 1838 if (is_duration_time(counter)) 1839 continue; 1840 print_counter_aggr(counter, prefix); 1841 } 1842 if (metric_only) 1843 fputc('\n', stat_config.output); 1844 break; 1845 case AGGR_NONE: 1846 if (metric_only) 1847 print_no_aggr_metric(prefix); 1848 else { 1849 evlist__for_each_entry(evsel_list, counter) { 1850 if (is_duration_time(counter)) 1851 continue; 1852 print_counter(counter, prefix); 1853 } 1854 } 1855 break; 1856 case AGGR_UNSET: 1857 default: 1858 break; 1859 } 1860 1861 if (!interval && !csv_output) 1862 print_footer(); 1863 1864 fflush(stat_config.output); 1865 } 1866 1867 static volatile int signr = -1; 1868 1869 static void skip_signal(int signo) 1870 { 1871 if ((child_pid == -1) || stat_config.interval) 1872 done = 1; 1873 1874 signr = signo; 1875 /* 1876 * render child_pid harmless 1877 * won't send SIGTERM to a random 1878 * process in case of race condition 1879 * and fast PID recycling 1880 */ 1881 child_pid = -1; 1882 } 1883 1884 static void sig_atexit(void) 1885 { 1886 sigset_t set, oset; 1887 1888 /* 1889 * avoid race condition with SIGCHLD handler 1890 * in skip_signal() which is modifying child_pid 1891 * goal is to avoid send SIGTERM to a random 1892 * process 1893 */ 1894 sigemptyset(&set); 1895 sigaddset(&set, SIGCHLD); 1896 sigprocmask(SIG_BLOCK, &set, &oset); 1897 1898 if (child_pid != -1) 1899 kill(child_pid, SIGTERM); 1900 1901 sigprocmask(SIG_SETMASK, &oset, NULL); 1902 1903 if (signr == -1) 1904 return; 1905 1906 signal(signr, SIG_DFL); 1907 kill(getpid(), signr); 1908 } 1909 1910 static int stat__set_big_num(const struct option *opt __maybe_unused, 1911 const char *s __maybe_unused, int unset) 1912 { 1913 big_num_opt = unset ? 0 : 1; 1914 return 0; 1915 } 1916 1917 static int enable_metric_only(const struct option *opt __maybe_unused, 1918 const char *s __maybe_unused, int unset) 1919 { 1920 force_metric_only = true; 1921 metric_only = !unset; 1922 return 0; 1923 } 1924 1925 static int parse_metric_groups(const struct option *opt, 1926 const char *str, 1927 int unset __maybe_unused) 1928 { 1929 return metricgroup__parse_groups(opt, str, &metric_events); 1930 } 1931 1932 static const struct option stat_options[] = { 1933 OPT_BOOLEAN('T', "transaction", &transaction_run, 1934 "hardware transaction statistics"), 1935 OPT_CALLBACK('e', "event", &evsel_list, "event", 1936 "event selector. use 'perf list' to list available events", 1937 parse_events_option), 1938 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1939 "event filter", parse_filter), 1940 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1941 "child tasks do not inherit counters"), 1942 OPT_STRING('p', "pid", &target.pid, "pid", 1943 "stat events on existing process id"), 1944 OPT_STRING('t', "tid", &target.tid, "tid", 1945 "stat events on existing thread id"), 1946 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1947 "system-wide collection from all CPUs"), 1948 OPT_BOOLEAN('g', "group", &group, 1949 "put the counters into a counter group"), 1950 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1951 OPT_INCR('v', "verbose", &verbose, 1952 "be more verbose (show counter open errors, etc)"), 1953 OPT_INTEGER('r', "repeat", &run_count, 1954 "repeat command and print average + stddev (max: 100, forever: 0)"), 1955 OPT_BOOLEAN('n', "null", &null_run, 1956 "null run - dont start any counters"), 1957 OPT_INCR('d', "detailed", &detailed_run, 1958 "detailed run - start a lot of events"), 1959 OPT_BOOLEAN('S', "sync", &sync_run, 1960 "call sync() before starting a run"), 1961 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1962 "print large numbers with thousands\' separators", 1963 stat__set_big_num), 1964 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1965 "list of cpus to monitor in system-wide"), 1966 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1967 "disable CPU count aggregation", AGGR_NONE), 1968 OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), 1969 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1970 "print counts with custom separator"), 1971 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1972 "monitor event in cgroup name only", parse_cgroups), 1973 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1974 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1975 OPT_INTEGER(0, "log-fd", &output_fd, 1976 "log output to fd, instead of stderr"), 1977 OPT_STRING(0, "pre", &pre_cmd, "command", 1978 "command to run prior to the measured command"), 1979 OPT_STRING(0, "post", &post_cmd, "command", 1980 "command to run after to the measured command"), 1981 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1982 "print counts at regular interval in ms " 1983 "(overhead is possible for values <= 100ms)"), 1984 OPT_INTEGER(0, "interval-count", &stat_config.times, 1985 "print counts for fixed number of times"), 1986 OPT_UINTEGER(0, "timeout", &stat_config.timeout, 1987 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 1988 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1989 "aggregate counts per processor socket", AGGR_SOCKET), 1990 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1991 "aggregate counts per physical processor core", AGGR_CORE), 1992 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1993 "aggregate counts per thread", AGGR_THREAD), 1994 OPT_UINTEGER('D', "delay", &initial_delay, 1995 "ms to wait before starting measurement after program start"), 1996 OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, 1997 "Only print computed metrics. No raw values", enable_metric_only), 1998 OPT_BOOLEAN(0, "topdown", &topdown_run, 1999 "measure topdown level 1 statistics"), 2000 OPT_BOOLEAN(0, "smi-cost", &smi_cost, 2001 "measure SMI cost"), 2002 OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", 2003 "monitor specified metrics or metric groups (separated by ,)", 2004 parse_metric_groups), 2005 OPT_END() 2006 }; 2007 2008 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 2009 { 2010 return cpu_map__get_socket(map, cpu, NULL); 2011 } 2012 2013 static int perf_stat__get_core(struct cpu_map *map, int cpu) 2014 { 2015 return cpu_map__get_core(map, cpu, NULL); 2016 } 2017 2018 static int cpu_map__get_max(struct cpu_map *map) 2019 { 2020 int i, max = -1; 2021 2022 for (i = 0; i < map->nr; i++) { 2023 if (map->map[i] > max) 2024 max = map->map[i]; 2025 } 2026 2027 return max; 2028 } 2029 2030 static struct cpu_map *cpus_aggr_map; 2031 2032 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 2033 { 2034 int cpu; 2035 2036 if (idx >= map->nr) 2037 return -1; 2038 2039 cpu = map->map[idx]; 2040 2041 if (cpus_aggr_map->map[cpu] == -1) 2042 cpus_aggr_map->map[cpu] = get_id(map, idx); 2043 2044 return cpus_aggr_map->map[cpu]; 2045 } 2046 2047 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 2048 { 2049 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 2050 } 2051 2052 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 2053 { 2054 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 2055 } 2056 2057 static int perf_stat_init_aggr_mode(void) 2058 { 2059 int nr; 2060 2061 switch (stat_config.aggr_mode) { 2062 case AGGR_SOCKET: 2063 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 2064 perror("cannot build socket map"); 2065 return -1; 2066 } 2067 aggr_get_id = perf_stat__get_socket_cached; 2068 break; 2069 case AGGR_CORE: 2070 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 2071 perror("cannot build core map"); 2072 return -1; 2073 } 2074 aggr_get_id = perf_stat__get_core_cached; 2075 break; 2076 case AGGR_NONE: 2077 case AGGR_GLOBAL: 2078 case AGGR_THREAD: 2079 case AGGR_UNSET: 2080 default: 2081 break; 2082 } 2083 2084 /* 2085 * The evsel_list->cpus is the base we operate on, 2086 * taking the highest cpu number to be the size of 2087 * the aggregation translate cpumap. 2088 */ 2089 nr = cpu_map__get_max(evsel_list->cpus); 2090 cpus_aggr_map = cpu_map__empty_new(nr + 1); 2091 return cpus_aggr_map ? 0 : -ENOMEM; 2092 } 2093 2094 static void perf_stat__exit_aggr_mode(void) 2095 { 2096 cpu_map__put(aggr_map); 2097 cpu_map__put(cpus_aggr_map); 2098 aggr_map = NULL; 2099 cpus_aggr_map = NULL; 2100 } 2101 2102 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 2103 { 2104 int cpu; 2105 2106 if (idx > map->nr) 2107 return -1; 2108 2109 cpu = map->map[idx]; 2110 2111 if (cpu >= env->nr_cpus_avail) 2112 return -1; 2113 2114 return cpu; 2115 } 2116 2117 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 2118 { 2119 struct perf_env *env = data; 2120 int cpu = perf_env__get_cpu(env, map, idx); 2121 2122 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 2123 } 2124 2125 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 2126 { 2127 struct perf_env *env = data; 2128 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 2129 2130 if (cpu != -1) { 2131 int socket_id = env->cpu[cpu].socket_id; 2132 2133 /* 2134 * Encode socket in upper 16 bits 2135 * core_id is relative to socket, and 2136 * we need a global id. So we combine 2137 * socket + core id. 2138 */ 2139 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 2140 } 2141 2142 return core; 2143 } 2144 2145 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 2146 struct cpu_map **sockp) 2147 { 2148 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 2149 } 2150 2151 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 2152 struct cpu_map **corep) 2153 { 2154 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 2155 } 2156 2157 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 2158 { 2159 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 2160 } 2161 2162 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 2163 { 2164 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 2165 } 2166 2167 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 2168 { 2169 struct perf_env *env = &st->session->header.env; 2170 2171 switch (stat_config.aggr_mode) { 2172 case AGGR_SOCKET: 2173 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 2174 perror("cannot build socket map"); 2175 return -1; 2176 } 2177 aggr_get_id = perf_stat__get_socket_file; 2178 break; 2179 case AGGR_CORE: 2180 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 2181 perror("cannot build core map"); 2182 return -1; 2183 } 2184 aggr_get_id = perf_stat__get_core_file; 2185 break; 2186 case AGGR_NONE: 2187 case AGGR_GLOBAL: 2188 case AGGR_THREAD: 2189 case AGGR_UNSET: 2190 default: 2191 break; 2192 } 2193 2194 return 0; 2195 } 2196 2197 static int topdown_filter_events(const char **attr, char **str, bool use_group) 2198 { 2199 int off = 0; 2200 int i; 2201 int len = 0; 2202 char *s; 2203 2204 for (i = 0; attr[i]; i++) { 2205 if (pmu_have_event("cpu", attr[i])) { 2206 len += strlen(attr[i]) + 1; 2207 attr[i - off] = attr[i]; 2208 } else 2209 off++; 2210 } 2211 attr[i - off] = NULL; 2212 2213 *str = malloc(len + 1 + 2); 2214 if (!*str) 2215 return -1; 2216 s = *str; 2217 if (i - off == 0) { 2218 *s = 0; 2219 return 0; 2220 } 2221 if (use_group) 2222 *s++ = '{'; 2223 for (i = 0; attr[i]; i++) { 2224 strcpy(s, attr[i]); 2225 s += strlen(s); 2226 *s++ = ','; 2227 } 2228 if (use_group) { 2229 s[-1] = '}'; 2230 *s = 0; 2231 } else 2232 s[-1] = 0; 2233 return 0; 2234 } 2235 2236 __weak bool arch_topdown_check_group(bool *warn) 2237 { 2238 *warn = false; 2239 return false; 2240 } 2241 2242 __weak void arch_topdown_group_warn(void) 2243 { 2244 } 2245 2246 /* 2247 * Add default attributes, if there were no attributes specified or 2248 * if -d/--detailed, -d -d or -d -d -d is used: 2249 */ 2250 static int add_default_attributes(void) 2251 { 2252 int err; 2253 struct perf_event_attr default_attrs0[] = { 2254 2255 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 2256 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 2257 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 2258 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 2259 2260 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 2261 }; 2262 struct perf_event_attr frontend_attrs[] = { 2263 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 2264 }; 2265 struct perf_event_attr backend_attrs[] = { 2266 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 2267 }; 2268 struct perf_event_attr default_attrs1[] = { 2269 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 2270 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 2271 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 2272 2273 }; 2274 2275 /* 2276 * Detailed stats (-d), covering the L1 and last level data caches: 2277 */ 2278 struct perf_event_attr detailed_attrs[] = { 2279 2280 { .type = PERF_TYPE_HW_CACHE, 2281 .config = 2282 PERF_COUNT_HW_CACHE_L1D << 0 | 2283 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2284 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2285 2286 { .type = PERF_TYPE_HW_CACHE, 2287 .config = 2288 PERF_COUNT_HW_CACHE_L1D << 0 | 2289 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2290 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2291 2292 { .type = PERF_TYPE_HW_CACHE, 2293 .config = 2294 PERF_COUNT_HW_CACHE_LL << 0 | 2295 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2296 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2297 2298 { .type = PERF_TYPE_HW_CACHE, 2299 .config = 2300 PERF_COUNT_HW_CACHE_LL << 0 | 2301 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2302 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2303 }; 2304 2305 /* 2306 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 2307 */ 2308 struct perf_event_attr very_detailed_attrs[] = { 2309 2310 { .type = PERF_TYPE_HW_CACHE, 2311 .config = 2312 PERF_COUNT_HW_CACHE_L1I << 0 | 2313 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2314 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2315 2316 { .type = PERF_TYPE_HW_CACHE, 2317 .config = 2318 PERF_COUNT_HW_CACHE_L1I << 0 | 2319 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2320 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2321 2322 { .type = PERF_TYPE_HW_CACHE, 2323 .config = 2324 PERF_COUNT_HW_CACHE_DTLB << 0 | 2325 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2326 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2327 2328 { .type = PERF_TYPE_HW_CACHE, 2329 .config = 2330 PERF_COUNT_HW_CACHE_DTLB << 0 | 2331 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2332 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2333 2334 { .type = PERF_TYPE_HW_CACHE, 2335 .config = 2336 PERF_COUNT_HW_CACHE_ITLB << 0 | 2337 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2338 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2339 2340 { .type = PERF_TYPE_HW_CACHE, 2341 .config = 2342 PERF_COUNT_HW_CACHE_ITLB << 0 | 2343 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 2344 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2345 2346 }; 2347 2348 /* 2349 * Very, very detailed stats (-d -d -d), adding prefetch events: 2350 */ 2351 struct perf_event_attr very_very_detailed_attrs[] = { 2352 2353 { .type = PERF_TYPE_HW_CACHE, 2354 .config = 2355 PERF_COUNT_HW_CACHE_L1D << 0 | 2356 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2357 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 2358 2359 { .type = PERF_TYPE_HW_CACHE, 2360 .config = 2361 PERF_COUNT_HW_CACHE_L1D << 0 | 2362 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 2363 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 2364 }; 2365 2366 /* Set attrs if no event is selected and !null_run: */ 2367 if (null_run) 2368 return 0; 2369 2370 if (transaction_run) { 2371 struct parse_events_error errinfo; 2372 2373 if (pmu_have_event("cpu", "cycles-ct") && 2374 pmu_have_event("cpu", "el-start")) 2375 err = parse_events(evsel_list, transaction_attrs, 2376 &errinfo); 2377 else 2378 err = parse_events(evsel_list, 2379 transaction_limited_attrs, 2380 &errinfo); 2381 if (err) { 2382 fprintf(stderr, "Cannot set up transaction events\n"); 2383 return -1; 2384 } 2385 return 0; 2386 } 2387 2388 if (smi_cost) { 2389 int smi; 2390 2391 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { 2392 fprintf(stderr, "freeze_on_smi is not supported.\n"); 2393 return -1; 2394 } 2395 2396 if (!smi) { 2397 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { 2398 fprintf(stderr, "Failed to set freeze_on_smi.\n"); 2399 return -1; 2400 } 2401 smi_reset = true; 2402 } 2403 2404 if (pmu_have_event("msr", "aperf") && 2405 pmu_have_event("msr", "smi")) { 2406 if (!force_metric_only) 2407 metric_only = true; 2408 err = parse_events(evsel_list, smi_cost_attrs, NULL); 2409 } else { 2410 fprintf(stderr, "To measure SMI cost, it needs " 2411 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); 2412 return -1; 2413 } 2414 if (err) { 2415 fprintf(stderr, "Cannot set up SMI cost events\n"); 2416 return -1; 2417 } 2418 return 0; 2419 } 2420 2421 if (topdown_run) { 2422 char *str = NULL; 2423 bool warn = false; 2424 2425 if (stat_config.aggr_mode != AGGR_GLOBAL && 2426 stat_config.aggr_mode != AGGR_CORE) { 2427 pr_err("top down event configuration requires --per-core mode\n"); 2428 return -1; 2429 } 2430 stat_config.aggr_mode = AGGR_CORE; 2431 if (nr_cgroups || !target__has_cpu(&target)) { 2432 pr_err("top down event configuration requires system-wide mode (-a)\n"); 2433 return -1; 2434 } 2435 2436 if (!force_metric_only) 2437 metric_only = true; 2438 if (topdown_filter_events(topdown_attrs, &str, 2439 arch_topdown_check_group(&warn)) < 0) { 2440 pr_err("Out of memory\n"); 2441 return -1; 2442 } 2443 if (topdown_attrs[0] && str) { 2444 if (warn) 2445 arch_topdown_group_warn(); 2446 err = parse_events(evsel_list, str, NULL); 2447 if (err) { 2448 fprintf(stderr, 2449 "Cannot set up top down events %s: %d\n", 2450 str, err); 2451 free(str); 2452 return -1; 2453 } 2454 } else { 2455 fprintf(stderr, "System does not support topdown\n"); 2456 return -1; 2457 } 2458 free(str); 2459 } 2460 2461 if (!evsel_list->nr_entries) { 2462 if (target__has_cpu(&target)) 2463 default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; 2464 2465 if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) 2466 return -1; 2467 if (pmu_have_event("cpu", "stalled-cycles-frontend")) { 2468 if (perf_evlist__add_default_attrs(evsel_list, 2469 frontend_attrs) < 0) 2470 return -1; 2471 } 2472 if (pmu_have_event("cpu", "stalled-cycles-backend")) { 2473 if (perf_evlist__add_default_attrs(evsel_list, 2474 backend_attrs) < 0) 2475 return -1; 2476 } 2477 if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) 2478 return -1; 2479 } 2480 2481 /* Detailed events get appended to the event list: */ 2482 2483 if (detailed_run < 1) 2484 return 0; 2485 2486 /* Append detailed run extra attributes: */ 2487 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 2488 return -1; 2489 2490 if (detailed_run < 2) 2491 return 0; 2492 2493 /* Append very detailed run extra attributes: */ 2494 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 2495 return -1; 2496 2497 if (detailed_run < 3) 2498 return 0; 2499 2500 /* Append very, very detailed run extra attributes: */ 2501 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 2502 } 2503 2504 static const char * const stat_record_usage[] = { 2505 "perf stat record [<options>]", 2506 NULL, 2507 }; 2508 2509 static void init_features(struct perf_session *session) 2510 { 2511 int feat; 2512 2513 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 2514 perf_header__set_feat(&session->header, feat); 2515 2516 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 2517 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 2518 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 2519 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 2520 } 2521 2522 static int __cmd_record(int argc, const char **argv) 2523 { 2524 struct perf_session *session; 2525 struct perf_data *data = &perf_stat.data; 2526 2527 argc = parse_options(argc, argv, stat_options, stat_record_usage, 2528 PARSE_OPT_STOP_AT_NON_OPTION); 2529 2530 if (output_name) 2531 data->file.path = output_name; 2532 2533 if (run_count != 1 || forever) { 2534 pr_err("Cannot use -r option with perf stat record.\n"); 2535 return -1; 2536 } 2537 2538 session = perf_session__new(data, false, NULL); 2539 if (session == NULL) { 2540 pr_err("Perf session creation failed.\n"); 2541 return -1; 2542 } 2543 2544 init_features(session); 2545 2546 session->evlist = evsel_list; 2547 perf_stat.session = session; 2548 perf_stat.record = true; 2549 return argc; 2550 } 2551 2552 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 2553 union perf_event *event, 2554 struct perf_session *session) 2555 { 2556 struct stat_round_event *stat_round = &event->stat_round; 2557 struct perf_evsel *counter; 2558 struct timespec tsh, *ts = NULL; 2559 const char **argv = session->header.env.cmdline_argv; 2560 int argc = session->header.env.nr_cmdline; 2561 2562 evlist__for_each_entry(evsel_list, counter) 2563 perf_stat_process_counter(&stat_config, counter); 2564 2565 if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) 2566 update_stats(&walltime_nsecs_stats, stat_round->time); 2567 2568 if (stat_config.interval && stat_round->time) { 2569 tsh.tv_sec = stat_round->time / NSEC_PER_SEC; 2570 tsh.tv_nsec = stat_round->time % NSEC_PER_SEC; 2571 ts = &tsh; 2572 } 2573 2574 print_counters(ts, argc, argv); 2575 return 0; 2576 } 2577 2578 static 2579 int process_stat_config_event(struct perf_tool *tool, 2580 union perf_event *event, 2581 struct perf_session *session __maybe_unused) 2582 { 2583 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2584 2585 perf_event__read_stat_config(&stat_config, &event->stat_config); 2586 2587 if (cpu_map__empty(st->cpus)) { 2588 if (st->aggr_mode != AGGR_UNSET) 2589 pr_warning("warning: processing task data, aggregation mode not set\n"); 2590 return 0; 2591 } 2592 2593 if (st->aggr_mode != AGGR_UNSET) 2594 stat_config.aggr_mode = st->aggr_mode; 2595 2596 if (perf_stat.data.is_pipe) 2597 perf_stat_init_aggr_mode(); 2598 else 2599 perf_stat_init_aggr_mode_file(st); 2600 2601 return 0; 2602 } 2603 2604 static int set_maps(struct perf_stat *st) 2605 { 2606 if (!st->cpus || !st->threads) 2607 return 0; 2608 2609 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 2610 return -EINVAL; 2611 2612 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 2613 2614 if (perf_evlist__alloc_stats(evsel_list, true)) 2615 return -ENOMEM; 2616 2617 st->maps_allocated = true; 2618 return 0; 2619 } 2620 2621 static 2622 int process_thread_map_event(struct perf_tool *tool, 2623 union perf_event *event, 2624 struct perf_session *session __maybe_unused) 2625 { 2626 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2627 2628 if (st->threads) { 2629 pr_warning("Extra thread map event, ignoring.\n"); 2630 return 0; 2631 } 2632 2633 st->threads = thread_map__new_event(&event->thread_map); 2634 if (!st->threads) 2635 return -ENOMEM; 2636 2637 return set_maps(st); 2638 } 2639 2640 static 2641 int process_cpu_map_event(struct perf_tool *tool, 2642 union perf_event *event, 2643 struct perf_session *session __maybe_unused) 2644 { 2645 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 2646 struct cpu_map *cpus; 2647 2648 if (st->cpus) { 2649 pr_warning("Extra cpu map event, ignoring.\n"); 2650 return 0; 2651 } 2652 2653 cpus = cpu_map__new_data(&event->cpu_map.data); 2654 if (!cpus) 2655 return -ENOMEM; 2656 2657 st->cpus = cpus; 2658 return set_maps(st); 2659 } 2660 2661 static int runtime_stat_new(struct perf_stat_config *config, int nthreads) 2662 { 2663 int i; 2664 2665 config->stats = calloc(nthreads, sizeof(struct runtime_stat)); 2666 if (!config->stats) 2667 return -1; 2668 2669 config->stats_num = nthreads; 2670 2671 for (i = 0; i < nthreads; i++) 2672 runtime_stat__init(&config->stats[i]); 2673 2674 return 0; 2675 } 2676 2677 static void runtime_stat_delete(struct perf_stat_config *config) 2678 { 2679 int i; 2680 2681 if (!config->stats) 2682 return; 2683 2684 for (i = 0; i < config->stats_num; i++) 2685 runtime_stat__exit(&config->stats[i]); 2686 2687 free(config->stats); 2688 } 2689 2690 static const char * const stat_report_usage[] = { 2691 "perf stat report [<options>]", 2692 NULL, 2693 }; 2694 2695 static struct perf_stat perf_stat = { 2696 .tool = { 2697 .attr = perf_event__process_attr, 2698 .event_update = perf_event__process_event_update, 2699 .thread_map = process_thread_map_event, 2700 .cpu_map = process_cpu_map_event, 2701 .stat_config = process_stat_config_event, 2702 .stat = perf_event__process_stat_event, 2703 .stat_round = process_stat_round_event, 2704 }, 2705 .aggr_mode = AGGR_UNSET, 2706 }; 2707 2708 static int __cmd_report(int argc, const char **argv) 2709 { 2710 struct perf_session *session; 2711 const struct option options[] = { 2712 OPT_STRING('i', "input", &input_name, "file", "input file name"), 2713 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 2714 "aggregate counts per processor socket", AGGR_SOCKET), 2715 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 2716 "aggregate counts per physical processor core", AGGR_CORE), 2717 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 2718 "disable CPU count aggregation", AGGR_NONE), 2719 OPT_END() 2720 }; 2721 struct stat st; 2722 int ret; 2723 2724 argc = parse_options(argc, argv, options, stat_report_usage, 0); 2725 2726 if (!input_name || !strlen(input_name)) { 2727 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 2728 input_name = "-"; 2729 else 2730 input_name = "perf.data"; 2731 } 2732 2733 perf_stat.data.file.path = input_name; 2734 perf_stat.data.mode = PERF_DATA_MODE_READ; 2735 2736 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 2737 if (session == NULL) 2738 return -1; 2739 2740 perf_stat.session = session; 2741 stat_config.output = stderr; 2742 evsel_list = session->evlist; 2743 2744 ret = perf_session__process_events(session); 2745 if (ret) 2746 return ret; 2747 2748 perf_session__delete(session); 2749 return 0; 2750 } 2751 2752 static void setup_system_wide(int forks) 2753 { 2754 /* 2755 * Make system wide (-a) the default target if 2756 * no target was specified and one of following 2757 * conditions is met: 2758 * 2759 * - there's no workload specified 2760 * - there is workload specified but all requested 2761 * events are system wide events 2762 */ 2763 if (!target__none(&target)) 2764 return; 2765 2766 if (!forks) 2767 target.system_wide = true; 2768 else { 2769 struct perf_evsel *counter; 2770 2771 evlist__for_each_entry(evsel_list, counter) { 2772 if (!counter->system_wide) 2773 return; 2774 } 2775 2776 if (evsel_list->nr_entries) 2777 target.system_wide = true; 2778 } 2779 } 2780 2781 int cmd_stat(int argc, const char **argv) 2782 { 2783 const char * const stat_usage[] = { 2784 "perf stat [<options>] [<command>]", 2785 NULL 2786 }; 2787 int status = -EINVAL, run_idx; 2788 const char *mode; 2789 FILE *output = stderr; 2790 unsigned int interval, timeout; 2791 const char * const stat_subcommands[] = { "record", "report" }; 2792 2793 setlocale(LC_ALL, ""); 2794 2795 evsel_list = perf_evlist__new(); 2796 if (evsel_list == NULL) 2797 return -ENOMEM; 2798 2799 parse_events__shrink_config_terms(); 2800 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 2801 (const char **) stat_usage, 2802 PARSE_OPT_STOP_AT_NON_OPTION); 2803 perf_stat__collect_metric_expr(evsel_list); 2804 perf_stat__init_shadow_stats(); 2805 2806 if (csv_sep) { 2807 csv_output = true; 2808 if (!strcmp(csv_sep, "\\t")) 2809 csv_sep = "\t"; 2810 } else 2811 csv_sep = DEFAULT_SEPARATOR; 2812 2813 if (argc && !strncmp(argv[0], "rec", 3)) { 2814 argc = __cmd_record(argc, argv); 2815 if (argc < 0) 2816 return -1; 2817 } else if (argc && !strncmp(argv[0], "rep", 3)) 2818 return __cmd_report(argc, argv); 2819 2820 interval = stat_config.interval; 2821 timeout = stat_config.timeout; 2822 2823 /* 2824 * For record command the -o is already taken care of. 2825 */ 2826 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 2827 output = NULL; 2828 2829 if (output_name && output_fd) { 2830 fprintf(stderr, "cannot use both --output and --log-fd\n"); 2831 parse_options_usage(stat_usage, stat_options, "o", 1); 2832 parse_options_usage(NULL, stat_options, "log-fd", 0); 2833 goto out; 2834 } 2835 2836 if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { 2837 fprintf(stderr, "--metric-only is not supported with --per-thread\n"); 2838 goto out; 2839 } 2840 2841 if (metric_only && run_count > 1) { 2842 fprintf(stderr, "--metric-only is not supported with -r\n"); 2843 goto out; 2844 } 2845 2846 if (output_fd < 0) { 2847 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 2848 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 2849 goto out; 2850 } 2851 2852 if (!output) { 2853 struct timespec tm; 2854 mode = append_file ? "a" : "w"; 2855 2856 output = fopen(output_name, mode); 2857 if (!output) { 2858 perror("failed to create output file"); 2859 return -1; 2860 } 2861 clock_gettime(CLOCK_REALTIME, &tm); 2862 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 2863 } else if (output_fd > 0) { 2864 mode = append_file ? "a" : "w"; 2865 output = fdopen(output_fd, mode); 2866 if (!output) { 2867 perror("Failed opening logfd"); 2868 return -errno; 2869 } 2870 } 2871 2872 stat_config.output = output; 2873 2874 /* 2875 * let the spreadsheet do the pretty-printing 2876 */ 2877 if (csv_output) { 2878 /* User explicitly passed -B? */ 2879 if (big_num_opt == 1) { 2880 fprintf(stderr, "-B option not supported with -x\n"); 2881 parse_options_usage(stat_usage, stat_options, "B", 1); 2882 parse_options_usage(NULL, stat_options, "x", 1); 2883 goto out; 2884 } else /* Nope, so disable big number formatting */ 2885 big_num = false; 2886 } else if (big_num_opt == 0) /* User passed --no-big-num */ 2887 big_num = false; 2888 2889 setup_system_wide(argc); 2890 2891 if (run_count < 0) { 2892 pr_err("Run count must be a positive number\n"); 2893 parse_options_usage(stat_usage, stat_options, "r", 1); 2894 goto out; 2895 } else if (run_count == 0) { 2896 forever = true; 2897 run_count = 1; 2898 } 2899 2900 if ((stat_config.aggr_mode == AGGR_THREAD) && 2901 !target__has_task(&target)) { 2902 if (!target.system_wide || target.cpu_list) { 2903 fprintf(stderr, "The --per-thread option is only " 2904 "available when monitoring via -p -t -a " 2905 "options or only --per-thread.\n"); 2906 parse_options_usage(NULL, stat_options, "p", 1); 2907 parse_options_usage(NULL, stat_options, "t", 1); 2908 goto out; 2909 } 2910 } 2911 2912 /* 2913 * no_aggr, cgroup are for system-wide only 2914 * --per-thread is aggregated per thread, we dont mix it with cpu mode 2915 */ 2916 if (((stat_config.aggr_mode != AGGR_GLOBAL && 2917 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 2918 !target__has_cpu(&target)) { 2919 fprintf(stderr, "both cgroup and no-aggregation " 2920 "modes only available in system-wide mode\n"); 2921 2922 parse_options_usage(stat_usage, stat_options, "G", 1); 2923 parse_options_usage(NULL, stat_options, "A", 1); 2924 parse_options_usage(NULL, stat_options, "a", 1); 2925 goto out; 2926 } 2927 2928 if (add_default_attributes()) 2929 goto out; 2930 2931 target__validate(&target); 2932 2933 if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) 2934 target.per_thread = true; 2935 2936 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 2937 if (target__has_task(&target)) { 2938 pr_err("Problems finding threads of monitor\n"); 2939 parse_options_usage(stat_usage, stat_options, "p", 1); 2940 parse_options_usage(NULL, stat_options, "t", 1); 2941 } else if (target__has_cpu(&target)) { 2942 perror("failed to parse CPUs map"); 2943 parse_options_usage(stat_usage, stat_options, "C", 1); 2944 parse_options_usage(NULL, stat_options, "a", 1); 2945 } 2946 goto out; 2947 } 2948 2949 /* 2950 * Initialize thread_map with comm names, 2951 * so we could print it out on output. 2952 */ 2953 if (stat_config.aggr_mode == AGGR_THREAD) { 2954 thread_map__read_comms(evsel_list->threads); 2955 if (target.system_wide) { 2956 if (runtime_stat_new(&stat_config, 2957 thread_map__nr(evsel_list->threads))) { 2958 goto out; 2959 } 2960 } 2961 } 2962 2963 if (stat_config.times && interval) 2964 interval_count = true; 2965 else if (stat_config.times && !interval) { 2966 pr_err("interval-count option should be used together with " 2967 "interval-print.\n"); 2968 parse_options_usage(stat_usage, stat_options, "interval-count", 0); 2969 parse_options_usage(stat_usage, stat_options, "I", 1); 2970 goto out; 2971 } 2972 2973 if (timeout && timeout < 100) { 2974 if (timeout < 10) { 2975 pr_err("timeout must be >= 10ms.\n"); 2976 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2977 goto out; 2978 } else 2979 pr_warning("timeout < 100ms. " 2980 "The overhead percentage could be high in some cases. " 2981 "Please proceed with caution.\n"); 2982 } 2983 if (timeout && interval) { 2984 pr_err("timeout option is not supported with interval-print.\n"); 2985 parse_options_usage(stat_usage, stat_options, "timeout", 0); 2986 parse_options_usage(stat_usage, stat_options, "I", 1); 2987 goto out; 2988 } 2989 2990 if (perf_evlist__alloc_stats(evsel_list, interval)) 2991 goto out; 2992 2993 if (perf_stat_init_aggr_mode()) 2994 goto out; 2995 2996 /* 2997 * We dont want to block the signals - that would cause 2998 * child tasks to inherit that and Ctrl-C would not work. 2999 * What we want is for Ctrl-C to work in the exec()-ed 3000 * task, but being ignored by perf stat itself: 3001 */ 3002 atexit(sig_atexit); 3003 if (!forever) 3004 signal(SIGINT, skip_signal); 3005 signal(SIGCHLD, skip_signal); 3006 signal(SIGALRM, skip_signal); 3007 signal(SIGABRT, skip_signal); 3008 3009 status = 0; 3010 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 3011 if (run_count != 1 && verbose > 0) 3012 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 3013 run_idx + 1); 3014 3015 status = run_perf_stat(argc, argv); 3016 if (forever && status != -1) { 3017 print_counters(NULL, argc, argv); 3018 perf_stat__reset_stats(); 3019 } 3020 } 3021 3022 if (!forever && status != -1 && !interval) 3023 print_counters(NULL, argc, argv); 3024 3025 if (STAT_RECORD) { 3026 /* 3027 * We synthesize the kernel mmap record just so that older tools 3028 * don't emit warnings about not being able to resolve symbols 3029 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 3030 * a saner message about no samples being in the perf.data file. 3031 * 3032 * This also serves to suppress a warning about f_header.data.size == 0 3033 * in header.c at the moment 'perf stat record' gets introduced, which 3034 * is not really needed once we start adding the stat specific PERF_RECORD_ 3035 * records, but the need to suppress the kptr_restrict messages in older 3036 * tools remain -acme 3037 */ 3038 int fd = perf_data__fd(&perf_stat.data); 3039 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 3040 process_synthesized_event, 3041 &perf_stat.session->machines.host); 3042 if (err) { 3043 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 3044 "older tools may produce warnings about this file\n."); 3045 } 3046 3047 if (!interval) { 3048 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 3049 pr_err("failed to write stat round event\n"); 3050 } 3051 3052 if (!perf_stat.data.is_pipe) { 3053 perf_stat.session->header.data_size += perf_stat.bytes_written; 3054 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 3055 } 3056 3057 perf_session__delete(perf_stat.session); 3058 } 3059 3060 perf_stat__exit_aggr_mode(); 3061 perf_evlist__free_stats(evsel_list); 3062 out: 3063 if (smi_cost && smi_reset) 3064 sysfs__write_int(FREEZE_ON_SMI_PATH, 0); 3065 3066 perf_evlist__delete(evsel_list); 3067 3068 runtime_stat_delete(&stat_config); 3069 3070 return status; 3071 } 3072