1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include <subcmd/parse-options.h> 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 #include "util/counts.h" 62 #include "util/session.h" 63 #include "util/tool.h" 64 #include "asm/bug.h" 65 66 #include <stdlib.h> 67 #include <sys/prctl.h> 68 #include <locale.h> 69 70 #define DEFAULT_SEPARATOR " " 71 #define CNTR_NOT_SUPPORTED "<not supported>" 72 #define CNTR_NOT_COUNTED "<not counted>" 73 74 static void print_counters(struct timespec *ts, int argc, const char **argv); 75 76 /* Default events used for perf stat -T */ 77 static const char *transaction_attrs = { 78 "task-clock," 79 "{" 80 "instructions," 81 "cycles," 82 "cpu/cycles-t/," 83 "cpu/tx-start/," 84 "cpu/el-start/," 85 "cpu/cycles-ct/" 86 "}" 87 }; 88 89 /* More limited version when the CPU does not have all events. */ 90 static const char * transaction_limited_attrs = { 91 "task-clock," 92 "{" 93 "instructions," 94 "cycles," 95 "cpu/cycles-t/," 96 "cpu/tx-start/" 97 "}" 98 }; 99 100 static struct perf_evlist *evsel_list; 101 102 static struct target target = { 103 .uid = UINT_MAX, 104 }; 105 106 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); 107 108 static int run_count = 1; 109 static bool no_inherit = false; 110 static volatile pid_t child_pid = -1; 111 static bool null_run = false; 112 static int detailed_run = 0; 113 static bool transaction_run; 114 static bool big_num = true; 115 static int big_num_opt = -1; 116 static const char *csv_sep = NULL; 117 static bool csv_output = false; 118 static bool group = false; 119 static const char *pre_cmd = NULL; 120 static const char *post_cmd = NULL; 121 static bool sync_run = false; 122 static unsigned int initial_delay = 0; 123 static unsigned int unit_width = 4; /* strlen("unit") */ 124 static bool forever = false; 125 static struct timespec ref_time; 126 static struct cpu_map *aggr_map; 127 static aggr_get_id_t aggr_get_id; 128 static bool append_file; 129 static const char *output_name; 130 static int output_fd; 131 132 struct perf_stat { 133 bool record; 134 struct perf_data_file file; 135 struct perf_session *session; 136 u64 bytes_written; 137 struct perf_tool tool; 138 bool maps_allocated; 139 struct cpu_map *cpus; 140 struct thread_map *threads; 141 enum aggr_mode aggr_mode; 142 }; 143 144 static struct perf_stat perf_stat; 145 #define STAT_RECORD perf_stat.record 146 147 static volatile int done = 0; 148 149 static struct perf_stat_config stat_config = { 150 .aggr_mode = AGGR_GLOBAL, 151 .scale = true, 152 }; 153 154 static inline void diff_timespec(struct timespec *r, struct timespec *a, 155 struct timespec *b) 156 { 157 r->tv_sec = a->tv_sec - b->tv_sec; 158 if (a->tv_nsec < b->tv_nsec) { 159 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 160 r->tv_sec--; 161 } else { 162 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 163 } 164 } 165 166 static void perf_stat__reset_stats(void) 167 { 168 perf_evlist__reset_stats(evsel_list); 169 perf_stat__reset_shadow_stats(); 170 } 171 172 static int create_perf_stat_counter(struct perf_evsel *evsel) 173 { 174 struct perf_event_attr *attr = &evsel->attr; 175 176 if (stat_config.scale) 177 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 178 PERF_FORMAT_TOTAL_TIME_RUNNING; 179 180 attr->inherit = !no_inherit; 181 182 /* 183 * Some events get initialized with sample_(period/type) set, 184 * like tracepoints. Clear it up for counting. 185 */ 186 attr->sample_period = 0; 187 188 /* 189 * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless 190 * while avoiding that older tools show confusing messages. 191 * 192 * However for pipe sessions we need to keep it zero, 193 * because script's perf_evsel__check_attr is triggered 194 * by attr->sample_type != 0, and we can't run it on 195 * stat sessions. 196 */ 197 if (!(STAT_RECORD && perf_stat.file.is_pipe)) 198 attr->sample_type = PERF_SAMPLE_IDENTIFIER; 199 200 /* 201 * Disabling all counters initially, they will be enabled 202 * either manually by us or by kernel via enable_on_exec 203 * set later. 204 */ 205 if (perf_evsel__is_group_leader(evsel)) { 206 attr->disabled = 1; 207 208 /* 209 * In case of initial_delay we enable tracee 210 * events manually. 211 */ 212 if (target__none(&target) && !initial_delay) 213 attr->enable_on_exec = 1; 214 } 215 216 if (target__has_cpu(&target)) 217 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 218 219 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 220 } 221 222 /* 223 * Does the counter have nsecs as a unit? 224 */ 225 static inline int nsec_counter(struct perf_evsel *evsel) 226 { 227 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 228 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 229 return 1; 230 231 return 0; 232 } 233 234 static int process_synthesized_event(struct perf_tool *tool __maybe_unused, 235 union perf_event *event, 236 struct perf_sample *sample __maybe_unused, 237 struct machine *machine __maybe_unused) 238 { 239 if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { 240 pr_err("failed to write perf data, error: %m\n"); 241 return -1; 242 } 243 244 perf_stat.bytes_written += event->header.size; 245 return 0; 246 } 247 248 static int write_stat_round_event(u64 tm, u64 type) 249 { 250 return perf_event__synthesize_stat_round(NULL, tm, type, 251 process_synthesized_event, 252 NULL); 253 } 254 255 #define WRITE_STAT_ROUND_EVENT(time, interval) \ 256 write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) 257 258 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 259 260 static int 261 perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, 262 struct perf_counts_values *count) 263 { 264 struct perf_sample_id *sid = SID(counter, cpu, thread); 265 266 return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, 267 process_synthesized_event, NULL); 268 } 269 270 /* 271 * Read out the results of a single counter: 272 * do not aggregate counts across CPUs in system-wide mode 273 */ 274 static int read_counter(struct perf_evsel *counter) 275 { 276 int nthreads = thread_map__nr(evsel_list->threads); 277 int ncpus = perf_evsel__nr_cpus(counter); 278 int cpu, thread; 279 280 if (!counter->supported) 281 return -ENOENT; 282 283 if (counter->system_wide) 284 nthreads = 1; 285 286 for (thread = 0; thread < nthreads; thread++) { 287 for (cpu = 0; cpu < ncpus; cpu++) { 288 struct perf_counts_values *count; 289 290 count = perf_counts(counter->counts, cpu, thread); 291 if (perf_evsel__read(counter, cpu, thread, count)) 292 return -1; 293 294 if (STAT_RECORD) { 295 if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { 296 pr_err("failed to write stat event\n"); 297 return -1; 298 } 299 } 300 } 301 } 302 303 return 0; 304 } 305 306 static void read_counters(bool close_counters) 307 { 308 struct perf_evsel *counter; 309 310 evlist__for_each(evsel_list, counter) { 311 if (read_counter(counter)) 312 pr_debug("failed to read counter %s\n", counter->name); 313 314 if (perf_stat_process_counter(&stat_config, counter)) 315 pr_warning("failed to process counter %s\n", counter->name); 316 317 if (close_counters) { 318 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 319 thread_map__nr(evsel_list->threads)); 320 } 321 } 322 } 323 324 static void process_interval(void) 325 { 326 struct timespec ts, rs; 327 328 read_counters(false); 329 330 clock_gettime(CLOCK_MONOTONIC, &ts); 331 diff_timespec(&rs, &ts, &ref_time); 332 333 if (STAT_RECORD) { 334 if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) 335 pr_err("failed to write stat round event\n"); 336 } 337 338 print_counters(&rs, 0, NULL); 339 } 340 341 static void enable_counters(void) 342 { 343 if (initial_delay) 344 usleep(initial_delay * 1000); 345 346 /* 347 * We need to enable counters only if: 348 * - we don't have tracee (attaching to task or cpu) 349 * - we have initial delay configured 350 */ 351 if (!target__none(&target) || initial_delay) 352 perf_evlist__enable(evsel_list); 353 } 354 355 static volatile int workload_exec_errno; 356 357 /* 358 * perf_evlist__prepare_workload will send a SIGUSR1 359 * if the fork fails, since we asked by setting its 360 * want_signal to true. 361 */ 362 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 363 void *ucontext __maybe_unused) 364 { 365 workload_exec_errno = info->si_value.sival_int; 366 } 367 368 static bool has_unit(struct perf_evsel *counter) 369 { 370 return counter->unit && *counter->unit; 371 } 372 373 static bool has_scale(struct perf_evsel *counter) 374 { 375 return counter->scale != 1; 376 } 377 378 static int perf_stat_synthesize_config(bool is_pipe) 379 { 380 struct perf_evsel *counter; 381 int err; 382 383 if (is_pipe) { 384 err = perf_event__synthesize_attrs(NULL, perf_stat.session, 385 process_synthesized_event); 386 if (err < 0) { 387 pr_err("Couldn't synthesize attrs.\n"); 388 return err; 389 } 390 } 391 392 /* 393 * Synthesize other events stuff not carried within 394 * attr event - unit, scale, name 395 */ 396 evlist__for_each(evsel_list, counter) { 397 if (!counter->supported) 398 continue; 399 400 /* 401 * Synthesize unit and scale only if it's defined. 402 */ 403 if (has_unit(counter)) { 404 err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); 405 if (err < 0) { 406 pr_err("Couldn't synthesize evsel unit.\n"); 407 return err; 408 } 409 } 410 411 if (has_scale(counter)) { 412 err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); 413 if (err < 0) { 414 pr_err("Couldn't synthesize evsel scale.\n"); 415 return err; 416 } 417 } 418 419 if (counter->own_cpus) { 420 err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); 421 if (err < 0) { 422 pr_err("Couldn't synthesize evsel scale.\n"); 423 return err; 424 } 425 } 426 427 /* 428 * Name is needed only for pipe output, 429 * perf.data carries event names. 430 */ 431 if (is_pipe) { 432 err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); 433 if (err < 0) { 434 pr_err("Couldn't synthesize evsel name.\n"); 435 return err; 436 } 437 } 438 } 439 440 err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, 441 process_synthesized_event, 442 NULL); 443 if (err < 0) { 444 pr_err("Couldn't synthesize thread map.\n"); 445 return err; 446 } 447 448 err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, 449 process_synthesized_event, NULL); 450 if (err < 0) { 451 pr_err("Couldn't synthesize thread map.\n"); 452 return err; 453 } 454 455 err = perf_event__synthesize_stat_config(NULL, &stat_config, 456 process_synthesized_event, NULL); 457 if (err < 0) { 458 pr_err("Couldn't synthesize config.\n"); 459 return err; 460 } 461 462 return 0; 463 } 464 465 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 466 467 static int __store_counter_ids(struct perf_evsel *counter, 468 struct cpu_map *cpus, 469 struct thread_map *threads) 470 { 471 int cpu, thread; 472 473 for (cpu = 0; cpu < cpus->nr; cpu++) { 474 for (thread = 0; thread < threads->nr; thread++) { 475 int fd = FD(counter, cpu, thread); 476 477 if (perf_evlist__id_add_fd(evsel_list, counter, 478 cpu, thread, fd) < 0) 479 return -1; 480 } 481 } 482 483 return 0; 484 } 485 486 static int store_counter_ids(struct perf_evsel *counter) 487 { 488 struct cpu_map *cpus = counter->cpus; 489 struct thread_map *threads = counter->threads; 490 491 if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) 492 return -ENOMEM; 493 494 return __store_counter_ids(counter, cpus, threads); 495 } 496 497 static int __run_perf_stat(int argc, const char **argv) 498 { 499 int interval = stat_config.interval; 500 char msg[512]; 501 unsigned long long t0, t1; 502 struct perf_evsel *counter; 503 struct timespec ts; 504 size_t l; 505 int status = 0; 506 const bool forks = (argc > 0); 507 bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; 508 509 if (interval) { 510 ts.tv_sec = interval / 1000; 511 ts.tv_nsec = (interval % 1000) * 1000000; 512 } else { 513 ts.tv_sec = 1; 514 ts.tv_nsec = 0; 515 } 516 517 if (forks) { 518 if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, 519 workload_exec_failed_signal) < 0) { 520 perror("failed to prepare workload"); 521 return -1; 522 } 523 child_pid = evsel_list->workload.pid; 524 } 525 526 if (group) 527 perf_evlist__set_leader(evsel_list); 528 529 evlist__for_each(evsel_list, counter) { 530 if (create_perf_stat_counter(counter) < 0) { 531 /* 532 * PPC returns ENXIO for HW counters until 2.6.37 533 * (behavior changed with commit b0a873e). 534 */ 535 if (errno == EINVAL || errno == ENOSYS || 536 errno == ENOENT || errno == EOPNOTSUPP || 537 errno == ENXIO) { 538 if (verbose) 539 ui__warning("%s event is not supported by the kernel.\n", 540 perf_evsel__name(counter)); 541 counter->supported = false; 542 543 if ((counter->leader != counter) || 544 !(counter->leader->nr_members > 1)) 545 continue; 546 } 547 548 perf_evsel__open_strerror(counter, &target, 549 errno, msg, sizeof(msg)); 550 ui__error("%s\n", msg); 551 552 if (child_pid != -1) 553 kill(child_pid, SIGTERM); 554 555 return -1; 556 } 557 counter->supported = true; 558 559 l = strlen(counter->unit); 560 if (l > unit_width) 561 unit_width = l; 562 563 if (STAT_RECORD && store_counter_ids(counter)) 564 return -1; 565 } 566 567 if (perf_evlist__apply_filters(evsel_list, &counter)) { 568 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 569 counter->filter, perf_evsel__name(counter), errno, 570 strerror_r(errno, msg, sizeof(msg))); 571 return -1; 572 } 573 574 if (STAT_RECORD) { 575 int err, fd = perf_data_file__fd(&perf_stat.file); 576 577 if (is_pipe) { 578 err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); 579 } else { 580 err = perf_session__write_header(perf_stat.session, evsel_list, 581 fd, false); 582 } 583 584 if (err < 0) 585 return err; 586 587 err = perf_stat_synthesize_config(is_pipe); 588 if (err < 0) 589 return err; 590 } 591 592 /* 593 * Enable counters and exec the command: 594 */ 595 t0 = rdclock(); 596 clock_gettime(CLOCK_MONOTONIC, &ref_time); 597 598 if (forks) { 599 perf_evlist__start_workload(evsel_list); 600 enable_counters(); 601 602 if (interval) { 603 while (!waitpid(child_pid, &status, WNOHANG)) { 604 nanosleep(&ts, NULL); 605 process_interval(); 606 } 607 } 608 wait(&status); 609 610 if (workload_exec_errno) { 611 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 612 pr_err("Workload failed: %s\n", emsg); 613 return -1; 614 } 615 616 if (WIFSIGNALED(status)) 617 psignal(WTERMSIG(status), argv[0]); 618 } else { 619 enable_counters(); 620 while (!done) { 621 nanosleep(&ts, NULL); 622 if (interval) 623 process_interval(); 624 } 625 } 626 627 t1 = rdclock(); 628 629 update_stats(&walltime_nsecs_stats, t1 - t0); 630 631 read_counters(true); 632 633 return WEXITSTATUS(status); 634 } 635 636 static int run_perf_stat(int argc, const char **argv) 637 { 638 int ret; 639 640 if (pre_cmd) { 641 ret = system(pre_cmd); 642 if (ret) 643 return ret; 644 } 645 646 if (sync_run) 647 sync(); 648 649 ret = __run_perf_stat(argc, argv); 650 if (ret) 651 return ret; 652 653 if (post_cmd) { 654 ret = system(post_cmd); 655 if (ret) 656 return ret; 657 } 658 659 return ret; 660 } 661 662 static void print_running(u64 run, u64 ena) 663 { 664 if (csv_output) { 665 fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", 666 csv_sep, 667 run, 668 csv_sep, 669 ena ? 100.0 * run / ena : 100.0); 670 } else if (run != ena) { 671 fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); 672 } 673 } 674 675 static void print_noise_pct(double total, double avg) 676 { 677 double pct = rel_stddev_stats(total, avg); 678 679 if (csv_output) 680 fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); 681 else if (pct) 682 fprintf(stat_config.output, " ( +-%6.2f%% )", pct); 683 } 684 685 static void print_noise(struct perf_evsel *evsel, double avg) 686 { 687 struct perf_stat_evsel *ps; 688 689 if (run_count == 1) 690 return; 691 692 ps = evsel->priv; 693 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 694 } 695 696 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 697 { 698 switch (stat_config.aggr_mode) { 699 case AGGR_CORE: 700 fprintf(stat_config.output, "S%d-C%*d%s%*d%s", 701 cpu_map__id_to_socket(id), 702 csv_output ? 0 : -8, 703 cpu_map__id_to_cpu(id), 704 csv_sep, 705 csv_output ? 0 : 4, 706 nr, 707 csv_sep); 708 break; 709 case AGGR_SOCKET: 710 fprintf(stat_config.output, "S%*d%s%*d%s", 711 csv_output ? 0 : -5, 712 id, 713 csv_sep, 714 csv_output ? 0 : 4, 715 nr, 716 csv_sep); 717 break; 718 case AGGR_NONE: 719 fprintf(stat_config.output, "CPU%*d%s", 720 csv_output ? 0 : -4, 721 perf_evsel__cpus(evsel)->map[id], csv_sep); 722 break; 723 case AGGR_THREAD: 724 fprintf(stat_config.output, "%*s-%*d%s", 725 csv_output ? 0 : 16, 726 thread_map__comm(evsel->threads, id), 727 csv_output ? 0 : -8, 728 thread_map__pid(evsel->threads, id), 729 csv_sep); 730 break; 731 case AGGR_GLOBAL: 732 case AGGR_UNSET: 733 default: 734 break; 735 } 736 } 737 738 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 739 { 740 FILE *output = stat_config.output; 741 double msecs = avg / 1e6; 742 const char *fmt_v, *fmt_n; 743 char name[25]; 744 745 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 746 fmt_n = csv_output ? "%s" : "%-25s"; 747 748 aggr_printout(evsel, id, nr); 749 750 scnprintf(name, sizeof(name), "%s%s", 751 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 752 753 fprintf(output, fmt_v, msecs, csv_sep); 754 755 if (csv_output) 756 fprintf(output, "%s%s", evsel->unit, csv_sep); 757 else 758 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 759 760 fprintf(output, fmt_n, name); 761 762 if (evsel->cgrp) 763 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 764 } 765 766 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 767 { 768 FILE *output = stat_config.output; 769 double sc = evsel->scale; 770 const char *fmt; 771 772 if (csv_output) { 773 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 774 } else { 775 if (big_num) 776 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 777 else 778 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 779 } 780 781 aggr_printout(evsel, id, nr); 782 783 fprintf(output, fmt, avg, csv_sep); 784 785 if (evsel->unit) 786 fprintf(output, "%-*s%s", 787 csv_output ? 0 : unit_width, 788 evsel->unit, csv_sep); 789 790 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 791 792 if (evsel->cgrp) 793 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 794 } 795 796 static void printout(int id, int nr, struct perf_evsel *counter, double uval) 797 { 798 int cpu = cpu_map__id_to_cpu(id); 799 800 if (stat_config.aggr_mode == AGGR_GLOBAL) 801 cpu = 0; 802 803 if (nsec_counter(counter)) 804 nsec_printout(id, nr, counter, uval); 805 else 806 abs_printout(id, nr, counter, uval); 807 808 if (!csv_output && !stat_config.interval) 809 perf_stat__print_shadow_stats(stat_config.output, counter, 810 uval, cpu, 811 stat_config.aggr_mode); 812 } 813 814 static void print_aggr(char *prefix) 815 { 816 FILE *output = stat_config.output; 817 struct perf_evsel *counter; 818 int cpu, s, s2, id, nr; 819 double uval; 820 u64 ena, run, val; 821 822 if (!(aggr_map || aggr_get_id)) 823 return; 824 825 for (s = 0; s < aggr_map->nr; s++) { 826 id = aggr_map->map[s]; 827 evlist__for_each(evsel_list, counter) { 828 val = ena = run = 0; 829 nr = 0; 830 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 831 s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); 832 if (s2 != id) 833 continue; 834 val += perf_counts(counter->counts, cpu, 0)->val; 835 ena += perf_counts(counter->counts, cpu, 0)->ena; 836 run += perf_counts(counter->counts, cpu, 0)->run; 837 nr++; 838 } 839 if (prefix) 840 fprintf(output, "%s", prefix); 841 842 if (run == 0 || ena == 0) { 843 aggr_printout(counter, id, nr); 844 845 fprintf(output, "%*s%s", 846 csv_output ? 0 : 18, 847 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 848 csv_sep); 849 850 fprintf(output, "%-*s%s", 851 csv_output ? 0 : unit_width, 852 counter->unit, csv_sep); 853 854 fprintf(output, "%*s", 855 csv_output ? 0 : -25, 856 perf_evsel__name(counter)); 857 858 if (counter->cgrp) 859 fprintf(output, "%s%s", 860 csv_sep, counter->cgrp->name); 861 862 print_running(run, ena); 863 fputc('\n', output); 864 continue; 865 } 866 uval = val * counter->scale; 867 printout(id, nr, counter, uval); 868 if (!csv_output) 869 print_noise(counter, 1.0); 870 871 print_running(run, ena); 872 fputc('\n', output); 873 } 874 } 875 } 876 877 static void print_aggr_thread(struct perf_evsel *counter, char *prefix) 878 { 879 FILE *output = stat_config.output; 880 int nthreads = thread_map__nr(counter->threads); 881 int ncpus = cpu_map__nr(counter->cpus); 882 int cpu, thread; 883 double uval; 884 885 for (thread = 0; thread < nthreads; thread++) { 886 u64 ena = 0, run = 0, val = 0; 887 888 for (cpu = 0; cpu < ncpus; cpu++) { 889 val += perf_counts(counter->counts, cpu, thread)->val; 890 ena += perf_counts(counter->counts, cpu, thread)->ena; 891 run += perf_counts(counter->counts, cpu, thread)->run; 892 } 893 894 if (prefix) 895 fprintf(output, "%s", prefix); 896 897 uval = val * counter->scale; 898 printout(thread, 0, counter, uval); 899 900 if (!csv_output) 901 print_noise(counter, 1.0); 902 903 print_running(run, ena); 904 fputc('\n', output); 905 } 906 } 907 908 /* 909 * Print out the results of a single counter: 910 * aggregated counts in system-wide mode 911 */ 912 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 913 { 914 FILE *output = stat_config.output; 915 struct perf_stat_evsel *ps = counter->priv; 916 double avg = avg_stats(&ps->res_stats[0]); 917 int scaled = counter->counts->scaled; 918 double uval; 919 double avg_enabled, avg_running; 920 921 avg_enabled = avg_stats(&ps->res_stats[1]); 922 avg_running = avg_stats(&ps->res_stats[2]); 923 924 if (prefix) 925 fprintf(output, "%s", prefix); 926 927 if (scaled == -1 || !counter->supported) { 928 fprintf(output, "%*s%s", 929 csv_output ? 0 : 18, 930 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 931 csv_sep); 932 fprintf(output, "%-*s%s", 933 csv_output ? 0 : unit_width, 934 counter->unit, csv_sep); 935 fprintf(output, "%*s", 936 csv_output ? 0 : -25, 937 perf_evsel__name(counter)); 938 939 if (counter->cgrp) 940 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 941 942 print_running(avg_running, avg_enabled); 943 fputc('\n', output); 944 return; 945 } 946 947 uval = avg * counter->scale; 948 printout(-1, 0, counter, uval); 949 950 print_noise(counter, avg); 951 952 print_running(avg_running, avg_enabled); 953 fprintf(output, "\n"); 954 } 955 956 /* 957 * Print out the results of a single counter: 958 * does not use aggregated count in system-wide 959 */ 960 static void print_counter(struct perf_evsel *counter, char *prefix) 961 { 962 FILE *output = stat_config.output; 963 u64 ena, run, val; 964 double uval; 965 int cpu; 966 967 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 968 val = perf_counts(counter->counts, cpu, 0)->val; 969 ena = perf_counts(counter->counts, cpu, 0)->ena; 970 run = perf_counts(counter->counts, cpu, 0)->run; 971 972 if (prefix) 973 fprintf(output, "%s", prefix); 974 975 if (run == 0 || ena == 0) { 976 fprintf(output, "CPU%*d%s%*s%s", 977 csv_output ? 0 : -4, 978 perf_evsel__cpus(counter)->map[cpu], csv_sep, 979 csv_output ? 0 : 18, 980 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 981 csv_sep); 982 983 fprintf(output, "%-*s%s", 984 csv_output ? 0 : unit_width, 985 counter->unit, csv_sep); 986 987 fprintf(output, "%*s", 988 csv_output ? 0 : -25, 989 perf_evsel__name(counter)); 990 991 if (counter->cgrp) 992 fprintf(output, "%s%s", 993 csv_sep, counter->cgrp->name); 994 995 print_running(run, ena); 996 fputc('\n', output); 997 continue; 998 } 999 1000 uval = val * counter->scale; 1001 printout(cpu, 0, counter, uval); 1002 if (!csv_output) 1003 print_noise(counter, 1.0); 1004 print_running(run, ena); 1005 1006 fputc('\n', output); 1007 } 1008 } 1009 1010 static void print_interval(char *prefix, struct timespec *ts) 1011 { 1012 FILE *output = stat_config.output; 1013 static int num_print_interval; 1014 1015 sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); 1016 1017 if (num_print_interval == 0 && !csv_output) { 1018 switch (stat_config.aggr_mode) { 1019 case AGGR_SOCKET: 1020 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 1021 break; 1022 case AGGR_CORE: 1023 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 1024 break; 1025 case AGGR_NONE: 1026 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 1027 break; 1028 case AGGR_THREAD: 1029 fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); 1030 break; 1031 case AGGR_GLOBAL: 1032 default: 1033 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 1034 case AGGR_UNSET: 1035 break; 1036 } 1037 } 1038 1039 if (++num_print_interval == 25) 1040 num_print_interval = 0; 1041 } 1042 1043 static void print_header(int argc, const char **argv) 1044 { 1045 FILE *output = stat_config.output; 1046 int i; 1047 1048 fflush(stdout); 1049 1050 if (!csv_output) { 1051 fprintf(output, "\n"); 1052 fprintf(output, " Performance counter stats for "); 1053 if (target.system_wide) 1054 fprintf(output, "\'system wide"); 1055 else if (target.cpu_list) 1056 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1057 else if (!target__has_task(&target)) { 1058 fprintf(output, "\'%s", argv ? argv[0] : "pipe"); 1059 for (i = 1; argv && (i < argc); i++) 1060 fprintf(output, " %s", argv[i]); 1061 } else if (target.pid) 1062 fprintf(output, "process id \'%s", target.pid); 1063 else 1064 fprintf(output, "thread id \'%s", target.tid); 1065 1066 fprintf(output, "\'"); 1067 if (run_count > 1) 1068 fprintf(output, " (%d runs)", run_count); 1069 fprintf(output, ":\n\n"); 1070 } 1071 } 1072 1073 static void print_footer(void) 1074 { 1075 FILE *output = stat_config.output; 1076 1077 if (!null_run) 1078 fprintf(output, "\n"); 1079 fprintf(output, " %17.9f seconds time elapsed", 1080 avg_stats(&walltime_nsecs_stats)/1e9); 1081 if (run_count > 1) { 1082 fprintf(output, " "); 1083 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1084 avg_stats(&walltime_nsecs_stats)); 1085 } 1086 fprintf(output, "\n\n"); 1087 } 1088 1089 static void print_counters(struct timespec *ts, int argc, const char **argv) 1090 { 1091 int interval = stat_config.interval; 1092 struct perf_evsel *counter; 1093 char buf[64], *prefix = NULL; 1094 1095 /* Do not print anything if we record to the pipe. */ 1096 if (STAT_RECORD && perf_stat.file.is_pipe) 1097 return; 1098 1099 if (interval) 1100 print_interval(prefix = buf, ts); 1101 else 1102 print_header(argc, argv); 1103 1104 switch (stat_config.aggr_mode) { 1105 case AGGR_CORE: 1106 case AGGR_SOCKET: 1107 print_aggr(prefix); 1108 break; 1109 case AGGR_THREAD: 1110 evlist__for_each(evsel_list, counter) 1111 print_aggr_thread(counter, prefix); 1112 break; 1113 case AGGR_GLOBAL: 1114 evlist__for_each(evsel_list, counter) 1115 print_counter_aggr(counter, prefix); 1116 break; 1117 case AGGR_NONE: 1118 evlist__for_each(evsel_list, counter) 1119 print_counter(counter, prefix); 1120 break; 1121 case AGGR_UNSET: 1122 default: 1123 break; 1124 } 1125 1126 if (!interval && !csv_output) 1127 print_footer(); 1128 1129 fflush(stat_config.output); 1130 } 1131 1132 static volatile int signr = -1; 1133 1134 static void skip_signal(int signo) 1135 { 1136 if ((child_pid == -1) || stat_config.interval) 1137 done = 1; 1138 1139 signr = signo; 1140 /* 1141 * render child_pid harmless 1142 * won't send SIGTERM to a random 1143 * process in case of race condition 1144 * and fast PID recycling 1145 */ 1146 child_pid = -1; 1147 } 1148 1149 static void sig_atexit(void) 1150 { 1151 sigset_t set, oset; 1152 1153 /* 1154 * avoid race condition with SIGCHLD handler 1155 * in skip_signal() which is modifying child_pid 1156 * goal is to avoid send SIGTERM to a random 1157 * process 1158 */ 1159 sigemptyset(&set); 1160 sigaddset(&set, SIGCHLD); 1161 sigprocmask(SIG_BLOCK, &set, &oset); 1162 1163 if (child_pid != -1) 1164 kill(child_pid, SIGTERM); 1165 1166 sigprocmask(SIG_SETMASK, &oset, NULL); 1167 1168 if (signr == -1) 1169 return; 1170 1171 signal(signr, SIG_DFL); 1172 kill(getpid(), signr); 1173 } 1174 1175 static int stat__set_big_num(const struct option *opt __maybe_unused, 1176 const char *s __maybe_unused, int unset) 1177 { 1178 big_num_opt = unset ? 0 : 1; 1179 return 0; 1180 } 1181 1182 static const struct option stat_options[] = { 1183 OPT_BOOLEAN('T', "transaction", &transaction_run, 1184 "hardware transaction statistics"), 1185 OPT_CALLBACK('e', "event", &evsel_list, "event", 1186 "event selector. use 'perf list' to list available events", 1187 parse_events_option), 1188 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1189 "event filter", parse_filter), 1190 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1191 "child tasks do not inherit counters"), 1192 OPT_STRING('p', "pid", &target.pid, "pid", 1193 "stat events on existing process id"), 1194 OPT_STRING('t', "tid", &target.tid, "tid", 1195 "stat events on existing thread id"), 1196 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1197 "system-wide collection from all CPUs"), 1198 OPT_BOOLEAN('g', "group", &group, 1199 "put the counters into a counter group"), 1200 OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), 1201 OPT_INCR('v', "verbose", &verbose, 1202 "be more verbose (show counter open errors, etc)"), 1203 OPT_INTEGER('r', "repeat", &run_count, 1204 "repeat command and print average + stddev (max: 100, forever: 0)"), 1205 OPT_BOOLEAN('n', "null", &null_run, 1206 "null run - dont start any counters"), 1207 OPT_INCR('d', "detailed", &detailed_run, 1208 "detailed run - start a lot of events"), 1209 OPT_BOOLEAN('S', "sync", &sync_run, 1210 "call sync() before starting a run"), 1211 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1212 "print large numbers with thousands\' separators", 1213 stat__set_big_num), 1214 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1215 "list of cpus to monitor in system-wide"), 1216 OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, 1217 "disable CPU count aggregation", AGGR_NONE), 1218 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1219 "print counts with custom separator"), 1220 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1221 "monitor event in cgroup name only", parse_cgroups), 1222 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1223 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1224 OPT_INTEGER(0, "log-fd", &output_fd, 1225 "log output to fd, instead of stderr"), 1226 OPT_STRING(0, "pre", &pre_cmd, "command", 1227 "command to run prior to the measured command"), 1228 OPT_STRING(0, "post", &post_cmd, "command", 1229 "command to run after to the measured command"), 1230 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1231 "print counts at regular interval in ms (>= 10)"), 1232 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1233 "aggregate counts per processor socket", AGGR_SOCKET), 1234 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1235 "aggregate counts per physical processor core", AGGR_CORE), 1236 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 1237 "aggregate counts per thread", AGGR_THREAD), 1238 OPT_UINTEGER('D', "delay", &initial_delay, 1239 "ms to wait before starting measurement after program start"), 1240 OPT_END() 1241 }; 1242 1243 static int perf_stat__get_socket(struct cpu_map *map, int cpu) 1244 { 1245 return cpu_map__get_socket(map, cpu, NULL); 1246 } 1247 1248 static int perf_stat__get_core(struct cpu_map *map, int cpu) 1249 { 1250 return cpu_map__get_core(map, cpu, NULL); 1251 } 1252 1253 static int cpu_map__get_max(struct cpu_map *map) 1254 { 1255 int i, max = -1; 1256 1257 for (i = 0; i < map->nr; i++) { 1258 if (map->map[i] > max) 1259 max = map->map[i]; 1260 } 1261 1262 return max; 1263 } 1264 1265 static struct cpu_map *cpus_aggr_map; 1266 1267 static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) 1268 { 1269 int cpu; 1270 1271 if (idx >= map->nr) 1272 return -1; 1273 1274 cpu = map->map[idx]; 1275 1276 if (cpus_aggr_map->map[cpu] == -1) 1277 cpus_aggr_map->map[cpu] = get_id(map, idx); 1278 1279 return cpus_aggr_map->map[cpu]; 1280 } 1281 1282 static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) 1283 { 1284 return perf_stat__get_aggr(perf_stat__get_socket, map, idx); 1285 } 1286 1287 static int perf_stat__get_core_cached(struct cpu_map *map, int idx) 1288 { 1289 return perf_stat__get_aggr(perf_stat__get_core, map, idx); 1290 } 1291 1292 static int perf_stat_init_aggr_mode(void) 1293 { 1294 int nr; 1295 1296 switch (stat_config.aggr_mode) { 1297 case AGGR_SOCKET: 1298 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1299 perror("cannot build socket map"); 1300 return -1; 1301 } 1302 aggr_get_id = perf_stat__get_socket_cached; 1303 break; 1304 case AGGR_CORE: 1305 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1306 perror("cannot build core map"); 1307 return -1; 1308 } 1309 aggr_get_id = perf_stat__get_core_cached; 1310 break; 1311 case AGGR_NONE: 1312 case AGGR_GLOBAL: 1313 case AGGR_THREAD: 1314 case AGGR_UNSET: 1315 default: 1316 break; 1317 } 1318 1319 /* 1320 * The evsel_list->cpus is the base we operate on, 1321 * taking the highest cpu number to be the size of 1322 * the aggregation translate cpumap. 1323 */ 1324 nr = cpu_map__get_max(evsel_list->cpus); 1325 cpus_aggr_map = cpu_map__empty_new(nr + 1); 1326 return cpus_aggr_map ? 0 : -ENOMEM; 1327 } 1328 1329 static void perf_stat__exit_aggr_mode(void) 1330 { 1331 cpu_map__put(aggr_map); 1332 cpu_map__put(cpus_aggr_map); 1333 aggr_map = NULL; 1334 cpus_aggr_map = NULL; 1335 } 1336 1337 static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) 1338 { 1339 int cpu; 1340 1341 if (idx > map->nr) 1342 return -1; 1343 1344 cpu = map->map[idx]; 1345 1346 if (cpu >= env->nr_cpus_online) 1347 return -1; 1348 1349 return cpu; 1350 } 1351 1352 static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) 1353 { 1354 struct perf_env *env = data; 1355 int cpu = perf_env__get_cpu(env, map, idx); 1356 1357 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 1358 } 1359 1360 static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 1361 { 1362 struct perf_env *env = data; 1363 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 1364 1365 if (cpu != -1) { 1366 int socket_id = env->cpu[cpu].socket_id; 1367 1368 /* 1369 * Encode socket in upper 16 bits 1370 * core_id is relative to socket, and 1371 * we need a global id. So we combine 1372 * socket + core id. 1373 */ 1374 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 1375 } 1376 1377 return core; 1378 } 1379 1380 static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, 1381 struct cpu_map **sockp) 1382 { 1383 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1384 } 1385 1386 static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1387 struct cpu_map **corep) 1388 { 1389 return cpu_map__build_map(cpus, corep, perf_env__get_core, env); 1390 } 1391 1392 static int perf_stat__get_socket_file(struct cpu_map *map, int idx) 1393 { 1394 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1395 } 1396 1397 static int perf_stat__get_core_file(struct cpu_map *map, int idx) 1398 { 1399 return perf_env__get_core(map, idx, &perf_stat.session->header.env); 1400 } 1401 1402 static int perf_stat_init_aggr_mode_file(struct perf_stat *st) 1403 { 1404 struct perf_env *env = &st->session->header.env; 1405 1406 switch (stat_config.aggr_mode) { 1407 case AGGR_SOCKET: 1408 if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { 1409 perror("cannot build socket map"); 1410 return -1; 1411 } 1412 aggr_get_id = perf_stat__get_socket_file; 1413 break; 1414 case AGGR_CORE: 1415 if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { 1416 perror("cannot build core map"); 1417 return -1; 1418 } 1419 aggr_get_id = perf_stat__get_core_file; 1420 break; 1421 case AGGR_NONE: 1422 case AGGR_GLOBAL: 1423 case AGGR_THREAD: 1424 case AGGR_UNSET: 1425 default: 1426 break; 1427 } 1428 1429 return 0; 1430 } 1431 1432 /* 1433 * Add default attributes, if there were no attributes specified or 1434 * if -d/--detailed, -d -d or -d -d -d is used: 1435 */ 1436 static int add_default_attributes(void) 1437 { 1438 struct perf_event_attr default_attrs[] = { 1439 1440 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1441 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1442 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1443 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1444 1445 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1446 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1447 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1448 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1449 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1450 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1451 1452 }; 1453 1454 /* 1455 * Detailed stats (-d), covering the L1 and last level data caches: 1456 */ 1457 struct perf_event_attr detailed_attrs[] = { 1458 1459 { .type = PERF_TYPE_HW_CACHE, 1460 .config = 1461 PERF_COUNT_HW_CACHE_L1D << 0 | 1462 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1463 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1464 1465 { .type = PERF_TYPE_HW_CACHE, 1466 .config = 1467 PERF_COUNT_HW_CACHE_L1D << 0 | 1468 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1469 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1470 1471 { .type = PERF_TYPE_HW_CACHE, 1472 .config = 1473 PERF_COUNT_HW_CACHE_LL << 0 | 1474 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1475 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1476 1477 { .type = PERF_TYPE_HW_CACHE, 1478 .config = 1479 PERF_COUNT_HW_CACHE_LL << 0 | 1480 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1481 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1482 }; 1483 1484 /* 1485 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1486 */ 1487 struct perf_event_attr very_detailed_attrs[] = { 1488 1489 { .type = PERF_TYPE_HW_CACHE, 1490 .config = 1491 PERF_COUNT_HW_CACHE_L1I << 0 | 1492 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1493 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1494 1495 { .type = PERF_TYPE_HW_CACHE, 1496 .config = 1497 PERF_COUNT_HW_CACHE_L1I << 0 | 1498 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1499 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1500 1501 { .type = PERF_TYPE_HW_CACHE, 1502 .config = 1503 PERF_COUNT_HW_CACHE_DTLB << 0 | 1504 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1505 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1506 1507 { .type = PERF_TYPE_HW_CACHE, 1508 .config = 1509 PERF_COUNT_HW_CACHE_DTLB << 0 | 1510 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1511 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1512 1513 { .type = PERF_TYPE_HW_CACHE, 1514 .config = 1515 PERF_COUNT_HW_CACHE_ITLB << 0 | 1516 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1517 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1518 1519 { .type = PERF_TYPE_HW_CACHE, 1520 .config = 1521 PERF_COUNT_HW_CACHE_ITLB << 0 | 1522 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1523 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1524 1525 }; 1526 1527 /* 1528 * Very, very detailed stats (-d -d -d), adding prefetch events: 1529 */ 1530 struct perf_event_attr very_very_detailed_attrs[] = { 1531 1532 { .type = PERF_TYPE_HW_CACHE, 1533 .config = 1534 PERF_COUNT_HW_CACHE_L1D << 0 | 1535 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1536 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1537 1538 { .type = PERF_TYPE_HW_CACHE, 1539 .config = 1540 PERF_COUNT_HW_CACHE_L1D << 0 | 1541 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1542 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1543 }; 1544 1545 /* Set attrs if no event is selected and !null_run: */ 1546 if (null_run) 1547 return 0; 1548 1549 if (transaction_run) { 1550 int err; 1551 if (pmu_have_event("cpu", "cycles-ct") && 1552 pmu_have_event("cpu", "el-start")) 1553 err = parse_events(evsel_list, transaction_attrs, NULL); 1554 else 1555 err = parse_events(evsel_list, transaction_limited_attrs, NULL); 1556 if (err) { 1557 fprintf(stderr, "Cannot set up transaction events\n"); 1558 return -1; 1559 } 1560 return 0; 1561 } 1562 1563 if (!evsel_list->nr_entries) { 1564 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1565 return -1; 1566 } 1567 1568 /* Detailed events get appended to the event list: */ 1569 1570 if (detailed_run < 1) 1571 return 0; 1572 1573 /* Append detailed run extra attributes: */ 1574 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1575 return -1; 1576 1577 if (detailed_run < 2) 1578 return 0; 1579 1580 /* Append very detailed run extra attributes: */ 1581 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1582 return -1; 1583 1584 if (detailed_run < 3) 1585 return 0; 1586 1587 /* Append very, very detailed run extra attributes: */ 1588 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1589 } 1590 1591 static const char * const stat_record_usage[] = { 1592 "perf stat record [<options>]", 1593 NULL, 1594 }; 1595 1596 static void init_features(struct perf_session *session) 1597 { 1598 int feat; 1599 1600 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1601 perf_header__set_feat(&session->header, feat); 1602 1603 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1604 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1605 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1606 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1607 } 1608 1609 static int __cmd_record(int argc, const char **argv) 1610 { 1611 struct perf_session *session; 1612 struct perf_data_file *file = &perf_stat.file; 1613 1614 argc = parse_options(argc, argv, stat_options, stat_record_usage, 1615 PARSE_OPT_STOP_AT_NON_OPTION); 1616 1617 if (output_name) 1618 file->path = output_name; 1619 1620 if (run_count != 1 || forever) { 1621 pr_err("Cannot use -r option with perf stat record.\n"); 1622 return -1; 1623 } 1624 1625 session = perf_session__new(file, false, NULL); 1626 if (session == NULL) { 1627 pr_err("Perf session creation failed.\n"); 1628 return -1; 1629 } 1630 1631 init_features(session); 1632 1633 session->evlist = evsel_list; 1634 perf_stat.session = session; 1635 perf_stat.record = true; 1636 return argc; 1637 } 1638 1639 static int process_stat_round_event(struct perf_tool *tool __maybe_unused, 1640 union perf_event *event, 1641 struct perf_session *session) 1642 { 1643 struct stat_round_event *round = &event->stat_round; 1644 struct perf_evsel *counter; 1645 struct timespec tsh, *ts = NULL; 1646 const char **argv = session->header.env.cmdline_argv; 1647 int argc = session->header.env.nr_cmdline; 1648 1649 evlist__for_each(evsel_list, counter) 1650 perf_stat_process_counter(&stat_config, counter); 1651 1652 if (round->type == PERF_STAT_ROUND_TYPE__FINAL) 1653 update_stats(&walltime_nsecs_stats, round->time); 1654 1655 if (stat_config.interval && round->time) { 1656 tsh.tv_sec = round->time / NSECS_PER_SEC; 1657 tsh.tv_nsec = round->time % NSECS_PER_SEC; 1658 ts = &tsh; 1659 } 1660 1661 print_counters(ts, argc, argv); 1662 return 0; 1663 } 1664 1665 static 1666 int process_stat_config_event(struct perf_tool *tool __maybe_unused, 1667 union perf_event *event, 1668 struct perf_session *session __maybe_unused) 1669 { 1670 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1671 1672 perf_event__read_stat_config(&stat_config, &event->stat_config); 1673 1674 if (cpu_map__empty(st->cpus)) { 1675 if (st->aggr_mode != AGGR_UNSET) 1676 pr_warning("warning: processing task data, aggregation mode not set\n"); 1677 return 0; 1678 } 1679 1680 if (st->aggr_mode != AGGR_UNSET) 1681 stat_config.aggr_mode = st->aggr_mode; 1682 1683 if (perf_stat.file.is_pipe) 1684 perf_stat_init_aggr_mode(); 1685 else 1686 perf_stat_init_aggr_mode_file(st); 1687 1688 return 0; 1689 } 1690 1691 static int set_maps(struct perf_stat *st) 1692 { 1693 if (!st->cpus || !st->threads) 1694 return 0; 1695 1696 if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) 1697 return -EINVAL; 1698 1699 perf_evlist__set_maps(evsel_list, st->cpus, st->threads); 1700 1701 if (perf_evlist__alloc_stats(evsel_list, true)) 1702 return -ENOMEM; 1703 1704 st->maps_allocated = true; 1705 return 0; 1706 } 1707 1708 static 1709 int process_thread_map_event(struct perf_tool *tool __maybe_unused, 1710 union perf_event *event, 1711 struct perf_session *session __maybe_unused) 1712 { 1713 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1714 1715 if (st->threads) { 1716 pr_warning("Extra thread map event, ignoring.\n"); 1717 return 0; 1718 } 1719 1720 st->threads = thread_map__new_event(&event->thread_map); 1721 if (!st->threads) 1722 return -ENOMEM; 1723 1724 return set_maps(st); 1725 } 1726 1727 static 1728 int process_cpu_map_event(struct perf_tool *tool __maybe_unused, 1729 union perf_event *event, 1730 struct perf_session *session __maybe_unused) 1731 { 1732 struct perf_stat *st = container_of(tool, struct perf_stat, tool); 1733 struct cpu_map *cpus; 1734 1735 if (st->cpus) { 1736 pr_warning("Extra cpu map event, ignoring.\n"); 1737 return 0; 1738 } 1739 1740 cpus = cpu_map__new_data(&event->cpu_map.data); 1741 if (!cpus) 1742 return -ENOMEM; 1743 1744 st->cpus = cpus; 1745 return set_maps(st); 1746 } 1747 1748 static const char * const stat_report_usage[] = { 1749 "perf stat report [<options>]", 1750 NULL, 1751 }; 1752 1753 static struct perf_stat perf_stat = { 1754 .tool = { 1755 .attr = perf_event__process_attr, 1756 .event_update = perf_event__process_event_update, 1757 .thread_map = process_thread_map_event, 1758 .cpu_map = process_cpu_map_event, 1759 .stat_config = process_stat_config_event, 1760 .stat = perf_event__process_stat_event, 1761 .stat_round = process_stat_round_event, 1762 }, 1763 .aggr_mode = AGGR_UNSET, 1764 }; 1765 1766 static int __cmd_report(int argc, const char **argv) 1767 { 1768 struct perf_session *session; 1769 const struct option options[] = { 1770 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1771 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1772 "aggregate counts per processor socket", AGGR_SOCKET), 1773 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1774 "aggregate counts per physical processor core", AGGR_CORE), 1775 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 1776 "disable CPU count aggregation", AGGR_NONE), 1777 OPT_END() 1778 }; 1779 struct stat st; 1780 int ret; 1781 1782 argc = parse_options(argc, argv, options, stat_report_usage, 0); 1783 1784 if (!input_name || !strlen(input_name)) { 1785 if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) 1786 input_name = "-"; 1787 else 1788 input_name = "perf.data"; 1789 } 1790 1791 perf_stat.file.path = input_name; 1792 perf_stat.file.mode = PERF_DATA_MODE_READ; 1793 1794 session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); 1795 if (session == NULL) 1796 return -1; 1797 1798 perf_stat.session = session; 1799 stat_config.output = stderr; 1800 evsel_list = session->evlist; 1801 1802 ret = perf_session__process_events(session); 1803 if (ret) 1804 return ret; 1805 1806 perf_session__delete(session); 1807 return 0; 1808 } 1809 1810 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1811 { 1812 const char * const stat_usage[] = { 1813 "perf stat [<options>] [<command>]", 1814 NULL 1815 }; 1816 int status = -EINVAL, run_idx; 1817 const char *mode; 1818 FILE *output = stderr; 1819 unsigned int interval; 1820 const char * const stat_subcommands[] = { "record", "report" }; 1821 1822 setlocale(LC_ALL, ""); 1823 1824 evsel_list = perf_evlist__new(); 1825 if (evsel_list == NULL) 1826 return -ENOMEM; 1827 1828 argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, 1829 (const char **) stat_usage, 1830 PARSE_OPT_STOP_AT_NON_OPTION); 1831 1832 if (csv_sep) { 1833 csv_output = true; 1834 if (!strcmp(csv_sep, "\\t")) 1835 csv_sep = "\t"; 1836 } else 1837 csv_sep = DEFAULT_SEPARATOR; 1838 1839 if (argc && !strncmp(argv[0], "rec", 3)) { 1840 argc = __cmd_record(argc, argv); 1841 if (argc < 0) 1842 return -1; 1843 } else if (argc && !strncmp(argv[0], "rep", 3)) 1844 return __cmd_report(argc, argv); 1845 1846 interval = stat_config.interval; 1847 1848 /* 1849 * For record command the -o is already taken care of. 1850 */ 1851 if (!STAT_RECORD && output_name && strcmp(output_name, "-")) 1852 output = NULL; 1853 1854 if (output_name && output_fd) { 1855 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1856 parse_options_usage(stat_usage, stat_options, "o", 1); 1857 parse_options_usage(NULL, stat_options, "log-fd", 0); 1858 goto out; 1859 } 1860 1861 if (output_fd < 0) { 1862 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1863 parse_options_usage(stat_usage, stat_options, "log-fd", 0); 1864 goto out; 1865 } 1866 1867 if (!output) { 1868 struct timespec tm; 1869 mode = append_file ? "a" : "w"; 1870 1871 output = fopen(output_name, mode); 1872 if (!output) { 1873 perror("failed to create output file"); 1874 return -1; 1875 } 1876 clock_gettime(CLOCK_REALTIME, &tm); 1877 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1878 } else if (output_fd > 0) { 1879 mode = append_file ? "a" : "w"; 1880 output = fdopen(output_fd, mode); 1881 if (!output) { 1882 perror("Failed opening logfd"); 1883 return -errno; 1884 } 1885 } 1886 1887 stat_config.output = output; 1888 1889 /* 1890 * let the spreadsheet do the pretty-printing 1891 */ 1892 if (csv_output) { 1893 /* User explicitly passed -B? */ 1894 if (big_num_opt == 1) { 1895 fprintf(stderr, "-B option not supported with -x\n"); 1896 parse_options_usage(stat_usage, stat_options, "B", 1); 1897 parse_options_usage(NULL, stat_options, "x", 1); 1898 goto out; 1899 } else /* Nope, so disable big number formatting */ 1900 big_num = false; 1901 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1902 big_num = false; 1903 1904 if (!argc && target__none(&target)) 1905 usage_with_options(stat_usage, stat_options); 1906 1907 if (run_count < 0) { 1908 pr_err("Run count must be a positive number\n"); 1909 parse_options_usage(stat_usage, stat_options, "r", 1); 1910 goto out; 1911 } else if (run_count == 0) { 1912 forever = true; 1913 run_count = 1; 1914 } 1915 1916 if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { 1917 fprintf(stderr, "The --per-thread option is only available " 1918 "when monitoring via -p -t options.\n"); 1919 parse_options_usage(NULL, stat_options, "p", 1); 1920 parse_options_usage(NULL, stat_options, "t", 1); 1921 goto out; 1922 } 1923 1924 /* 1925 * no_aggr, cgroup are for system-wide only 1926 * --per-thread is aggregated per thread, we dont mix it with cpu mode 1927 */ 1928 if (((stat_config.aggr_mode != AGGR_GLOBAL && 1929 stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && 1930 !target__has_cpu(&target)) { 1931 fprintf(stderr, "both cgroup and no-aggregation " 1932 "modes only available in system-wide mode\n"); 1933 1934 parse_options_usage(stat_usage, stat_options, "G", 1); 1935 parse_options_usage(NULL, stat_options, "A", 1); 1936 parse_options_usage(NULL, stat_options, "a", 1); 1937 goto out; 1938 } 1939 1940 if (add_default_attributes()) 1941 goto out; 1942 1943 target__validate(&target); 1944 1945 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1946 if (target__has_task(&target)) { 1947 pr_err("Problems finding threads of monitor\n"); 1948 parse_options_usage(stat_usage, stat_options, "p", 1); 1949 parse_options_usage(NULL, stat_options, "t", 1); 1950 } else if (target__has_cpu(&target)) { 1951 perror("failed to parse CPUs map"); 1952 parse_options_usage(stat_usage, stat_options, "C", 1); 1953 parse_options_usage(NULL, stat_options, "a", 1); 1954 } 1955 goto out; 1956 } 1957 1958 /* 1959 * Initialize thread_map with comm names, 1960 * so we could print it out on output. 1961 */ 1962 if (stat_config.aggr_mode == AGGR_THREAD) 1963 thread_map__read_comms(evsel_list->threads); 1964 1965 if (interval && interval < 100) { 1966 if (interval < 10) { 1967 pr_err("print interval must be >= 10ms\n"); 1968 parse_options_usage(stat_usage, stat_options, "I", 1); 1969 goto out; 1970 } else 1971 pr_warning("print interval < 100ms. " 1972 "The overhead percentage could be high in some cases. " 1973 "Please proceed with caution.\n"); 1974 } 1975 1976 if (perf_evlist__alloc_stats(evsel_list, interval)) 1977 goto out; 1978 1979 if (perf_stat_init_aggr_mode()) 1980 goto out; 1981 1982 /* 1983 * We dont want to block the signals - that would cause 1984 * child tasks to inherit that and Ctrl-C would not work. 1985 * What we want is for Ctrl-C to work in the exec()-ed 1986 * task, but being ignored by perf stat itself: 1987 */ 1988 atexit(sig_atexit); 1989 if (!forever) 1990 signal(SIGINT, skip_signal); 1991 signal(SIGCHLD, skip_signal); 1992 signal(SIGALRM, skip_signal); 1993 signal(SIGABRT, skip_signal); 1994 1995 status = 0; 1996 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1997 if (run_count != 1 && verbose) 1998 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1999 run_idx + 1); 2000 2001 status = run_perf_stat(argc, argv); 2002 if (forever && status != -1) { 2003 print_counters(NULL, argc, argv); 2004 perf_stat__reset_stats(); 2005 } 2006 } 2007 2008 if (!forever && status != -1 && !interval) 2009 print_counters(NULL, argc, argv); 2010 2011 if (STAT_RECORD) { 2012 /* 2013 * We synthesize the kernel mmap record just so that older tools 2014 * don't emit warnings about not being able to resolve symbols 2015 * due to /proc/sys/kernel/kptr_restrict settings and instear provide 2016 * a saner message about no samples being in the perf.data file. 2017 * 2018 * This also serves to suppress a warning about f_header.data.size == 0 2019 * in header.c at the moment 'perf stat record' gets introduced, which 2020 * is not really needed once we start adding the stat specific PERF_RECORD_ 2021 * records, but the need to suppress the kptr_restrict messages in older 2022 * tools remain -acme 2023 */ 2024 int fd = perf_data_file__fd(&perf_stat.file); 2025 int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, 2026 process_synthesized_event, 2027 &perf_stat.session->machines.host); 2028 if (err) { 2029 pr_warning("Couldn't synthesize the kernel mmap record, harmless, " 2030 "older tools may produce warnings about this file\n."); 2031 } 2032 2033 if (!interval) { 2034 if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) 2035 pr_err("failed to write stat round event\n"); 2036 } 2037 2038 if (!perf_stat.file.is_pipe) { 2039 perf_stat.session->header.data_size += perf_stat.bytes_written; 2040 perf_session__write_header(perf_stat.session, evsel_list, fd, true); 2041 } 2042 2043 perf_session__delete(perf_stat.session); 2044 } 2045 2046 perf_stat__exit_aggr_mode(); 2047 perf_evlist__free_stats(evsel_list); 2048 out: 2049 perf_evlist__delete(evsel_list); 2050 return status; 2051 } 2052