1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/util.h" 47 #include "util/parse-options.h" 48 #include "util/parse-events.h" 49 #include "util/event.h" 50 #include "util/evlist.h" 51 #include "util/evsel.h" 52 #include "util/debug.h" 53 #include "util/color.h" 54 #include "util/stat.h" 55 #include "util/header.h" 56 #include "util/cpumap.h" 57 #include "util/thread.h" 58 #include "util/thread_map.h" 59 60 #include <stdlib.h> 61 #include <sys/prctl.h> 62 #include <locale.h> 63 64 #define DEFAULT_SEPARATOR " " 65 #define CNTR_NOT_SUPPORTED "<not supported>" 66 #define CNTR_NOT_COUNTED "<not counted>" 67 68 static void print_stat(int argc, const char **argv); 69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 70 static void print_counter(struct perf_evsel *counter, char *prefix); 71 static void print_aggr(char *prefix); 72 73 static struct perf_evlist *evsel_list; 74 75 static struct perf_target target = { 76 .uid = UINT_MAX, 77 }; 78 79 enum aggr_mode { 80 AGGR_NONE, 81 AGGR_GLOBAL, 82 AGGR_SOCKET, 83 AGGR_CORE, 84 }; 85 86 static int run_count = 1; 87 static bool no_inherit = false; 88 static bool scale = true; 89 static enum aggr_mode aggr_mode = AGGR_GLOBAL; 90 static volatile pid_t child_pid = -1; 91 static bool null_run = false; 92 static int detailed_run = 0; 93 static bool big_num = true; 94 static int big_num_opt = -1; 95 static const char *csv_sep = NULL; 96 static bool csv_output = false; 97 static bool group = false; 98 static FILE *output = NULL; 99 static const char *pre_cmd = NULL; 100 static const char *post_cmd = NULL; 101 static bool sync_run = false; 102 static unsigned int interval = 0; 103 static unsigned int initial_delay = 0; 104 static bool forever = false; 105 static struct timespec ref_time; 106 static struct cpu_map *aggr_map; 107 static int (*aggr_get_id)(struct cpu_map *m, int cpu); 108 109 static volatile int done = 0; 110 111 struct perf_stat { 112 struct stats res_stats[3]; 113 }; 114 115 static inline void diff_timespec(struct timespec *r, struct timespec *a, 116 struct timespec *b) 117 { 118 r->tv_sec = a->tv_sec - b->tv_sec; 119 if (a->tv_nsec < b->tv_nsec) { 120 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 121 r->tv_sec--; 122 } else { 123 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 124 } 125 } 126 127 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 128 { 129 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 130 } 131 132 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 133 { 134 return perf_evsel__cpus(evsel)->nr; 135 } 136 137 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 138 { 139 memset(evsel->priv, 0, sizeof(struct perf_stat)); 140 } 141 142 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 143 { 144 evsel->priv = zalloc(sizeof(struct perf_stat)); 145 return evsel->priv == NULL ? -ENOMEM : 0; 146 } 147 148 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 149 { 150 free(evsel->priv); 151 evsel->priv = NULL; 152 } 153 154 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 155 { 156 void *addr; 157 size_t sz; 158 159 sz = sizeof(*evsel->counts) + 160 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); 161 162 addr = zalloc(sz); 163 if (!addr) 164 return -ENOMEM; 165 166 evsel->prev_raw_counts = addr; 167 168 return 0; 169 } 170 171 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 172 { 173 free(evsel->prev_raw_counts); 174 evsel->prev_raw_counts = NULL; 175 } 176 177 static void perf_evlist__free_stats(struct perf_evlist *evlist) 178 { 179 struct perf_evsel *evsel; 180 181 list_for_each_entry(evsel, &evlist->entries, node) { 182 perf_evsel__free_stat_priv(evsel); 183 perf_evsel__free_counts(evsel); 184 perf_evsel__free_prev_raw_counts(evsel); 185 } 186 } 187 188 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) 189 { 190 struct perf_evsel *evsel; 191 192 list_for_each_entry(evsel, &evlist->entries, node) { 193 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 194 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || 195 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) 196 goto out_free; 197 } 198 199 return 0; 200 201 out_free: 202 perf_evlist__free_stats(evlist); 203 return -1; 204 } 205 206 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 207 static struct stats runtime_cycles_stats[MAX_NR_CPUS]; 208 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 209 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; 210 static struct stats runtime_branches_stats[MAX_NR_CPUS]; 211 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 212 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 213 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; 214 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 215 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 216 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 217 static struct stats walltime_nsecs_stats; 218 219 static void perf_stat__reset_stats(struct perf_evlist *evlist) 220 { 221 struct perf_evsel *evsel; 222 223 list_for_each_entry(evsel, &evlist->entries, node) { 224 perf_evsel__reset_stat_priv(evsel); 225 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 226 } 227 228 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); 229 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); 230 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); 231 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); 232 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); 233 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); 234 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); 235 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); 236 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 237 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 238 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 239 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 240 } 241 242 static int create_perf_stat_counter(struct perf_evsel *evsel) 243 { 244 struct perf_event_attr *attr = &evsel->attr; 245 246 if (scale) 247 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 248 PERF_FORMAT_TOTAL_TIME_RUNNING; 249 250 attr->inherit = !no_inherit; 251 252 if (perf_target__has_cpu(&target)) 253 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 254 255 if (!perf_target__has_task(&target) && 256 perf_evsel__is_group_leader(evsel)) { 257 attr->disabled = 1; 258 if (!initial_delay) 259 attr->enable_on_exec = 1; 260 } 261 262 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 263 } 264 265 /* 266 * Does the counter have nsecs as a unit? 267 */ 268 static inline int nsec_counter(struct perf_evsel *evsel) 269 { 270 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 271 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 272 return 1; 273 274 return 0; 275 } 276 277 /* 278 * Update various tracking values we maintain to print 279 * more semantic information such as miss/hit ratios, 280 * instruction rates, etc: 281 */ 282 static void update_shadow_stats(struct perf_evsel *counter, u64 *count) 283 { 284 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 285 update_stats(&runtime_nsecs_stats[0], count[0]); 286 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 287 update_stats(&runtime_cycles_stats[0], count[0]); 288 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 289 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 290 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 291 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); 292 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 293 update_stats(&runtime_branches_stats[0], count[0]); 294 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 295 update_stats(&runtime_cacherefs_stats[0], count[0]); 296 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 297 update_stats(&runtime_l1_dcache_stats[0], count[0]); 298 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 299 update_stats(&runtime_l1_icache_stats[0], count[0]); 300 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 301 update_stats(&runtime_ll_cache_stats[0], count[0]); 302 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 303 update_stats(&runtime_dtlb_cache_stats[0], count[0]); 304 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 305 update_stats(&runtime_itlb_cache_stats[0], count[0]); 306 } 307 308 /* 309 * Read out the results of a single counter: 310 * aggregate counts across CPUs in system-wide mode 311 */ 312 static int read_counter_aggr(struct perf_evsel *counter) 313 { 314 struct perf_stat *ps = counter->priv; 315 u64 *count = counter->counts->aggr.values; 316 int i; 317 318 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 319 thread_map__nr(evsel_list->threads), scale) < 0) 320 return -1; 321 322 for (i = 0; i < 3; i++) 323 update_stats(&ps->res_stats[i], count[i]); 324 325 if (verbose) { 326 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 327 perf_evsel__name(counter), count[0], count[1], count[2]); 328 } 329 330 /* 331 * Save the full runtime - to allow normalization during printout: 332 */ 333 update_shadow_stats(counter, count); 334 335 return 0; 336 } 337 338 /* 339 * Read out the results of a single counter: 340 * do not aggregate counts across CPUs in system-wide mode 341 */ 342 static int read_counter(struct perf_evsel *counter) 343 { 344 u64 *count; 345 int cpu; 346 347 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 348 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 349 return -1; 350 351 count = counter->counts->cpu[cpu].values; 352 353 update_shadow_stats(counter, count); 354 } 355 356 return 0; 357 } 358 359 static void print_interval(void) 360 { 361 static int num_print_interval; 362 struct perf_evsel *counter; 363 struct perf_stat *ps; 364 struct timespec ts, rs; 365 char prefix[64]; 366 367 if (aggr_mode == AGGR_GLOBAL) { 368 list_for_each_entry(counter, &evsel_list->entries, node) { 369 ps = counter->priv; 370 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 371 read_counter_aggr(counter); 372 } 373 } else { 374 list_for_each_entry(counter, &evsel_list->entries, node) { 375 ps = counter->priv; 376 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 377 read_counter(counter); 378 } 379 } 380 381 clock_gettime(CLOCK_MONOTONIC, &ts); 382 diff_timespec(&rs, &ts, &ref_time); 383 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 384 385 if (num_print_interval == 0 && !csv_output) { 386 switch (aggr_mode) { 387 case AGGR_SOCKET: 388 fprintf(output, "# time socket cpus counts events\n"); 389 break; 390 case AGGR_CORE: 391 fprintf(output, "# time core cpus counts events\n"); 392 break; 393 case AGGR_NONE: 394 fprintf(output, "# time CPU counts events\n"); 395 break; 396 case AGGR_GLOBAL: 397 default: 398 fprintf(output, "# time counts events\n"); 399 } 400 } 401 402 if (++num_print_interval == 25) 403 num_print_interval = 0; 404 405 switch (aggr_mode) { 406 case AGGR_CORE: 407 case AGGR_SOCKET: 408 print_aggr(prefix); 409 break; 410 case AGGR_NONE: 411 list_for_each_entry(counter, &evsel_list->entries, node) 412 print_counter(counter, prefix); 413 break; 414 case AGGR_GLOBAL: 415 default: 416 list_for_each_entry(counter, &evsel_list->entries, node) 417 print_counter_aggr(counter, prefix); 418 } 419 420 fflush(output); 421 } 422 423 static void handle_initial_delay(void) 424 { 425 struct perf_evsel *counter; 426 427 if (initial_delay) { 428 const int ncpus = cpu_map__nr(evsel_list->cpus), 429 nthreads = thread_map__nr(evsel_list->threads); 430 431 usleep(initial_delay * 1000); 432 list_for_each_entry(counter, &evsel_list->entries, node) 433 perf_evsel__enable(counter, ncpus, nthreads); 434 } 435 } 436 437 static int __run_perf_stat(int argc, const char **argv) 438 { 439 char msg[512]; 440 unsigned long long t0, t1; 441 struct perf_evsel *counter; 442 struct timespec ts; 443 int status = 0; 444 const bool forks = (argc > 0); 445 446 if (interval) { 447 ts.tv_sec = interval / 1000; 448 ts.tv_nsec = (interval % 1000) * 1000000; 449 } else { 450 ts.tv_sec = 1; 451 ts.tv_nsec = 0; 452 } 453 454 if (forks) { 455 if (perf_evlist__prepare_workload(evsel_list, &target, argv, 456 false, false) < 0) { 457 perror("failed to prepare workload"); 458 return -1; 459 } 460 child_pid = evsel_list->workload.pid; 461 } 462 463 if (group) 464 perf_evlist__set_leader(evsel_list); 465 466 list_for_each_entry(counter, &evsel_list->entries, node) { 467 if (create_perf_stat_counter(counter) < 0) { 468 /* 469 * PPC returns ENXIO for HW counters until 2.6.37 470 * (behavior changed with commit b0a873e). 471 */ 472 if (errno == EINVAL || errno == ENOSYS || 473 errno == ENOENT || errno == EOPNOTSUPP || 474 errno == ENXIO) { 475 if (verbose) 476 ui__warning("%s event is not supported by the kernel.\n", 477 perf_evsel__name(counter)); 478 counter->supported = false; 479 continue; 480 } 481 482 perf_evsel__open_strerror(counter, &target, 483 errno, msg, sizeof(msg)); 484 ui__error("%s\n", msg); 485 486 if (child_pid != -1) 487 kill(child_pid, SIGTERM); 488 489 return -1; 490 } 491 counter->supported = true; 492 } 493 494 if (perf_evlist__apply_filters(evsel_list)) { 495 error("failed to set filter with %d (%s)\n", errno, 496 strerror(errno)); 497 return -1; 498 } 499 500 /* 501 * Enable counters and exec the command: 502 */ 503 t0 = rdclock(); 504 clock_gettime(CLOCK_MONOTONIC, &ref_time); 505 506 if (forks) { 507 perf_evlist__start_workload(evsel_list); 508 handle_initial_delay(); 509 510 if (interval) { 511 while (!waitpid(child_pid, &status, WNOHANG)) { 512 nanosleep(&ts, NULL); 513 print_interval(); 514 } 515 } 516 wait(&status); 517 if (WIFSIGNALED(status)) 518 psignal(WTERMSIG(status), argv[0]); 519 } else { 520 handle_initial_delay(); 521 while (!done) { 522 nanosleep(&ts, NULL); 523 if (interval) 524 print_interval(); 525 } 526 } 527 528 t1 = rdclock(); 529 530 update_stats(&walltime_nsecs_stats, t1 - t0); 531 532 if (aggr_mode == AGGR_GLOBAL) { 533 list_for_each_entry(counter, &evsel_list->entries, node) { 534 read_counter_aggr(counter); 535 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 536 thread_map__nr(evsel_list->threads)); 537 } 538 } else { 539 list_for_each_entry(counter, &evsel_list->entries, node) { 540 read_counter(counter); 541 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 542 } 543 } 544 545 return WEXITSTATUS(status); 546 } 547 548 static int run_perf_stat(int argc __maybe_unused, const char **argv) 549 { 550 int ret; 551 552 if (pre_cmd) { 553 ret = system(pre_cmd); 554 if (ret) 555 return ret; 556 } 557 558 if (sync_run) 559 sync(); 560 561 ret = __run_perf_stat(argc, argv); 562 if (ret) 563 return ret; 564 565 if (post_cmd) { 566 ret = system(post_cmd); 567 if (ret) 568 return ret; 569 } 570 571 return ret; 572 } 573 574 static void print_noise_pct(double total, double avg) 575 { 576 double pct = rel_stddev_stats(total, avg); 577 578 if (csv_output) 579 fprintf(output, "%s%.2f%%", csv_sep, pct); 580 else if (pct) 581 fprintf(output, " ( +-%6.2f%% )", pct); 582 } 583 584 static void print_noise(struct perf_evsel *evsel, double avg) 585 { 586 struct perf_stat *ps; 587 588 if (run_count == 1) 589 return; 590 591 ps = evsel->priv; 592 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 593 } 594 595 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 596 { 597 switch (aggr_mode) { 598 case AGGR_CORE: 599 fprintf(output, "S%d-C%*d%s%*d%s", 600 cpu_map__id_to_socket(id), 601 csv_output ? 0 : -8, 602 cpu_map__id_to_cpu(id), 603 csv_sep, 604 csv_output ? 0 : 4, 605 nr, 606 csv_sep); 607 break; 608 case AGGR_SOCKET: 609 fprintf(output, "S%*d%s%*d%s", 610 csv_output ? 0 : -5, 611 id, 612 csv_sep, 613 csv_output ? 0 : 4, 614 nr, 615 csv_sep); 616 break; 617 case AGGR_NONE: 618 fprintf(output, "CPU%*d%s", 619 csv_output ? 0 : -4, 620 perf_evsel__cpus(evsel)->map[id], csv_sep); 621 break; 622 case AGGR_GLOBAL: 623 default: 624 break; 625 } 626 } 627 628 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 629 { 630 double msecs = avg / 1e6; 631 const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; 632 633 aggr_printout(evsel, cpu, nr); 634 635 fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel)); 636 637 if (evsel->cgrp) 638 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 639 640 if (csv_output || interval) 641 return; 642 643 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 644 fprintf(output, " # %8.3f CPUs utilized ", 645 avg / avg_stats(&walltime_nsecs_stats)); 646 else 647 fprintf(output, " "); 648 } 649 650 /* used for get_ratio_color() */ 651 enum grc_type { 652 GRC_STALLED_CYCLES_FE, 653 GRC_STALLED_CYCLES_BE, 654 GRC_CACHE_MISSES, 655 GRC_MAX_NR 656 }; 657 658 static const char *get_ratio_color(enum grc_type type, double ratio) 659 { 660 static const double grc_table[GRC_MAX_NR][3] = { 661 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 662 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 663 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 664 }; 665 const char *color = PERF_COLOR_NORMAL; 666 667 if (ratio > grc_table[type][0]) 668 color = PERF_COLOR_RED; 669 else if (ratio > grc_table[type][1]) 670 color = PERF_COLOR_MAGENTA; 671 else if (ratio > grc_table[type][2]) 672 color = PERF_COLOR_YELLOW; 673 674 return color; 675 } 676 677 static void print_stalled_cycles_frontend(int cpu, 678 struct perf_evsel *evsel 679 __maybe_unused, double avg) 680 { 681 double total, ratio = 0.0; 682 const char *color; 683 684 total = avg_stats(&runtime_cycles_stats[cpu]); 685 686 if (total) 687 ratio = avg / total * 100.0; 688 689 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 690 691 fprintf(output, " # "); 692 color_fprintf(output, color, "%6.2f%%", ratio); 693 fprintf(output, " frontend cycles idle "); 694 } 695 696 static void print_stalled_cycles_backend(int cpu, 697 struct perf_evsel *evsel 698 __maybe_unused, double avg) 699 { 700 double total, ratio = 0.0; 701 const char *color; 702 703 total = avg_stats(&runtime_cycles_stats[cpu]); 704 705 if (total) 706 ratio = avg / total * 100.0; 707 708 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 709 710 fprintf(output, " # "); 711 color_fprintf(output, color, "%6.2f%%", ratio); 712 fprintf(output, " backend cycles idle "); 713 } 714 715 static void print_branch_misses(int cpu, 716 struct perf_evsel *evsel __maybe_unused, 717 double avg) 718 { 719 double total, ratio = 0.0; 720 const char *color; 721 722 total = avg_stats(&runtime_branches_stats[cpu]); 723 724 if (total) 725 ratio = avg / total * 100.0; 726 727 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 728 729 fprintf(output, " # "); 730 color_fprintf(output, color, "%6.2f%%", ratio); 731 fprintf(output, " of all branches "); 732 } 733 734 static void print_l1_dcache_misses(int cpu, 735 struct perf_evsel *evsel __maybe_unused, 736 double avg) 737 { 738 double total, ratio = 0.0; 739 const char *color; 740 741 total = avg_stats(&runtime_l1_dcache_stats[cpu]); 742 743 if (total) 744 ratio = avg / total * 100.0; 745 746 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 747 748 fprintf(output, " # "); 749 color_fprintf(output, color, "%6.2f%%", ratio); 750 fprintf(output, " of all L1-dcache hits "); 751 } 752 753 static void print_l1_icache_misses(int cpu, 754 struct perf_evsel *evsel __maybe_unused, 755 double avg) 756 { 757 double total, ratio = 0.0; 758 const char *color; 759 760 total = avg_stats(&runtime_l1_icache_stats[cpu]); 761 762 if (total) 763 ratio = avg / total * 100.0; 764 765 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 766 767 fprintf(output, " # "); 768 color_fprintf(output, color, "%6.2f%%", ratio); 769 fprintf(output, " of all L1-icache hits "); 770 } 771 772 static void print_dtlb_cache_misses(int cpu, 773 struct perf_evsel *evsel __maybe_unused, 774 double avg) 775 { 776 double total, ratio = 0.0; 777 const char *color; 778 779 total = avg_stats(&runtime_dtlb_cache_stats[cpu]); 780 781 if (total) 782 ratio = avg / total * 100.0; 783 784 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 785 786 fprintf(output, " # "); 787 color_fprintf(output, color, "%6.2f%%", ratio); 788 fprintf(output, " of all dTLB cache hits "); 789 } 790 791 static void print_itlb_cache_misses(int cpu, 792 struct perf_evsel *evsel __maybe_unused, 793 double avg) 794 { 795 double total, ratio = 0.0; 796 const char *color; 797 798 total = avg_stats(&runtime_itlb_cache_stats[cpu]); 799 800 if (total) 801 ratio = avg / total * 100.0; 802 803 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 804 805 fprintf(output, " # "); 806 color_fprintf(output, color, "%6.2f%%", ratio); 807 fprintf(output, " of all iTLB cache hits "); 808 } 809 810 static void print_ll_cache_misses(int cpu, 811 struct perf_evsel *evsel __maybe_unused, 812 double avg) 813 { 814 double total, ratio = 0.0; 815 const char *color; 816 817 total = avg_stats(&runtime_ll_cache_stats[cpu]); 818 819 if (total) 820 ratio = avg / total * 100.0; 821 822 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 823 824 fprintf(output, " # "); 825 color_fprintf(output, color, "%6.2f%%", ratio); 826 fprintf(output, " of all LL-cache hits "); 827 } 828 829 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 830 { 831 double total, ratio = 0.0; 832 const char *fmt; 833 834 if (csv_output) 835 fmt = "%.0f%s%s"; 836 else if (big_num) 837 fmt = "%'18.0f%s%-25s"; 838 else 839 fmt = "%18.0f%s%-25s"; 840 841 aggr_printout(evsel, cpu, nr); 842 843 if (aggr_mode == AGGR_GLOBAL) 844 cpu = 0; 845 846 fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); 847 848 if (evsel->cgrp) 849 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 850 851 if (csv_output || interval) 852 return; 853 854 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 855 total = avg_stats(&runtime_cycles_stats[cpu]); 856 if (total) 857 ratio = avg / total; 858 859 fprintf(output, " # %5.2f insns per cycle ", ratio); 860 861 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); 862 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); 863 864 if (total && avg) { 865 ratio = total / avg; 866 fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); 867 } 868 869 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 870 runtime_branches_stats[cpu].n != 0) { 871 print_branch_misses(cpu, evsel, avg); 872 } else if ( 873 evsel->attr.type == PERF_TYPE_HW_CACHE && 874 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 875 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 876 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 877 runtime_l1_dcache_stats[cpu].n != 0) { 878 print_l1_dcache_misses(cpu, evsel, avg); 879 } else if ( 880 evsel->attr.type == PERF_TYPE_HW_CACHE && 881 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 882 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 883 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 884 runtime_l1_icache_stats[cpu].n != 0) { 885 print_l1_icache_misses(cpu, evsel, avg); 886 } else if ( 887 evsel->attr.type == PERF_TYPE_HW_CACHE && 888 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 889 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 890 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 891 runtime_dtlb_cache_stats[cpu].n != 0) { 892 print_dtlb_cache_misses(cpu, evsel, avg); 893 } else if ( 894 evsel->attr.type == PERF_TYPE_HW_CACHE && 895 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 896 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 897 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 898 runtime_itlb_cache_stats[cpu].n != 0) { 899 print_itlb_cache_misses(cpu, evsel, avg); 900 } else if ( 901 evsel->attr.type == PERF_TYPE_HW_CACHE && 902 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 903 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 904 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 905 runtime_ll_cache_stats[cpu].n != 0) { 906 print_ll_cache_misses(cpu, evsel, avg); 907 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 908 runtime_cacherefs_stats[cpu].n != 0) { 909 total = avg_stats(&runtime_cacherefs_stats[cpu]); 910 911 if (total) 912 ratio = avg * 100 / total; 913 914 fprintf(output, " # %8.3f %% of all cache refs ", ratio); 915 916 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 917 print_stalled_cycles_frontend(cpu, evsel, avg); 918 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 919 print_stalled_cycles_backend(cpu, evsel, avg); 920 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 921 total = avg_stats(&runtime_nsecs_stats[cpu]); 922 923 if (total) 924 ratio = 1.0 * avg / total; 925 926 fprintf(output, " # %8.3f GHz ", ratio); 927 } else if (runtime_nsecs_stats[cpu].n != 0) { 928 char unit = 'M'; 929 930 total = avg_stats(&runtime_nsecs_stats[cpu]); 931 932 if (total) 933 ratio = 1000.0 * avg / total; 934 if (ratio < 0.001) { 935 ratio *= 1000; 936 unit = 'K'; 937 } 938 939 fprintf(output, " # %8.3f %c/sec ", ratio, unit); 940 } else { 941 fprintf(output, " "); 942 } 943 } 944 945 static void print_aggr(char *prefix) 946 { 947 struct perf_evsel *counter; 948 int cpu, cpu2, s, s2, id, nr; 949 u64 ena, run, val; 950 951 if (!(aggr_map || aggr_get_id)) 952 return; 953 954 for (s = 0; s < aggr_map->nr; s++) { 955 id = aggr_map->map[s]; 956 list_for_each_entry(counter, &evsel_list->entries, node) { 957 val = ena = run = 0; 958 nr = 0; 959 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 960 cpu2 = perf_evsel__cpus(counter)->map[cpu]; 961 s2 = aggr_get_id(evsel_list->cpus, cpu2); 962 if (s2 != id) 963 continue; 964 val += counter->counts->cpu[cpu].val; 965 ena += counter->counts->cpu[cpu].ena; 966 run += counter->counts->cpu[cpu].run; 967 nr++; 968 } 969 if (prefix) 970 fprintf(output, "%s", prefix); 971 972 if (run == 0 || ena == 0) { 973 aggr_printout(counter, id, nr); 974 975 fprintf(output, "%*s%s%*s", 976 csv_output ? 0 : 18, 977 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 978 csv_sep, 979 csv_output ? 0 : -24, 980 perf_evsel__name(counter)); 981 982 if (counter->cgrp) 983 fprintf(output, "%s%s", 984 csv_sep, counter->cgrp->name); 985 986 fputc('\n', output); 987 continue; 988 } 989 990 if (nsec_counter(counter)) 991 nsec_printout(id, nr, counter, val); 992 else 993 abs_printout(id, nr, counter, val); 994 995 if (!csv_output) { 996 print_noise(counter, 1.0); 997 998 if (run != ena) 999 fprintf(output, " (%.2f%%)", 1000 100.0 * run / ena); 1001 } 1002 fputc('\n', output); 1003 } 1004 } 1005 } 1006 1007 /* 1008 * Print out the results of a single counter: 1009 * aggregated counts in system-wide mode 1010 */ 1011 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1012 { 1013 struct perf_stat *ps = counter->priv; 1014 double avg = avg_stats(&ps->res_stats[0]); 1015 int scaled = counter->counts->scaled; 1016 1017 if (prefix) 1018 fprintf(output, "%s", prefix); 1019 1020 if (scaled == -1) { 1021 fprintf(output, "%*s%s%*s", 1022 csv_output ? 0 : 18, 1023 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1024 csv_sep, 1025 csv_output ? 0 : -24, 1026 perf_evsel__name(counter)); 1027 1028 if (counter->cgrp) 1029 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 1030 1031 fputc('\n', output); 1032 return; 1033 } 1034 1035 if (nsec_counter(counter)) 1036 nsec_printout(-1, 0, counter, avg); 1037 else 1038 abs_printout(-1, 0, counter, avg); 1039 1040 print_noise(counter, avg); 1041 1042 if (csv_output) { 1043 fputc('\n', output); 1044 return; 1045 } 1046 1047 if (scaled) { 1048 double avg_enabled, avg_running; 1049 1050 avg_enabled = avg_stats(&ps->res_stats[1]); 1051 avg_running = avg_stats(&ps->res_stats[2]); 1052 1053 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); 1054 } 1055 fprintf(output, "\n"); 1056 } 1057 1058 /* 1059 * Print out the results of a single counter: 1060 * does not use aggregated count in system-wide 1061 */ 1062 static void print_counter(struct perf_evsel *counter, char *prefix) 1063 { 1064 u64 ena, run, val; 1065 int cpu; 1066 1067 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1068 val = counter->counts->cpu[cpu].val; 1069 ena = counter->counts->cpu[cpu].ena; 1070 run = counter->counts->cpu[cpu].run; 1071 1072 if (prefix) 1073 fprintf(output, "%s", prefix); 1074 1075 if (run == 0 || ena == 0) { 1076 fprintf(output, "CPU%*d%s%*s%s%*s", 1077 csv_output ? 0 : -4, 1078 perf_evsel__cpus(counter)->map[cpu], csv_sep, 1079 csv_output ? 0 : 18, 1080 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1081 csv_sep, 1082 csv_output ? 0 : -24, 1083 perf_evsel__name(counter)); 1084 1085 if (counter->cgrp) 1086 fprintf(output, "%s%s", 1087 csv_sep, counter->cgrp->name); 1088 1089 fputc('\n', output); 1090 continue; 1091 } 1092 1093 if (nsec_counter(counter)) 1094 nsec_printout(cpu, 0, counter, val); 1095 else 1096 abs_printout(cpu, 0, counter, val); 1097 1098 if (!csv_output) { 1099 print_noise(counter, 1.0); 1100 1101 if (run != ena) 1102 fprintf(output, " (%.2f%%)", 1103 100.0 * run / ena); 1104 } 1105 fputc('\n', output); 1106 } 1107 } 1108 1109 static void print_stat(int argc, const char **argv) 1110 { 1111 struct perf_evsel *counter; 1112 int i; 1113 1114 fflush(stdout); 1115 1116 if (!csv_output) { 1117 fprintf(output, "\n"); 1118 fprintf(output, " Performance counter stats for "); 1119 if (!perf_target__has_task(&target)) { 1120 fprintf(output, "\'%s", argv[0]); 1121 for (i = 1; i < argc; i++) 1122 fprintf(output, " %s", argv[i]); 1123 } else if (target.pid) 1124 fprintf(output, "process id \'%s", target.pid); 1125 else 1126 fprintf(output, "thread id \'%s", target.tid); 1127 1128 fprintf(output, "\'"); 1129 if (run_count > 1) 1130 fprintf(output, " (%d runs)", run_count); 1131 fprintf(output, ":\n\n"); 1132 } 1133 1134 switch (aggr_mode) { 1135 case AGGR_CORE: 1136 case AGGR_SOCKET: 1137 print_aggr(NULL); 1138 break; 1139 case AGGR_GLOBAL: 1140 list_for_each_entry(counter, &evsel_list->entries, node) 1141 print_counter_aggr(counter, NULL); 1142 break; 1143 case AGGR_NONE: 1144 list_for_each_entry(counter, &evsel_list->entries, node) 1145 print_counter(counter, NULL); 1146 break; 1147 default: 1148 break; 1149 } 1150 1151 if (!csv_output) { 1152 if (!null_run) 1153 fprintf(output, "\n"); 1154 fprintf(output, " %17.9f seconds time elapsed", 1155 avg_stats(&walltime_nsecs_stats)/1e9); 1156 if (run_count > 1) { 1157 fprintf(output, " "); 1158 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1159 avg_stats(&walltime_nsecs_stats)); 1160 } 1161 fprintf(output, "\n\n"); 1162 } 1163 } 1164 1165 static volatile int signr = -1; 1166 1167 static void skip_signal(int signo) 1168 { 1169 if ((child_pid == -1) || interval) 1170 done = 1; 1171 1172 signr = signo; 1173 /* 1174 * render child_pid harmless 1175 * won't send SIGTERM to a random 1176 * process in case of race condition 1177 * and fast PID recycling 1178 */ 1179 child_pid = -1; 1180 } 1181 1182 static void sig_atexit(void) 1183 { 1184 sigset_t set, oset; 1185 1186 /* 1187 * avoid race condition with SIGCHLD handler 1188 * in skip_signal() which is modifying child_pid 1189 * goal is to avoid send SIGTERM to a random 1190 * process 1191 */ 1192 sigemptyset(&set); 1193 sigaddset(&set, SIGCHLD); 1194 sigprocmask(SIG_BLOCK, &set, &oset); 1195 1196 if (child_pid != -1) 1197 kill(child_pid, SIGTERM); 1198 1199 sigprocmask(SIG_SETMASK, &oset, NULL); 1200 1201 if (signr == -1) 1202 return; 1203 1204 signal(signr, SIG_DFL); 1205 kill(getpid(), signr); 1206 } 1207 1208 static int stat__set_big_num(const struct option *opt __maybe_unused, 1209 const char *s __maybe_unused, int unset) 1210 { 1211 big_num_opt = unset ? 0 : 1; 1212 return 0; 1213 } 1214 1215 static int perf_stat_init_aggr_mode(void) 1216 { 1217 switch (aggr_mode) { 1218 case AGGR_SOCKET: 1219 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1220 perror("cannot build socket map"); 1221 return -1; 1222 } 1223 aggr_get_id = cpu_map__get_socket; 1224 break; 1225 case AGGR_CORE: 1226 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1227 perror("cannot build core map"); 1228 return -1; 1229 } 1230 aggr_get_id = cpu_map__get_core; 1231 break; 1232 case AGGR_NONE: 1233 case AGGR_GLOBAL: 1234 default: 1235 break; 1236 } 1237 return 0; 1238 } 1239 1240 1241 /* 1242 * Add default attributes, if there were no attributes specified or 1243 * if -d/--detailed, -d -d or -d -d -d is used: 1244 */ 1245 static int add_default_attributes(void) 1246 { 1247 struct perf_event_attr default_attrs[] = { 1248 1249 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1250 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1251 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1252 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1253 1254 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1255 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1256 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1257 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1258 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1259 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1260 1261 }; 1262 1263 /* 1264 * Detailed stats (-d), covering the L1 and last level data caches: 1265 */ 1266 struct perf_event_attr detailed_attrs[] = { 1267 1268 { .type = PERF_TYPE_HW_CACHE, 1269 .config = 1270 PERF_COUNT_HW_CACHE_L1D << 0 | 1271 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1272 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1273 1274 { .type = PERF_TYPE_HW_CACHE, 1275 .config = 1276 PERF_COUNT_HW_CACHE_L1D << 0 | 1277 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1278 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1279 1280 { .type = PERF_TYPE_HW_CACHE, 1281 .config = 1282 PERF_COUNT_HW_CACHE_LL << 0 | 1283 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1284 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1285 1286 { .type = PERF_TYPE_HW_CACHE, 1287 .config = 1288 PERF_COUNT_HW_CACHE_LL << 0 | 1289 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1290 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1291 }; 1292 1293 /* 1294 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1295 */ 1296 struct perf_event_attr very_detailed_attrs[] = { 1297 1298 { .type = PERF_TYPE_HW_CACHE, 1299 .config = 1300 PERF_COUNT_HW_CACHE_L1I << 0 | 1301 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1302 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1303 1304 { .type = PERF_TYPE_HW_CACHE, 1305 .config = 1306 PERF_COUNT_HW_CACHE_L1I << 0 | 1307 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1308 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1309 1310 { .type = PERF_TYPE_HW_CACHE, 1311 .config = 1312 PERF_COUNT_HW_CACHE_DTLB << 0 | 1313 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1314 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1315 1316 { .type = PERF_TYPE_HW_CACHE, 1317 .config = 1318 PERF_COUNT_HW_CACHE_DTLB << 0 | 1319 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1320 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1321 1322 { .type = PERF_TYPE_HW_CACHE, 1323 .config = 1324 PERF_COUNT_HW_CACHE_ITLB << 0 | 1325 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1326 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1327 1328 { .type = PERF_TYPE_HW_CACHE, 1329 .config = 1330 PERF_COUNT_HW_CACHE_ITLB << 0 | 1331 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1332 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1333 1334 }; 1335 1336 /* 1337 * Very, very detailed stats (-d -d -d), adding prefetch events: 1338 */ 1339 struct perf_event_attr very_very_detailed_attrs[] = { 1340 1341 { .type = PERF_TYPE_HW_CACHE, 1342 .config = 1343 PERF_COUNT_HW_CACHE_L1D << 0 | 1344 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1345 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1346 1347 { .type = PERF_TYPE_HW_CACHE, 1348 .config = 1349 PERF_COUNT_HW_CACHE_L1D << 0 | 1350 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1351 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1352 }; 1353 1354 /* Set attrs if no event is selected and !null_run: */ 1355 if (null_run) 1356 return 0; 1357 1358 if (!evsel_list->nr_entries) { 1359 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1360 return -1; 1361 } 1362 1363 /* Detailed events get appended to the event list: */ 1364 1365 if (detailed_run < 1) 1366 return 0; 1367 1368 /* Append detailed run extra attributes: */ 1369 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1370 return -1; 1371 1372 if (detailed_run < 2) 1373 return 0; 1374 1375 /* Append very detailed run extra attributes: */ 1376 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1377 return -1; 1378 1379 if (detailed_run < 3) 1380 return 0; 1381 1382 /* Append very, very detailed run extra attributes: */ 1383 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1384 } 1385 1386 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1387 { 1388 bool append_file = false; 1389 int output_fd = 0; 1390 const char *output_name = NULL; 1391 const struct option options[] = { 1392 OPT_CALLBACK('e', "event", &evsel_list, "event", 1393 "event selector. use 'perf list' to list available events", 1394 parse_events_option), 1395 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1396 "event filter", parse_filter), 1397 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1398 "child tasks do not inherit counters"), 1399 OPT_STRING('p', "pid", &target.pid, "pid", 1400 "stat events on existing process id"), 1401 OPT_STRING('t', "tid", &target.tid, "tid", 1402 "stat events on existing thread id"), 1403 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1404 "system-wide collection from all CPUs"), 1405 OPT_BOOLEAN('g', "group", &group, 1406 "put the counters into a counter group"), 1407 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1408 OPT_INCR('v', "verbose", &verbose, 1409 "be more verbose (show counter open errors, etc)"), 1410 OPT_INTEGER('r', "repeat", &run_count, 1411 "repeat command and print average + stddev (max: 100, forever: 0)"), 1412 OPT_BOOLEAN('n', "null", &null_run, 1413 "null run - dont start any counters"), 1414 OPT_INCR('d', "detailed", &detailed_run, 1415 "detailed run - start a lot of events"), 1416 OPT_BOOLEAN('S', "sync", &sync_run, 1417 "call sync() before starting a run"), 1418 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1419 "print large numbers with thousands\' separators", 1420 stat__set_big_num), 1421 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1422 "list of cpus to monitor in system-wide"), 1423 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1424 "disable CPU count aggregation", AGGR_NONE), 1425 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1426 "print counts with custom separator"), 1427 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1428 "monitor event in cgroup name only", parse_cgroups), 1429 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1430 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1431 OPT_INTEGER(0, "log-fd", &output_fd, 1432 "log output to fd, instead of stderr"), 1433 OPT_STRING(0, "pre", &pre_cmd, "command", 1434 "command to run prior to the measured command"), 1435 OPT_STRING(0, "post", &post_cmd, "command", 1436 "command to run after to the measured command"), 1437 OPT_UINTEGER('I', "interval-print", &interval, 1438 "print counts at regular interval in ms (>= 100)"), 1439 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1440 "aggregate counts per processor socket", AGGR_SOCKET), 1441 OPT_SET_UINT(0, "per-core", &aggr_mode, 1442 "aggregate counts per physical processor core", AGGR_CORE), 1443 OPT_UINTEGER('D', "delay", &initial_delay, 1444 "ms to wait before starting measurement after program start"), 1445 OPT_END() 1446 }; 1447 const char * const stat_usage[] = { 1448 "perf stat [<options>] [<command>]", 1449 NULL 1450 }; 1451 int status = -ENOMEM, run_idx; 1452 const char *mode; 1453 1454 setlocale(LC_ALL, ""); 1455 1456 evsel_list = perf_evlist__new(); 1457 if (evsel_list == NULL) 1458 return -ENOMEM; 1459 1460 argc = parse_options(argc, argv, options, stat_usage, 1461 PARSE_OPT_STOP_AT_NON_OPTION); 1462 1463 output = stderr; 1464 if (output_name && strcmp(output_name, "-")) 1465 output = NULL; 1466 1467 if (output_name && output_fd) { 1468 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1469 usage_with_options(stat_usage, options); 1470 } 1471 1472 if (output_fd < 0) { 1473 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1474 usage_with_options(stat_usage, options); 1475 } 1476 1477 if (!output) { 1478 struct timespec tm; 1479 mode = append_file ? "a" : "w"; 1480 1481 output = fopen(output_name, mode); 1482 if (!output) { 1483 perror("failed to create output file"); 1484 return -1; 1485 } 1486 clock_gettime(CLOCK_REALTIME, &tm); 1487 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1488 } else if (output_fd > 0) { 1489 mode = append_file ? "a" : "w"; 1490 output = fdopen(output_fd, mode); 1491 if (!output) { 1492 perror("Failed opening logfd"); 1493 return -errno; 1494 } 1495 } 1496 1497 if (csv_sep) { 1498 csv_output = true; 1499 if (!strcmp(csv_sep, "\\t")) 1500 csv_sep = "\t"; 1501 } else 1502 csv_sep = DEFAULT_SEPARATOR; 1503 1504 /* 1505 * let the spreadsheet do the pretty-printing 1506 */ 1507 if (csv_output) { 1508 /* User explicitly passed -B? */ 1509 if (big_num_opt == 1) { 1510 fprintf(stderr, "-B option not supported with -x\n"); 1511 usage_with_options(stat_usage, options); 1512 } else /* Nope, so disable big number formatting */ 1513 big_num = false; 1514 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1515 big_num = false; 1516 1517 if (!argc && !perf_target__has_task(&target)) 1518 usage_with_options(stat_usage, options); 1519 if (run_count < 0) { 1520 usage_with_options(stat_usage, options); 1521 } else if (run_count == 0) { 1522 forever = true; 1523 run_count = 1; 1524 } 1525 1526 /* no_aggr, cgroup are for system-wide only */ 1527 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) 1528 && !perf_target__has_cpu(&target)) { 1529 fprintf(stderr, "both cgroup and no-aggregation " 1530 "modes only available in system-wide mode\n"); 1531 1532 usage_with_options(stat_usage, options); 1533 return -1; 1534 } 1535 1536 if (add_default_attributes()) 1537 goto out; 1538 1539 perf_target__validate(&target); 1540 1541 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1542 if (perf_target__has_task(&target)) 1543 pr_err("Problems finding threads of monitor\n"); 1544 if (perf_target__has_cpu(&target)) 1545 perror("failed to parse CPUs map"); 1546 1547 usage_with_options(stat_usage, options); 1548 return -1; 1549 } 1550 if (interval && interval < 100) { 1551 pr_err("print interval must be >= 100ms\n"); 1552 usage_with_options(stat_usage, options); 1553 return -1; 1554 } 1555 1556 if (perf_evlist__alloc_stats(evsel_list, interval)) 1557 goto out_free_maps; 1558 1559 if (perf_stat_init_aggr_mode()) 1560 goto out; 1561 1562 /* 1563 * We dont want to block the signals - that would cause 1564 * child tasks to inherit that and Ctrl-C would not work. 1565 * What we want is for Ctrl-C to work in the exec()-ed 1566 * task, but being ignored by perf stat itself: 1567 */ 1568 atexit(sig_atexit); 1569 if (!forever) 1570 signal(SIGINT, skip_signal); 1571 signal(SIGCHLD, skip_signal); 1572 signal(SIGALRM, skip_signal); 1573 signal(SIGABRT, skip_signal); 1574 1575 status = 0; 1576 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1577 if (run_count != 1 && verbose) 1578 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1579 run_idx + 1); 1580 1581 status = run_perf_stat(argc, argv); 1582 if (forever && status != -1) { 1583 print_stat(argc, argv); 1584 perf_stat__reset_stats(evsel_list); 1585 } 1586 } 1587 1588 if (!forever && status != -1 && !interval) 1589 print_stat(argc, argv); 1590 1591 perf_evlist__free_stats(evsel_list); 1592 out_free_maps: 1593 perf_evlist__delete_maps(evsel_list); 1594 out: 1595 perf_evlist__delete(evsel_list); 1596 return status; 1597 } 1598