1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/util.h" 47 #include "util/parse-options.h" 48 #include "util/parse-events.h" 49 #include "util/event.h" 50 #include "util/evlist.h" 51 #include "util/evsel.h" 52 #include "util/debug.h" 53 #include "util/color.h" 54 #include "util/stat.h" 55 #include "util/header.h" 56 #include "util/cpumap.h" 57 #include "util/thread.h" 58 #include "util/thread_map.h" 59 60 #include <stdlib.h> 61 #include <sys/prctl.h> 62 #include <locale.h> 63 64 #define DEFAULT_SEPARATOR " " 65 #define CNTR_NOT_SUPPORTED "<not supported>" 66 #define CNTR_NOT_COUNTED "<not counted>" 67 68 static void print_stat(int argc, const char **argv); 69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 70 static void print_counter(struct perf_evsel *counter, char *prefix); 71 static void print_aggr(char *prefix); 72 73 static struct perf_evlist *evsel_list; 74 75 static struct perf_target target = { 76 .uid = UINT_MAX, 77 }; 78 79 enum aggr_mode { 80 AGGR_NONE, 81 AGGR_GLOBAL, 82 AGGR_SOCKET, 83 AGGR_CORE, 84 }; 85 86 static int run_count = 1; 87 static bool no_inherit = false; 88 static bool scale = true; 89 static enum aggr_mode aggr_mode = AGGR_GLOBAL; 90 static volatile pid_t child_pid = -1; 91 static bool null_run = false; 92 static int detailed_run = 0; 93 static bool big_num = true; 94 static int big_num_opt = -1; 95 static const char *csv_sep = NULL; 96 static bool csv_output = false; 97 static bool group = false; 98 static FILE *output = NULL; 99 static const char *pre_cmd = NULL; 100 static const char *post_cmd = NULL; 101 static bool sync_run = false; 102 static unsigned int interval = 0; 103 static unsigned int initial_delay = 0; 104 static bool forever = false; 105 static struct timespec ref_time; 106 static struct cpu_map *aggr_map; 107 static int (*aggr_get_id)(struct cpu_map *m, int cpu); 108 109 static volatile int done = 0; 110 111 struct perf_stat { 112 struct stats res_stats[3]; 113 }; 114 115 static inline void diff_timespec(struct timespec *r, struct timespec *a, 116 struct timespec *b) 117 { 118 r->tv_sec = a->tv_sec - b->tv_sec; 119 if (a->tv_nsec < b->tv_nsec) { 120 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 121 r->tv_sec--; 122 } else { 123 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 124 } 125 } 126 127 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 128 { 129 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 130 } 131 132 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 133 { 134 return perf_evsel__cpus(evsel)->nr; 135 } 136 137 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 138 { 139 memset(evsel->priv, 0, sizeof(struct perf_stat)); 140 } 141 142 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 143 { 144 evsel->priv = zalloc(sizeof(struct perf_stat)); 145 return evsel->priv == NULL ? -ENOMEM : 0; 146 } 147 148 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 149 { 150 free(evsel->priv); 151 evsel->priv = NULL; 152 } 153 154 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 155 { 156 void *addr; 157 size_t sz; 158 159 sz = sizeof(*evsel->counts) + 160 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); 161 162 addr = zalloc(sz); 163 if (!addr) 164 return -ENOMEM; 165 166 evsel->prev_raw_counts = addr; 167 168 return 0; 169 } 170 171 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 172 { 173 free(evsel->prev_raw_counts); 174 evsel->prev_raw_counts = NULL; 175 } 176 177 static void perf_evlist__free_stats(struct perf_evlist *evlist) 178 { 179 struct perf_evsel *evsel; 180 181 list_for_each_entry(evsel, &evlist->entries, node) { 182 perf_evsel__free_stat_priv(evsel); 183 perf_evsel__free_counts(evsel); 184 perf_evsel__free_prev_raw_counts(evsel); 185 } 186 } 187 188 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) 189 { 190 struct perf_evsel *evsel; 191 192 list_for_each_entry(evsel, &evlist->entries, node) { 193 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 194 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || 195 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) 196 goto out_free; 197 } 198 199 return 0; 200 201 out_free: 202 perf_evlist__free_stats(evlist); 203 return -1; 204 } 205 206 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 207 static struct stats runtime_cycles_stats[MAX_NR_CPUS]; 208 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 209 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; 210 static struct stats runtime_branches_stats[MAX_NR_CPUS]; 211 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 212 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 213 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; 214 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 215 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 216 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 217 static struct stats walltime_nsecs_stats; 218 219 static void perf_stat__reset_stats(struct perf_evlist *evlist) 220 { 221 struct perf_evsel *evsel; 222 223 list_for_each_entry(evsel, &evlist->entries, node) { 224 perf_evsel__reset_stat_priv(evsel); 225 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 226 } 227 228 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); 229 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); 230 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); 231 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); 232 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); 233 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); 234 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); 235 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); 236 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 237 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 238 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 239 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 240 } 241 242 static int create_perf_stat_counter(struct perf_evsel *evsel) 243 { 244 struct perf_event_attr *attr = &evsel->attr; 245 246 if (scale) 247 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 248 PERF_FORMAT_TOTAL_TIME_RUNNING; 249 250 attr->inherit = !no_inherit; 251 252 if (perf_target__has_cpu(&target)) 253 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 254 255 if (!perf_target__has_task(&target) && 256 perf_evsel__is_group_leader(evsel)) { 257 attr->disabled = 1; 258 if (!initial_delay) 259 attr->enable_on_exec = 1; 260 } 261 262 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 263 } 264 265 /* 266 * Does the counter have nsecs as a unit? 267 */ 268 static inline int nsec_counter(struct perf_evsel *evsel) 269 { 270 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 271 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 272 return 1; 273 274 return 0; 275 } 276 277 /* 278 * Update various tracking values we maintain to print 279 * more semantic information such as miss/hit ratios, 280 * instruction rates, etc: 281 */ 282 static void update_shadow_stats(struct perf_evsel *counter, u64 *count) 283 { 284 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 285 update_stats(&runtime_nsecs_stats[0], count[0]); 286 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 287 update_stats(&runtime_cycles_stats[0], count[0]); 288 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 289 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 290 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 291 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); 292 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 293 update_stats(&runtime_branches_stats[0], count[0]); 294 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 295 update_stats(&runtime_cacherefs_stats[0], count[0]); 296 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 297 update_stats(&runtime_l1_dcache_stats[0], count[0]); 298 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 299 update_stats(&runtime_l1_icache_stats[0], count[0]); 300 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 301 update_stats(&runtime_ll_cache_stats[0], count[0]); 302 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 303 update_stats(&runtime_dtlb_cache_stats[0], count[0]); 304 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 305 update_stats(&runtime_itlb_cache_stats[0], count[0]); 306 } 307 308 /* 309 * Read out the results of a single counter: 310 * aggregate counts across CPUs in system-wide mode 311 */ 312 static int read_counter_aggr(struct perf_evsel *counter) 313 { 314 struct perf_stat *ps = counter->priv; 315 u64 *count = counter->counts->aggr.values; 316 int i; 317 318 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 319 thread_map__nr(evsel_list->threads), scale) < 0) 320 return -1; 321 322 for (i = 0; i < 3; i++) 323 update_stats(&ps->res_stats[i], count[i]); 324 325 if (verbose) { 326 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 327 perf_evsel__name(counter), count[0], count[1], count[2]); 328 } 329 330 /* 331 * Save the full runtime - to allow normalization during printout: 332 */ 333 update_shadow_stats(counter, count); 334 335 return 0; 336 } 337 338 /* 339 * Read out the results of a single counter: 340 * do not aggregate counts across CPUs in system-wide mode 341 */ 342 static int read_counter(struct perf_evsel *counter) 343 { 344 u64 *count; 345 int cpu; 346 347 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 348 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 349 return -1; 350 351 count = counter->counts->cpu[cpu].values; 352 353 update_shadow_stats(counter, count); 354 } 355 356 return 0; 357 } 358 359 static void print_interval(void) 360 { 361 static int num_print_interval; 362 struct perf_evsel *counter; 363 struct perf_stat *ps; 364 struct timespec ts, rs; 365 char prefix[64]; 366 367 if (aggr_mode == AGGR_GLOBAL) { 368 list_for_each_entry(counter, &evsel_list->entries, node) { 369 ps = counter->priv; 370 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 371 read_counter_aggr(counter); 372 } 373 } else { 374 list_for_each_entry(counter, &evsel_list->entries, node) { 375 ps = counter->priv; 376 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 377 read_counter(counter); 378 } 379 } 380 381 clock_gettime(CLOCK_MONOTONIC, &ts); 382 diff_timespec(&rs, &ts, &ref_time); 383 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 384 385 if (num_print_interval == 0 && !csv_output) { 386 switch (aggr_mode) { 387 case AGGR_SOCKET: 388 fprintf(output, "# time socket cpus counts events\n"); 389 break; 390 case AGGR_CORE: 391 fprintf(output, "# time core cpus counts events\n"); 392 break; 393 case AGGR_NONE: 394 fprintf(output, "# time CPU counts events\n"); 395 break; 396 case AGGR_GLOBAL: 397 default: 398 fprintf(output, "# time counts events\n"); 399 } 400 } 401 402 if (++num_print_interval == 25) 403 num_print_interval = 0; 404 405 switch (aggr_mode) { 406 case AGGR_CORE: 407 case AGGR_SOCKET: 408 print_aggr(prefix); 409 break; 410 case AGGR_NONE: 411 list_for_each_entry(counter, &evsel_list->entries, node) 412 print_counter(counter, prefix); 413 break; 414 case AGGR_GLOBAL: 415 default: 416 list_for_each_entry(counter, &evsel_list->entries, node) 417 print_counter_aggr(counter, prefix); 418 } 419 420 fflush(output); 421 } 422 423 static void handle_initial_delay(void) 424 { 425 struct perf_evsel *counter; 426 427 if (initial_delay) { 428 const int ncpus = cpu_map__nr(evsel_list->cpus), 429 nthreads = thread_map__nr(evsel_list->threads); 430 431 usleep(initial_delay * 1000); 432 list_for_each_entry(counter, &evsel_list->entries, node) 433 perf_evsel__enable(counter, ncpus, nthreads); 434 } 435 } 436 437 static int __run_perf_stat(int argc, const char **argv) 438 { 439 char msg[512]; 440 unsigned long long t0, t1; 441 struct perf_evsel *counter; 442 struct timespec ts; 443 int status = 0; 444 const bool forks = (argc > 0); 445 446 if (interval) { 447 ts.tv_sec = interval / 1000; 448 ts.tv_nsec = (interval % 1000) * 1000000; 449 } else { 450 ts.tv_sec = 1; 451 ts.tv_nsec = 0; 452 } 453 454 if (forks) { 455 if (perf_evlist__prepare_workload(evsel_list, &target, argv, 456 false, false) < 0) { 457 perror("failed to prepare workload"); 458 return -1; 459 } 460 } 461 462 if (group) 463 perf_evlist__set_leader(evsel_list); 464 465 list_for_each_entry(counter, &evsel_list->entries, node) { 466 if (create_perf_stat_counter(counter) < 0) { 467 /* 468 * PPC returns ENXIO for HW counters until 2.6.37 469 * (behavior changed with commit b0a873e). 470 */ 471 if (errno == EINVAL || errno == ENOSYS || 472 errno == ENOENT || errno == EOPNOTSUPP || 473 errno == ENXIO) { 474 if (verbose) 475 ui__warning("%s event is not supported by the kernel.\n", 476 perf_evsel__name(counter)); 477 counter->supported = false; 478 continue; 479 } 480 481 perf_evsel__open_strerror(counter, &target, 482 errno, msg, sizeof(msg)); 483 ui__error("%s\n", msg); 484 485 if (child_pid != -1) 486 kill(child_pid, SIGTERM); 487 488 return -1; 489 } 490 counter->supported = true; 491 } 492 493 if (perf_evlist__apply_filters(evsel_list)) { 494 error("failed to set filter with %d (%s)\n", errno, 495 strerror(errno)); 496 return -1; 497 } 498 499 /* 500 * Enable counters and exec the command: 501 */ 502 t0 = rdclock(); 503 clock_gettime(CLOCK_MONOTONIC, &ref_time); 504 505 if (forks) { 506 perf_evlist__start_workload(evsel_list); 507 handle_initial_delay(); 508 509 if (interval) { 510 while (!waitpid(child_pid, &status, WNOHANG)) { 511 nanosleep(&ts, NULL); 512 print_interval(); 513 } 514 } 515 wait(&status); 516 if (WIFSIGNALED(status)) 517 psignal(WTERMSIG(status), argv[0]); 518 } else { 519 handle_initial_delay(); 520 while (!done) { 521 nanosleep(&ts, NULL); 522 if (interval) 523 print_interval(); 524 } 525 } 526 527 t1 = rdclock(); 528 529 update_stats(&walltime_nsecs_stats, t1 - t0); 530 531 if (aggr_mode == AGGR_GLOBAL) { 532 list_for_each_entry(counter, &evsel_list->entries, node) { 533 read_counter_aggr(counter); 534 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 535 thread_map__nr(evsel_list->threads)); 536 } 537 } else { 538 list_for_each_entry(counter, &evsel_list->entries, node) { 539 read_counter(counter); 540 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 541 } 542 } 543 544 return WEXITSTATUS(status); 545 } 546 547 static int run_perf_stat(int argc __maybe_unused, const char **argv) 548 { 549 int ret; 550 551 if (pre_cmd) { 552 ret = system(pre_cmd); 553 if (ret) 554 return ret; 555 } 556 557 if (sync_run) 558 sync(); 559 560 ret = __run_perf_stat(argc, argv); 561 if (ret) 562 return ret; 563 564 if (post_cmd) { 565 ret = system(post_cmd); 566 if (ret) 567 return ret; 568 } 569 570 return ret; 571 } 572 573 static void print_noise_pct(double total, double avg) 574 { 575 double pct = rel_stddev_stats(total, avg); 576 577 if (csv_output) 578 fprintf(output, "%s%.2f%%", csv_sep, pct); 579 else if (pct) 580 fprintf(output, " ( +-%6.2f%% )", pct); 581 } 582 583 static void print_noise(struct perf_evsel *evsel, double avg) 584 { 585 struct perf_stat *ps; 586 587 if (run_count == 1) 588 return; 589 590 ps = evsel->priv; 591 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 592 } 593 594 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 595 { 596 switch (aggr_mode) { 597 case AGGR_CORE: 598 fprintf(output, "S%d-C%*d%s%*d%s", 599 cpu_map__id_to_socket(id), 600 csv_output ? 0 : -8, 601 cpu_map__id_to_cpu(id), 602 csv_sep, 603 csv_output ? 0 : 4, 604 nr, 605 csv_sep); 606 break; 607 case AGGR_SOCKET: 608 fprintf(output, "S%*d%s%*d%s", 609 csv_output ? 0 : -5, 610 id, 611 csv_sep, 612 csv_output ? 0 : 4, 613 nr, 614 csv_sep); 615 break; 616 case AGGR_NONE: 617 fprintf(output, "CPU%*d%s", 618 csv_output ? 0 : -4, 619 perf_evsel__cpus(evsel)->map[id], csv_sep); 620 break; 621 case AGGR_GLOBAL: 622 default: 623 break; 624 } 625 } 626 627 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 628 { 629 double msecs = avg / 1e6; 630 const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s"; 631 632 aggr_printout(evsel, cpu, nr); 633 634 fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel)); 635 636 if (evsel->cgrp) 637 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 638 639 if (csv_output || interval) 640 return; 641 642 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 643 fprintf(output, " # %8.3f CPUs utilized ", 644 avg / avg_stats(&walltime_nsecs_stats)); 645 else 646 fprintf(output, " "); 647 } 648 649 /* used for get_ratio_color() */ 650 enum grc_type { 651 GRC_STALLED_CYCLES_FE, 652 GRC_STALLED_CYCLES_BE, 653 GRC_CACHE_MISSES, 654 GRC_MAX_NR 655 }; 656 657 static const char *get_ratio_color(enum grc_type type, double ratio) 658 { 659 static const double grc_table[GRC_MAX_NR][3] = { 660 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 661 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 662 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 663 }; 664 const char *color = PERF_COLOR_NORMAL; 665 666 if (ratio > grc_table[type][0]) 667 color = PERF_COLOR_RED; 668 else if (ratio > grc_table[type][1]) 669 color = PERF_COLOR_MAGENTA; 670 else if (ratio > grc_table[type][2]) 671 color = PERF_COLOR_YELLOW; 672 673 return color; 674 } 675 676 static void print_stalled_cycles_frontend(int cpu, 677 struct perf_evsel *evsel 678 __maybe_unused, double avg) 679 { 680 double total, ratio = 0.0; 681 const char *color; 682 683 total = avg_stats(&runtime_cycles_stats[cpu]); 684 685 if (total) 686 ratio = avg / total * 100.0; 687 688 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 689 690 fprintf(output, " # "); 691 color_fprintf(output, color, "%6.2f%%", ratio); 692 fprintf(output, " frontend cycles idle "); 693 } 694 695 static void print_stalled_cycles_backend(int cpu, 696 struct perf_evsel *evsel 697 __maybe_unused, double avg) 698 { 699 double total, ratio = 0.0; 700 const char *color; 701 702 total = avg_stats(&runtime_cycles_stats[cpu]); 703 704 if (total) 705 ratio = avg / total * 100.0; 706 707 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 708 709 fprintf(output, " # "); 710 color_fprintf(output, color, "%6.2f%%", ratio); 711 fprintf(output, " backend cycles idle "); 712 } 713 714 static void print_branch_misses(int cpu, 715 struct perf_evsel *evsel __maybe_unused, 716 double avg) 717 { 718 double total, ratio = 0.0; 719 const char *color; 720 721 total = avg_stats(&runtime_branches_stats[cpu]); 722 723 if (total) 724 ratio = avg / total * 100.0; 725 726 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 727 728 fprintf(output, " # "); 729 color_fprintf(output, color, "%6.2f%%", ratio); 730 fprintf(output, " of all branches "); 731 } 732 733 static void print_l1_dcache_misses(int cpu, 734 struct perf_evsel *evsel __maybe_unused, 735 double avg) 736 { 737 double total, ratio = 0.0; 738 const char *color; 739 740 total = avg_stats(&runtime_l1_dcache_stats[cpu]); 741 742 if (total) 743 ratio = avg / total * 100.0; 744 745 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 746 747 fprintf(output, " # "); 748 color_fprintf(output, color, "%6.2f%%", ratio); 749 fprintf(output, " of all L1-dcache hits "); 750 } 751 752 static void print_l1_icache_misses(int cpu, 753 struct perf_evsel *evsel __maybe_unused, 754 double avg) 755 { 756 double total, ratio = 0.0; 757 const char *color; 758 759 total = avg_stats(&runtime_l1_icache_stats[cpu]); 760 761 if (total) 762 ratio = avg / total * 100.0; 763 764 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 765 766 fprintf(output, " # "); 767 color_fprintf(output, color, "%6.2f%%", ratio); 768 fprintf(output, " of all L1-icache hits "); 769 } 770 771 static void print_dtlb_cache_misses(int cpu, 772 struct perf_evsel *evsel __maybe_unused, 773 double avg) 774 { 775 double total, ratio = 0.0; 776 const char *color; 777 778 total = avg_stats(&runtime_dtlb_cache_stats[cpu]); 779 780 if (total) 781 ratio = avg / total * 100.0; 782 783 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 784 785 fprintf(output, " # "); 786 color_fprintf(output, color, "%6.2f%%", ratio); 787 fprintf(output, " of all dTLB cache hits "); 788 } 789 790 static void print_itlb_cache_misses(int cpu, 791 struct perf_evsel *evsel __maybe_unused, 792 double avg) 793 { 794 double total, ratio = 0.0; 795 const char *color; 796 797 total = avg_stats(&runtime_itlb_cache_stats[cpu]); 798 799 if (total) 800 ratio = avg / total * 100.0; 801 802 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 803 804 fprintf(output, " # "); 805 color_fprintf(output, color, "%6.2f%%", ratio); 806 fprintf(output, " of all iTLB cache hits "); 807 } 808 809 static void print_ll_cache_misses(int cpu, 810 struct perf_evsel *evsel __maybe_unused, 811 double avg) 812 { 813 double total, ratio = 0.0; 814 const char *color; 815 816 total = avg_stats(&runtime_ll_cache_stats[cpu]); 817 818 if (total) 819 ratio = avg / total * 100.0; 820 821 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 822 823 fprintf(output, " # "); 824 color_fprintf(output, color, "%6.2f%%", ratio); 825 fprintf(output, " of all LL-cache hits "); 826 } 827 828 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) 829 { 830 double total, ratio = 0.0; 831 const char *fmt; 832 833 if (csv_output) 834 fmt = "%.0f%s%s"; 835 else if (big_num) 836 fmt = "%'18.0f%s%-25s"; 837 else 838 fmt = "%18.0f%s%-25s"; 839 840 aggr_printout(evsel, cpu, nr); 841 842 if (aggr_mode == AGGR_GLOBAL) 843 cpu = 0; 844 845 fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel)); 846 847 if (evsel->cgrp) 848 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 849 850 if (csv_output || interval) 851 return; 852 853 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 854 total = avg_stats(&runtime_cycles_stats[cpu]); 855 if (total) 856 ratio = avg / total; 857 858 fprintf(output, " # %5.2f insns per cycle ", ratio); 859 860 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); 861 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); 862 863 if (total && avg) { 864 ratio = total / avg; 865 fprintf(output, "\n # %5.2f stalled cycles per insn", ratio); 866 } 867 868 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 869 runtime_branches_stats[cpu].n != 0) { 870 print_branch_misses(cpu, evsel, avg); 871 } else if ( 872 evsel->attr.type == PERF_TYPE_HW_CACHE && 873 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 874 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 875 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 876 runtime_l1_dcache_stats[cpu].n != 0) { 877 print_l1_dcache_misses(cpu, evsel, avg); 878 } else if ( 879 evsel->attr.type == PERF_TYPE_HW_CACHE && 880 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 881 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 882 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 883 runtime_l1_icache_stats[cpu].n != 0) { 884 print_l1_icache_misses(cpu, evsel, avg); 885 } else if ( 886 evsel->attr.type == PERF_TYPE_HW_CACHE && 887 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 888 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 889 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 890 runtime_dtlb_cache_stats[cpu].n != 0) { 891 print_dtlb_cache_misses(cpu, evsel, avg); 892 } else if ( 893 evsel->attr.type == PERF_TYPE_HW_CACHE && 894 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 895 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 896 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 897 runtime_itlb_cache_stats[cpu].n != 0) { 898 print_itlb_cache_misses(cpu, evsel, avg); 899 } else if ( 900 evsel->attr.type == PERF_TYPE_HW_CACHE && 901 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 902 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 903 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 904 runtime_ll_cache_stats[cpu].n != 0) { 905 print_ll_cache_misses(cpu, evsel, avg); 906 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 907 runtime_cacherefs_stats[cpu].n != 0) { 908 total = avg_stats(&runtime_cacherefs_stats[cpu]); 909 910 if (total) 911 ratio = avg * 100 / total; 912 913 fprintf(output, " # %8.3f %% of all cache refs ", ratio); 914 915 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 916 print_stalled_cycles_frontend(cpu, evsel, avg); 917 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 918 print_stalled_cycles_backend(cpu, evsel, avg); 919 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 920 total = avg_stats(&runtime_nsecs_stats[cpu]); 921 922 if (total) 923 ratio = 1.0 * avg / total; 924 925 fprintf(output, " # %8.3f GHz ", ratio); 926 } else if (runtime_nsecs_stats[cpu].n != 0) { 927 char unit = 'M'; 928 929 total = avg_stats(&runtime_nsecs_stats[cpu]); 930 931 if (total) 932 ratio = 1000.0 * avg / total; 933 if (ratio < 0.001) { 934 ratio *= 1000; 935 unit = 'K'; 936 } 937 938 fprintf(output, " # %8.3f %c/sec ", ratio, unit); 939 } else { 940 fprintf(output, " "); 941 } 942 } 943 944 static void print_aggr(char *prefix) 945 { 946 struct perf_evsel *counter; 947 int cpu, cpu2, s, s2, id, nr; 948 u64 ena, run, val; 949 950 if (!(aggr_map || aggr_get_id)) 951 return; 952 953 for (s = 0; s < aggr_map->nr; s++) { 954 id = aggr_map->map[s]; 955 list_for_each_entry(counter, &evsel_list->entries, node) { 956 val = ena = run = 0; 957 nr = 0; 958 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 959 cpu2 = perf_evsel__cpus(counter)->map[cpu]; 960 s2 = aggr_get_id(evsel_list->cpus, cpu2); 961 if (s2 != id) 962 continue; 963 val += counter->counts->cpu[cpu].val; 964 ena += counter->counts->cpu[cpu].ena; 965 run += counter->counts->cpu[cpu].run; 966 nr++; 967 } 968 if (prefix) 969 fprintf(output, "%s", prefix); 970 971 if (run == 0 || ena == 0) { 972 aggr_printout(counter, id, nr); 973 974 fprintf(output, "%*s%s%*s", 975 csv_output ? 0 : 18, 976 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 977 csv_sep, 978 csv_output ? 0 : -24, 979 perf_evsel__name(counter)); 980 981 if (counter->cgrp) 982 fprintf(output, "%s%s", 983 csv_sep, counter->cgrp->name); 984 985 fputc('\n', output); 986 continue; 987 } 988 989 if (nsec_counter(counter)) 990 nsec_printout(id, nr, counter, val); 991 else 992 abs_printout(id, nr, counter, val); 993 994 if (!csv_output) { 995 print_noise(counter, 1.0); 996 997 if (run != ena) 998 fprintf(output, " (%.2f%%)", 999 100.0 * run / ena); 1000 } 1001 fputc('\n', output); 1002 } 1003 } 1004 } 1005 1006 /* 1007 * Print out the results of a single counter: 1008 * aggregated counts in system-wide mode 1009 */ 1010 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1011 { 1012 struct perf_stat *ps = counter->priv; 1013 double avg = avg_stats(&ps->res_stats[0]); 1014 int scaled = counter->counts->scaled; 1015 1016 if (prefix) 1017 fprintf(output, "%s", prefix); 1018 1019 if (scaled == -1) { 1020 fprintf(output, "%*s%s%*s", 1021 csv_output ? 0 : 18, 1022 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1023 csv_sep, 1024 csv_output ? 0 : -24, 1025 perf_evsel__name(counter)); 1026 1027 if (counter->cgrp) 1028 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 1029 1030 fputc('\n', output); 1031 return; 1032 } 1033 1034 if (nsec_counter(counter)) 1035 nsec_printout(-1, 0, counter, avg); 1036 else 1037 abs_printout(-1, 0, counter, avg); 1038 1039 print_noise(counter, avg); 1040 1041 if (csv_output) { 1042 fputc('\n', output); 1043 return; 1044 } 1045 1046 if (scaled) { 1047 double avg_enabled, avg_running; 1048 1049 avg_enabled = avg_stats(&ps->res_stats[1]); 1050 avg_running = avg_stats(&ps->res_stats[2]); 1051 1052 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); 1053 } 1054 fprintf(output, "\n"); 1055 } 1056 1057 /* 1058 * Print out the results of a single counter: 1059 * does not use aggregated count in system-wide 1060 */ 1061 static void print_counter(struct perf_evsel *counter, char *prefix) 1062 { 1063 u64 ena, run, val; 1064 int cpu; 1065 1066 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1067 val = counter->counts->cpu[cpu].val; 1068 ena = counter->counts->cpu[cpu].ena; 1069 run = counter->counts->cpu[cpu].run; 1070 1071 if (prefix) 1072 fprintf(output, "%s", prefix); 1073 1074 if (run == 0 || ena == 0) { 1075 fprintf(output, "CPU%*d%s%*s%s%*s", 1076 csv_output ? 0 : -4, 1077 perf_evsel__cpus(counter)->map[cpu], csv_sep, 1078 csv_output ? 0 : 18, 1079 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1080 csv_sep, 1081 csv_output ? 0 : -24, 1082 perf_evsel__name(counter)); 1083 1084 if (counter->cgrp) 1085 fprintf(output, "%s%s", 1086 csv_sep, counter->cgrp->name); 1087 1088 fputc('\n', output); 1089 continue; 1090 } 1091 1092 if (nsec_counter(counter)) 1093 nsec_printout(cpu, 0, counter, val); 1094 else 1095 abs_printout(cpu, 0, counter, val); 1096 1097 if (!csv_output) { 1098 print_noise(counter, 1.0); 1099 1100 if (run != ena) 1101 fprintf(output, " (%.2f%%)", 1102 100.0 * run / ena); 1103 } 1104 fputc('\n', output); 1105 } 1106 } 1107 1108 static void print_stat(int argc, const char **argv) 1109 { 1110 struct perf_evsel *counter; 1111 int i; 1112 1113 fflush(stdout); 1114 1115 if (!csv_output) { 1116 fprintf(output, "\n"); 1117 fprintf(output, " Performance counter stats for "); 1118 if (!perf_target__has_task(&target)) { 1119 fprintf(output, "\'%s", argv[0]); 1120 for (i = 1; i < argc; i++) 1121 fprintf(output, " %s", argv[i]); 1122 } else if (target.pid) 1123 fprintf(output, "process id \'%s", target.pid); 1124 else 1125 fprintf(output, "thread id \'%s", target.tid); 1126 1127 fprintf(output, "\'"); 1128 if (run_count > 1) 1129 fprintf(output, " (%d runs)", run_count); 1130 fprintf(output, ":\n\n"); 1131 } 1132 1133 switch (aggr_mode) { 1134 case AGGR_CORE: 1135 case AGGR_SOCKET: 1136 print_aggr(NULL); 1137 break; 1138 case AGGR_GLOBAL: 1139 list_for_each_entry(counter, &evsel_list->entries, node) 1140 print_counter_aggr(counter, NULL); 1141 break; 1142 case AGGR_NONE: 1143 list_for_each_entry(counter, &evsel_list->entries, node) 1144 print_counter(counter, NULL); 1145 break; 1146 default: 1147 break; 1148 } 1149 1150 if (!csv_output) { 1151 if (!null_run) 1152 fprintf(output, "\n"); 1153 fprintf(output, " %17.9f seconds time elapsed", 1154 avg_stats(&walltime_nsecs_stats)/1e9); 1155 if (run_count > 1) { 1156 fprintf(output, " "); 1157 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1158 avg_stats(&walltime_nsecs_stats)); 1159 } 1160 fprintf(output, "\n\n"); 1161 } 1162 } 1163 1164 static volatile int signr = -1; 1165 1166 static void skip_signal(int signo) 1167 { 1168 if ((child_pid == -1) || interval) 1169 done = 1; 1170 1171 signr = signo; 1172 /* 1173 * render child_pid harmless 1174 * won't send SIGTERM to a random 1175 * process in case of race condition 1176 * and fast PID recycling 1177 */ 1178 child_pid = -1; 1179 } 1180 1181 static void sig_atexit(void) 1182 { 1183 sigset_t set, oset; 1184 1185 /* 1186 * avoid race condition with SIGCHLD handler 1187 * in skip_signal() which is modifying child_pid 1188 * goal is to avoid send SIGTERM to a random 1189 * process 1190 */ 1191 sigemptyset(&set); 1192 sigaddset(&set, SIGCHLD); 1193 sigprocmask(SIG_BLOCK, &set, &oset); 1194 1195 if (child_pid != -1) 1196 kill(child_pid, SIGTERM); 1197 1198 sigprocmask(SIG_SETMASK, &oset, NULL); 1199 1200 if (signr == -1) 1201 return; 1202 1203 signal(signr, SIG_DFL); 1204 kill(getpid(), signr); 1205 } 1206 1207 static int stat__set_big_num(const struct option *opt __maybe_unused, 1208 const char *s __maybe_unused, int unset) 1209 { 1210 big_num_opt = unset ? 0 : 1; 1211 return 0; 1212 } 1213 1214 static int perf_stat_init_aggr_mode(void) 1215 { 1216 switch (aggr_mode) { 1217 case AGGR_SOCKET: 1218 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1219 perror("cannot build socket map"); 1220 return -1; 1221 } 1222 aggr_get_id = cpu_map__get_socket; 1223 break; 1224 case AGGR_CORE: 1225 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1226 perror("cannot build core map"); 1227 return -1; 1228 } 1229 aggr_get_id = cpu_map__get_core; 1230 break; 1231 case AGGR_NONE: 1232 case AGGR_GLOBAL: 1233 default: 1234 break; 1235 } 1236 return 0; 1237 } 1238 1239 1240 /* 1241 * Add default attributes, if there were no attributes specified or 1242 * if -d/--detailed, -d -d or -d -d -d is used: 1243 */ 1244 static int add_default_attributes(void) 1245 { 1246 struct perf_event_attr default_attrs[] = { 1247 1248 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1249 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1250 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1251 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1252 1253 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1254 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1255 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1256 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1257 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1258 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1259 1260 }; 1261 1262 /* 1263 * Detailed stats (-d), covering the L1 and last level data caches: 1264 */ 1265 struct perf_event_attr detailed_attrs[] = { 1266 1267 { .type = PERF_TYPE_HW_CACHE, 1268 .config = 1269 PERF_COUNT_HW_CACHE_L1D << 0 | 1270 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1271 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1272 1273 { .type = PERF_TYPE_HW_CACHE, 1274 .config = 1275 PERF_COUNT_HW_CACHE_L1D << 0 | 1276 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1277 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1278 1279 { .type = PERF_TYPE_HW_CACHE, 1280 .config = 1281 PERF_COUNT_HW_CACHE_LL << 0 | 1282 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1283 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1284 1285 { .type = PERF_TYPE_HW_CACHE, 1286 .config = 1287 PERF_COUNT_HW_CACHE_LL << 0 | 1288 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1289 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1290 }; 1291 1292 /* 1293 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1294 */ 1295 struct perf_event_attr very_detailed_attrs[] = { 1296 1297 { .type = PERF_TYPE_HW_CACHE, 1298 .config = 1299 PERF_COUNT_HW_CACHE_L1I << 0 | 1300 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1301 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1302 1303 { .type = PERF_TYPE_HW_CACHE, 1304 .config = 1305 PERF_COUNT_HW_CACHE_L1I << 0 | 1306 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1307 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1308 1309 { .type = PERF_TYPE_HW_CACHE, 1310 .config = 1311 PERF_COUNT_HW_CACHE_DTLB << 0 | 1312 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1313 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1314 1315 { .type = PERF_TYPE_HW_CACHE, 1316 .config = 1317 PERF_COUNT_HW_CACHE_DTLB << 0 | 1318 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1319 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1320 1321 { .type = PERF_TYPE_HW_CACHE, 1322 .config = 1323 PERF_COUNT_HW_CACHE_ITLB << 0 | 1324 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1325 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1326 1327 { .type = PERF_TYPE_HW_CACHE, 1328 .config = 1329 PERF_COUNT_HW_CACHE_ITLB << 0 | 1330 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1331 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1332 1333 }; 1334 1335 /* 1336 * Very, very detailed stats (-d -d -d), adding prefetch events: 1337 */ 1338 struct perf_event_attr very_very_detailed_attrs[] = { 1339 1340 { .type = PERF_TYPE_HW_CACHE, 1341 .config = 1342 PERF_COUNT_HW_CACHE_L1D << 0 | 1343 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1344 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1345 1346 { .type = PERF_TYPE_HW_CACHE, 1347 .config = 1348 PERF_COUNT_HW_CACHE_L1D << 0 | 1349 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1350 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1351 }; 1352 1353 /* Set attrs if no event is selected and !null_run: */ 1354 if (null_run) 1355 return 0; 1356 1357 if (!evsel_list->nr_entries) { 1358 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1359 return -1; 1360 } 1361 1362 /* Detailed events get appended to the event list: */ 1363 1364 if (detailed_run < 1) 1365 return 0; 1366 1367 /* Append detailed run extra attributes: */ 1368 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1369 return -1; 1370 1371 if (detailed_run < 2) 1372 return 0; 1373 1374 /* Append very detailed run extra attributes: */ 1375 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1376 return -1; 1377 1378 if (detailed_run < 3) 1379 return 0; 1380 1381 /* Append very, very detailed run extra attributes: */ 1382 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1383 } 1384 1385 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1386 { 1387 bool append_file = false; 1388 int output_fd = 0; 1389 const char *output_name = NULL; 1390 const struct option options[] = { 1391 OPT_CALLBACK('e', "event", &evsel_list, "event", 1392 "event selector. use 'perf list' to list available events", 1393 parse_events_option), 1394 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1395 "event filter", parse_filter), 1396 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1397 "child tasks do not inherit counters"), 1398 OPT_STRING('p', "pid", &target.pid, "pid", 1399 "stat events on existing process id"), 1400 OPT_STRING('t', "tid", &target.tid, "tid", 1401 "stat events on existing thread id"), 1402 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1403 "system-wide collection from all CPUs"), 1404 OPT_BOOLEAN('g', "group", &group, 1405 "put the counters into a counter group"), 1406 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1407 OPT_INCR('v', "verbose", &verbose, 1408 "be more verbose (show counter open errors, etc)"), 1409 OPT_INTEGER('r', "repeat", &run_count, 1410 "repeat command and print average + stddev (max: 100, forever: 0)"), 1411 OPT_BOOLEAN('n', "null", &null_run, 1412 "null run - dont start any counters"), 1413 OPT_INCR('d', "detailed", &detailed_run, 1414 "detailed run - start a lot of events"), 1415 OPT_BOOLEAN('S', "sync", &sync_run, 1416 "call sync() before starting a run"), 1417 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1418 "print large numbers with thousands\' separators", 1419 stat__set_big_num), 1420 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1421 "list of cpus to monitor in system-wide"), 1422 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1423 "disable CPU count aggregation", AGGR_NONE), 1424 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1425 "print counts with custom separator"), 1426 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1427 "monitor event in cgroup name only", parse_cgroups), 1428 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1429 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1430 OPT_INTEGER(0, "log-fd", &output_fd, 1431 "log output to fd, instead of stderr"), 1432 OPT_STRING(0, "pre", &pre_cmd, "command", 1433 "command to run prior to the measured command"), 1434 OPT_STRING(0, "post", &post_cmd, "command", 1435 "command to run after to the measured command"), 1436 OPT_UINTEGER('I', "interval-print", &interval, 1437 "print counts at regular interval in ms (>= 100)"), 1438 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1439 "aggregate counts per processor socket", AGGR_SOCKET), 1440 OPT_SET_UINT(0, "per-core", &aggr_mode, 1441 "aggregate counts per physical processor core", AGGR_CORE), 1442 OPT_UINTEGER('D', "delay", &initial_delay, 1443 "ms to wait before starting measurement after program start"), 1444 OPT_END() 1445 }; 1446 const char * const stat_usage[] = { 1447 "perf stat [<options>] [<command>]", 1448 NULL 1449 }; 1450 int status = -ENOMEM, run_idx; 1451 const char *mode; 1452 1453 setlocale(LC_ALL, ""); 1454 1455 evsel_list = perf_evlist__new(); 1456 if (evsel_list == NULL) 1457 return -ENOMEM; 1458 1459 argc = parse_options(argc, argv, options, stat_usage, 1460 PARSE_OPT_STOP_AT_NON_OPTION); 1461 1462 output = stderr; 1463 if (output_name && strcmp(output_name, "-")) 1464 output = NULL; 1465 1466 if (output_name && output_fd) { 1467 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1468 usage_with_options(stat_usage, options); 1469 } 1470 1471 if (output_fd < 0) { 1472 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1473 usage_with_options(stat_usage, options); 1474 } 1475 1476 if (!output) { 1477 struct timespec tm; 1478 mode = append_file ? "a" : "w"; 1479 1480 output = fopen(output_name, mode); 1481 if (!output) { 1482 perror("failed to create output file"); 1483 return -1; 1484 } 1485 clock_gettime(CLOCK_REALTIME, &tm); 1486 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1487 } else if (output_fd > 0) { 1488 mode = append_file ? "a" : "w"; 1489 output = fdopen(output_fd, mode); 1490 if (!output) { 1491 perror("Failed opening logfd"); 1492 return -errno; 1493 } 1494 } 1495 1496 if (csv_sep) { 1497 csv_output = true; 1498 if (!strcmp(csv_sep, "\\t")) 1499 csv_sep = "\t"; 1500 } else 1501 csv_sep = DEFAULT_SEPARATOR; 1502 1503 /* 1504 * let the spreadsheet do the pretty-printing 1505 */ 1506 if (csv_output) { 1507 /* User explicitly passed -B? */ 1508 if (big_num_opt == 1) { 1509 fprintf(stderr, "-B option not supported with -x\n"); 1510 usage_with_options(stat_usage, options); 1511 } else /* Nope, so disable big number formatting */ 1512 big_num = false; 1513 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1514 big_num = false; 1515 1516 if (!argc && !perf_target__has_task(&target)) 1517 usage_with_options(stat_usage, options); 1518 if (run_count < 0) { 1519 usage_with_options(stat_usage, options); 1520 } else if (run_count == 0) { 1521 forever = true; 1522 run_count = 1; 1523 } 1524 1525 /* no_aggr, cgroup are for system-wide only */ 1526 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) 1527 && !perf_target__has_cpu(&target)) { 1528 fprintf(stderr, "both cgroup and no-aggregation " 1529 "modes only available in system-wide mode\n"); 1530 1531 usage_with_options(stat_usage, options); 1532 return -1; 1533 } 1534 1535 if (add_default_attributes()) 1536 goto out; 1537 1538 perf_target__validate(&target); 1539 1540 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1541 if (perf_target__has_task(&target)) 1542 pr_err("Problems finding threads of monitor\n"); 1543 if (perf_target__has_cpu(&target)) 1544 perror("failed to parse CPUs map"); 1545 1546 usage_with_options(stat_usage, options); 1547 return -1; 1548 } 1549 if (interval && interval < 100) { 1550 pr_err("print interval must be >= 100ms\n"); 1551 usage_with_options(stat_usage, options); 1552 return -1; 1553 } 1554 1555 if (perf_evlist__alloc_stats(evsel_list, interval)) 1556 goto out_free_maps; 1557 1558 if (perf_stat_init_aggr_mode()) 1559 goto out; 1560 1561 /* 1562 * We dont want to block the signals - that would cause 1563 * child tasks to inherit that and Ctrl-C would not work. 1564 * What we want is for Ctrl-C to work in the exec()-ed 1565 * task, but being ignored by perf stat itself: 1566 */ 1567 atexit(sig_atexit); 1568 if (!forever) 1569 signal(SIGINT, skip_signal); 1570 signal(SIGCHLD, skip_signal); 1571 signal(SIGALRM, skip_signal); 1572 signal(SIGABRT, skip_signal); 1573 1574 status = 0; 1575 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1576 if (run_count != 1 && verbose) 1577 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1578 run_idx + 1); 1579 1580 status = run_perf_stat(argc, argv); 1581 if (forever && status != -1) { 1582 print_stat(argc, argv); 1583 perf_stat__reset_stats(evsel_list); 1584 } 1585 } 1586 1587 if (!forever && status != -1 && !interval) 1588 print_stat(argc, argv); 1589 1590 perf_evlist__free_stats(evsel_list); 1591 out_free_maps: 1592 perf_evlist__delete_maps(evsel_list); 1593 out: 1594 perf_evlist__delete(evsel_list); 1595 return status; 1596 } 1597