1 /* 2 * builtin-stat.c 3 * 4 * Builtin stat command: Give a precise performance counters summary 5 * overview about any workload, CPU or specific PID. 6 * 7 * Sample output: 8 9 $ perf stat ./hackbench 10 10 11 Time: 0.118 12 13 Performance counter stats for './hackbench 10': 14 15 1708.761321 task-clock # 11.037 CPUs utilized 16 41,190 context-switches # 0.024 M/sec 17 6,735 CPU-migrations # 0.004 M/sec 18 17,318 page-faults # 0.010 M/sec 19 5,205,202,243 cycles # 3.046 GHz 20 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle 21 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle 22 2,603,501,247 instructions # 0.50 insns per cycle 23 # 1.48 stalled cycles per insn 24 484,357,498 branches # 283.455 M/sec 25 6,388,934 branch-misses # 1.32% of all branches 26 27 0.154822978 seconds time elapsed 28 29 * 30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 31 * 32 * Improvements and fixes by: 33 * 34 * Arjan van de Ven <arjan@linux.intel.com> 35 * Yanmin Zhang <yanmin.zhang@intel.com> 36 * Wu Fengguang <fengguang.wu@intel.com> 37 * Mike Galbraith <efault@gmx.de> 38 * Paul Mackerras <paulus@samba.org> 39 * Jaswinder Singh Rajput <jaswinder@kernel.org> 40 * 41 * Released under the GPL v2. (and only v2, not any later version) 42 */ 43 44 #include "perf.h" 45 #include "builtin.h" 46 #include "util/cgroup.h" 47 #include "util/util.h" 48 #include "util/parse-options.h" 49 #include "util/parse-events.h" 50 #include "util/pmu.h" 51 #include "util/event.h" 52 #include "util/evlist.h" 53 #include "util/evsel.h" 54 #include "util/debug.h" 55 #include "util/color.h" 56 #include "util/stat.h" 57 #include "util/header.h" 58 #include "util/cpumap.h" 59 #include "util/thread.h" 60 #include "util/thread_map.h" 61 62 #include <stdlib.h> 63 #include <sys/prctl.h> 64 #include <locale.h> 65 66 #define DEFAULT_SEPARATOR " " 67 #define CNTR_NOT_SUPPORTED "<not supported>" 68 #define CNTR_NOT_COUNTED "<not counted>" 69 70 static void print_stat(int argc, const char **argv); 71 static void print_counter_aggr(struct perf_evsel *counter, char *prefix); 72 static void print_counter(struct perf_evsel *counter, char *prefix); 73 static void print_aggr(char *prefix); 74 75 /* Default events used for perf stat -T */ 76 static const char * const transaction_attrs[] = { 77 "task-clock", 78 "{" 79 "instructions," 80 "cycles," 81 "cpu/cycles-t/," 82 "cpu/tx-start/," 83 "cpu/el-start/," 84 "cpu/cycles-ct/" 85 "}" 86 }; 87 88 /* More limited version when the CPU does not have all events. */ 89 static const char * const transaction_limited_attrs[] = { 90 "task-clock", 91 "{" 92 "instructions," 93 "cycles," 94 "cpu/cycles-t/," 95 "cpu/tx-start/" 96 "}" 97 }; 98 99 /* must match transaction_attrs and the beginning limited_attrs */ 100 enum { 101 T_TASK_CLOCK, 102 T_INSTRUCTIONS, 103 T_CYCLES, 104 T_CYCLES_IN_TX, 105 T_TRANSACTION_START, 106 T_ELISION_START, 107 T_CYCLES_IN_TX_CP, 108 }; 109 110 static struct perf_evlist *evsel_list; 111 112 static struct target target = { 113 .uid = UINT_MAX, 114 }; 115 116 enum aggr_mode { 117 AGGR_NONE, 118 AGGR_GLOBAL, 119 AGGR_SOCKET, 120 AGGR_CORE, 121 }; 122 123 static int run_count = 1; 124 static bool no_inherit = false; 125 static bool scale = true; 126 static enum aggr_mode aggr_mode = AGGR_GLOBAL; 127 static volatile pid_t child_pid = -1; 128 static bool null_run = false; 129 static int detailed_run = 0; 130 static bool transaction_run; 131 static bool big_num = true; 132 static int big_num_opt = -1; 133 static const char *csv_sep = NULL; 134 static bool csv_output = false; 135 static bool group = false; 136 static FILE *output = NULL; 137 static const char *pre_cmd = NULL; 138 static const char *post_cmd = NULL; 139 static bool sync_run = false; 140 static unsigned int interval = 0; 141 static unsigned int initial_delay = 0; 142 static unsigned int unit_width = 4; /* strlen("unit") */ 143 static bool forever = false; 144 static struct timespec ref_time; 145 static struct cpu_map *aggr_map; 146 static int (*aggr_get_id)(struct cpu_map *m, int cpu); 147 148 static volatile int done = 0; 149 150 struct perf_stat { 151 struct stats res_stats[3]; 152 }; 153 154 static inline void diff_timespec(struct timespec *r, struct timespec *a, 155 struct timespec *b) 156 { 157 r->tv_sec = a->tv_sec - b->tv_sec; 158 if (a->tv_nsec < b->tv_nsec) { 159 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; 160 r->tv_sec--; 161 } else { 162 r->tv_nsec = a->tv_nsec - b->tv_nsec ; 163 } 164 } 165 166 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) 167 { 168 return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; 169 } 170 171 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) 172 { 173 return perf_evsel__cpus(evsel)->nr; 174 } 175 176 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) 177 { 178 int i; 179 struct perf_stat *ps = evsel->priv; 180 181 for (i = 0; i < 3; i++) 182 init_stats(&ps->res_stats[i]); 183 } 184 185 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) 186 { 187 evsel->priv = zalloc(sizeof(struct perf_stat)); 188 if (evsel->priv == NULL) 189 return -ENOMEM; 190 perf_evsel__reset_stat_priv(evsel); 191 return 0; 192 } 193 194 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) 195 { 196 zfree(&evsel->priv); 197 } 198 199 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 200 { 201 void *addr; 202 size_t sz; 203 204 sz = sizeof(*evsel->counts) + 205 (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); 206 207 addr = zalloc(sz); 208 if (!addr) 209 return -ENOMEM; 210 211 evsel->prev_raw_counts = addr; 212 213 return 0; 214 } 215 216 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) 217 { 218 zfree(&evsel->prev_raw_counts); 219 } 220 221 static void perf_evlist__free_stats(struct perf_evlist *evlist) 222 { 223 struct perf_evsel *evsel; 224 225 evlist__for_each(evlist, evsel) { 226 perf_evsel__free_stat_priv(evsel); 227 perf_evsel__free_counts(evsel); 228 perf_evsel__free_prev_raw_counts(evsel); 229 } 230 } 231 232 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) 233 { 234 struct perf_evsel *evsel; 235 236 evlist__for_each(evlist, evsel) { 237 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 238 perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || 239 (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) 240 goto out_free; 241 } 242 243 return 0; 244 245 out_free: 246 perf_evlist__free_stats(evlist); 247 return -1; 248 } 249 250 static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; 251 static struct stats runtime_cycles_stats[MAX_NR_CPUS]; 252 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; 253 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; 254 static struct stats runtime_branches_stats[MAX_NR_CPUS]; 255 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; 256 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; 257 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; 258 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; 259 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; 260 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; 261 static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS]; 262 static struct stats walltime_nsecs_stats; 263 static struct stats runtime_transaction_stats[MAX_NR_CPUS]; 264 static struct stats runtime_elision_stats[MAX_NR_CPUS]; 265 266 static void perf_stat__reset_stats(struct perf_evlist *evlist) 267 { 268 struct perf_evsel *evsel; 269 270 evlist__for_each(evlist, evsel) { 271 perf_evsel__reset_stat_priv(evsel); 272 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); 273 } 274 275 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); 276 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); 277 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); 278 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); 279 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); 280 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); 281 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); 282 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); 283 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); 284 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); 285 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); 286 memset(runtime_cycles_in_tx_stats, 0, 287 sizeof(runtime_cycles_in_tx_stats)); 288 memset(runtime_transaction_stats, 0, 289 sizeof(runtime_transaction_stats)); 290 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); 291 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 292 } 293 294 static int create_perf_stat_counter(struct perf_evsel *evsel) 295 { 296 struct perf_event_attr *attr = &evsel->attr; 297 298 if (scale) 299 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 300 PERF_FORMAT_TOTAL_TIME_RUNNING; 301 302 attr->inherit = !no_inherit; 303 304 if (target__has_cpu(&target)) 305 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); 306 307 if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) { 308 attr->disabled = 1; 309 if (!initial_delay) 310 attr->enable_on_exec = 1; 311 } 312 313 return perf_evsel__open_per_thread(evsel, evsel_list->threads); 314 } 315 316 /* 317 * Does the counter have nsecs as a unit? 318 */ 319 static inline int nsec_counter(struct perf_evsel *evsel) 320 { 321 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || 322 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 323 return 1; 324 325 return 0; 326 } 327 328 static struct perf_evsel *nth_evsel(int n) 329 { 330 static struct perf_evsel **array; 331 static int array_len; 332 struct perf_evsel *ev; 333 int j; 334 335 /* Assumes this only called when evsel_list does not change anymore. */ 336 if (!array) { 337 evlist__for_each(evsel_list, ev) 338 array_len++; 339 array = malloc(array_len * sizeof(void *)); 340 if (!array) 341 exit(ENOMEM); 342 j = 0; 343 evlist__for_each(evsel_list, ev) 344 array[j++] = ev; 345 } 346 if (n < array_len) 347 return array[n]; 348 return NULL; 349 } 350 351 /* 352 * Update various tracking values we maintain to print 353 * more semantic information such as miss/hit ratios, 354 * instruction rates, etc: 355 */ 356 static void update_shadow_stats(struct perf_evsel *counter, u64 *count) 357 { 358 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) 359 update_stats(&runtime_nsecs_stats[0], count[0]); 360 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 361 update_stats(&runtime_cycles_stats[0], count[0]); 362 else if (transaction_run && 363 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) 364 update_stats(&runtime_cycles_in_tx_stats[0], count[0]); 365 else if (transaction_run && 366 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) 367 update_stats(&runtime_transaction_stats[0], count[0]); 368 else if (transaction_run && 369 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) 370 update_stats(&runtime_elision_stats[0], count[0]); 371 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 372 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); 373 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 374 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); 375 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 376 update_stats(&runtime_branches_stats[0], count[0]); 377 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 378 update_stats(&runtime_cacherefs_stats[0], count[0]); 379 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 380 update_stats(&runtime_l1_dcache_stats[0], count[0]); 381 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 382 update_stats(&runtime_l1_icache_stats[0], count[0]); 383 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 384 update_stats(&runtime_ll_cache_stats[0], count[0]); 385 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 386 update_stats(&runtime_dtlb_cache_stats[0], count[0]); 387 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 388 update_stats(&runtime_itlb_cache_stats[0], count[0]); 389 } 390 391 /* 392 * Read out the results of a single counter: 393 * aggregate counts across CPUs in system-wide mode 394 */ 395 static int read_counter_aggr(struct perf_evsel *counter) 396 { 397 struct perf_stat *ps = counter->priv; 398 u64 *count = counter->counts->aggr.values; 399 int i; 400 401 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 402 thread_map__nr(evsel_list->threads), scale) < 0) 403 return -1; 404 405 for (i = 0; i < 3; i++) 406 update_stats(&ps->res_stats[i], count[i]); 407 408 if (verbose) { 409 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", 410 perf_evsel__name(counter), count[0], count[1], count[2]); 411 } 412 413 /* 414 * Save the full runtime - to allow normalization during printout: 415 */ 416 update_shadow_stats(counter, count); 417 418 return 0; 419 } 420 421 /* 422 * Read out the results of a single counter: 423 * do not aggregate counts across CPUs in system-wide mode 424 */ 425 static int read_counter(struct perf_evsel *counter) 426 { 427 u64 *count; 428 int cpu; 429 430 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 431 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 432 return -1; 433 434 count = counter->counts->cpu[cpu].values; 435 436 update_shadow_stats(counter, count); 437 } 438 439 return 0; 440 } 441 442 static void print_interval(void) 443 { 444 static int num_print_interval; 445 struct perf_evsel *counter; 446 struct perf_stat *ps; 447 struct timespec ts, rs; 448 char prefix[64]; 449 450 if (aggr_mode == AGGR_GLOBAL) { 451 evlist__for_each(evsel_list, counter) { 452 ps = counter->priv; 453 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 454 read_counter_aggr(counter); 455 } 456 } else { 457 evlist__for_each(evsel_list, counter) { 458 ps = counter->priv; 459 memset(ps->res_stats, 0, sizeof(ps->res_stats)); 460 read_counter(counter); 461 } 462 } 463 464 clock_gettime(CLOCK_MONOTONIC, &ts); 465 diff_timespec(&rs, &ts, &ref_time); 466 sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); 467 468 if (num_print_interval == 0 && !csv_output) { 469 switch (aggr_mode) { 470 case AGGR_SOCKET: 471 fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); 472 break; 473 case AGGR_CORE: 474 fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); 475 break; 476 case AGGR_NONE: 477 fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); 478 break; 479 case AGGR_GLOBAL: 480 default: 481 fprintf(output, "# time counts %*s events\n", unit_width, "unit"); 482 } 483 } 484 485 if (++num_print_interval == 25) 486 num_print_interval = 0; 487 488 switch (aggr_mode) { 489 case AGGR_CORE: 490 case AGGR_SOCKET: 491 print_aggr(prefix); 492 break; 493 case AGGR_NONE: 494 evlist__for_each(evsel_list, counter) 495 print_counter(counter, prefix); 496 break; 497 case AGGR_GLOBAL: 498 default: 499 evlist__for_each(evsel_list, counter) 500 print_counter_aggr(counter, prefix); 501 } 502 503 fflush(output); 504 } 505 506 static void handle_initial_delay(void) 507 { 508 struct perf_evsel *counter; 509 510 if (initial_delay) { 511 const int ncpus = cpu_map__nr(evsel_list->cpus), 512 nthreads = thread_map__nr(evsel_list->threads); 513 514 usleep(initial_delay * 1000); 515 evlist__for_each(evsel_list, counter) 516 perf_evsel__enable(counter, ncpus, nthreads); 517 } 518 } 519 520 static volatile int workload_exec_errno; 521 522 /* 523 * perf_evlist__prepare_workload will send a SIGUSR1 524 * if the fork fails, since we asked by setting its 525 * want_signal to true. 526 */ 527 static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info, 528 void *ucontext __maybe_unused) 529 { 530 workload_exec_errno = info->si_value.sival_int; 531 } 532 533 static int __run_perf_stat(int argc, const char **argv) 534 { 535 char msg[512]; 536 unsigned long long t0, t1; 537 struct perf_evsel *counter; 538 struct timespec ts; 539 size_t l; 540 int status = 0; 541 const bool forks = (argc > 0); 542 543 if (interval) { 544 ts.tv_sec = interval / 1000; 545 ts.tv_nsec = (interval % 1000) * 1000000; 546 } else { 547 ts.tv_sec = 1; 548 ts.tv_nsec = 0; 549 } 550 551 if (forks) { 552 if (perf_evlist__prepare_workload(evsel_list, &target, argv, false, 553 workload_exec_failed_signal) < 0) { 554 perror("failed to prepare workload"); 555 return -1; 556 } 557 child_pid = evsel_list->workload.pid; 558 } 559 560 if (group) 561 perf_evlist__set_leader(evsel_list); 562 563 evlist__for_each(evsel_list, counter) { 564 if (create_perf_stat_counter(counter) < 0) { 565 /* 566 * PPC returns ENXIO for HW counters until 2.6.37 567 * (behavior changed with commit b0a873e). 568 */ 569 if (errno == EINVAL || errno == ENOSYS || 570 errno == ENOENT || errno == EOPNOTSUPP || 571 errno == ENXIO) { 572 if (verbose) 573 ui__warning("%s event is not supported by the kernel.\n", 574 perf_evsel__name(counter)); 575 counter->supported = false; 576 continue; 577 } 578 579 perf_evsel__open_strerror(counter, &target, 580 errno, msg, sizeof(msg)); 581 ui__error("%s\n", msg); 582 583 if (child_pid != -1) 584 kill(child_pid, SIGTERM); 585 586 return -1; 587 } 588 counter->supported = true; 589 590 l = strlen(counter->unit); 591 if (l > unit_width) 592 unit_width = l; 593 } 594 595 if (perf_evlist__apply_filters(evsel_list)) { 596 error("failed to set filter with %d (%s)\n", errno, 597 strerror_r(errno, msg, sizeof(msg))); 598 return -1; 599 } 600 601 /* 602 * Enable counters and exec the command: 603 */ 604 t0 = rdclock(); 605 clock_gettime(CLOCK_MONOTONIC, &ref_time); 606 607 if (forks) { 608 perf_evlist__start_workload(evsel_list); 609 handle_initial_delay(); 610 611 if (interval) { 612 while (!waitpid(child_pid, &status, WNOHANG)) { 613 nanosleep(&ts, NULL); 614 print_interval(); 615 } 616 } 617 wait(&status); 618 619 if (workload_exec_errno) { 620 const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg)); 621 pr_err("Workload failed: %s\n", emsg); 622 return -1; 623 } 624 625 if (WIFSIGNALED(status)) 626 psignal(WTERMSIG(status), argv[0]); 627 } else { 628 handle_initial_delay(); 629 while (!done) { 630 nanosleep(&ts, NULL); 631 if (interval) 632 print_interval(); 633 } 634 } 635 636 t1 = rdclock(); 637 638 update_stats(&walltime_nsecs_stats, t1 - t0); 639 640 if (aggr_mode == AGGR_GLOBAL) { 641 evlist__for_each(evsel_list, counter) { 642 read_counter_aggr(counter); 643 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 644 thread_map__nr(evsel_list->threads)); 645 } 646 } else { 647 evlist__for_each(evsel_list, counter) { 648 read_counter(counter); 649 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); 650 } 651 } 652 653 return WEXITSTATUS(status); 654 } 655 656 static int run_perf_stat(int argc, const char **argv) 657 { 658 int ret; 659 660 if (pre_cmd) { 661 ret = system(pre_cmd); 662 if (ret) 663 return ret; 664 } 665 666 if (sync_run) 667 sync(); 668 669 ret = __run_perf_stat(argc, argv); 670 if (ret) 671 return ret; 672 673 if (post_cmd) { 674 ret = system(post_cmd); 675 if (ret) 676 return ret; 677 } 678 679 return ret; 680 } 681 682 static void print_noise_pct(double total, double avg) 683 { 684 double pct = rel_stddev_stats(total, avg); 685 686 if (csv_output) 687 fprintf(output, "%s%.2f%%", csv_sep, pct); 688 else if (pct) 689 fprintf(output, " ( +-%6.2f%% )", pct); 690 } 691 692 static void print_noise(struct perf_evsel *evsel, double avg) 693 { 694 struct perf_stat *ps; 695 696 if (run_count == 1) 697 return; 698 699 ps = evsel->priv; 700 print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); 701 } 702 703 static void aggr_printout(struct perf_evsel *evsel, int id, int nr) 704 { 705 switch (aggr_mode) { 706 case AGGR_CORE: 707 fprintf(output, "S%d-C%*d%s%*d%s", 708 cpu_map__id_to_socket(id), 709 csv_output ? 0 : -8, 710 cpu_map__id_to_cpu(id), 711 csv_sep, 712 csv_output ? 0 : 4, 713 nr, 714 csv_sep); 715 break; 716 case AGGR_SOCKET: 717 fprintf(output, "S%*d%s%*d%s", 718 csv_output ? 0 : -5, 719 id, 720 csv_sep, 721 csv_output ? 0 : 4, 722 nr, 723 csv_sep); 724 break; 725 case AGGR_NONE: 726 fprintf(output, "CPU%*d%s", 727 csv_output ? 0 : -4, 728 perf_evsel__cpus(evsel)->map[id], csv_sep); 729 break; 730 case AGGR_GLOBAL: 731 default: 732 break; 733 } 734 } 735 736 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) 737 { 738 double msecs = avg / 1e6; 739 const char *fmt_v, *fmt_n; 740 char name[25]; 741 742 fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; 743 fmt_n = csv_output ? "%s" : "%-25s"; 744 745 aggr_printout(evsel, id, nr); 746 747 scnprintf(name, sizeof(name), "%s%s", 748 perf_evsel__name(evsel), csv_output ? "" : " (msec)"); 749 750 fprintf(output, fmt_v, msecs, csv_sep); 751 752 if (csv_output) 753 fprintf(output, "%s%s", evsel->unit, csv_sep); 754 else 755 fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); 756 757 fprintf(output, fmt_n, name); 758 759 if (evsel->cgrp) 760 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 761 762 if (csv_output || interval) 763 return; 764 765 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 766 fprintf(output, " # %8.3f CPUs utilized ", 767 avg / avg_stats(&walltime_nsecs_stats)); 768 else 769 fprintf(output, " "); 770 } 771 772 /* used for get_ratio_color() */ 773 enum grc_type { 774 GRC_STALLED_CYCLES_FE, 775 GRC_STALLED_CYCLES_BE, 776 GRC_CACHE_MISSES, 777 GRC_MAX_NR 778 }; 779 780 static const char *get_ratio_color(enum grc_type type, double ratio) 781 { 782 static const double grc_table[GRC_MAX_NR][3] = { 783 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 784 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 785 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 786 }; 787 const char *color = PERF_COLOR_NORMAL; 788 789 if (ratio > grc_table[type][0]) 790 color = PERF_COLOR_RED; 791 else if (ratio > grc_table[type][1]) 792 color = PERF_COLOR_MAGENTA; 793 else if (ratio > grc_table[type][2]) 794 color = PERF_COLOR_YELLOW; 795 796 return color; 797 } 798 799 static void print_stalled_cycles_frontend(int cpu, 800 struct perf_evsel *evsel 801 __maybe_unused, double avg) 802 { 803 double total, ratio = 0.0; 804 const char *color; 805 806 total = avg_stats(&runtime_cycles_stats[cpu]); 807 808 if (total) 809 ratio = avg / total * 100.0; 810 811 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 812 813 fprintf(output, " # "); 814 color_fprintf(output, color, "%6.2f%%", ratio); 815 fprintf(output, " frontend cycles idle "); 816 } 817 818 static void print_stalled_cycles_backend(int cpu, 819 struct perf_evsel *evsel 820 __maybe_unused, double avg) 821 { 822 double total, ratio = 0.0; 823 const char *color; 824 825 total = avg_stats(&runtime_cycles_stats[cpu]); 826 827 if (total) 828 ratio = avg / total * 100.0; 829 830 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 831 832 fprintf(output, " # "); 833 color_fprintf(output, color, "%6.2f%%", ratio); 834 fprintf(output, " backend cycles idle "); 835 } 836 837 static void print_branch_misses(int cpu, 838 struct perf_evsel *evsel __maybe_unused, 839 double avg) 840 { 841 double total, ratio = 0.0; 842 const char *color; 843 844 total = avg_stats(&runtime_branches_stats[cpu]); 845 846 if (total) 847 ratio = avg / total * 100.0; 848 849 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 850 851 fprintf(output, " # "); 852 color_fprintf(output, color, "%6.2f%%", ratio); 853 fprintf(output, " of all branches "); 854 } 855 856 static void print_l1_dcache_misses(int cpu, 857 struct perf_evsel *evsel __maybe_unused, 858 double avg) 859 { 860 double total, ratio = 0.0; 861 const char *color; 862 863 total = avg_stats(&runtime_l1_dcache_stats[cpu]); 864 865 if (total) 866 ratio = avg / total * 100.0; 867 868 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 869 870 fprintf(output, " # "); 871 color_fprintf(output, color, "%6.2f%%", ratio); 872 fprintf(output, " of all L1-dcache hits "); 873 } 874 875 static void print_l1_icache_misses(int cpu, 876 struct perf_evsel *evsel __maybe_unused, 877 double avg) 878 { 879 double total, ratio = 0.0; 880 const char *color; 881 882 total = avg_stats(&runtime_l1_icache_stats[cpu]); 883 884 if (total) 885 ratio = avg / total * 100.0; 886 887 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 888 889 fprintf(output, " # "); 890 color_fprintf(output, color, "%6.2f%%", ratio); 891 fprintf(output, " of all L1-icache hits "); 892 } 893 894 static void print_dtlb_cache_misses(int cpu, 895 struct perf_evsel *evsel __maybe_unused, 896 double avg) 897 { 898 double total, ratio = 0.0; 899 const char *color; 900 901 total = avg_stats(&runtime_dtlb_cache_stats[cpu]); 902 903 if (total) 904 ratio = avg / total * 100.0; 905 906 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 907 908 fprintf(output, " # "); 909 color_fprintf(output, color, "%6.2f%%", ratio); 910 fprintf(output, " of all dTLB cache hits "); 911 } 912 913 static void print_itlb_cache_misses(int cpu, 914 struct perf_evsel *evsel __maybe_unused, 915 double avg) 916 { 917 double total, ratio = 0.0; 918 const char *color; 919 920 total = avg_stats(&runtime_itlb_cache_stats[cpu]); 921 922 if (total) 923 ratio = avg / total * 100.0; 924 925 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 926 927 fprintf(output, " # "); 928 color_fprintf(output, color, "%6.2f%%", ratio); 929 fprintf(output, " of all iTLB cache hits "); 930 } 931 932 static void print_ll_cache_misses(int cpu, 933 struct perf_evsel *evsel __maybe_unused, 934 double avg) 935 { 936 double total, ratio = 0.0; 937 const char *color; 938 939 total = avg_stats(&runtime_ll_cache_stats[cpu]); 940 941 if (total) 942 ratio = avg / total * 100.0; 943 944 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 945 946 fprintf(output, " # "); 947 color_fprintf(output, color, "%6.2f%%", ratio); 948 fprintf(output, " of all LL-cache hits "); 949 } 950 951 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) 952 { 953 double total, ratio = 0.0, total2; 954 double sc = evsel->scale; 955 const char *fmt; 956 int cpu = cpu_map__id_to_cpu(id); 957 958 if (csv_output) { 959 fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; 960 } else { 961 if (big_num) 962 fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; 963 else 964 fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; 965 } 966 967 aggr_printout(evsel, id, nr); 968 969 if (aggr_mode == AGGR_GLOBAL) 970 cpu = 0; 971 972 fprintf(output, fmt, avg, csv_sep); 973 974 if (evsel->unit) 975 fprintf(output, "%-*s%s", 976 csv_output ? 0 : unit_width, 977 evsel->unit, csv_sep); 978 979 fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); 980 981 if (evsel->cgrp) 982 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); 983 984 if (csv_output || interval) 985 return; 986 987 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 988 total = avg_stats(&runtime_cycles_stats[cpu]); 989 if (total) { 990 ratio = avg / total; 991 fprintf(output, " # %5.2f insns per cycle ", ratio); 992 } 993 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); 994 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); 995 996 if (total && avg) { 997 ratio = total / avg; 998 fprintf(output, "\n"); 999 if (aggr_mode == AGGR_NONE) 1000 fprintf(output, " "); 1001 fprintf(output, " # %5.2f stalled cycles per insn", ratio); 1002 } 1003 1004 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && 1005 runtime_branches_stats[cpu].n != 0) { 1006 print_branch_misses(cpu, evsel, avg); 1007 } else if ( 1008 evsel->attr.type == PERF_TYPE_HW_CACHE && 1009 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 1010 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 1011 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 1012 runtime_l1_dcache_stats[cpu].n != 0) { 1013 print_l1_dcache_misses(cpu, evsel, avg); 1014 } else if ( 1015 evsel->attr.type == PERF_TYPE_HW_CACHE && 1016 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 1017 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 1018 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 1019 runtime_l1_icache_stats[cpu].n != 0) { 1020 print_l1_icache_misses(cpu, evsel, avg); 1021 } else if ( 1022 evsel->attr.type == PERF_TYPE_HW_CACHE && 1023 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 1024 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 1025 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 1026 runtime_dtlb_cache_stats[cpu].n != 0) { 1027 print_dtlb_cache_misses(cpu, evsel, avg); 1028 } else if ( 1029 evsel->attr.type == PERF_TYPE_HW_CACHE && 1030 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 1031 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 1032 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 1033 runtime_itlb_cache_stats[cpu].n != 0) { 1034 print_itlb_cache_misses(cpu, evsel, avg); 1035 } else if ( 1036 evsel->attr.type == PERF_TYPE_HW_CACHE && 1037 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 1038 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 1039 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && 1040 runtime_ll_cache_stats[cpu].n != 0) { 1041 print_ll_cache_misses(cpu, evsel, avg); 1042 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && 1043 runtime_cacherefs_stats[cpu].n != 0) { 1044 total = avg_stats(&runtime_cacherefs_stats[cpu]); 1045 1046 if (total) 1047 ratio = avg * 100 / total; 1048 1049 fprintf(output, " # %8.3f %% of all cache refs ", ratio); 1050 1051 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 1052 print_stalled_cycles_frontend(cpu, evsel, avg); 1053 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 1054 print_stalled_cycles_backend(cpu, evsel, avg); 1055 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 1056 total = avg_stats(&runtime_nsecs_stats[cpu]); 1057 1058 if (total) { 1059 ratio = avg / total; 1060 fprintf(output, " # %8.3f GHz ", ratio); 1061 } 1062 } else if (transaction_run && 1063 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { 1064 total = avg_stats(&runtime_cycles_stats[cpu]); 1065 if (total) 1066 fprintf(output, 1067 " # %5.2f%% transactional cycles ", 1068 100.0 * (avg / total)); 1069 } else if (transaction_run && 1070 perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { 1071 total = avg_stats(&runtime_cycles_stats[cpu]); 1072 total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]); 1073 if (total2 < avg) 1074 total2 = avg; 1075 if (total) 1076 fprintf(output, 1077 " # %5.2f%% aborted cycles ", 1078 100.0 * ((total2-avg) / total)); 1079 } else if (transaction_run && 1080 perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && 1081 avg > 0 && 1082 runtime_cycles_in_tx_stats[cpu].n != 0) { 1083 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); 1084 1085 if (total) 1086 ratio = total / avg; 1087 1088 fprintf(output, " # %8.0f cycles / transaction ", ratio); 1089 } else if (transaction_run && 1090 perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && 1091 avg > 0 && 1092 runtime_cycles_in_tx_stats[cpu].n != 0) { 1093 total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); 1094 1095 if (total) 1096 ratio = total / avg; 1097 1098 fprintf(output, " # %8.0f cycles / elision ", ratio); 1099 } else if (runtime_nsecs_stats[cpu].n != 0) { 1100 char unit = 'M'; 1101 1102 total = avg_stats(&runtime_nsecs_stats[cpu]); 1103 1104 if (total) 1105 ratio = 1000.0 * avg / total; 1106 if (ratio < 0.001) { 1107 ratio *= 1000; 1108 unit = 'K'; 1109 } 1110 1111 fprintf(output, " # %8.3f %c/sec ", ratio, unit); 1112 } else { 1113 fprintf(output, " "); 1114 } 1115 } 1116 1117 static void print_aggr(char *prefix) 1118 { 1119 struct perf_evsel *counter; 1120 int cpu, cpu2, s, s2, id, nr; 1121 double uval; 1122 u64 ena, run, val; 1123 1124 if (!(aggr_map || aggr_get_id)) 1125 return; 1126 1127 for (s = 0; s < aggr_map->nr; s++) { 1128 id = aggr_map->map[s]; 1129 evlist__for_each(evsel_list, counter) { 1130 val = ena = run = 0; 1131 nr = 0; 1132 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1133 cpu2 = perf_evsel__cpus(counter)->map[cpu]; 1134 s2 = aggr_get_id(evsel_list->cpus, cpu2); 1135 if (s2 != id) 1136 continue; 1137 val += counter->counts->cpu[cpu].val; 1138 ena += counter->counts->cpu[cpu].ena; 1139 run += counter->counts->cpu[cpu].run; 1140 nr++; 1141 } 1142 if (prefix) 1143 fprintf(output, "%s", prefix); 1144 1145 if (run == 0 || ena == 0) { 1146 aggr_printout(counter, id, nr); 1147 1148 fprintf(output, "%*s%s", 1149 csv_output ? 0 : 18, 1150 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1151 csv_sep); 1152 1153 fprintf(output, "%-*s%s", 1154 csv_output ? 0 : unit_width, 1155 counter->unit, csv_sep); 1156 1157 fprintf(output, "%*s", 1158 csv_output ? 0 : -25, 1159 perf_evsel__name(counter)); 1160 1161 if (counter->cgrp) 1162 fprintf(output, "%s%s", 1163 csv_sep, counter->cgrp->name); 1164 1165 fputc('\n', output); 1166 continue; 1167 } 1168 uval = val * counter->scale; 1169 1170 if (nsec_counter(counter)) 1171 nsec_printout(id, nr, counter, uval); 1172 else 1173 abs_printout(id, nr, counter, uval); 1174 1175 if (!csv_output) { 1176 print_noise(counter, 1.0); 1177 1178 if (run != ena) 1179 fprintf(output, " (%.2f%%)", 1180 100.0 * run / ena); 1181 } 1182 fputc('\n', output); 1183 } 1184 } 1185 } 1186 1187 /* 1188 * Print out the results of a single counter: 1189 * aggregated counts in system-wide mode 1190 */ 1191 static void print_counter_aggr(struct perf_evsel *counter, char *prefix) 1192 { 1193 struct perf_stat *ps = counter->priv; 1194 double avg = avg_stats(&ps->res_stats[0]); 1195 int scaled = counter->counts->scaled; 1196 double uval; 1197 1198 if (prefix) 1199 fprintf(output, "%s", prefix); 1200 1201 if (scaled == -1) { 1202 fprintf(output, "%*s%s", 1203 csv_output ? 0 : 18, 1204 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1205 csv_sep); 1206 fprintf(output, "%-*s%s", 1207 csv_output ? 0 : unit_width, 1208 counter->unit, csv_sep); 1209 fprintf(output, "%*s", 1210 csv_output ? 0 : -25, 1211 perf_evsel__name(counter)); 1212 1213 if (counter->cgrp) 1214 fprintf(output, "%s%s", csv_sep, counter->cgrp->name); 1215 1216 fputc('\n', output); 1217 return; 1218 } 1219 1220 uval = avg * counter->scale; 1221 1222 if (nsec_counter(counter)) 1223 nsec_printout(-1, 0, counter, uval); 1224 else 1225 abs_printout(-1, 0, counter, uval); 1226 1227 print_noise(counter, avg); 1228 1229 if (csv_output) { 1230 fputc('\n', output); 1231 return; 1232 } 1233 1234 if (scaled) { 1235 double avg_enabled, avg_running; 1236 1237 avg_enabled = avg_stats(&ps->res_stats[1]); 1238 avg_running = avg_stats(&ps->res_stats[2]); 1239 1240 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); 1241 } 1242 fprintf(output, "\n"); 1243 } 1244 1245 /* 1246 * Print out the results of a single counter: 1247 * does not use aggregated count in system-wide 1248 */ 1249 static void print_counter(struct perf_evsel *counter, char *prefix) 1250 { 1251 u64 ena, run, val; 1252 double uval; 1253 int cpu; 1254 1255 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 1256 val = counter->counts->cpu[cpu].val; 1257 ena = counter->counts->cpu[cpu].ena; 1258 run = counter->counts->cpu[cpu].run; 1259 1260 if (prefix) 1261 fprintf(output, "%s", prefix); 1262 1263 if (run == 0 || ena == 0) { 1264 fprintf(output, "CPU%*d%s%*s%s", 1265 csv_output ? 0 : -4, 1266 perf_evsel__cpus(counter)->map[cpu], csv_sep, 1267 csv_output ? 0 : 18, 1268 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, 1269 csv_sep); 1270 1271 fprintf(output, "%-*s%s", 1272 csv_output ? 0 : unit_width, 1273 counter->unit, csv_sep); 1274 1275 fprintf(output, "%*s", 1276 csv_output ? 0 : -25, 1277 perf_evsel__name(counter)); 1278 1279 if (counter->cgrp) 1280 fprintf(output, "%s%s", 1281 csv_sep, counter->cgrp->name); 1282 1283 fputc('\n', output); 1284 continue; 1285 } 1286 1287 uval = val * counter->scale; 1288 1289 if (nsec_counter(counter)) 1290 nsec_printout(cpu, 0, counter, uval); 1291 else 1292 abs_printout(cpu, 0, counter, uval); 1293 1294 if (!csv_output) { 1295 print_noise(counter, 1.0); 1296 1297 if (run != ena) 1298 fprintf(output, " (%.2f%%)", 1299 100.0 * run / ena); 1300 } 1301 fputc('\n', output); 1302 } 1303 } 1304 1305 static void print_stat(int argc, const char **argv) 1306 { 1307 struct perf_evsel *counter; 1308 int i; 1309 1310 fflush(stdout); 1311 1312 if (!csv_output) { 1313 fprintf(output, "\n"); 1314 fprintf(output, " Performance counter stats for "); 1315 if (target.system_wide) 1316 fprintf(output, "\'system wide"); 1317 else if (target.cpu_list) 1318 fprintf(output, "\'CPU(s) %s", target.cpu_list); 1319 else if (!target__has_task(&target)) { 1320 fprintf(output, "\'%s", argv[0]); 1321 for (i = 1; i < argc; i++) 1322 fprintf(output, " %s", argv[i]); 1323 } else if (target.pid) 1324 fprintf(output, "process id \'%s", target.pid); 1325 else 1326 fprintf(output, "thread id \'%s", target.tid); 1327 1328 fprintf(output, "\'"); 1329 if (run_count > 1) 1330 fprintf(output, " (%d runs)", run_count); 1331 fprintf(output, ":\n\n"); 1332 } 1333 1334 switch (aggr_mode) { 1335 case AGGR_CORE: 1336 case AGGR_SOCKET: 1337 print_aggr(NULL); 1338 break; 1339 case AGGR_GLOBAL: 1340 evlist__for_each(evsel_list, counter) 1341 print_counter_aggr(counter, NULL); 1342 break; 1343 case AGGR_NONE: 1344 evlist__for_each(evsel_list, counter) 1345 print_counter(counter, NULL); 1346 break; 1347 default: 1348 break; 1349 } 1350 1351 if (!csv_output) { 1352 if (!null_run) 1353 fprintf(output, "\n"); 1354 fprintf(output, " %17.9f seconds time elapsed", 1355 avg_stats(&walltime_nsecs_stats)/1e9); 1356 if (run_count > 1) { 1357 fprintf(output, " "); 1358 print_noise_pct(stddev_stats(&walltime_nsecs_stats), 1359 avg_stats(&walltime_nsecs_stats)); 1360 } 1361 fprintf(output, "\n\n"); 1362 } 1363 } 1364 1365 static volatile int signr = -1; 1366 1367 static void skip_signal(int signo) 1368 { 1369 if ((child_pid == -1) || interval) 1370 done = 1; 1371 1372 signr = signo; 1373 /* 1374 * render child_pid harmless 1375 * won't send SIGTERM to a random 1376 * process in case of race condition 1377 * and fast PID recycling 1378 */ 1379 child_pid = -1; 1380 } 1381 1382 static void sig_atexit(void) 1383 { 1384 sigset_t set, oset; 1385 1386 /* 1387 * avoid race condition with SIGCHLD handler 1388 * in skip_signal() which is modifying child_pid 1389 * goal is to avoid send SIGTERM to a random 1390 * process 1391 */ 1392 sigemptyset(&set); 1393 sigaddset(&set, SIGCHLD); 1394 sigprocmask(SIG_BLOCK, &set, &oset); 1395 1396 if (child_pid != -1) 1397 kill(child_pid, SIGTERM); 1398 1399 sigprocmask(SIG_SETMASK, &oset, NULL); 1400 1401 if (signr == -1) 1402 return; 1403 1404 signal(signr, SIG_DFL); 1405 kill(getpid(), signr); 1406 } 1407 1408 static int stat__set_big_num(const struct option *opt __maybe_unused, 1409 const char *s __maybe_unused, int unset) 1410 { 1411 big_num_opt = unset ? 0 : 1; 1412 return 0; 1413 } 1414 1415 static int perf_stat_init_aggr_mode(void) 1416 { 1417 switch (aggr_mode) { 1418 case AGGR_SOCKET: 1419 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { 1420 perror("cannot build socket map"); 1421 return -1; 1422 } 1423 aggr_get_id = cpu_map__get_socket; 1424 break; 1425 case AGGR_CORE: 1426 if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { 1427 perror("cannot build core map"); 1428 return -1; 1429 } 1430 aggr_get_id = cpu_map__get_core; 1431 break; 1432 case AGGR_NONE: 1433 case AGGR_GLOBAL: 1434 default: 1435 break; 1436 } 1437 return 0; 1438 } 1439 1440 static int setup_events(const char * const *attrs, unsigned len) 1441 { 1442 unsigned i; 1443 1444 for (i = 0; i < len; i++) { 1445 if (parse_events(evsel_list, attrs[i])) 1446 return -1; 1447 } 1448 return 0; 1449 } 1450 1451 /* 1452 * Add default attributes, if there were no attributes specified or 1453 * if -d/--detailed, -d -d or -d -d -d is used: 1454 */ 1455 static int add_default_attributes(void) 1456 { 1457 struct perf_event_attr default_attrs[] = { 1458 1459 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 1460 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 1461 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 1462 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 1463 1464 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 1465 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, 1466 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, 1467 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 1468 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 1469 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 1470 1471 }; 1472 1473 /* 1474 * Detailed stats (-d), covering the L1 and last level data caches: 1475 */ 1476 struct perf_event_attr detailed_attrs[] = { 1477 1478 { .type = PERF_TYPE_HW_CACHE, 1479 .config = 1480 PERF_COUNT_HW_CACHE_L1D << 0 | 1481 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1482 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1483 1484 { .type = PERF_TYPE_HW_CACHE, 1485 .config = 1486 PERF_COUNT_HW_CACHE_L1D << 0 | 1487 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1488 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1489 1490 { .type = PERF_TYPE_HW_CACHE, 1491 .config = 1492 PERF_COUNT_HW_CACHE_LL << 0 | 1493 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1494 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1495 1496 { .type = PERF_TYPE_HW_CACHE, 1497 .config = 1498 PERF_COUNT_HW_CACHE_LL << 0 | 1499 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1500 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1501 }; 1502 1503 /* 1504 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: 1505 */ 1506 struct perf_event_attr very_detailed_attrs[] = { 1507 1508 { .type = PERF_TYPE_HW_CACHE, 1509 .config = 1510 PERF_COUNT_HW_CACHE_L1I << 0 | 1511 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1512 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1513 1514 { .type = PERF_TYPE_HW_CACHE, 1515 .config = 1516 PERF_COUNT_HW_CACHE_L1I << 0 | 1517 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1518 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1519 1520 { .type = PERF_TYPE_HW_CACHE, 1521 .config = 1522 PERF_COUNT_HW_CACHE_DTLB << 0 | 1523 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1524 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1525 1526 { .type = PERF_TYPE_HW_CACHE, 1527 .config = 1528 PERF_COUNT_HW_CACHE_DTLB << 0 | 1529 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1530 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1531 1532 { .type = PERF_TYPE_HW_CACHE, 1533 .config = 1534 PERF_COUNT_HW_CACHE_ITLB << 0 | 1535 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1536 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1537 1538 { .type = PERF_TYPE_HW_CACHE, 1539 .config = 1540 PERF_COUNT_HW_CACHE_ITLB << 0 | 1541 (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1542 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1543 1544 }; 1545 1546 /* 1547 * Very, very detailed stats (-d -d -d), adding prefetch events: 1548 */ 1549 struct perf_event_attr very_very_detailed_attrs[] = { 1550 1551 { .type = PERF_TYPE_HW_CACHE, 1552 .config = 1553 PERF_COUNT_HW_CACHE_L1D << 0 | 1554 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1555 (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 1556 1557 { .type = PERF_TYPE_HW_CACHE, 1558 .config = 1559 PERF_COUNT_HW_CACHE_L1D << 0 | 1560 (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | 1561 (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 1562 }; 1563 1564 /* Set attrs if no event is selected and !null_run: */ 1565 if (null_run) 1566 return 0; 1567 1568 if (transaction_run) { 1569 int err; 1570 if (pmu_have_event("cpu", "cycles-ct") && 1571 pmu_have_event("cpu", "el-start")) 1572 err = setup_events(transaction_attrs, 1573 ARRAY_SIZE(transaction_attrs)); 1574 else 1575 err = setup_events(transaction_limited_attrs, 1576 ARRAY_SIZE(transaction_limited_attrs)); 1577 if (err < 0) { 1578 fprintf(stderr, "Cannot set up transaction events\n"); 1579 return -1; 1580 } 1581 return 0; 1582 } 1583 1584 if (!evsel_list->nr_entries) { 1585 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) 1586 return -1; 1587 } 1588 1589 /* Detailed events get appended to the event list: */ 1590 1591 if (detailed_run < 1) 1592 return 0; 1593 1594 /* Append detailed run extra attributes: */ 1595 if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) 1596 return -1; 1597 1598 if (detailed_run < 2) 1599 return 0; 1600 1601 /* Append very detailed run extra attributes: */ 1602 if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) 1603 return -1; 1604 1605 if (detailed_run < 3) 1606 return 0; 1607 1608 /* Append very, very detailed run extra attributes: */ 1609 return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); 1610 } 1611 1612 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) 1613 { 1614 bool append_file = false; 1615 int output_fd = 0; 1616 const char *output_name = NULL; 1617 const struct option options[] = { 1618 OPT_BOOLEAN('T', "transaction", &transaction_run, 1619 "hardware transaction statistics"), 1620 OPT_CALLBACK('e', "event", &evsel_list, "event", 1621 "event selector. use 'perf list' to list available events", 1622 parse_events_option), 1623 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1624 "event filter", parse_filter), 1625 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1626 "child tasks do not inherit counters"), 1627 OPT_STRING('p', "pid", &target.pid, "pid", 1628 "stat events on existing process id"), 1629 OPT_STRING('t', "tid", &target.tid, "tid", 1630 "stat events on existing thread id"), 1631 OPT_BOOLEAN('a', "all-cpus", &target.system_wide, 1632 "system-wide collection from all CPUs"), 1633 OPT_BOOLEAN('g', "group", &group, 1634 "put the counters into a counter group"), 1635 OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), 1636 OPT_INCR('v', "verbose", &verbose, 1637 "be more verbose (show counter open errors, etc)"), 1638 OPT_INTEGER('r', "repeat", &run_count, 1639 "repeat command and print average + stddev (max: 100, forever: 0)"), 1640 OPT_BOOLEAN('n', "null", &null_run, 1641 "null run - dont start any counters"), 1642 OPT_INCR('d', "detailed", &detailed_run, 1643 "detailed run - start a lot of events"), 1644 OPT_BOOLEAN('S', "sync", &sync_run, 1645 "call sync() before starting a run"), 1646 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 1647 "print large numbers with thousands\' separators", 1648 stat__set_big_num), 1649 OPT_STRING('C', "cpu", &target.cpu_list, "cpu", 1650 "list of cpus to monitor in system-wide"), 1651 OPT_SET_UINT('A', "no-aggr", &aggr_mode, 1652 "disable CPU count aggregation", AGGR_NONE), 1653 OPT_STRING('x', "field-separator", &csv_sep, "separator", 1654 "print counts with custom separator"), 1655 OPT_CALLBACK('G', "cgroup", &evsel_list, "name", 1656 "monitor event in cgroup name only", parse_cgroups), 1657 OPT_STRING('o', "output", &output_name, "file", "output file name"), 1658 OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), 1659 OPT_INTEGER(0, "log-fd", &output_fd, 1660 "log output to fd, instead of stderr"), 1661 OPT_STRING(0, "pre", &pre_cmd, "command", 1662 "command to run prior to the measured command"), 1663 OPT_STRING(0, "post", &post_cmd, "command", 1664 "command to run after to the measured command"), 1665 OPT_UINTEGER('I', "interval-print", &interval, 1666 "print counts at regular interval in ms (>= 100)"), 1667 OPT_SET_UINT(0, "per-socket", &aggr_mode, 1668 "aggregate counts per processor socket", AGGR_SOCKET), 1669 OPT_SET_UINT(0, "per-core", &aggr_mode, 1670 "aggregate counts per physical processor core", AGGR_CORE), 1671 OPT_UINTEGER('D', "delay", &initial_delay, 1672 "ms to wait before starting measurement after program start"), 1673 OPT_END() 1674 }; 1675 const char * const stat_usage[] = { 1676 "perf stat [<options>] [<command>]", 1677 NULL 1678 }; 1679 int status = -EINVAL, run_idx; 1680 const char *mode; 1681 1682 setlocale(LC_ALL, ""); 1683 1684 evsel_list = perf_evlist__new(); 1685 if (evsel_list == NULL) 1686 return -ENOMEM; 1687 1688 argc = parse_options(argc, argv, options, stat_usage, 1689 PARSE_OPT_STOP_AT_NON_OPTION); 1690 1691 output = stderr; 1692 if (output_name && strcmp(output_name, "-")) 1693 output = NULL; 1694 1695 if (output_name && output_fd) { 1696 fprintf(stderr, "cannot use both --output and --log-fd\n"); 1697 parse_options_usage(stat_usage, options, "o", 1); 1698 parse_options_usage(NULL, options, "log-fd", 0); 1699 goto out; 1700 } 1701 1702 if (output_fd < 0) { 1703 fprintf(stderr, "argument to --log-fd must be a > 0\n"); 1704 parse_options_usage(stat_usage, options, "log-fd", 0); 1705 goto out; 1706 } 1707 1708 if (!output) { 1709 struct timespec tm; 1710 mode = append_file ? "a" : "w"; 1711 1712 output = fopen(output_name, mode); 1713 if (!output) { 1714 perror("failed to create output file"); 1715 return -1; 1716 } 1717 clock_gettime(CLOCK_REALTIME, &tm); 1718 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); 1719 } else if (output_fd > 0) { 1720 mode = append_file ? "a" : "w"; 1721 output = fdopen(output_fd, mode); 1722 if (!output) { 1723 perror("Failed opening logfd"); 1724 return -errno; 1725 } 1726 } 1727 1728 if (csv_sep) { 1729 csv_output = true; 1730 if (!strcmp(csv_sep, "\\t")) 1731 csv_sep = "\t"; 1732 } else 1733 csv_sep = DEFAULT_SEPARATOR; 1734 1735 /* 1736 * let the spreadsheet do the pretty-printing 1737 */ 1738 if (csv_output) { 1739 /* User explicitly passed -B? */ 1740 if (big_num_opt == 1) { 1741 fprintf(stderr, "-B option not supported with -x\n"); 1742 parse_options_usage(stat_usage, options, "B", 1); 1743 parse_options_usage(NULL, options, "x", 1); 1744 goto out; 1745 } else /* Nope, so disable big number formatting */ 1746 big_num = false; 1747 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1748 big_num = false; 1749 1750 if (!argc && target__none(&target)) 1751 usage_with_options(stat_usage, options); 1752 1753 if (run_count < 0) { 1754 pr_err("Run count must be a positive number\n"); 1755 parse_options_usage(stat_usage, options, "r", 1); 1756 goto out; 1757 } else if (run_count == 0) { 1758 forever = true; 1759 run_count = 1; 1760 } 1761 1762 /* no_aggr, cgroup are for system-wide only */ 1763 if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && 1764 !target__has_cpu(&target)) { 1765 fprintf(stderr, "both cgroup and no-aggregation " 1766 "modes only available in system-wide mode\n"); 1767 1768 parse_options_usage(stat_usage, options, "G", 1); 1769 parse_options_usage(NULL, options, "A", 1); 1770 parse_options_usage(NULL, options, "a", 1); 1771 goto out; 1772 } 1773 1774 if (add_default_attributes()) 1775 goto out; 1776 1777 target__validate(&target); 1778 1779 if (perf_evlist__create_maps(evsel_list, &target) < 0) { 1780 if (target__has_task(&target)) { 1781 pr_err("Problems finding threads of monitor\n"); 1782 parse_options_usage(stat_usage, options, "p", 1); 1783 parse_options_usage(NULL, options, "t", 1); 1784 } else if (target__has_cpu(&target)) { 1785 perror("failed to parse CPUs map"); 1786 parse_options_usage(stat_usage, options, "C", 1); 1787 parse_options_usage(NULL, options, "a", 1); 1788 } 1789 goto out; 1790 } 1791 if (interval && interval < 100) { 1792 pr_err("print interval must be >= 100ms\n"); 1793 parse_options_usage(stat_usage, options, "I", 1); 1794 goto out; 1795 } 1796 1797 if (perf_evlist__alloc_stats(evsel_list, interval)) 1798 goto out; 1799 1800 if (perf_stat_init_aggr_mode()) 1801 goto out; 1802 1803 /* 1804 * We dont want to block the signals - that would cause 1805 * child tasks to inherit that and Ctrl-C would not work. 1806 * What we want is for Ctrl-C to work in the exec()-ed 1807 * task, but being ignored by perf stat itself: 1808 */ 1809 atexit(sig_atexit); 1810 if (!forever) 1811 signal(SIGINT, skip_signal); 1812 signal(SIGCHLD, skip_signal); 1813 signal(SIGALRM, skip_signal); 1814 signal(SIGABRT, skip_signal); 1815 1816 status = 0; 1817 for (run_idx = 0; forever || run_idx < run_count; run_idx++) { 1818 if (run_count != 1 && verbose) 1819 fprintf(output, "[ perf stat: executing run #%d ... ]\n", 1820 run_idx + 1); 1821 1822 status = run_perf_stat(argc, argv); 1823 if (forever && status != -1) { 1824 print_stat(argc, argv); 1825 perf_stat__reset_stats(evsel_list); 1826 } 1827 } 1828 1829 if (!forever && status != -1 && !interval) 1830 print_stat(argc, argv); 1831 1832 perf_evlist__free_stats(evsel_list); 1833 out: 1834 perf_evlist__delete(evsel_list); 1835 return status; 1836 } 1837