1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 12 /* 13 * AGGR_GLOBAL: Use CPU 0 14 * AGGR_SOCKET: Use first CPU of socket 15 * AGGR_CORE: Use first CPU of core 16 * AGGR_NONE: Use matching CPU 17 * AGGR_THREAD: Not supported? 18 */ 19 static bool have_frontend_stalled; 20 21 struct runtime_stat rt_stat; 22 struct stats walltime_nsecs_stats; 23 24 struct saved_value { 25 struct rb_node rb_node; 26 struct perf_evsel *evsel; 27 enum stat_type type; 28 int ctx; 29 int cpu; 30 struct runtime_stat *stat; 31 struct stats stats; 32 }; 33 34 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 35 { 36 struct saved_value *a = container_of(rb_node, 37 struct saved_value, 38 rb_node); 39 const struct saved_value *b = entry; 40 41 if (a->cpu != b->cpu) 42 return a->cpu - b->cpu; 43 44 /* 45 * Previously the rbtree was used to link generic metrics. 46 * The keys were evsel/cpu. Now the rbtree is extended to support 47 * per-thread shadow stats. For shadow stats case, the keys 48 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 49 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 50 */ 51 if (a->type != b->type) 52 return a->type - b->type; 53 54 if (a->ctx != b->ctx) 55 return a->ctx - b->ctx; 56 57 if (a->evsel == NULL && b->evsel == NULL) { 58 if (a->stat == b->stat) 59 return 0; 60 61 if ((char *)a->stat < (char *)b->stat) 62 return -1; 63 64 return 1; 65 } 66 67 if (a->evsel == b->evsel) 68 return 0; 69 if ((char *)a->evsel < (char *)b->evsel) 70 return -1; 71 return +1; 72 } 73 74 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 75 const void *entry) 76 { 77 struct saved_value *nd = malloc(sizeof(struct saved_value)); 78 79 if (!nd) 80 return NULL; 81 memcpy(nd, entry, sizeof(struct saved_value)); 82 return &nd->rb_node; 83 } 84 85 static void saved_value_delete(struct rblist *rblist __maybe_unused, 86 struct rb_node *rb_node) 87 { 88 struct saved_value *v; 89 90 BUG_ON(!rb_node); 91 v = container_of(rb_node, struct saved_value, rb_node); 92 free(v); 93 } 94 95 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 96 int cpu, 97 bool create, 98 enum stat_type type, 99 int ctx, 100 struct runtime_stat *st) 101 { 102 struct rblist *rblist; 103 struct rb_node *nd; 104 struct saved_value dm = { 105 .cpu = cpu, 106 .evsel = evsel, 107 .type = type, 108 .ctx = ctx, 109 .stat = st, 110 }; 111 112 rblist = &st->value_list; 113 114 nd = rblist__find(rblist, &dm); 115 if (nd) 116 return container_of(nd, struct saved_value, rb_node); 117 if (create) { 118 rblist__add_node(rblist, &dm); 119 nd = rblist__find(rblist, &dm); 120 if (nd) 121 return container_of(nd, struct saved_value, rb_node); 122 } 123 return NULL; 124 } 125 126 void runtime_stat__init(struct runtime_stat *st) 127 { 128 struct rblist *rblist = &st->value_list; 129 130 rblist__init(rblist); 131 rblist->node_cmp = saved_value_cmp; 132 rblist->node_new = saved_value_new; 133 rblist->node_delete = saved_value_delete; 134 } 135 136 void runtime_stat__exit(struct runtime_stat *st) 137 { 138 rblist__exit(&st->value_list); 139 } 140 141 void perf_stat__init_shadow_stats(void) 142 { 143 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 144 runtime_stat__init(&rt_stat); 145 } 146 147 static int evsel_context(struct perf_evsel *evsel) 148 { 149 int ctx = 0; 150 151 if (evsel->attr.exclude_kernel) 152 ctx |= CTX_BIT_KERNEL; 153 if (evsel->attr.exclude_user) 154 ctx |= CTX_BIT_USER; 155 if (evsel->attr.exclude_hv) 156 ctx |= CTX_BIT_HV; 157 if (evsel->attr.exclude_host) 158 ctx |= CTX_BIT_HOST; 159 if (evsel->attr.exclude_idle) 160 ctx |= CTX_BIT_IDLE; 161 162 return ctx; 163 } 164 165 static void reset_stat(struct runtime_stat *st) 166 { 167 struct rblist *rblist; 168 struct rb_node *pos, *next; 169 170 rblist = &st->value_list; 171 next = rb_first(&rblist->entries); 172 while (next) { 173 pos = next; 174 next = rb_next(pos); 175 memset(&container_of(pos, struct saved_value, rb_node)->stats, 176 0, 177 sizeof(struct stats)); 178 } 179 } 180 181 void perf_stat__reset_shadow_stats(void) 182 { 183 reset_stat(&rt_stat); 184 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 185 } 186 187 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 188 { 189 reset_stat(st); 190 } 191 192 static void update_runtime_stat(struct runtime_stat *st, 193 enum stat_type type, 194 int ctx, int cpu, u64 count) 195 { 196 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 197 type, ctx, st); 198 199 if (v) 200 update_stats(&v->stats, count); 201 } 202 203 /* 204 * Update various tracking values we maintain to print 205 * more semantic information such as miss/hit ratios, 206 * instruction rates, etc: 207 */ 208 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, 209 int cpu, struct runtime_stat *st) 210 { 211 int ctx = evsel_context(counter); 212 213 count *= counter->scale; 214 215 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || 216 perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) 217 update_runtime_stat(st, STAT_NSECS, 0, cpu, count); 218 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 219 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 220 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 221 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 222 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 223 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 224 else if (perf_stat_evsel__is(counter, ELISION_START)) 225 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 226 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 227 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 228 ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 230 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 236 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 242 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 243 ctx, cpu, count); 244 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 246 ctx, cpu, count); 247 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 248 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 249 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 250 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 251 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 252 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 253 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 254 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 255 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 256 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 257 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 258 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 259 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 260 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 261 else if (perf_stat_evsel__is(counter, SMI_NUM)) 262 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 263 else if (perf_stat_evsel__is(counter, APERF)) 264 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 265 266 if (counter->collect_stat) { 267 struct saved_value *v = saved_value_lookup(counter, cpu, true, 268 STAT_NONE, 0, st); 269 update_stats(&v->stats, count); 270 } 271 } 272 273 /* used for get_ratio_color() */ 274 enum grc_type { 275 GRC_STALLED_CYCLES_FE, 276 GRC_STALLED_CYCLES_BE, 277 GRC_CACHE_MISSES, 278 GRC_MAX_NR 279 }; 280 281 static const char *get_ratio_color(enum grc_type type, double ratio) 282 { 283 static const double grc_table[GRC_MAX_NR][3] = { 284 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 285 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 286 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 287 }; 288 const char *color = PERF_COLOR_NORMAL; 289 290 if (ratio > grc_table[type][0]) 291 color = PERF_COLOR_RED; 292 else if (ratio > grc_table[type][1]) 293 color = PERF_COLOR_MAGENTA; 294 else if (ratio > grc_table[type][2]) 295 color = PERF_COLOR_YELLOW; 296 297 return color; 298 } 299 300 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, 301 const char *name) 302 { 303 struct perf_evsel *c2; 304 305 evlist__for_each_entry (evsel_list, c2) { 306 if (!strcasecmp(c2->name, name)) 307 return c2; 308 } 309 return NULL; 310 } 311 312 /* Mark MetricExpr target events and link events using them to them. */ 313 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) 314 { 315 struct perf_evsel *counter, *leader, **metric_events, *oc; 316 bool found; 317 const char **metric_names; 318 int i; 319 int num_metric_names; 320 321 evlist__for_each_entry(evsel_list, counter) { 322 bool invalid = false; 323 324 leader = counter->leader; 325 if (!counter->metric_expr) 326 continue; 327 metric_events = counter->metric_events; 328 if (!metric_events) { 329 if (expr__find_other(counter->metric_expr, counter->name, 330 &metric_names, &num_metric_names) < 0) 331 continue; 332 333 metric_events = calloc(sizeof(struct perf_evsel *), 334 num_metric_names + 1); 335 if (!metric_events) 336 return; 337 counter->metric_events = metric_events; 338 } 339 340 for (i = 0; i < num_metric_names; i++) { 341 found = false; 342 if (leader) { 343 /* Search in group */ 344 for_each_group_member (oc, leader) { 345 if (!strcasecmp(oc->name, metric_names[i])) { 346 found = true; 347 break; 348 } 349 } 350 } 351 if (!found) { 352 /* Search ignoring groups */ 353 oc = perf_stat__find_event(evsel_list, metric_names[i]); 354 } 355 if (!oc) { 356 /* Deduping one is good enough to handle duplicated PMUs. */ 357 static char *printed; 358 359 /* 360 * Adding events automatically would be difficult, because 361 * it would risk creating groups that are not schedulable. 362 * perf stat doesn't understand all the scheduling constraints 363 * of events. So we ask the user instead to add the missing 364 * events. 365 */ 366 if (!printed || strcasecmp(printed, metric_names[i])) { 367 fprintf(stderr, 368 "Add %s event to groups to get metric expression for %s\n", 369 metric_names[i], 370 counter->name); 371 printed = strdup(metric_names[i]); 372 } 373 invalid = true; 374 continue; 375 } 376 metric_events[i] = oc; 377 oc->collect_stat = true; 378 } 379 metric_events[i] = NULL; 380 free(metric_names); 381 if (invalid) { 382 free(metric_events); 383 counter->metric_events = NULL; 384 counter->metric_expr = NULL; 385 } 386 } 387 } 388 389 static double runtime_stat_avg(struct runtime_stat *st, 390 enum stat_type type, int ctx, int cpu) 391 { 392 struct saved_value *v; 393 394 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 395 if (!v) 396 return 0.0; 397 398 return avg_stats(&v->stats); 399 } 400 401 static double runtime_stat_n(struct runtime_stat *st, 402 enum stat_type type, int ctx, int cpu) 403 { 404 struct saved_value *v; 405 406 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 407 if (!v) 408 return 0.0; 409 410 return v->stats.n; 411 } 412 413 static void print_stalled_cycles_frontend(int cpu, 414 struct perf_evsel *evsel, double avg, 415 struct perf_stat_output_ctx *out, 416 struct runtime_stat *st) 417 { 418 double total, ratio = 0.0; 419 const char *color; 420 int ctx = evsel_context(evsel); 421 422 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 423 424 if (total) 425 ratio = avg / total * 100.0; 426 427 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 428 429 if (ratio) 430 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", 431 ratio); 432 else 433 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); 434 } 435 436 static void print_stalled_cycles_backend(int cpu, 437 struct perf_evsel *evsel, double avg, 438 struct perf_stat_output_ctx *out, 439 struct runtime_stat *st) 440 { 441 double total, ratio = 0.0; 442 const char *color; 443 int ctx = evsel_context(evsel); 444 445 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 446 447 if (total) 448 ratio = avg / total * 100.0; 449 450 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 451 452 out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 453 } 454 455 static void print_branch_misses(int cpu, 456 struct perf_evsel *evsel, 457 double avg, 458 struct perf_stat_output_ctx *out, 459 struct runtime_stat *st) 460 { 461 double total, ratio = 0.0; 462 const char *color; 463 int ctx = evsel_context(evsel); 464 465 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 466 467 if (total) 468 ratio = avg / total * 100.0; 469 470 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 471 472 out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); 473 } 474 475 static void print_l1_dcache_misses(int cpu, 476 struct perf_evsel *evsel, 477 double avg, 478 struct perf_stat_output_ctx *out, 479 struct runtime_stat *st) 480 481 { 482 double total, ratio = 0.0; 483 const char *color; 484 int ctx = evsel_context(evsel); 485 486 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 487 488 if (total) 489 ratio = avg / total * 100.0; 490 491 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 492 493 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 494 } 495 496 static void print_l1_icache_misses(int cpu, 497 struct perf_evsel *evsel, 498 double avg, 499 struct perf_stat_output_ctx *out, 500 struct runtime_stat *st) 501 502 { 503 double total, ratio = 0.0; 504 const char *color; 505 int ctx = evsel_context(evsel); 506 507 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 508 509 if (total) 510 ratio = avg / total * 100.0; 511 512 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 513 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 514 } 515 516 static void print_dtlb_cache_misses(int cpu, 517 struct perf_evsel *evsel, 518 double avg, 519 struct perf_stat_output_ctx *out, 520 struct runtime_stat *st) 521 { 522 double total, ratio = 0.0; 523 const char *color; 524 int ctx = evsel_context(evsel); 525 526 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 527 528 if (total) 529 ratio = avg / total * 100.0; 530 531 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 532 out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 533 } 534 535 static void print_itlb_cache_misses(int cpu, 536 struct perf_evsel *evsel, 537 double avg, 538 struct perf_stat_output_ctx *out, 539 struct runtime_stat *st) 540 { 541 double total, ratio = 0.0; 542 const char *color; 543 int ctx = evsel_context(evsel); 544 545 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 546 547 if (total) 548 ratio = avg / total * 100.0; 549 550 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 551 out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 552 } 553 554 static void print_ll_cache_misses(int cpu, 555 struct perf_evsel *evsel, 556 double avg, 557 struct perf_stat_output_ctx *out, 558 struct runtime_stat *st) 559 { 560 double total, ratio = 0.0; 561 const char *color; 562 int ctx = evsel_context(evsel); 563 564 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 565 566 if (total) 567 ratio = avg / total * 100.0; 568 569 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 570 out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 571 } 572 573 /* 574 * High level "TopDown" CPU core pipe line bottleneck break down. 575 * 576 * Basic concept following 577 * Yasin, A Top Down Method for Performance analysis and Counter architecture 578 * ISPASS14 579 * 580 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 581 * 582 * Frontend -> Backend -> Retiring 583 * BadSpeculation in addition means out of order execution that is thrown away 584 * (for example branch mispredictions) 585 * Frontend is instruction decoding. 586 * Backend is execution, like computation and accessing data in memory 587 * Retiring is good execution that is not directly bottlenecked 588 * 589 * The formulas are computed in slots. 590 * A slot is an entry in the pipeline each for the pipeline width 591 * (for example a 4-wide pipeline has 4 slots for each cycle) 592 * 593 * Formulas: 594 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 595 * TotalSlots 596 * Retiring = SlotsRetired / TotalSlots 597 * FrontendBound = FetchBubbles / TotalSlots 598 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 599 * 600 * The kernel provides the mapping to the low level CPU events and any scaling 601 * needed for the CPU pipeline width, for example: 602 * 603 * TotalSlots = Cycles * 4 604 * 605 * The scaling factor is communicated in the sysfs unit. 606 * 607 * In some cases the CPU may not be able to measure all the formulas due to 608 * missing events. In this case multiple formulas are combined, as possible. 609 * 610 * Full TopDown supports more levels to sub-divide each area: for example 611 * BackendBound into computing bound and memory bound. For now we only 612 * support Level 1 TopDown. 613 */ 614 615 static double sanitize_val(double x) 616 { 617 if (x < 0 && x >= -0.02) 618 return 0.0; 619 return x; 620 } 621 622 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 623 { 624 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 625 } 626 627 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 628 { 629 double bad_spec = 0; 630 double total_slots; 631 double total; 632 633 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 634 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 635 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 636 637 total_slots = td_total_slots(ctx, cpu, st); 638 if (total_slots) 639 bad_spec = total / total_slots; 640 return sanitize_val(bad_spec); 641 } 642 643 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 644 { 645 double retiring = 0; 646 double total_slots = td_total_slots(ctx, cpu, st); 647 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 648 ctx, cpu); 649 650 if (total_slots) 651 retiring = ret_slots / total_slots; 652 return retiring; 653 } 654 655 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 656 { 657 double fe_bound = 0; 658 double total_slots = td_total_slots(ctx, cpu, st); 659 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 660 ctx, cpu); 661 662 if (total_slots) 663 fe_bound = fetch_bub / total_slots; 664 return fe_bound; 665 } 666 667 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 668 { 669 double sum = (td_fe_bound(ctx, cpu, st) + 670 td_bad_spec(ctx, cpu, st) + 671 td_retiring(ctx, cpu, st)); 672 if (sum == 0) 673 return 0; 674 return sanitize_val(1.0 - sum); 675 } 676 677 static void print_smi_cost(int cpu, struct perf_evsel *evsel, 678 struct perf_stat_output_ctx *out, 679 struct runtime_stat *st) 680 { 681 double smi_num, aperf, cycles, cost = 0.0; 682 int ctx = evsel_context(evsel); 683 const char *color = NULL; 684 685 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 686 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 687 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 688 689 if ((cycles == 0) || (aperf == 0)) 690 return; 691 692 if (smi_num) 693 cost = (aperf - cycles) / aperf * 100.00; 694 695 if (cost > 10) 696 color = PERF_COLOR_RED; 697 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 698 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); 699 } 700 701 static void generic_metric(const char *metric_expr, 702 struct perf_evsel **metric_events, 703 char *name, 704 const char *metric_name, 705 double avg, 706 int cpu, 707 struct perf_stat_output_ctx *out, 708 struct runtime_stat *st) 709 { 710 print_metric_t print_metric = out->print_metric; 711 struct parse_ctx pctx; 712 double ratio; 713 int i; 714 void *ctxp = out->ctx; 715 716 expr__ctx_init(&pctx); 717 expr__add_id(&pctx, name, avg); 718 for (i = 0; metric_events[i]; i++) { 719 struct saved_value *v; 720 struct stats *stats; 721 double scale; 722 723 if (!strcmp(metric_events[i]->name, "duration_time")) { 724 stats = &walltime_nsecs_stats; 725 scale = 1e-9; 726 } else { 727 v = saved_value_lookup(metric_events[i], cpu, false, 728 STAT_NONE, 0, st); 729 if (!v) 730 break; 731 stats = &v->stats; 732 scale = 1.0; 733 } 734 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); 735 } 736 if (!metric_events[i]) { 737 const char *p = metric_expr; 738 739 if (expr__parse(&ratio, &pctx, &p) == 0) 740 print_metric(ctxp, NULL, "%8.1f", 741 metric_name ? 742 metric_name : 743 out->force_header ? name : "", 744 ratio); 745 else 746 print_metric(ctxp, NULL, NULL, 747 out->force_header ? 748 (metric_name ? metric_name : name) : "", 0); 749 } else 750 print_metric(ctxp, NULL, NULL, "", 0); 751 } 752 753 void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 754 double avg, int cpu, 755 struct perf_stat_output_ctx *out, 756 struct rblist *metric_events, 757 struct runtime_stat *st) 758 { 759 void *ctxp = out->ctx; 760 print_metric_t print_metric = out->print_metric; 761 double total, ratio = 0.0, total2; 762 const char *color = NULL; 763 int ctx = evsel_context(evsel); 764 struct metric_event *me; 765 int num = 1; 766 767 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 768 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 769 770 if (total) { 771 ratio = avg / total; 772 print_metric(ctxp, NULL, "%7.2f ", 773 "insn per cycle", ratio); 774 } else { 775 print_metric(ctxp, NULL, NULL, "insn per cycle", 0); 776 } 777 778 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 779 ctx, cpu); 780 781 total = max(total, runtime_stat_avg(st, 782 STAT_STALLED_CYCLES_BACK, 783 ctx, cpu)); 784 785 if (total && avg) { 786 out->new_line(ctxp); 787 ratio = total / avg; 788 print_metric(ctxp, NULL, "%7.2f ", 789 "stalled cycles per insn", 790 ratio); 791 } else if (have_frontend_stalled) { 792 print_metric(ctxp, NULL, NULL, 793 "stalled cycles per insn", 0); 794 } 795 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 796 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 797 print_branch_misses(cpu, evsel, avg, out, st); 798 else 799 print_metric(ctxp, NULL, NULL, "of all branches", 0); 800 } else if ( 801 evsel->attr.type == PERF_TYPE_HW_CACHE && 802 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 803 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 804 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 805 806 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 807 print_l1_dcache_misses(cpu, evsel, avg, out, st); 808 else 809 print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); 810 } else if ( 811 evsel->attr.type == PERF_TYPE_HW_CACHE && 812 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 813 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 814 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 815 816 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 817 print_l1_icache_misses(cpu, evsel, avg, out, st); 818 else 819 print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); 820 } else if ( 821 evsel->attr.type == PERF_TYPE_HW_CACHE && 822 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 823 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 824 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 825 826 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 827 print_dtlb_cache_misses(cpu, evsel, avg, out, st); 828 else 829 print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); 830 } else if ( 831 evsel->attr.type == PERF_TYPE_HW_CACHE && 832 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 833 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 834 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 835 836 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 837 print_itlb_cache_misses(cpu, evsel, avg, out, st); 838 else 839 print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); 840 } else if ( 841 evsel->attr.type == PERF_TYPE_HW_CACHE && 842 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 843 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 844 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 845 846 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 847 print_ll_cache_misses(cpu, evsel, avg, out, st); 848 else 849 print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); 850 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 851 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 852 853 if (total) 854 ratio = avg * 100 / total; 855 856 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 857 print_metric(ctxp, NULL, "%8.3f %%", 858 "of all cache refs", ratio); 859 else 860 print_metric(ctxp, NULL, NULL, "of all cache refs", 0); 861 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 862 print_stalled_cycles_frontend(cpu, evsel, avg, out, st); 863 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 864 print_stalled_cycles_backend(cpu, evsel, avg, out, st); 865 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 866 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 867 868 if (total) { 869 ratio = avg / total; 870 print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); 871 } else { 872 print_metric(ctxp, NULL, NULL, "Ghz", 0); 873 } 874 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 875 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 876 877 if (total) 878 print_metric(ctxp, NULL, 879 "%7.2f%%", "transactional cycles", 880 100.0 * (avg / total)); 881 else 882 print_metric(ctxp, NULL, NULL, "transactional cycles", 883 0); 884 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 885 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 886 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 887 888 if (total2 < avg) 889 total2 = avg; 890 if (total) 891 print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 892 100.0 * ((total2-avg) / total)); 893 else 894 print_metric(ctxp, NULL, NULL, "aborted cycles", 0); 895 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 896 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 897 ctx, cpu); 898 899 if (avg) 900 ratio = total / avg; 901 902 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 903 print_metric(ctxp, NULL, "%8.0f", 904 "cycles / transaction", ratio); 905 else 906 print_metric(ctxp, NULL, NULL, "cycles / transaction", 907 0); 908 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 909 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 910 ctx, cpu); 911 912 if (avg) 913 ratio = total / avg; 914 915 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); 916 } else if (perf_evsel__is_clock(evsel)) { 917 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 918 print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", 919 avg / (ratio * evsel->scale)); 920 else 921 print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); 922 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 923 double fe_bound = td_fe_bound(ctx, cpu, st); 924 925 if (fe_bound > 0.2) 926 color = PERF_COLOR_RED; 927 print_metric(ctxp, color, "%8.1f%%", "frontend bound", 928 fe_bound * 100.); 929 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 930 double retiring = td_retiring(ctx, cpu, st); 931 932 if (retiring > 0.7) 933 color = PERF_COLOR_GREEN; 934 print_metric(ctxp, color, "%8.1f%%", "retiring", 935 retiring * 100.); 936 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 937 double bad_spec = td_bad_spec(ctx, cpu, st); 938 939 if (bad_spec > 0.1) 940 color = PERF_COLOR_RED; 941 print_metric(ctxp, color, "%8.1f%%", "bad speculation", 942 bad_spec * 100.); 943 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 944 double be_bound = td_be_bound(ctx, cpu, st); 945 const char *name = "backend bound"; 946 static int have_recovery_bubbles = -1; 947 948 /* In case the CPU does not support topdown-recovery-bubbles */ 949 if (have_recovery_bubbles < 0) 950 have_recovery_bubbles = pmu_have_event("cpu", 951 "topdown-recovery-bubbles"); 952 if (!have_recovery_bubbles) 953 name = "backend bound/bad spec"; 954 955 if (be_bound > 0.2) 956 color = PERF_COLOR_RED; 957 if (td_total_slots(ctx, cpu, st) > 0) 958 print_metric(ctxp, color, "%8.1f%%", name, 959 be_bound * 100.); 960 else 961 print_metric(ctxp, NULL, NULL, name, 0); 962 } else if (evsel->metric_expr) { 963 generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, 964 evsel->metric_name, avg, cpu, out, st); 965 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 966 char unit = 'M'; 967 char unit_buf[10]; 968 969 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 970 971 if (total) 972 ratio = 1000.0 * avg / total; 973 if (ratio < 0.001) { 974 ratio *= 1000; 975 unit = 'K'; 976 } 977 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 978 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); 979 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 980 print_smi_cost(cpu, evsel, out, st); 981 } else { 982 num = 0; 983 } 984 985 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 986 struct metric_expr *mexp; 987 988 list_for_each_entry (mexp, &me->head, nd) { 989 if (num++ > 0) 990 out->new_line(ctxp); 991 generic_metric(mexp->metric_expr, mexp->metric_events, 992 evsel->name, mexp->metric_name, 993 avg, cpu, out, st); 994 } 995 } 996 if (num == 0) 997 print_metric(ctxp, NULL, NULL, NULL, 0); 998 } 999