1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 12 /* 13 * AGGR_GLOBAL: Use CPU 0 14 * AGGR_SOCKET: Use first CPU of socket 15 * AGGR_CORE: Use first CPU of core 16 * AGGR_NONE: Use matching CPU 17 * AGGR_THREAD: Not supported? 18 */ 19 static bool have_frontend_stalled; 20 21 struct runtime_stat rt_stat; 22 struct stats walltime_nsecs_stats; 23 24 struct saved_value { 25 struct rb_node rb_node; 26 struct perf_evsel *evsel; 27 enum stat_type type; 28 int ctx; 29 int cpu; 30 struct runtime_stat *stat; 31 struct stats stats; 32 }; 33 34 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 35 { 36 struct saved_value *a = container_of(rb_node, 37 struct saved_value, 38 rb_node); 39 const struct saved_value *b = entry; 40 41 if (a->cpu != b->cpu) 42 return a->cpu - b->cpu; 43 44 /* 45 * Previously the rbtree was used to link generic metrics. 46 * The keys were evsel/cpu. Now the rbtree is extended to support 47 * per-thread shadow stats. For shadow stats case, the keys 48 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 49 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 50 */ 51 if (a->type != b->type) 52 return a->type - b->type; 53 54 if (a->ctx != b->ctx) 55 return a->ctx - b->ctx; 56 57 if (a->evsel == NULL && b->evsel == NULL) { 58 if (a->stat == b->stat) 59 return 0; 60 61 if ((char *)a->stat < (char *)b->stat) 62 return -1; 63 64 return 1; 65 } 66 67 if (a->evsel == b->evsel) 68 return 0; 69 if ((char *)a->evsel < (char *)b->evsel) 70 return -1; 71 return +1; 72 } 73 74 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 75 const void *entry) 76 { 77 struct saved_value *nd = malloc(sizeof(struct saved_value)); 78 79 if (!nd) 80 return NULL; 81 memcpy(nd, entry, sizeof(struct saved_value)); 82 return &nd->rb_node; 83 } 84 85 static void saved_value_delete(struct rblist *rblist __maybe_unused, 86 struct rb_node *rb_node) 87 { 88 struct saved_value *v; 89 90 BUG_ON(!rb_node); 91 v = container_of(rb_node, struct saved_value, rb_node); 92 free(v); 93 } 94 95 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 96 int cpu, 97 bool create, 98 enum stat_type type, 99 int ctx, 100 struct runtime_stat *st) 101 { 102 struct rblist *rblist; 103 struct rb_node *nd; 104 struct saved_value dm = { 105 .cpu = cpu, 106 .evsel = evsel, 107 .type = type, 108 .ctx = ctx, 109 .stat = st, 110 }; 111 112 rblist = &st->value_list; 113 114 nd = rblist__find(rblist, &dm); 115 if (nd) 116 return container_of(nd, struct saved_value, rb_node); 117 if (create) { 118 rblist__add_node(rblist, &dm); 119 nd = rblist__find(rblist, &dm); 120 if (nd) 121 return container_of(nd, struct saved_value, rb_node); 122 } 123 return NULL; 124 } 125 126 void runtime_stat__init(struct runtime_stat *st) 127 { 128 struct rblist *rblist = &st->value_list; 129 130 rblist__init(rblist); 131 rblist->node_cmp = saved_value_cmp; 132 rblist->node_new = saved_value_new; 133 rblist->node_delete = saved_value_delete; 134 } 135 136 void runtime_stat__exit(struct runtime_stat *st) 137 { 138 rblist__exit(&st->value_list); 139 } 140 141 void perf_stat__init_shadow_stats(void) 142 { 143 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 144 runtime_stat__init(&rt_stat); 145 } 146 147 static int evsel_context(struct perf_evsel *evsel) 148 { 149 int ctx = 0; 150 151 if (evsel->attr.exclude_kernel) 152 ctx |= CTX_BIT_KERNEL; 153 if (evsel->attr.exclude_user) 154 ctx |= CTX_BIT_USER; 155 if (evsel->attr.exclude_hv) 156 ctx |= CTX_BIT_HV; 157 if (evsel->attr.exclude_host) 158 ctx |= CTX_BIT_HOST; 159 if (evsel->attr.exclude_idle) 160 ctx |= CTX_BIT_IDLE; 161 162 return ctx; 163 } 164 165 static void reset_stat(struct runtime_stat *st) 166 { 167 struct rblist *rblist; 168 struct rb_node *pos, *next; 169 170 rblist = &st->value_list; 171 next = rb_first_cached(&rblist->entries); 172 while (next) { 173 pos = next; 174 next = rb_next(pos); 175 memset(&container_of(pos, struct saved_value, rb_node)->stats, 176 0, 177 sizeof(struct stats)); 178 } 179 } 180 181 void perf_stat__reset_shadow_stats(void) 182 { 183 reset_stat(&rt_stat); 184 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 185 } 186 187 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 188 { 189 reset_stat(st); 190 } 191 192 static void update_runtime_stat(struct runtime_stat *st, 193 enum stat_type type, 194 int ctx, int cpu, u64 count) 195 { 196 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 197 type, ctx, st); 198 199 if (v) 200 update_stats(&v->stats, count); 201 } 202 203 /* 204 * Update various tracking values we maintain to print 205 * more semantic information such as miss/hit ratios, 206 * instruction rates, etc: 207 */ 208 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, 209 int cpu, struct runtime_stat *st) 210 { 211 int ctx = evsel_context(counter); 212 u64 count_ns = count; 213 214 count *= counter->scale; 215 216 if (perf_evsel__is_clock(counter)) 217 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 218 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 219 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 220 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 221 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 222 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 223 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 224 else if (perf_stat_evsel__is(counter, ELISION_START)) 225 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 226 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 227 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 228 ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 230 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 236 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 242 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 243 ctx, cpu, count); 244 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 246 ctx, cpu, count); 247 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 248 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 249 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 250 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 251 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 252 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 253 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 254 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 255 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 256 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 257 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 258 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 259 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 260 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 261 else if (perf_stat_evsel__is(counter, SMI_NUM)) 262 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 263 else if (perf_stat_evsel__is(counter, APERF)) 264 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 265 266 if (counter->collect_stat) { 267 struct saved_value *v = saved_value_lookup(counter, cpu, true, 268 STAT_NONE, 0, st); 269 update_stats(&v->stats, count); 270 } 271 } 272 273 /* used for get_ratio_color() */ 274 enum grc_type { 275 GRC_STALLED_CYCLES_FE, 276 GRC_STALLED_CYCLES_BE, 277 GRC_CACHE_MISSES, 278 GRC_MAX_NR 279 }; 280 281 static const char *get_ratio_color(enum grc_type type, double ratio) 282 { 283 static const double grc_table[GRC_MAX_NR][3] = { 284 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 285 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 286 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 287 }; 288 const char *color = PERF_COLOR_NORMAL; 289 290 if (ratio > grc_table[type][0]) 291 color = PERF_COLOR_RED; 292 else if (ratio > grc_table[type][1]) 293 color = PERF_COLOR_MAGENTA; 294 else if (ratio > grc_table[type][2]) 295 color = PERF_COLOR_YELLOW; 296 297 return color; 298 } 299 300 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, 301 const char *name) 302 { 303 struct perf_evsel *c2; 304 305 evlist__for_each_entry (evsel_list, c2) { 306 if (!strcasecmp(c2->name, name)) 307 return c2; 308 } 309 return NULL; 310 } 311 312 /* Mark MetricExpr target events and link events using them to them. */ 313 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) 314 { 315 struct perf_evsel *counter, *leader, **metric_events, *oc; 316 bool found; 317 const char **metric_names; 318 int i; 319 int num_metric_names; 320 321 evlist__for_each_entry(evsel_list, counter) { 322 bool invalid = false; 323 324 leader = counter->leader; 325 if (!counter->metric_expr) 326 continue; 327 metric_events = counter->metric_events; 328 if (!metric_events) { 329 if (expr__find_other(counter->metric_expr, counter->name, 330 &metric_names, &num_metric_names) < 0) 331 continue; 332 333 metric_events = calloc(sizeof(struct perf_evsel *), 334 num_metric_names + 1); 335 if (!metric_events) 336 return; 337 counter->metric_events = metric_events; 338 } 339 340 for (i = 0; i < num_metric_names; i++) { 341 found = false; 342 if (leader) { 343 /* Search in group */ 344 for_each_group_member (oc, leader) { 345 if (!strcasecmp(oc->name, metric_names[i])) { 346 found = true; 347 break; 348 } 349 } 350 } 351 if (!found) { 352 /* Search ignoring groups */ 353 oc = perf_stat__find_event(evsel_list, metric_names[i]); 354 } 355 if (!oc) { 356 /* Deduping one is good enough to handle duplicated PMUs. */ 357 static char *printed; 358 359 /* 360 * Adding events automatically would be difficult, because 361 * it would risk creating groups that are not schedulable. 362 * perf stat doesn't understand all the scheduling constraints 363 * of events. So we ask the user instead to add the missing 364 * events. 365 */ 366 if (!printed || strcasecmp(printed, metric_names[i])) { 367 fprintf(stderr, 368 "Add %s event to groups to get metric expression for %s\n", 369 metric_names[i], 370 counter->name); 371 printed = strdup(metric_names[i]); 372 } 373 invalid = true; 374 continue; 375 } 376 metric_events[i] = oc; 377 oc->collect_stat = true; 378 } 379 metric_events[i] = NULL; 380 free(metric_names); 381 if (invalid) { 382 free(metric_events); 383 counter->metric_events = NULL; 384 counter->metric_expr = NULL; 385 } 386 } 387 } 388 389 static double runtime_stat_avg(struct runtime_stat *st, 390 enum stat_type type, int ctx, int cpu) 391 { 392 struct saved_value *v; 393 394 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 395 if (!v) 396 return 0.0; 397 398 return avg_stats(&v->stats); 399 } 400 401 static double runtime_stat_n(struct runtime_stat *st, 402 enum stat_type type, int ctx, int cpu) 403 { 404 struct saved_value *v; 405 406 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 407 if (!v) 408 return 0.0; 409 410 return v->stats.n; 411 } 412 413 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 414 int cpu, 415 struct perf_evsel *evsel, double avg, 416 struct perf_stat_output_ctx *out, 417 struct runtime_stat *st) 418 { 419 double total, ratio = 0.0; 420 const char *color; 421 int ctx = evsel_context(evsel); 422 423 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 424 425 if (total) 426 ratio = avg / total * 100.0; 427 428 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 429 430 if (ratio) 431 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 432 ratio); 433 else 434 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 435 } 436 437 static void print_stalled_cycles_backend(struct perf_stat_config *config, 438 int cpu, 439 struct perf_evsel *evsel, double avg, 440 struct perf_stat_output_ctx *out, 441 struct runtime_stat *st) 442 { 443 double total, ratio = 0.0; 444 const char *color; 445 int ctx = evsel_context(evsel); 446 447 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 448 449 if (total) 450 ratio = avg / total * 100.0; 451 452 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 453 454 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 455 } 456 457 static void print_branch_misses(struct perf_stat_config *config, 458 int cpu, 459 struct perf_evsel *evsel, 460 double avg, 461 struct perf_stat_output_ctx *out, 462 struct runtime_stat *st) 463 { 464 double total, ratio = 0.0; 465 const char *color; 466 int ctx = evsel_context(evsel); 467 468 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 469 470 if (total) 471 ratio = avg / total * 100.0; 472 473 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 474 475 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 476 } 477 478 static void print_l1_dcache_misses(struct perf_stat_config *config, 479 int cpu, 480 struct perf_evsel *evsel, 481 double avg, 482 struct perf_stat_output_ctx *out, 483 struct runtime_stat *st) 484 485 { 486 double total, ratio = 0.0; 487 const char *color; 488 int ctx = evsel_context(evsel); 489 490 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 491 492 if (total) 493 ratio = avg / total * 100.0; 494 495 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 496 497 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 498 } 499 500 static void print_l1_icache_misses(struct perf_stat_config *config, 501 int cpu, 502 struct perf_evsel *evsel, 503 double avg, 504 struct perf_stat_output_ctx *out, 505 struct runtime_stat *st) 506 507 { 508 double total, ratio = 0.0; 509 const char *color; 510 int ctx = evsel_context(evsel); 511 512 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 513 514 if (total) 515 ratio = avg / total * 100.0; 516 517 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 518 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 519 } 520 521 static void print_dtlb_cache_misses(struct perf_stat_config *config, 522 int cpu, 523 struct perf_evsel *evsel, 524 double avg, 525 struct perf_stat_output_ctx *out, 526 struct runtime_stat *st) 527 { 528 double total, ratio = 0.0; 529 const char *color; 530 int ctx = evsel_context(evsel); 531 532 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 533 534 if (total) 535 ratio = avg / total * 100.0; 536 537 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 538 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 539 } 540 541 static void print_itlb_cache_misses(struct perf_stat_config *config, 542 int cpu, 543 struct perf_evsel *evsel, 544 double avg, 545 struct perf_stat_output_ctx *out, 546 struct runtime_stat *st) 547 { 548 double total, ratio = 0.0; 549 const char *color; 550 int ctx = evsel_context(evsel); 551 552 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 553 554 if (total) 555 ratio = avg / total * 100.0; 556 557 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 558 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 559 } 560 561 static void print_ll_cache_misses(struct perf_stat_config *config, 562 int cpu, 563 struct perf_evsel *evsel, 564 double avg, 565 struct perf_stat_output_ctx *out, 566 struct runtime_stat *st) 567 { 568 double total, ratio = 0.0; 569 const char *color; 570 int ctx = evsel_context(evsel); 571 572 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 573 574 if (total) 575 ratio = avg / total * 100.0; 576 577 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 578 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 579 } 580 581 /* 582 * High level "TopDown" CPU core pipe line bottleneck break down. 583 * 584 * Basic concept following 585 * Yasin, A Top Down Method for Performance analysis and Counter architecture 586 * ISPASS14 587 * 588 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 589 * 590 * Frontend -> Backend -> Retiring 591 * BadSpeculation in addition means out of order execution that is thrown away 592 * (for example branch mispredictions) 593 * Frontend is instruction decoding. 594 * Backend is execution, like computation and accessing data in memory 595 * Retiring is good execution that is not directly bottlenecked 596 * 597 * The formulas are computed in slots. 598 * A slot is an entry in the pipeline each for the pipeline width 599 * (for example a 4-wide pipeline has 4 slots for each cycle) 600 * 601 * Formulas: 602 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 603 * TotalSlots 604 * Retiring = SlotsRetired / TotalSlots 605 * FrontendBound = FetchBubbles / TotalSlots 606 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 607 * 608 * The kernel provides the mapping to the low level CPU events and any scaling 609 * needed for the CPU pipeline width, for example: 610 * 611 * TotalSlots = Cycles * 4 612 * 613 * The scaling factor is communicated in the sysfs unit. 614 * 615 * In some cases the CPU may not be able to measure all the formulas due to 616 * missing events. In this case multiple formulas are combined, as possible. 617 * 618 * Full TopDown supports more levels to sub-divide each area: for example 619 * BackendBound into computing bound and memory bound. For now we only 620 * support Level 1 TopDown. 621 */ 622 623 static double sanitize_val(double x) 624 { 625 if (x < 0 && x >= -0.02) 626 return 0.0; 627 return x; 628 } 629 630 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 631 { 632 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 633 } 634 635 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 636 { 637 double bad_spec = 0; 638 double total_slots; 639 double total; 640 641 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 642 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 643 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 644 645 total_slots = td_total_slots(ctx, cpu, st); 646 if (total_slots) 647 bad_spec = total / total_slots; 648 return sanitize_val(bad_spec); 649 } 650 651 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 652 { 653 double retiring = 0; 654 double total_slots = td_total_slots(ctx, cpu, st); 655 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 656 ctx, cpu); 657 658 if (total_slots) 659 retiring = ret_slots / total_slots; 660 return retiring; 661 } 662 663 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 664 { 665 double fe_bound = 0; 666 double total_slots = td_total_slots(ctx, cpu, st); 667 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 668 ctx, cpu); 669 670 if (total_slots) 671 fe_bound = fetch_bub / total_slots; 672 return fe_bound; 673 } 674 675 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 676 { 677 double sum = (td_fe_bound(ctx, cpu, st) + 678 td_bad_spec(ctx, cpu, st) + 679 td_retiring(ctx, cpu, st)); 680 if (sum == 0) 681 return 0; 682 return sanitize_val(1.0 - sum); 683 } 684 685 static void print_smi_cost(struct perf_stat_config *config, 686 int cpu, struct perf_evsel *evsel, 687 struct perf_stat_output_ctx *out, 688 struct runtime_stat *st) 689 { 690 double smi_num, aperf, cycles, cost = 0.0; 691 int ctx = evsel_context(evsel); 692 const char *color = NULL; 693 694 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 695 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 696 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 697 698 if ((cycles == 0) || (aperf == 0)) 699 return; 700 701 if (smi_num) 702 cost = (aperf - cycles) / aperf * 100.00; 703 704 if (cost > 10) 705 color = PERF_COLOR_RED; 706 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 707 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 708 } 709 710 static void generic_metric(struct perf_stat_config *config, 711 const char *metric_expr, 712 struct perf_evsel **metric_events, 713 char *name, 714 const char *metric_name, 715 double avg, 716 int cpu, 717 struct perf_stat_output_ctx *out, 718 struct runtime_stat *st) 719 { 720 print_metric_t print_metric = out->print_metric; 721 struct parse_ctx pctx; 722 double ratio; 723 int i; 724 void *ctxp = out->ctx; 725 726 expr__ctx_init(&pctx); 727 expr__add_id(&pctx, name, avg); 728 for (i = 0; metric_events[i]; i++) { 729 struct saved_value *v; 730 struct stats *stats; 731 double scale; 732 733 if (!strcmp(metric_events[i]->name, "duration_time")) { 734 stats = &walltime_nsecs_stats; 735 scale = 1e-9; 736 } else { 737 v = saved_value_lookup(metric_events[i], cpu, false, 738 STAT_NONE, 0, st); 739 if (!v) 740 break; 741 stats = &v->stats; 742 scale = 1.0; 743 } 744 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); 745 } 746 if (!metric_events[i]) { 747 const char *p = metric_expr; 748 749 if (expr__parse(&ratio, &pctx, &p) == 0) 750 print_metric(config, ctxp, NULL, "%8.1f", 751 metric_name ? 752 metric_name : 753 out->force_header ? name : "", 754 ratio); 755 else 756 print_metric(config, ctxp, NULL, NULL, 757 out->force_header ? 758 (metric_name ? metric_name : name) : "", 0); 759 } else 760 print_metric(config, ctxp, NULL, NULL, "", 0); 761 } 762 763 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 764 struct perf_evsel *evsel, 765 double avg, int cpu, 766 struct perf_stat_output_ctx *out, 767 struct rblist *metric_events, 768 struct runtime_stat *st) 769 { 770 void *ctxp = out->ctx; 771 print_metric_t print_metric = out->print_metric; 772 double total, ratio = 0.0, total2; 773 const char *color = NULL; 774 int ctx = evsel_context(evsel); 775 struct metric_event *me; 776 int num = 1; 777 778 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 779 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 780 781 if (total) { 782 ratio = avg / total; 783 print_metric(config, ctxp, NULL, "%7.2f ", 784 "insn per cycle", ratio); 785 } else { 786 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 787 } 788 789 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 790 ctx, cpu); 791 792 total = max(total, runtime_stat_avg(st, 793 STAT_STALLED_CYCLES_BACK, 794 ctx, cpu)); 795 796 if (total && avg) { 797 out->new_line(config, ctxp); 798 ratio = total / avg; 799 print_metric(config, ctxp, NULL, "%7.2f ", 800 "stalled cycles per insn", 801 ratio); 802 } else if (have_frontend_stalled) { 803 print_metric(config, ctxp, NULL, NULL, 804 "stalled cycles per insn", 0); 805 } 806 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 807 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 808 print_branch_misses(config, cpu, evsel, avg, out, st); 809 else 810 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 811 } else if ( 812 evsel->attr.type == PERF_TYPE_HW_CACHE && 813 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 814 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 815 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 816 817 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 818 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 819 else 820 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 821 } else if ( 822 evsel->attr.type == PERF_TYPE_HW_CACHE && 823 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 824 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 825 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 826 827 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 828 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 829 else 830 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 831 } else if ( 832 evsel->attr.type == PERF_TYPE_HW_CACHE && 833 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 834 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 835 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 836 837 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 838 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 839 else 840 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 841 } else if ( 842 evsel->attr.type == PERF_TYPE_HW_CACHE && 843 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 844 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 845 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 846 847 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 848 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 849 else 850 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 851 } else if ( 852 evsel->attr.type == PERF_TYPE_HW_CACHE && 853 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 854 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 855 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 856 857 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 858 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 859 else 860 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 861 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 862 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 863 864 if (total) 865 ratio = avg * 100 / total; 866 867 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 868 print_metric(config, ctxp, NULL, "%8.3f %%", 869 "of all cache refs", ratio); 870 else 871 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 872 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 873 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 874 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 875 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 876 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 877 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 878 879 if (total) { 880 ratio = avg / total; 881 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 882 } else { 883 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 884 } 885 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 886 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 887 888 if (total) 889 print_metric(config, ctxp, NULL, 890 "%7.2f%%", "transactional cycles", 891 100.0 * (avg / total)); 892 else 893 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 894 0); 895 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 896 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 897 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 898 899 if (total2 < avg) 900 total2 = avg; 901 if (total) 902 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 903 100.0 * ((total2-avg) / total)); 904 else 905 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 906 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 907 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 908 ctx, cpu); 909 910 if (avg) 911 ratio = total / avg; 912 913 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 914 print_metric(config, ctxp, NULL, "%8.0f", 915 "cycles / transaction", ratio); 916 else 917 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 918 0); 919 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 920 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 921 ctx, cpu); 922 923 if (avg) 924 ratio = total / avg; 925 926 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 927 } else if (perf_evsel__is_clock(evsel)) { 928 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 929 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 930 avg / (ratio * evsel->scale)); 931 else 932 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 933 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 934 double fe_bound = td_fe_bound(ctx, cpu, st); 935 936 if (fe_bound > 0.2) 937 color = PERF_COLOR_RED; 938 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 939 fe_bound * 100.); 940 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 941 double retiring = td_retiring(ctx, cpu, st); 942 943 if (retiring > 0.7) 944 color = PERF_COLOR_GREEN; 945 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 946 retiring * 100.); 947 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 948 double bad_spec = td_bad_spec(ctx, cpu, st); 949 950 if (bad_spec > 0.1) 951 color = PERF_COLOR_RED; 952 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 953 bad_spec * 100.); 954 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 955 double be_bound = td_be_bound(ctx, cpu, st); 956 const char *name = "backend bound"; 957 static int have_recovery_bubbles = -1; 958 959 /* In case the CPU does not support topdown-recovery-bubbles */ 960 if (have_recovery_bubbles < 0) 961 have_recovery_bubbles = pmu_have_event("cpu", 962 "topdown-recovery-bubbles"); 963 if (!have_recovery_bubbles) 964 name = "backend bound/bad spec"; 965 966 if (be_bound > 0.2) 967 color = PERF_COLOR_RED; 968 if (td_total_slots(ctx, cpu, st) > 0) 969 print_metric(config, ctxp, color, "%8.1f%%", name, 970 be_bound * 100.); 971 else 972 print_metric(config, ctxp, NULL, NULL, name, 0); 973 } else if (evsel->metric_expr) { 974 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 975 evsel->metric_name, avg, cpu, out, st); 976 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 977 char unit = 'M'; 978 char unit_buf[10]; 979 980 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 981 982 if (total) 983 ratio = 1000.0 * avg / total; 984 if (ratio < 0.001) { 985 ratio *= 1000; 986 unit = 'K'; 987 } 988 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 989 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 990 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 991 print_smi_cost(config, cpu, evsel, out, st); 992 } else { 993 num = 0; 994 } 995 996 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 997 struct metric_expr *mexp; 998 999 list_for_each_entry (mexp, &me->head, nd) { 1000 if (num++ > 0) 1001 out->new_line(config, ctxp); 1002 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1003 evsel->name, mexp->metric_name, 1004 avg, cpu, out, st); 1005 } 1006 } 1007 if (num == 0) 1008 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1009 } 1010