1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 u64 metric_total; 34 int metric_other; 35 }; 36 37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 38 { 39 struct saved_value *a = container_of(rb_node, 40 struct saved_value, 41 rb_node); 42 const struct saved_value *b = entry; 43 44 if (a->cpu != b->cpu) 45 return a->cpu - b->cpu; 46 47 /* 48 * Previously the rbtree was used to link generic metrics. 49 * The keys were evsel/cpu. Now the rbtree is extended to support 50 * per-thread shadow stats. For shadow stats case, the keys 51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 53 */ 54 if (a->type != b->type) 55 return a->type - b->type; 56 57 if (a->ctx != b->ctx) 58 return a->ctx - b->ctx; 59 60 if (a->evsel == NULL && b->evsel == NULL) { 61 if (a->stat == b->stat) 62 return 0; 63 64 if ((char *)a->stat < (char *)b->stat) 65 return -1; 66 67 return 1; 68 } 69 70 if (a->evsel == b->evsel) 71 return 0; 72 if ((char *)a->evsel < (char *)b->evsel) 73 return -1; 74 return +1; 75 } 76 77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 78 const void *entry) 79 { 80 struct saved_value *nd = malloc(sizeof(struct saved_value)); 81 82 if (!nd) 83 return NULL; 84 memcpy(nd, entry, sizeof(struct saved_value)); 85 return &nd->rb_node; 86 } 87 88 static void saved_value_delete(struct rblist *rblist __maybe_unused, 89 struct rb_node *rb_node) 90 { 91 struct saved_value *v; 92 93 BUG_ON(!rb_node); 94 v = container_of(rb_node, struct saved_value, rb_node); 95 free(v); 96 } 97 98 static struct saved_value *saved_value_lookup(struct evsel *evsel, 99 int cpu, 100 bool create, 101 enum stat_type type, 102 int ctx, 103 struct runtime_stat *st) 104 { 105 struct rblist *rblist; 106 struct rb_node *nd; 107 struct saved_value dm = { 108 .cpu = cpu, 109 .evsel = evsel, 110 .type = type, 111 .ctx = ctx, 112 .stat = st, 113 }; 114 115 rblist = &st->value_list; 116 117 nd = rblist__find(rblist, &dm); 118 if (nd) 119 return container_of(nd, struct saved_value, rb_node); 120 if (create) { 121 rblist__add_node(rblist, &dm); 122 nd = rblist__find(rblist, &dm); 123 if (nd) 124 return container_of(nd, struct saved_value, rb_node); 125 } 126 return NULL; 127 } 128 129 void runtime_stat__init(struct runtime_stat *st) 130 { 131 struct rblist *rblist = &st->value_list; 132 133 rblist__init(rblist); 134 rblist->node_cmp = saved_value_cmp; 135 rblist->node_new = saved_value_new; 136 rblist->node_delete = saved_value_delete; 137 } 138 139 void runtime_stat__exit(struct runtime_stat *st) 140 { 141 rblist__exit(&st->value_list); 142 } 143 144 void perf_stat__init_shadow_stats(void) 145 { 146 runtime_stat__init(&rt_stat); 147 } 148 149 static int evsel_context(struct evsel *evsel) 150 { 151 int ctx = 0; 152 153 if (evsel->core.attr.exclude_kernel) 154 ctx |= CTX_BIT_KERNEL; 155 if (evsel->core.attr.exclude_user) 156 ctx |= CTX_BIT_USER; 157 if (evsel->core.attr.exclude_hv) 158 ctx |= CTX_BIT_HV; 159 if (evsel->core.attr.exclude_host) 160 ctx |= CTX_BIT_HOST; 161 if (evsel->core.attr.exclude_idle) 162 ctx |= CTX_BIT_IDLE; 163 164 return ctx; 165 } 166 167 static void reset_stat(struct runtime_stat *st) 168 { 169 struct rblist *rblist; 170 struct rb_node *pos, *next; 171 172 rblist = &st->value_list; 173 next = rb_first_cached(&rblist->entries); 174 while (next) { 175 pos = next; 176 next = rb_next(pos); 177 memset(&container_of(pos, struct saved_value, rb_node)->stats, 178 0, 179 sizeof(struct stats)); 180 } 181 } 182 183 void perf_stat__reset_shadow_stats(void) 184 { 185 reset_stat(&rt_stat); 186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 187 } 188 189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 190 { 191 reset_stat(st); 192 } 193 194 static void update_runtime_stat(struct runtime_stat *st, 195 enum stat_type type, 196 int ctx, int cpu, u64 count) 197 { 198 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 199 type, ctx, st); 200 201 if (v) 202 update_stats(&v->stats, count); 203 } 204 205 /* 206 * Update various tracking values we maintain to print 207 * more semantic information such as miss/hit ratios, 208 * instruction rates, etc: 209 */ 210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 211 int cpu, struct runtime_stat *st) 212 { 213 int ctx = evsel_context(counter); 214 u64 count_ns = count; 215 struct saved_value *v; 216 217 count *= counter->scale; 218 219 if (evsel__is_clock(counter)) 220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 221 else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 226 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, ELISION_START)) 228 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 230 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 236 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 243 ctx, cpu, count); 244 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 246 ctx, cpu, count); 247 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 248 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 249 ctx, cpu, count); 250 else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 251 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 252 else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 253 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 254 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 255 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 256 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 257 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 258 else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 259 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 260 else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 261 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 262 else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 263 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, SMI_NUM)) 265 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 266 else if (perf_stat_evsel__is(counter, APERF)) 267 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 268 269 if (counter->collect_stat) { 270 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 271 update_stats(&v->stats, count); 272 if (counter->metric_leader) 273 v->metric_total += count; 274 } else if (counter->metric_leader) { 275 v = saved_value_lookup(counter->metric_leader, 276 cpu, true, STAT_NONE, 0, st); 277 v->metric_total += count; 278 v->metric_other++; 279 } 280 } 281 282 /* used for get_ratio_color() */ 283 enum grc_type { 284 GRC_STALLED_CYCLES_FE, 285 GRC_STALLED_CYCLES_BE, 286 GRC_CACHE_MISSES, 287 GRC_MAX_NR 288 }; 289 290 static const char *get_ratio_color(enum grc_type type, double ratio) 291 { 292 static const double grc_table[GRC_MAX_NR][3] = { 293 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 294 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 295 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 296 }; 297 const char *color = PERF_COLOR_NORMAL; 298 299 if (ratio > grc_table[type][0]) 300 color = PERF_COLOR_RED; 301 else if (ratio > grc_table[type][1]) 302 color = PERF_COLOR_MAGENTA; 303 else if (ratio > grc_table[type][2]) 304 color = PERF_COLOR_YELLOW; 305 306 return color; 307 } 308 309 static struct evsel *perf_stat__find_event(struct evlist *evsel_list, 310 const char *name) 311 { 312 struct evsel *c2; 313 314 evlist__for_each_entry (evsel_list, c2) { 315 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 316 return c2; 317 } 318 return NULL; 319 } 320 321 /* Mark MetricExpr target events and link events using them to them. */ 322 void perf_stat__collect_metric_expr(struct evlist *evsel_list) 323 { 324 struct evsel *counter, *leader, **metric_events, *oc; 325 bool found; 326 struct expr_parse_ctx ctx; 327 struct hashmap_entry *cur; 328 size_t bkt; 329 int i; 330 331 expr__ctx_init(&ctx); 332 evlist__for_each_entry(evsel_list, counter) { 333 bool invalid = false; 334 335 leader = counter->leader; 336 if (!counter->metric_expr) 337 continue; 338 339 expr__ctx_clear(&ctx); 340 metric_events = counter->metric_events; 341 if (!metric_events) { 342 if (expr__find_other(counter->metric_expr, 343 counter->name, 344 &ctx, 1) < 0) 345 continue; 346 347 metric_events = calloc(sizeof(struct evsel *), 348 hashmap__size(&ctx.ids) + 1); 349 if (!metric_events) { 350 expr__ctx_clear(&ctx); 351 return; 352 } 353 counter->metric_events = metric_events; 354 } 355 356 i = 0; 357 hashmap__for_each_entry((&ctx.ids), cur, bkt) { 358 const char *metric_name = (const char *)cur->key; 359 360 found = false; 361 if (leader) { 362 /* Search in group */ 363 for_each_group_member (oc, leader) { 364 if (!strcasecmp(oc->name, 365 metric_name) && 366 !oc->collect_stat) { 367 found = true; 368 break; 369 } 370 } 371 } 372 if (!found) { 373 /* Search ignoring groups */ 374 oc = perf_stat__find_event(evsel_list, 375 metric_name); 376 } 377 if (!oc) { 378 /* Deduping one is good enough to handle duplicated PMUs. */ 379 static char *printed; 380 381 /* 382 * Adding events automatically would be difficult, because 383 * it would risk creating groups that are not schedulable. 384 * perf stat doesn't understand all the scheduling constraints 385 * of events. So we ask the user instead to add the missing 386 * events. 387 */ 388 if (!printed || 389 strcasecmp(printed, metric_name)) { 390 fprintf(stderr, 391 "Add %s event to groups to get metric expression for %s\n", 392 metric_name, 393 counter->name); 394 printed = strdup(metric_name); 395 } 396 invalid = true; 397 continue; 398 } 399 metric_events[i++] = oc; 400 oc->collect_stat = true; 401 } 402 metric_events[i] = NULL; 403 if (invalid) { 404 free(metric_events); 405 counter->metric_events = NULL; 406 counter->metric_expr = NULL; 407 } 408 } 409 expr__ctx_clear(&ctx); 410 } 411 412 static double runtime_stat_avg(struct runtime_stat *st, 413 enum stat_type type, int ctx, int cpu) 414 { 415 struct saved_value *v; 416 417 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 418 if (!v) 419 return 0.0; 420 421 return avg_stats(&v->stats); 422 } 423 424 static double runtime_stat_n(struct runtime_stat *st, 425 enum stat_type type, int ctx, int cpu) 426 { 427 struct saved_value *v; 428 429 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 430 if (!v) 431 return 0.0; 432 433 return v->stats.n; 434 } 435 436 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 437 int cpu, 438 struct evsel *evsel, double avg, 439 struct perf_stat_output_ctx *out, 440 struct runtime_stat *st) 441 { 442 double total, ratio = 0.0; 443 const char *color; 444 int ctx = evsel_context(evsel); 445 446 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 447 448 if (total) 449 ratio = avg / total * 100.0; 450 451 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 452 453 if (ratio) 454 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 455 ratio); 456 else 457 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 458 } 459 460 static void print_stalled_cycles_backend(struct perf_stat_config *config, 461 int cpu, 462 struct evsel *evsel, double avg, 463 struct perf_stat_output_ctx *out, 464 struct runtime_stat *st) 465 { 466 double total, ratio = 0.0; 467 const char *color; 468 int ctx = evsel_context(evsel); 469 470 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 471 472 if (total) 473 ratio = avg / total * 100.0; 474 475 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 476 477 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 478 } 479 480 static void print_branch_misses(struct perf_stat_config *config, 481 int cpu, 482 struct evsel *evsel, 483 double avg, 484 struct perf_stat_output_ctx *out, 485 struct runtime_stat *st) 486 { 487 double total, ratio = 0.0; 488 const char *color; 489 int ctx = evsel_context(evsel); 490 491 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 492 493 if (total) 494 ratio = avg / total * 100.0; 495 496 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 497 498 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 499 } 500 501 static void print_l1_dcache_misses(struct perf_stat_config *config, 502 int cpu, 503 struct evsel *evsel, 504 double avg, 505 struct perf_stat_output_ctx *out, 506 struct runtime_stat *st) 507 508 { 509 double total, ratio = 0.0; 510 const char *color; 511 int ctx = evsel_context(evsel); 512 513 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 514 515 if (total) 516 ratio = avg / total * 100.0; 517 518 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 519 520 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 521 } 522 523 static void print_l1_icache_misses(struct perf_stat_config *config, 524 int cpu, 525 struct evsel *evsel, 526 double avg, 527 struct perf_stat_output_ctx *out, 528 struct runtime_stat *st) 529 530 { 531 double total, ratio = 0.0; 532 const char *color; 533 int ctx = evsel_context(evsel); 534 535 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 536 537 if (total) 538 ratio = avg / total * 100.0; 539 540 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 541 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 542 } 543 544 static void print_dtlb_cache_misses(struct perf_stat_config *config, 545 int cpu, 546 struct evsel *evsel, 547 double avg, 548 struct perf_stat_output_ctx *out, 549 struct runtime_stat *st) 550 { 551 double total, ratio = 0.0; 552 const char *color; 553 int ctx = evsel_context(evsel); 554 555 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 556 557 if (total) 558 ratio = avg / total * 100.0; 559 560 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 561 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 562 } 563 564 static void print_itlb_cache_misses(struct perf_stat_config *config, 565 int cpu, 566 struct evsel *evsel, 567 double avg, 568 struct perf_stat_output_ctx *out, 569 struct runtime_stat *st) 570 { 571 double total, ratio = 0.0; 572 const char *color; 573 int ctx = evsel_context(evsel); 574 575 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 576 577 if (total) 578 ratio = avg / total * 100.0; 579 580 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 581 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 582 } 583 584 static void print_ll_cache_misses(struct perf_stat_config *config, 585 int cpu, 586 struct evsel *evsel, 587 double avg, 588 struct perf_stat_output_ctx *out, 589 struct runtime_stat *st) 590 { 591 double total, ratio = 0.0; 592 const char *color; 593 int ctx = evsel_context(evsel); 594 595 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 596 597 if (total) 598 ratio = avg / total * 100.0; 599 600 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 601 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 602 } 603 604 /* 605 * High level "TopDown" CPU core pipe line bottleneck break down. 606 * 607 * Basic concept following 608 * Yasin, A Top Down Method for Performance analysis and Counter architecture 609 * ISPASS14 610 * 611 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 612 * 613 * Frontend -> Backend -> Retiring 614 * BadSpeculation in addition means out of order execution that is thrown away 615 * (for example branch mispredictions) 616 * Frontend is instruction decoding. 617 * Backend is execution, like computation and accessing data in memory 618 * Retiring is good execution that is not directly bottlenecked 619 * 620 * The formulas are computed in slots. 621 * A slot is an entry in the pipeline each for the pipeline width 622 * (for example a 4-wide pipeline has 4 slots for each cycle) 623 * 624 * Formulas: 625 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 626 * TotalSlots 627 * Retiring = SlotsRetired / TotalSlots 628 * FrontendBound = FetchBubbles / TotalSlots 629 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 630 * 631 * The kernel provides the mapping to the low level CPU events and any scaling 632 * needed for the CPU pipeline width, for example: 633 * 634 * TotalSlots = Cycles * 4 635 * 636 * The scaling factor is communicated in the sysfs unit. 637 * 638 * In some cases the CPU may not be able to measure all the formulas due to 639 * missing events. In this case multiple formulas are combined, as possible. 640 * 641 * Full TopDown supports more levels to sub-divide each area: for example 642 * BackendBound into computing bound and memory bound. For now we only 643 * support Level 1 TopDown. 644 */ 645 646 static double sanitize_val(double x) 647 { 648 if (x < 0 && x >= -0.02) 649 return 0.0; 650 return x; 651 } 652 653 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 654 { 655 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 656 } 657 658 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 659 { 660 double bad_spec = 0; 661 double total_slots; 662 double total; 663 664 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 665 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 666 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 667 668 total_slots = td_total_slots(ctx, cpu, st); 669 if (total_slots) 670 bad_spec = total / total_slots; 671 return sanitize_val(bad_spec); 672 } 673 674 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 675 { 676 double retiring = 0; 677 double total_slots = td_total_slots(ctx, cpu, st); 678 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 679 ctx, cpu); 680 681 if (total_slots) 682 retiring = ret_slots / total_slots; 683 return retiring; 684 } 685 686 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 687 { 688 double fe_bound = 0; 689 double total_slots = td_total_slots(ctx, cpu, st); 690 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 691 ctx, cpu); 692 693 if (total_slots) 694 fe_bound = fetch_bub / total_slots; 695 return fe_bound; 696 } 697 698 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 699 { 700 double sum = (td_fe_bound(ctx, cpu, st) + 701 td_bad_spec(ctx, cpu, st) + 702 td_retiring(ctx, cpu, st)); 703 if (sum == 0) 704 return 0; 705 return sanitize_val(1.0 - sum); 706 } 707 708 static void print_smi_cost(struct perf_stat_config *config, 709 int cpu, struct evsel *evsel, 710 struct perf_stat_output_ctx *out, 711 struct runtime_stat *st) 712 { 713 double smi_num, aperf, cycles, cost = 0.0; 714 int ctx = evsel_context(evsel); 715 const char *color = NULL; 716 717 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 718 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 719 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 720 721 if ((cycles == 0) || (aperf == 0)) 722 return; 723 724 if (smi_num) 725 cost = (aperf - cycles) / aperf * 100.00; 726 727 if (cost > 10) 728 color = PERF_COLOR_RED; 729 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 730 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 731 } 732 733 static void generic_metric(struct perf_stat_config *config, 734 const char *metric_expr, 735 struct evsel **metric_events, 736 char *name, 737 const char *metric_name, 738 const char *metric_unit, 739 int runtime, 740 int cpu, 741 struct perf_stat_output_ctx *out, 742 struct runtime_stat *st) 743 { 744 print_metric_t print_metric = out->print_metric; 745 struct expr_parse_ctx pctx; 746 double ratio, scale; 747 int i; 748 void *ctxp = out->ctx; 749 char *n, *pn; 750 751 expr__ctx_init(&pctx); 752 for (i = 0; metric_events[i]; i++) { 753 struct saved_value *v; 754 struct stats *stats; 755 u64 metric_total = 0; 756 757 if (!strcmp(metric_events[i]->name, "duration_time")) { 758 stats = &walltime_nsecs_stats; 759 scale = 1e-9; 760 } else { 761 v = saved_value_lookup(metric_events[i], cpu, false, 762 STAT_NONE, 0, st); 763 if (!v) 764 break; 765 stats = &v->stats; 766 scale = 1.0; 767 768 if (v->metric_other) 769 metric_total = v->metric_total; 770 } 771 772 n = strdup(metric_events[i]->name); 773 if (!n) 774 return; 775 /* 776 * This display code with --no-merge adds [cpu] postfixes. 777 * These are not supported by the parser. Remove everything 778 * after the space. 779 */ 780 pn = strchr(n, ' '); 781 if (pn) 782 *pn = 0; 783 784 if (metric_total) 785 expr__add_id(&pctx, n, metric_total); 786 else 787 expr__add_id(&pctx, n, avg_stats(stats)*scale); 788 } 789 790 if (!metric_events[i]) { 791 if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { 792 char *unit; 793 char metric_bf[64]; 794 795 if (metric_unit && metric_name) { 796 if (perf_pmu__convert_scale(metric_unit, 797 &unit, &scale) >= 0) { 798 ratio *= scale; 799 } 800 if (strstr(metric_expr, "?")) 801 scnprintf(metric_bf, sizeof(metric_bf), 802 "%s %s_%d", unit, metric_name, runtime); 803 else 804 scnprintf(metric_bf, sizeof(metric_bf), 805 "%s %s", unit, metric_name); 806 807 print_metric(config, ctxp, NULL, "%8.1f", 808 metric_bf, ratio); 809 } else { 810 print_metric(config, ctxp, NULL, "%8.2f", 811 metric_name ? 812 metric_name : 813 out->force_header ? name : "", 814 ratio); 815 } 816 } else { 817 print_metric(config, ctxp, NULL, NULL, 818 out->force_header ? 819 (metric_name ? metric_name : name) : "", 0); 820 } 821 } else { 822 print_metric(config, ctxp, NULL, NULL, 823 out->force_header ? 824 (metric_name ? metric_name : name) : "", 0); 825 } 826 827 expr__ctx_clear(&pctx); 828 } 829 830 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 831 struct evsel *evsel, 832 double avg, int cpu, 833 struct perf_stat_output_ctx *out, 834 struct rblist *metric_events, 835 struct runtime_stat *st) 836 { 837 void *ctxp = out->ctx; 838 print_metric_t print_metric = out->print_metric; 839 double total, ratio = 0.0, total2; 840 const char *color = NULL; 841 int ctx = evsel_context(evsel); 842 struct metric_event *me; 843 int num = 1; 844 845 if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 846 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 847 848 if (total) { 849 ratio = avg / total; 850 print_metric(config, ctxp, NULL, "%7.2f ", 851 "insn per cycle", ratio); 852 } else { 853 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 854 } 855 856 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 857 ctx, cpu); 858 859 total = max(total, runtime_stat_avg(st, 860 STAT_STALLED_CYCLES_BACK, 861 ctx, cpu)); 862 863 if (total && avg) { 864 out->new_line(config, ctxp); 865 ratio = total / avg; 866 print_metric(config, ctxp, NULL, "%7.2f ", 867 "stalled cycles per insn", 868 ratio); 869 } 870 } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 871 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 872 print_branch_misses(config, cpu, evsel, avg, out, st); 873 else 874 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 875 } else if ( 876 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 877 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D | 878 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 879 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 880 881 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 882 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 883 else 884 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 885 } else if ( 886 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 887 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | 888 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 889 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 890 891 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 892 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 893 else 894 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 895 } else if ( 896 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 897 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 898 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 899 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 900 901 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 902 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 903 else 904 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 905 } else if ( 906 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 907 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 908 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 909 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 910 911 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 912 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 913 else 914 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 915 } else if ( 916 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 917 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | 918 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 919 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 920 921 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 922 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 923 else 924 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 925 } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 926 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 927 928 if (total) 929 ratio = avg * 100 / total; 930 931 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 932 print_metric(config, ctxp, NULL, "%8.3f %%", 933 "of all cache refs", ratio); 934 else 935 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 936 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 937 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 938 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 939 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 940 } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 941 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 942 943 if (total) { 944 ratio = avg / total; 945 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 946 } else { 947 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 948 } 949 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 950 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 951 952 if (total) 953 print_metric(config, ctxp, NULL, 954 "%7.2f%%", "transactional cycles", 955 100.0 * (avg / total)); 956 else 957 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 958 0); 959 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 960 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 961 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 962 963 if (total2 < avg) 964 total2 = avg; 965 if (total) 966 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 967 100.0 * ((total2-avg) / total)); 968 else 969 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 970 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 971 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 972 ctx, cpu); 973 974 if (avg) 975 ratio = total / avg; 976 977 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 978 print_metric(config, ctxp, NULL, "%8.0f", 979 "cycles / transaction", ratio); 980 else 981 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 982 0); 983 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 984 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 985 ctx, cpu); 986 987 if (avg) 988 ratio = total / avg; 989 990 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 991 } else if (evsel__is_clock(evsel)) { 992 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 993 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 994 avg / (ratio * evsel->scale)); 995 else 996 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 997 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 998 double fe_bound = td_fe_bound(ctx, cpu, st); 999 1000 if (fe_bound > 0.2) 1001 color = PERF_COLOR_RED; 1002 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 1003 fe_bound * 100.); 1004 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 1005 double retiring = td_retiring(ctx, cpu, st); 1006 1007 if (retiring > 0.7) 1008 color = PERF_COLOR_GREEN; 1009 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 1010 retiring * 100.); 1011 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 1012 double bad_spec = td_bad_spec(ctx, cpu, st); 1013 1014 if (bad_spec > 0.1) 1015 color = PERF_COLOR_RED; 1016 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 1017 bad_spec * 100.); 1018 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 1019 double be_bound = td_be_bound(ctx, cpu, st); 1020 const char *name = "backend bound"; 1021 static int have_recovery_bubbles = -1; 1022 1023 /* In case the CPU does not support topdown-recovery-bubbles */ 1024 if (have_recovery_bubbles < 0) 1025 have_recovery_bubbles = pmu_have_event("cpu", 1026 "topdown-recovery-bubbles"); 1027 if (!have_recovery_bubbles) 1028 name = "backend bound/bad spec"; 1029 1030 if (be_bound > 0.2) 1031 color = PERF_COLOR_RED; 1032 if (td_total_slots(ctx, cpu, st) > 0) 1033 print_metric(config, ctxp, color, "%8.1f%%", name, 1034 be_bound * 100.); 1035 else 1036 print_metric(config, ctxp, NULL, NULL, name, 0); 1037 } else if (evsel->metric_expr) { 1038 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 1039 evsel->metric_name, NULL, 1, cpu, out, st); 1040 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1041 char unit = 'M'; 1042 char unit_buf[10]; 1043 1044 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1045 1046 if (total) 1047 ratio = 1000.0 * avg / total; 1048 if (ratio < 0.001) { 1049 ratio *= 1000; 1050 unit = 'K'; 1051 } 1052 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1053 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1054 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1055 print_smi_cost(config, cpu, evsel, out, st); 1056 } else { 1057 num = 0; 1058 } 1059 1060 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1061 struct metric_expr *mexp; 1062 1063 list_for_each_entry (mexp, &me->head, nd) { 1064 if (num++ > 0) 1065 out->new_line(config, ctxp); 1066 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1067 evsel->name, mexp->metric_name, 1068 mexp->metric_unit, mexp->runtime, cpu, out, st); 1069 } 1070 } 1071 if (num == 0) 1072 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1073 } 1074