1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 u64 metric_total; 34 int metric_other; 35 }; 36 37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 38 { 39 struct saved_value *a = container_of(rb_node, 40 struct saved_value, 41 rb_node); 42 const struct saved_value *b = entry; 43 44 if (a->cpu != b->cpu) 45 return a->cpu - b->cpu; 46 47 /* 48 * Previously the rbtree was used to link generic metrics. 49 * The keys were evsel/cpu. Now the rbtree is extended to support 50 * per-thread shadow stats. For shadow stats case, the keys 51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 53 */ 54 if (a->type != b->type) 55 return a->type - b->type; 56 57 if (a->ctx != b->ctx) 58 return a->ctx - b->ctx; 59 60 if (a->evsel == NULL && b->evsel == NULL) { 61 if (a->stat == b->stat) 62 return 0; 63 64 if ((char *)a->stat < (char *)b->stat) 65 return -1; 66 67 return 1; 68 } 69 70 if (a->evsel == b->evsel) 71 return 0; 72 if ((char *)a->evsel < (char *)b->evsel) 73 return -1; 74 return +1; 75 } 76 77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 78 const void *entry) 79 { 80 struct saved_value *nd = malloc(sizeof(struct saved_value)); 81 82 if (!nd) 83 return NULL; 84 memcpy(nd, entry, sizeof(struct saved_value)); 85 return &nd->rb_node; 86 } 87 88 static void saved_value_delete(struct rblist *rblist __maybe_unused, 89 struct rb_node *rb_node) 90 { 91 struct saved_value *v; 92 93 BUG_ON(!rb_node); 94 v = container_of(rb_node, struct saved_value, rb_node); 95 free(v); 96 } 97 98 static struct saved_value *saved_value_lookup(struct evsel *evsel, 99 int cpu, 100 bool create, 101 enum stat_type type, 102 int ctx, 103 struct runtime_stat *st) 104 { 105 struct rblist *rblist; 106 struct rb_node *nd; 107 struct saved_value dm = { 108 .cpu = cpu, 109 .evsel = evsel, 110 .type = type, 111 .ctx = ctx, 112 .stat = st, 113 }; 114 115 rblist = &st->value_list; 116 117 nd = rblist__find(rblist, &dm); 118 if (nd) 119 return container_of(nd, struct saved_value, rb_node); 120 if (create) { 121 rblist__add_node(rblist, &dm); 122 nd = rblist__find(rblist, &dm); 123 if (nd) 124 return container_of(nd, struct saved_value, rb_node); 125 } 126 return NULL; 127 } 128 129 void runtime_stat__init(struct runtime_stat *st) 130 { 131 struct rblist *rblist = &st->value_list; 132 133 rblist__init(rblist); 134 rblist->node_cmp = saved_value_cmp; 135 rblist->node_new = saved_value_new; 136 rblist->node_delete = saved_value_delete; 137 } 138 139 void runtime_stat__exit(struct runtime_stat *st) 140 { 141 rblist__exit(&st->value_list); 142 } 143 144 void perf_stat__init_shadow_stats(void) 145 { 146 runtime_stat__init(&rt_stat); 147 } 148 149 static int evsel_context(struct evsel *evsel) 150 { 151 int ctx = 0; 152 153 if (evsel->core.attr.exclude_kernel) 154 ctx |= CTX_BIT_KERNEL; 155 if (evsel->core.attr.exclude_user) 156 ctx |= CTX_BIT_USER; 157 if (evsel->core.attr.exclude_hv) 158 ctx |= CTX_BIT_HV; 159 if (evsel->core.attr.exclude_host) 160 ctx |= CTX_BIT_HOST; 161 if (evsel->core.attr.exclude_idle) 162 ctx |= CTX_BIT_IDLE; 163 164 return ctx; 165 } 166 167 static void reset_stat(struct runtime_stat *st) 168 { 169 struct rblist *rblist; 170 struct rb_node *pos, *next; 171 172 rblist = &st->value_list; 173 next = rb_first_cached(&rblist->entries); 174 while (next) { 175 pos = next; 176 next = rb_next(pos); 177 memset(&container_of(pos, struct saved_value, rb_node)->stats, 178 0, 179 sizeof(struct stats)); 180 } 181 } 182 183 void perf_stat__reset_shadow_stats(void) 184 { 185 reset_stat(&rt_stat); 186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 187 } 188 189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 190 { 191 reset_stat(st); 192 } 193 194 static void update_runtime_stat(struct runtime_stat *st, 195 enum stat_type type, 196 int ctx, int cpu, u64 count) 197 { 198 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 199 type, ctx, st); 200 201 if (v) 202 update_stats(&v->stats, count); 203 } 204 205 /* 206 * Update various tracking values we maintain to print 207 * more semantic information such as miss/hit ratios, 208 * instruction rates, etc: 209 */ 210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 211 int cpu, struct runtime_stat *st) 212 { 213 int ctx = evsel_context(counter); 214 u64 count_ns = count; 215 struct saved_value *v; 216 217 count *= counter->scale; 218 219 if (evsel__is_clock(counter)) 220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 221 else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 226 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, ELISION_START)) 228 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 230 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 236 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 243 ctx, cpu, count); 244 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 246 ctx, cpu, count); 247 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 248 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 249 ctx, cpu, count); 250 else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 251 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 252 else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 253 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 254 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 255 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 256 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 257 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 258 else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 259 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 260 else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 261 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 262 else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 263 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, SMI_NUM)) 265 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 266 else if (perf_stat_evsel__is(counter, APERF)) 267 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 268 269 if (counter->collect_stat) { 270 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 271 update_stats(&v->stats, count); 272 if (counter->metric_leader) 273 v->metric_total += count; 274 } else if (counter->metric_leader) { 275 v = saved_value_lookup(counter->metric_leader, 276 cpu, true, STAT_NONE, 0, st); 277 v->metric_total += count; 278 v->metric_other++; 279 } 280 } 281 282 /* used for get_ratio_color() */ 283 enum grc_type { 284 GRC_STALLED_CYCLES_FE, 285 GRC_STALLED_CYCLES_BE, 286 GRC_CACHE_MISSES, 287 GRC_MAX_NR 288 }; 289 290 static const char *get_ratio_color(enum grc_type type, double ratio) 291 { 292 static const double grc_table[GRC_MAX_NR][3] = { 293 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 294 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 295 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 296 }; 297 const char *color = PERF_COLOR_NORMAL; 298 299 if (ratio > grc_table[type][0]) 300 color = PERF_COLOR_RED; 301 else if (ratio > grc_table[type][1]) 302 color = PERF_COLOR_MAGENTA; 303 else if (ratio > grc_table[type][2]) 304 color = PERF_COLOR_YELLOW; 305 306 return color; 307 } 308 309 static struct evsel *perf_stat__find_event(struct evlist *evsel_list, 310 const char *name) 311 { 312 struct evsel *c2; 313 314 evlist__for_each_entry (evsel_list, c2) { 315 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 316 return c2; 317 } 318 return NULL; 319 } 320 321 /* Mark MetricExpr target events and link events using them to them. */ 322 void perf_stat__collect_metric_expr(struct evlist *evsel_list) 323 { 324 struct evsel *counter, *leader, **metric_events, *oc; 325 bool found; 326 struct expr_parse_ctx ctx; 327 struct hashmap_entry *cur; 328 size_t bkt; 329 int i; 330 331 expr__ctx_init(&ctx); 332 evlist__for_each_entry(evsel_list, counter) { 333 bool invalid = false; 334 335 leader = counter->leader; 336 if (!counter->metric_expr) 337 continue; 338 339 expr__ctx_clear(&ctx); 340 metric_events = counter->metric_events; 341 if (!metric_events) { 342 if (expr__find_other(counter->metric_expr, 343 counter->name, 344 &ctx, 1) < 0) 345 continue; 346 347 metric_events = calloc(sizeof(struct evsel *), 348 hashmap__size(&ctx.ids) + 1); 349 if (!metric_events) { 350 expr__ctx_clear(&ctx); 351 return; 352 } 353 counter->metric_events = metric_events; 354 } 355 356 i = 0; 357 hashmap__for_each_entry((&ctx.ids), cur, bkt) { 358 const char *metric_name = (const char *)cur->key; 359 360 found = false; 361 if (leader) { 362 /* Search in group */ 363 for_each_group_member (oc, leader) { 364 if (!strcasecmp(oc->name, 365 metric_name) && 366 !oc->collect_stat) { 367 found = true; 368 break; 369 } 370 } 371 } 372 if (!found) { 373 /* Search ignoring groups */ 374 oc = perf_stat__find_event(evsel_list, 375 metric_name); 376 } 377 if (!oc) { 378 /* Deduping one is good enough to handle duplicated PMUs. */ 379 static char *printed; 380 381 /* 382 * Adding events automatically would be difficult, because 383 * it would risk creating groups that are not schedulable. 384 * perf stat doesn't understand all the scheduling constraints 385 * of events. So we ask the user instead to add the missing 386 * events. 387 */ 388 if (!printed || 389 strcasecmp(printed, metric_name)) { 390 fprintf(stderr, 391 "Add %s event to groups to get metric expression for %s\n", 392 metric_name, 393 counter->name); 394 printed = strdup(metric_name); 395 } 396 invalid = true; 397 continue; 398 } 399 metric_events[i++] = oc; 400 oc->collect_stat = true; 401 } 402 metric_events[i] = NULL; 403 if (invalid) { 404 free(metric_events); 405 counter->metric_events = NULL; 406 counter->metric_expr = NULL; 407 } 408 } 409 expr__ctx_clear(&ctx); 410 } 411 412 static double runtime_stat_avg(struct runtime_stat *st, 413 enum stat_type type, int ctx, int cpu) 414 { 415 struct saved_value *v; 416 417 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 418 if (!v) 419 return 0.0; 420 421 return avg_stats(&v->stats); 422 } 423 424 static double runtime_stat_n(struct runtime_stat *st, 425 enum stat_type type, int ctx, int cpu) 426 { 427 struct saved_value *v; 428 429 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 430 if (!v) 431 return 0.0; 432 433 return v->stats.n; 434 } 435 436 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 437 int cpu, 438 struct evsel *evsel, double avg, 439 struct perf_stat_output_ctx *out, 440 struct runtime_stat *st) 441 { 442 double total, ratio = 0.0; 443 const char *color; 444 int ctx = evsel_context(evsel); 445 446 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 447 448 if (total) 449 ratio = avg / total * 100.0; 450 451 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 452 453 if (ratio) 454 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 455 ratio); 456 else 457 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 458 } 459 460 static void print_stalled_cycles_backend(struct perf_stat_config *config, 461 int cpu, 462 struct evsel *evsel, double avg, 463 struct perf_stat_output_ctx *out, 464 struct runtime_stat *st) 465 { 466 double total, ratio = 0.0; 467 const char *color; 468 int ctx = evsel_context(evsel); 469 470 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 471 472 if (total) 473 ratio = avg / total * 100.0; 474 475 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 476 477 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 478 } 479 480 static void print_branch_misses(struct perf_stat_config *config, 481 int cpu, 482 struct evsel *evsel, 483 double avg, 484 struct perf_stat_output_ctx *out, 485 struct runtime_stat *st) 486 { 487 double total, ratio = 0.0; 488 const char *color; 489 int ctx = evsel_context(evsel); 490 491 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 492 493 if (total) 494 ratio = avg / total * 100.0; 495 496 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 497 498 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 499 } 500 501 static void print_l1_dcache_misses(struct perf_stat_config *config, 502 int cpu, 503 struct evsel *evsel, 504 double avg, 505 struct perf_stat_output_ctx *out, 506 struct runtime_stat *st) 507 508 { 509 double total, ratio = 0.0; 510 const char *color; 511 int ctx = evsel_context(evsel); 512 513 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 514 515 if (total) 516 ratio = avg / total * 100.0; 517 518 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 519 520 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio); 521 } 522 523 static void print_l1_icache_misses(struct perf_stat_config *config, 524 int cpu, 525 struct evsel *evsel, 526 double avg, 527 struct perf_stat_output_ctx *out, 528 struct runtime_stat *st) 529 530 { 531 double total, ratio = 0.0; 532 const char *color; 533 int ctx = evsel_context(evsel); 534 535 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 536 537 if (total) 538 ratio = avg / total * 100.0; 539 540 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 541 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio); 542 } 543 544 static void print_dtlb_cache_misses(struct perf_stat_config *config, 545 int cpu, 546 struct evsel *evsel, 547 double avg, 548 struct perf_stat_output_ctx *out, 549 struct runtime_stat *st) 550 { 551 double total, ratio = 0.0; 552 const char *color; 553 int ctx = evsel_context(evsel); 554 555 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 556 557 if (total) 558 ratio = avg / total * 100.0; 559 560 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 561 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio); 562 } 563 564 static void print_itlb_cache_misses(struct perf_stat_config *config, 565 int cpu, 566 struct evsel *evsel, 567 double avg, 568 struct perf_stat_output_ctx *out, 569 struct runtime_stat *st) 570 { 571 double total, ratio = 0.0; 572 const char *color; 573 int ctx = evsel_context(evsel); 574 575 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 576 577 if (total) 578 ratio = avg / total * 100.0; 579 580 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 581 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio); 582 } 583 584 static void print_ll_cache_misses(struct perf_stat_config *config, 585 int cpu, 586 struct evsel *evsel, 587 double avg, 588 struct perf_stat_output_ctx *out, 589 struct runtime_stat *st) 590 { 591 double total, ratio = 0.0; 592 const char *color; 593 int ctx = evsel_context(evsel); 594 595 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 596 597 if (total) 598 ratio = avg / total * 100.0; 599 600 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 601 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio); 602 } 603 604 /* 605 * High level "TopDown" CPU core pipe line bottleneck break down. 606 * 607 * Basic concept following 608 * Yasin, A Top Down Method for Performance analysis and Counter architecture 609 * ISPASS14 610 * 611 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 612 * 613 * Frontend -> Backend -> Retiring 614 * BadSpeculation in addition means out of order execution that is thrown away 615 * (for example branch mispredictions) 616 * Frontend is instruction decoding. 617 * Backend is execution, like computation and accessing data in memory 618 * Retiring is good execution that is not directly bottlenecked 619 * 620 * The formulas are computed in slots. 621 * A slot is an entry in the pipeline each for the pipeline width 622 * (for example a 4-wide pipeline has 4 slots for each cycle) 623 * 624 * Formulas: 625 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 626 * TotalSlots 627 * Retiring = SlotsRetired / TotalSlots 628 * FrontendBound = FetchBubbles / TotalSlots 629 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 630 * 631 * The kernel provides the mapping to the low level CPU events and any scaling 632 * needed for the CPU pipeline width, for example: 633 * 634 * TotalSlots = Cycles * 4 635 * 636 * The scaling factor is communicated in the sysfs unit. 637 * 638 * In some cases the CPU may not be able to measure all the formulas due to 639 * missing events. In this case multiple formulas are combined, as possible. 640 * 641 * Full TopDown supports more levels to sub-divide each area: for example 642 * BackendBound into computing bound and memory bound. For now we only 643 * support Level 1 TopDown. 644 */ 645 646 static double sanitize_val(double x) 647 { 648 if (x < 0 && x >= -0.02) 649 return 0.0; 650 return x; 651 } 652 653 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 654 { 655 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 656 } 657 658 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 659 { 660 double bad_spec = 0; 661 double total_slots; 662 double total; 663 664 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 665 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 666 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 667 668 total_slots = td_total_slots(ctx, cpu, st); 669 if (total_slots) 670 bad_spec = total / total_slots; 671 return sanitize_val(bad_spec); 672 } 673 674 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 675 { 676 double retiring = 0; 677 double total_slots = td_total_slots(ctx, cpu, st); 678 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 679 ctx, cpu); 680 681 if (total_slots) 682 retiring = ret_slots / total_slots; 683 return retiring; 684 } 685 686 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 687 { 688 double fe_bound = 0; 689 double total_slots = td_total_slots(ctx, cpu, st); 690 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 691 ctx, cpu); 692 693 if (total_slots) 694 fe_bound = fetch_bub / total_slots; 695 return fe_bound; 696 } 697 698 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 699 { 700 double sum = (td_fe_bound(ctx, cpu, st) + 701 td_bad_spec(ctx, cpu, st) + 702 td_retiring(ctx, cpu, st)); 703 if (sum == 0) 704 return 0; 705 return sanitize_val(1.0 - sum); 706 } 707 708 static void print_smi_cost(struct perf_stat_config *config, 709 int cpu, struct evsel *evsel, 710 struct perf_stat_output_ctx *out, 711 struct runtime_stat *st) 712 { 713 double smi_num, aperf, cycles, cost = 0.0; 714 int ctx = evsel_context(evsel); 715 const char *color = NULL; 716 717 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 718 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 719 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 720 721 if ((cycles == 0) || (aperf == 0)) 722 return; 723 724 if (smi_num) 725 cost = (aperf - cycles) / aperf * 100.00; 726 727 if (cost > 10) 728 color = PERF_COLOR_RED; 729 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 730 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 731 } 732 733 static int prepare_metric(struct evsel **metric_events, 734 struct metric_ref *metric_refs, 735 struct expr_parse_ctx *pctx, 736 int cpu, 737 struct runtime_stat *st) 738 { 739 double scale; 740 char *n, *pn; 741 int i, j, ret; 742 743 expr__ctx_init(pctx); 744 for (i = 0; metric_events[i]; i++) { 745 struct saved_value *v; 746 struct stats *stats; 747 u64 metric_total = 0; 748 749 if (!strcmp(metric_events[i]->name, "duration_time")) { 750 stats = &walltime_nsecs_stats; 751 scale = 1e-9; 752 } else { 753 v = saved_value_lookup(metric_events[i], cpu, false, 754 STAT_NONE, 0, st); 755 if (!v) 756 break; 757 stats = &v->stats; 758 scale = 1.0; 759 760 if (v->metric_other) 761 metric_total = v->metric_total; 762 } 763 764 n = strdup(metric_events[i]->name); 765 if (!n) 766 return -ENOMEM; 767 /* 768 * This display code with --no-merge adds [cpu] postfixes. 769 * These are not supported by the parser. Remove everything 770 * after the space. 771 */ 772 pn = strchr(n, ' '); 773 if (pn) 774 *pn = 0; 775 776 if (metric_total) 777 expr__add_id_val(pctx, n, metric_total); 778 else 779 expr__add_id_val(pctx, n, avg_stats(stats)*scale); 780 } 781 782 for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { 783 ret = expr__add_ref(pctx, &metric_refs[j]); 784 if (ret) 785 return ret; 786 } 787 788 return i; 789 } 790 791 static void generic_metric(struct perf_stat_config *config, 792 const char *metric_expr, 793 struct evsel **metric_events, 794 struct metric_ref *metric_refs, 795 char *name, 796 const char *metric_name, 797 const char *metric_unit, 798 int runtime, 799 int cpu, 800 struct perf_stat_output_ctx *out, 801 struct runtime_stat *st) 802 { 803 print_metric_t print_metric = out->print_metric; 804 struct expr_parse_ctx pctx; 805 double ratio, scale; 806 int i; 807 void *ctxp = out->ctx; 808 809 i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); 810 if (i < 0) 811 return; 812 813 if (!metric_events[i]) { 814 if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { 815 char *unit; 816 char metric_bf[64]; 817 818 if (metric_unit && metric_name) { 819 if (perf_pmu__convert_scale(metric_unit, 820 &unit, &scale) >= 0) { 821 ratio *= scale; 822 } 823 if (strstr(metric_expr, "?")) 824 scnprintf(metric_bf, sizeof(metric_bf), 825 "%s %s_%d", unit, metric_name, runtime); 826 else 827 scnprintf(metric_bf, sizeof(metric_bf), 828 "%s %s", unit, metric_name); 829 830 print_metric(config, ctxp, NULL, "%8.1f", 831 metric_bf, ratio); 832 } else { 833 print_metric(config, ctxp, NULL, "%8.2f", 834 metric_name ? 835 metric_name : 836 out->force_header ? name : "", 837 ratio); 838 } 839 } else { 840 print_metric(config, ctxp, NULL, NULL, 841 out->force_header ? 842 (metric_name ? metric_name : name) : "", 0); 843 } 844 } else { 845 print_metric(config, ctxp, NULL, NULL, 846 out->force_header ? 847 (metric_name ? metric_name : name) : "", 0); 848 } 849 850 expr__ctx_clear(&pctx); 851 } 852 853 double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) 854 { 855 struct expr_parse_ctx pctx; 856 double ratio = 0.0; 857 858 if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) 859 goto out; 860 861 if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) 862 ratio = 0.0; 863 864 out: 865 expr__ctx_clear(&pctx); 866 return ratio; 867 } 868 869 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 870 struct evsel *evsel, 871 double avg, int cpu, 872 struct perf_stat_output_ctx *out, 873 struct rblist *metric_events, 874 struct runtime_stat *st) 875 { 876 void *ctxp = out->ctx; 877 print_metric_t print_metric = out->print_metric; 878 double total, ratio = 0.0, total2; 879 const char *color = NULL; 880 int ctx = evsel_context(evsel); 881 struct metric_event *me; 882 int num = 1; 883 884 if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 885 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 886 887 if (total) { 888 ratio = avg / total; 889 print_metric(config, ctxp, NULL, "%7.2f ", 890 "insn per cycle", ratio); 891 } else { 892 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 893 } 894 895 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 896 ctx, cpu); 897 898 total = max(total, runtime_stat_avg(st, 899 STAT_STALLED_CYCLES_BACK, 900 ctx, cpu)); 901 902 if (total && avg) { 903 out->new_line(config, ctxp); 904 ratio = total / avg; 905 print_metric(config, ctxp, NULL, "%7.2f ", 906 "stalled cycles per insn", 907 ratio); 908 } 909 } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 910 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 911 print_branch_misses(config, cpu, evsel, avg, out, st); 912 else 913 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 914 } else if ( 915 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 916 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D | 917 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 918 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 919 920 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 921 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 922 else 923 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); 924 } else if ( 925 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 926 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | 927 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 928 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 929 930 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 931 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 932 else 933 print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); 934 } else if ( 935 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 936 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 937 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 938 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 939 940 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 941 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 942 else 943 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); 944 } else if ( 945 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 946 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 947 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 948 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 949 950 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 951 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 952 else 953 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); 954 } else if ( 955 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 956 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | 957 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 958 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 959 960 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 961 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 962 else 963 print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); 964 } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 965 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 966 967 if (total) 968 ratio = avg * 100 / total; 969 970 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 971 print_metric(config, ctxp, NULL, "%8.3f %%", 972 "of all cache refs", ratio); 973 else 974 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 975 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 976 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 977 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 978 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 979 } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 980 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 981 982 if (total) { 983 ratio = avg / total; 984 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 985 } else { 986 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 987 } 988 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 989 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 990 991 if (total) 992 print_metric(config, ctxp, NULL, 993 "%7.2f%%", "transactional cycles", 994 100.0 * (avg / total)); 995 else 996 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 997 0); 998 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 999 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 1000 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 1001 1002 if (total2 < avg) 1003 total2 = avg; 1004 if (total) 1005 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 1006 100.0 * ((total2-avg) / total)); 1007 else 1008 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 1009 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 1010 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 1011 ctx, cpu); 1012 1013 if (avg) 1014 ratio = total / avg; 1015 1016 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 1017 print_metric(config, ctxp, NULL, "%8.0f", 1018 "cycles / transaction", ratio); 1019 else 1020 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 1021 0); 1022 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 1023 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 1024 ctx, cpu); 1025 1026 if (avg) 1027 ratio = total / avg; 1028 1029 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 1030 } else if (evsel__is_clock(evsel)) { 1031 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 1032 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 1033 avg / (ratio * evsel->scale)); 1034 else 1035 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 1036 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 1037 double fe_bound = td_fe_bound(ctx, cpu, st); 1038 1039 if (fe_bound > 0.2) 1040 color = PERF_COLOR_RED; 1041 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 1042 fe_bound * 100.); 1043 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 1044 double retiring = td_retiring(ctx, cpu, st); 1045 1046 if (retiring > 0.7) 1047 color = PERF_COLOR_GREEN; 1048 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 1049 retiring * 100.); 1050 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 1051 double bad_spec = td_bad_spec(ctx, cpu, st); 1052 1053 if (bad_spec > 0.1) 1054 color = PERF_COLOR_RED; 1055 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 1056 bad_spec * 100.); 1057 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 1058 double be_bound = td_be_bound(ctx, cpu, st); 1059 const char *name = "backend bound"; 1060 static int have_recovery_bubbles = -1; 1061 1062 /* In case the CPU does not support topdown-recovery-bubbles */ 1063 if (have_recovery_bubbles < 0) 1064 have_recovery_bubbles = pmu_have_event("cpu", 1065 "topdown-recovery-bubbles"); 1066 if (!have_recovery_bubbles) 1067 name = "backend bound/bad spec"; 1068 1069 if (be_bound > 0.2) 1070 color = PERF_COLOR_RED; 1071 if (td_total_slots(ctx, cpu, st) > 0) 1072 print_metric(config, ctxp, color, "%8.1f%%", name, 1073 be_bound * 100.); 1074 else 1075 print_metric(config, ctxp, NULL, NULL, name, 0); 1076 } else if (evsel->metric_expr) { 1077 generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, 1078 evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); 1079 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1080 char unit = 'M'; 1081 char unit_buf[10]; 1082 1083 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1084 1085 if (total) 1086 ratio = 1000.0 * avg / total; 1087 if (ratio < 0.001) { 1088 ratio *= 1000; 1089 unit = 'K'; 1090 } 1091 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1092 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1093 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1094 print_smi_cost(config, cpu, evsel, out, st); 1095 } else { 1096 num = 0; 1097 } 1098 1099 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1100 struct metric_expr *mexp; 1101 1102 list_for_each_entry (mexp, &me->head, nd) { 1103 if (num++ > 0) 1104 out->new_line(config, ctxp); 1105 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1106 mexp->metric_refs, evsel->name, mexp->metric_name, 1107 mexp->metric_unit, mexp->runtime, cpu, out, st); 1108 } 1109 } 1110 if (num == 0) 1111 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1112 } 1113