1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 u64 metric_total; 34 int metric_other; 35 }; 36 37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 38 { 39 struct saved_value *a = container_of(rb_node, 40 struct saved_value, 41 rb_node); 42 const struct saved_value *b = entry; 43 44 if (a->cpu != b->cpu) 45 return a->cpu - b->cpu; 46 47 /* 48 * Previously the rbtree was used to link generic metrics. 49 * The keys were evsel/cpu. Now the rbtree is extended to support 50 * per-thread shadow stats. For shadow stats case, the keys 51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 53 */ 54 if (a->type != b->type) 55 return a->type - b->type; 56 57 if (a->ctx != b->ctx) 58 return a->ctx - b->ctx; 59 60 if (a->evsel == NULL && b->evsel == NULL) { 61 if (a->stat == b->stat) 62 return 0; 63 64 if ((char *)a->stat < (char *)b->stat) 65 return -1; 66 67 return 1; 68 } 69 70 if (a->evsel == b->evsel) 71 return 0; 72 if ((char *)a->evsel < (char *)b->evsel) 73 return -1; 74 return +1; 75 } 76 77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 78 const void *entry) 79 { 80 struct saved_value *nd = malloc(sizeof(struct saved_value)); 81 82 if (!nd) 83 return NULL; 84 memcpy(nd, entry, sizeof(struct saved_value)); 85 return &nd->rb_node; 86 } 87 88 static void saved_value_delete(struct rblist *rblist __maybe_unused, 89 struct rb_node *rb_node) 90 { 91 struct saved_value *v; 92 93 BUG_ON(!rb_node); 94 v = container_of(rb_node, struct saved_value, rb_node); 95 free(v); 96 } 97 98 static struct saved_value *saved_value_lookup(struct evsel *evsel, 99 int cpu, 100 bool create, 101 enum stat_type type, 102 int ctx, 103 struct runtime_stat *st) 104 { 105 struct rblist *rblist; 106 struct rb_node *nd; 107 struct saved_value dm = { 108 .cpu = cpu, 109 .evsel = evsel, 110 .type = type, 111 .ctx = ctx, 112 .stat = st, 113 }; 114 115 rblist = &st->value_list; 116 117 nd = rblist__find(rblist, &dm); 118 if (nd) 119 return container_of(nd, struct saved_value, rb_node); 120 if (create) { 121 rblist__add_node(rblist, &dm); 122 nd = rblist__find(rblist, &dm); 123 if (nd) 124 return container_of(nd, struct saved_value, rb_node); 125 } 126 return NULL; 127 } 128 129 void runtime_stat__init(struct runtime_stat *st) 130 { 131 struct rblist *rblist = &st->value_list; 132 133 rblist__init(rblist); 134 rblist->node_cmp = saved_value_cmp; 135 rblist->node_new = saved_value_new; 136 rblist->node_delete = saved_value_delete; 137 } 138 139 void runtime_stat__exit(struct runtime_stat *st) 140 { 141 rblist__exit(&st->value_list); 142 } 143 144 void perf_stat__init_shadow_stats(void) 145 { 146 runtime_stat__init(&rt_stat); 147 } 148 149 static int evsel_context(struct evsel *evsel) 150 { 151 int ctx = 0; 152 153 if (evsel->core.attr.exclude_kernel) 154 ctx |= CTX_BIT_KERNEL; 155 if (evsel->core.attr.exclude_user) 156 ctx |= CTX_BIT_USER; 157 if (evsel->core.attr.exclude_hv) 158 ctx |= CTX_BIT_HV; 159 if (evsel->core.attr.exclude_host) 160 ctx |= CTX_BIT_HOST; 161 if (evsel->core.attr.exclude_idle) 162 ctx |= CTX_BIT_IDLE; 163 164 return ctx; 165 } 166 167 static void reset_stat(struct runtime_stat *st) 168 { 169 struct rblist *rblist; 170 struct rb_node *pos, *next; 171 172 rblist = &st->value_list; 173 next = rb_first_cached(&rblist->entries); 174 while (next) { 175 pos = next; 176 next = rb_next(pos); 177 memset(&container_of(pos, struct saved_value, rb_node)->stats, 178 0, 179 sizeof(struct stats)); 180 } 181 } 182 183 void perf_stat__reset_shadow_stats(void) 184 { 185 reset_stat(&rt_stat); 186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 187 } 188 189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 190 { 191 reset_stat(st); 192 } 193 194 static void update_runtime_stat(struct runtime_stat *st, 195 enum stat_type type, 196 int ctx, int cpu, u64 count) 197 { 198 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 199 type, ctx, st); 200 201 if (v) 202 update_stats(&v->stats, count); 203 } 204 205 /* 206 * Update various tracking values we maintain to print 207 * more semantic information such as miss/hit ratios, 208 * instruction rates, etc: 209 */ 210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 211 int cpu, struct runtime_stat *st) 212 { 213 int ctx = evsel_context(counter); 214 u64 count_ns = count; 215 struct saved_value *v; 216 217 count *= counter->scale; 218 219 if (perf_evsel__is_clock(counter)) 220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 221 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 226 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, ELISION_START)) 228 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 230 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 236 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 243 ctx, cpu, count); 244 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 246 ctx, cpu, count); 247 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 248 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 249 ctx, cpu, count); 250 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 251 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 252 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 253 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 254 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 255 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 257 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 259 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 261 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 262 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 263 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, SMI_NUM)) 265 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 266 else if (perf_stat_evsel__is(counter, APERF)) 267 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 268 269 if (counter->collect_stat) { 270 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 271 update_stats(&v->stats, count); 272 if (counter->metric_leader) 273 v->metric_total += count; 274 } else if (counter->metric_leader) { 275 v = saved_value_lookup(counter->metric_leader, 276 cpu, true, STAT_NONE, 0, st); 277 v->metric_total += count; 278 v->metric_other++; 279 } 280 } 281 282 /* used for get_ratio_color() */ 283 enum grc_type { 284 GRC_STALLED_CYCLES_FE, 285 GRC_STALLED_CYCLES_BE, 286 GRC_CACHE_MISSES, 287 GRC_MAX_NR 288 }; 289 290 static const char *get_ratio_color(enum grc_type type, double ratio) 291 { 292 static const double grc_table[GRC_MAX_NR][3] = { 293 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 294 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 295 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 296 }; 297 const char *color = PERF_COLOR_NORMAL; 298 299 if (ratio > grc_table[type][0]) 300 color = PERF_COLOR_RED; 301 else if (ratio > grc_table[type][1]) 302 color = PERF_COLOR_MAGENTA; 303 else if (ratio > grc_table[type][2]) 304 color = PERF_COLOR_YELLOW; 305 306 return color; 307 } 308 309 static struct evsel *perf_stat__find_event(struct evlist *evsel_list, 310 const char *name) 311 { 312 struct evsel *c2; 313 314 evlist__for_each_entry (evsel_list, c2) { 315 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 316 return c2; 317 } 318 return NULL; 319 } 320 321 /* Mark MetricExpr target events and link events using them to them. */ 322 void perf_stat__collect_metric_expr(struct evlist *evsel_list) 323 { 324 struct evsel *counter, *leader, **metric_events, *oc; 325 bool found; 326 const char **metric_names; 327 int i; 328 int num_metric_names; 329 330 evlist__for_each_entry(evsel_list, counter) { 331 bool invalid = false; 332 333 leader = counter->leader; 334 if (!counter->metric_expr) 335 continue; 336 metric_events = counter->metric_events; 337 if (!metric_events) { 338 if (expr__find_other(counter->metric_expr, counter->name, 339 &metric_names, &num_metric_names) < 0) 340 continue; 341 342 metric_events = calloc(sizeof(struct evsel *), 343 num_metric_names + 1); 344 if (!metric_events) 345 return; 346 counter->metric_events = metric_events; 347 } 348 349 for (i = 0; i < num_metric_names; i++) { 350 found = false; 351 if (leader) { 352 /* Search in group */ 353 for_each_group_member (oc, leader) { 354 if (!strcasecmp(oc->name, metric_names[i]) && 355 !oc->collect_stat) { 356 found = true; 357 break; 358 } 359 } 360 } 361 if (!found) { 362 /* Search ignoring groups */ 363 oc = perf_stat__find_event(evsel_list, metric_names[i]); 364 } 365 if (!oc) { 366 /* Deduping one is good enough to handle duplicated PMUs. */ 367 static char *printed; 368 369 /* 370 * Adding events automatically would be difficult, because 371 * it would risk creating groups that are not schedulable. 372 * perf stat doesn't understand all the scheduling constraints 373 * of events. So we ask the user instead to add the missing 374 * events. 375 */ 376 if (!printed || strcasecmp(printed, metric_names[i])) { 377 fprintf(stderr, 378 "Add %s event to groups to get metric expression for %s\n", 379 metric_names[i], 380 counter->name); 381 printed = strdup(metric_names[i]); 382 } 383 invalid = true; 384 continue; 385 } 386 metric_events[i] = oc; 387 oc->collect_stat = true; 388 } 389 metric_events[i] = NULL; 390 free(metric_names); 391 if (invalid) { 392 free(metric_events); 393 counter->metric_events = NULL; 394 counter->metric_expr = NULL; 395 } 396 } 397 } 398 399 static double runtime_stat_avg(struct runtime_stat *st, 400 enum stat_type type, int ctx, int cpu) 401 { 402 struct saved_value *v; 403 404 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 405 if (!v) 406 return 0.0; 407 408 return avg_stats(&v->stats); 409 } 410 411 static double runtime_stat_n(struct runtime_stat *st, 412 enum stat_type type, int ctx, int cpu) 413 { 414 struct saved_value *v; 415 416 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 417 if (!v) 418 return 0.0; 419 420 return v->stats.n; 421 } 422 423 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 424 int cpu, 425 struct evsel *evsel, double avg, 426 struct perf_stat_output_ctx *out, 427 struct runtime_stat *st) 428 { 429 double total, ratio = 0.0; 430 const char *color; 431 int ctx = evsel_context(evsel); 432 433 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 434 435 if (total) 436 ratio = avg / total * 100.0; 437 438 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 439 440 if (ratio) 441 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 442 ratio); 443 else 444 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 445 } 446 447 static void print_stalled_cycles_backend(struct perf_stat_config *config, 448 int cpu, 449 struct evsel *evsel, double avg, 450 struct perf_stat_output_ctx *out, 451 struct runtime_stat *st) 452 { 453 double total, ratio = 0.0; 454 const char *color; 455 int ctx = evsel_context(evsel); 456 457 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 458 459 if (total) 460 ratio = avg / total * 100.0; 461 462 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 463 464 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 465 } 466 467 static void print_branch_misses(struct perf_stat_config *config, 468 int cpu, 469 struct evsel *evsel, 470 double avg, 471 struct perf_stat_output_ctx *out, 472 struct runtime_stat *st) 473 { 474 double total, ratio = 0.0; 475 const char *color; 476 int ctx = evsel_context(evsel); 477 478 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 479 480 if (total) 481 ratio = avg / total * 100.0; 482 483 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 484 485 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 486 } 487 488 static void print_l1_dcache_misses(struct perf_stat_config *config, 489 int cpu, 490 struct evsel *evsel, 491 double avg, 492 struct perf_stat_output_ctx *out, 493 struct runtime_stat *st) 494 495 { 496 double total, ratio = 0.0; 497 const char *color; 498 int ctx = evsel_context(evsel); 499 500 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 501 502 if (total) 503 ratio = avg / total * 100.0; 504 505 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 506 507 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 508 } 509 510 static void print_l1_icache_misses(struct perf_stat_config *config, 511 int cpu, 512 struct evsel *evsel, 513 double avg, 514 struct perf_stat_output_ctx *out, 515 struct runtime_stat *st) 516 517 { 518 double total, ratio = 0.0; 519 const char *color; 520 int ctx = evsel_context(evsel); 521 522 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 523 524 if (total) 525 ratio = avg / total * 100.0; 526 527 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 528 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 529 } 530 531 static void print_dtlb_cache_misses(struct perf_stat_config *config, 532 int cpu, 533 struct evsel *evsel, 534 double avg, 535 struct perf_stat_output_ctx *out, 536 struct runtime_stat *st) 537 { 538 double total, ratio = 0.0; 539 const char *color; 540 int ctx = evsel_context(evsel); 541 542 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 543 544 if (total) 545 ratio = avg / total * 100.0; 546 547 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 548 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 549 } 550 551 static void print_itlb_cache_misses(struct perf_stat_config *config, 552 int cpu, 553 struct evsel *evsel, 554 double avg, 555 struct perf_stat_output_ctx *out, 556 struct runtime_stat *st) 557 { 558 double total, ratio = 0.0; 559 const char *color; 560 int ctx = evsel_context(evsel); 561 562 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 563 564 if (total) 565 ratio = avg / total * 100.0; 566 567 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 568 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 569 } 570 571 static void print_ll_cache_misses(struct perf_stat_config *config, 572 int cpu, 573 struct evsel *evsel, 574 double avg, 575 struct perf_stat_output_ctx *out, 576 struct runtime_stat *st) 577 { 578 double total, ratio = 0.0; 579 const char *color; 580 int ctx = evsel_context(evsel); 581 582 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 583 584 if (total) 585 ratio = avg / total * 100.0; 586 587 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 588 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 589 } 590 591 /* 592 * High level "TopDown" CPU core pipe line bottleneck break down. 593 * 594 * Basic concept following 595 * Yasin, A Top Down Method for Performance analysis and Counter architecture 596 * ISPASS14 597 * 598 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 599 * 600 * Frontend -> Backend -> Retiring 601 * BadSpeculation in addition means out of order execution that is thrown away 602 * (for example branch mispredictions) 603 * Frontend is instruction decoding. 604 * Backend is execution, like computation and accessing data in memory 605 * Retiring is good execution that is not directly bottlenecked 606 * 607 * The formulas are computed in slots. 608 * A slot is an entry in the pipeline each for the pipeline width 609 * (for example a 4-wide pipeline has 4 slots for each cycle) 610 * 611 * Formulas: 612 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 613 * TotalSlots 614 * Retiring = SlotsRetired / TotalSlots 615 * FrontendBound = FetchBubbles / TotalSlots 616 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 617 * 618 * The kernel provides the mapping to the low level CPU events and any scaling 619 * needed for the CPU pipeline width, for example: 620 * 621 * TotalSlots = Cycles * 4 622 * 623 * The scaling factor is communicated in the sysfs unit. 624 * 625 * In some cases the CPU may not be able to measure all the formulas due to 626 * missing events. In this case multiple formulas are combined, as possible. 627 * 628 * Full TopDown supports more levels to sub-divide each area: for example 629 * BackendBound into computing bound and memory bound. For now we only 630 * support Level 1 TopDown. 631 */ 632 633 static double sanitize_val(double x) 634 { 635 if (x < 0 && x >= -0.02) 636 return 0.0; 637 return x; 638 } 639 640 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 641 { 642 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 643 } 644 645 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 646 { 647 double bad_spec = 0; 648 double total_slots; 649 double total; 650 651 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 652 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 653 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 654 655 total_slots = td_total_slots(ctx, cpu, st); 656 if (total_slots) 657 bad_spec = total / total_slots; 658 return sanitize_val(bad_spec); 659 } 660 661 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 662 { 663 double retiring = 0; 664 double total_slots = td_total_slots(ctx, cpu, st); 665 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 666 ctx, cpu); 667 668 if (total_slots) 669 retiring = ret_slots / total_slots; 670 return retiring; 671 } 672 673 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 674 { 675 double fe_bound = 0; 676 double total_slots = td_total_slots(ctx, cpu, st); 677 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 678 ctx, cpu); 679 680 if (total_slots) 681 fe_bound = fetch_bub / total_slots; 682 return fe_bound; 683 } 684 685 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 686 { 687 double sum = (td_fe_bound(ctx, cpu, st) + 688 td_bad_spec(ctx, cpu, st) + 689 td_retiring(ctx, cpu, st)); 690 if (sum == 0) 691 return 0; 692 return sanitize_val(1.0 - sum); 693 } 694 695 static void print_smi_cost(struct perf_stat_config *config, 696 int cpu, struct evsel *evsel, 697 struct perf_stat_output_ctx *out, 698 struct runtime_stat *st) 699 { 700 double smi_num, aperf, cycles, cost = 0.0; 701 int ctx = evsel_context(evsel); 702 const char *color = NULL; 703 704 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 705 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 706 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 707 708 if ((cycles == 0) || (aperf == 0)) 709 return; 710 711 if (smi_num) 712 cost = (aperf - cycles) / aperf * 100.00; 713 714 if (cost > 10) 715 color = PERF_COLOR_RED; 716 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 717 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 718 } 719 720 static void generic_metric(struct perf_stat_config *config, 721 const char *metric_expr, 722 struct evsel **metric_events, 723 char *name, 724 const char *metric_name, 725 const char *metric_unit, 726 double avg, 727 int cpu, 728 struct perf_stat_output_ctx *out, 729 struct runtime_stat *st) 730 { 731 print_metric_t print_metric = out->print_metric; 732 struct parse_ctx pctx; 733 double ratio, scale; 734 int i; 735 void *ctxp = out->ctx; 736 char *n, *pn; 737 738 expr__ctx_init(&pctx); 739 /* Must be first id entry */ 740 expr__add_id(&pctx, name, avg); 741 for (i = 0; metric_events[i]; i++) { 742 struct saved_value *v; 743 struct stats *stats; 744 u64 metric_total = 0; 745 746 if (!strcmp(metric_events[i]->name, "duration_time")) { 747 stats = &walltime_nsecs_stats; 748 scale = 1e-9; 749 } else { 750 v = saved_value_lookup(metric_events[i], cpu, false, 751 STAT_NONE, 0, st); 752 if (!v) 753 break; 754 stats = &v->stats; 755 scale = 1.0; 756 757 if (v->metric_other) 758 metric_total = v->metric_total; 759 } 760 761 n = strdup(metric_events[i]->name); 762 if (!n) 763 return; 764 /* 765 * This display code with --no-merge adds [cpu] postfixes. 766 * These are not supported by the parser. Remove everything 767 * after the space. 768 */ 769 pn = strchr(n, ' '); 770 if (pn) 771 *pn = 0; 772 773 if (metric_total) 774 expr__add_id(&pctx, n, metric_total); 775 else 776 expr__add_id(&pctx, n, avg_stats(stats)*scale); 777 } 778 779 if (!metric_events[i]) { 780 const char *p = metric_expr; 781 782 if (expr__parse(&ratio, &pctx, &p) == 0) { 783 char *unit; 784 char metric_bf[64]; 785 786 if (metric_unit && metric_name) { 787 if (perf_pmu__convert_scale(metric_unit, 788 &unit, &scale) >= 0) { 789 ratio *= scale; 790 } 791 792 scnprintf(metric_bf, sizeof(metric_bf), 793 "%s %s", unit, metric_name); 794 print_metric(config, ctxp, NULL, "%8.1f", 795 metric_bf, ratio); 796 } else { 797 print_metric(config, ctxp, NULL, "%8.1f", 798 metric_name ? 799 metric_name : 800 out->force_header ? name : "", 801 ratio); 802 } 803 } else { 804 print_metric(config, ctxp, NULL, NULL, 805 out->force_header ? 806 (metric_name ? metric_name : name) : "", 0); 807 } 808 } else 809 print_metric(config, ctxp, NULL, NULL, "", 0); 810 811 for (i = 1; i < pctx.num_ids; i++) 812 zfree(&pctx.ids[i].name); 813 } 814 815 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 816 struct evsel *evsel, 817 double avg, int cpu, 818 struct perf_stat_output_ctx *out, 819 struct rblist *metric_events, 820 struct runtime_stat *st) 821 { 822 void *ctxp = out->ctx; 823 print_metric_t print_metric = out->print_metric; 824 double total, ratio = 0.0, total2; 825 const char *color = NULL; 826 int ctx = evsel_context(evsel); 827 struct metric_event *me; 828 int num = 1; 829 830 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 831 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 832 833 if (total) { 834 ratio = avg / total; 835 print_metric(config, ctxp, NULL, "%7.2f ", 836 "insn per cycle", ratio); 837 } else { 838 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 839 } 840 841 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 842 ctx, cpu); 843 844 total = max(total, runtime_stat_avg(st, 845 STAT_STALLED_CYCLES_BACK, 846 ctx, cpu)); 847 848 if (total && avg) { 849 out->new_line(config, ctxp); 850 ratio = total / avg; 851 print_metric(config, ctxp, NULL, "%7.2f ", 852 "stalled cycles per insn", 853 ratio); 854 } 855 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 856 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 857 print_branch_misses(config, cpu, evsel, avg, out, st); 858 else 859 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 860 } else if ( 861 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 862 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D | 863 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 864 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 865 866 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 867 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 868 else 869 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 870 } else if ( 871 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 872 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | 873 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 874 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 875 876 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 877 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 878 else 879 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 880 } else if ( 881 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 882 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 883 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 884 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 885 886 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 887 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 888 else 889 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 890 } else if ( 891 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 892 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 893 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 894 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 895 896 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 897 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 898 else 899 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 900 } else if ( 901 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 902 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | 903 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 904 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 905 906 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 907 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 908 else 909 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 910 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 911 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 912 913 if (total) 914 ratio = avg * 100 / total; 915 916 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 917 print_metric(config, ctxp, NULL, "%8.3f %%", 918 "of all cache refs", ratio); 919 else 920 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 921 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 922 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 923 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 924 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 925 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 926 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 927 928 if (total) { 929 ratio = avg / total; 930 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 931 } else { 932 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 933 } 934 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 935 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 936 937 if (total) 938 print_metric(config, ctxp, NULL, 939 "%7.2f%%", "transactional cycles", 940 100.0 * (avg / total)); 941 else 942 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 943 0); 944 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 945 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 946 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 947 948 if (total2 < avg) 949 total2 = avg; 950 if (total) 951 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 952 100.0 * ((total2-avg) / total)); 953 else 954 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 955 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 956 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 957 ctx, cpu); 958 959 if (avg) 960 ratio = total / avg; 961 962 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 963 print_metric(config, ctxp, NULL, "%8.0f", 964 "cycles / transaction", ratio); 965 else 966 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 967 0); 968 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 969 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 970 ctx, cpu); 971 972 if (avg) 973 ratio = total / avg; 974 975 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 976 } else if (perf_evsel__is_clock(evsel)) { 977 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 978 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 979 avg / (ratio * evsel->scale)); 980 else 981 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 982 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 983 double fe_bound = td_fe_bound(ctx, cpu, st); 984 985 if (fe_bound > 0.2) 986 color = PERF_COLOR_RED; 987 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 988 fe_bound * 100.); 989 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 990 double retiring = td_retiring(ctx, cpu, st); 991 992 if (retiring > 0.7) 993 color = PERF_COLOR_GREEN; 994 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 995 retiring * 100.); 996 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 997 double bad_spec = td_bad_spec(ctx, cpu, st); 998 999 if (bad_spec > 0.1) 1000 color = PERF_COLOR_RED; 1001 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 1002 bad_spec * 100.); 1003 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 1004 double be_bound = td_be_bound(ctx, cpu, st); 1005 const char *name = "backend bound"; 1006 static int have_recovery_bubbles = -1; 1007 1008 /* In case the CPU does not support topdown-recovery-bubbles */ 1009 if (have_recovery_bubbles < 0) 1010 have_recovery_bubbles = pmu_have_event("cpu", 1011 "topdown-recovery-bubbles"); 1012 if (!have_recovery_bubbles) 1013 name = "backend bound/bad spec"; 1014 1015 if (be_bound > 0.2) 1016 color = PERF_COLOR_RED; 1017 if (td_total_slots(ctx, cpu, st) > 0) 1018 print_metric(config, ctxp, color, "%8.1f%%", name, 1019 be_bound * 100.); 1020 else 1021 print_metric(config, ctxp, NULL, NULL, name, 0); 1022 } else if (evsel->metric_expr) { 1023 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 1024 evsel->metric_name, NULL, avg, cpu, out, st); 1025 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1026 char unit = 'M'; 1027 char unit_buf[10]; 1028 1029 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1030 1031 if (total) 1032 ratio = 1000.0 * avg / total; 1033 if (ratio < 0.001) { 1034 ratio *= 1000; 1035 unit = 'K'; 1036 } 1037 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1038 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1039 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1040 print_smi_cost(config, cpu, evsel, out, st); 1041 } else { 1042 num = 0; 1043 } 1044 1045 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1046 struct metric_expr *mexp; 1047 1048 list_for_each_entry (mexp, &me->head, nd) { 1049 if (num++ > 0) 1050 out->new_line(config, ctxp); 1051 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1052 evsel->name, mexp->metric_name, 1053 mexp->metric_unit, avg, cpu, out, st); 1054 } 1055 } 1056 if (num == 0) 1057 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1058 } 1059